Catching errors when scraping with Selenium - python

As part of a scraping job, I am trying to catch errors and bypass them. I want to keep a while: loop going in spite of these errors being raised. I have this code:
logger = logging.getLogger(__name__)
# ...
except (httplib.HTTPException, IOError) as e:
logger.exception('Ignoring exception, sleeping for 20 seconds')
time.sleep(20)
But, this still throws the same socket error as before:
Traceback (most recent call last):
File "/Users/aa/Box Sync/Work/PythonCode/TWPh/dev.py", line 54, in <module>
old_length = len(driver.page_source)
File "/usr/local/lib/python2.7/site- packages/selenium/webdriver/remote/webdriver.py", line 438, in page_source
return self.execute(Command.GET_PAGE_SOURCE)['value']
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 173, in execute
response = self.command_executor.execute(driver_command, params)
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 349, in execute
return self._request(command_info[0], url, body=data)
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 417, in _request
resp = opener.open(request)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1227, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1200, in do_open
r = h.getresponse(buffering=True)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1073, in getresponse
response.begin()
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 371, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 54] Connection reset by peer
[Finished in 24639.7s with exit code 1]

Related

Can't continue a program when binance api(connection) error occured

I'm fetching OHLCV data from Binance via Binance api on AWS-EC2.
But sometime(once a couple of days) error "104, 'ECONNRESET'" occur and program stop.
The program written in Python3.7.3 and run on AWS-EC2
The code is following. But this code couldn't continue.
I want to continue the program running when any api or connection error occured.
How should I handle this error ?
while True:
..............
try :
klines = client.get_historical_klines(ticker_symbol, Client.KLINE_INTERVAL_1MINUTE, _from_str, _until_str)
except BinanceAPIException as e:
print(e)
print('Something went wrong. Error occured at %s. Wait for 1 hour.' % (datetime.datetime.now().astimezone(timezone('UTC'))))
sleep(3600)
continue
..............
All error massege are following.
Traceback (most recent call last):
File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 600, in urlopen
chunked=chunked) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 384, in _make_request
six.raise_from(e, None) File "", line 2, in raise_from File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 380, in _make_request
httplib_response = conn.getresponse() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 1321, in getresponse
response.begin() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 296, in begin
version, status, reason = self._read_status() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 257, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/socket.py", line
589, in readinto
return self._sock.recv_into(b) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/contrib/pyopenssl.py",
line 312, in recv_into
return self.recv_into(*args, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/contrib/pyopenssl.py",
line 302, in recv_into
raise SocketError(str(e)) OSError: (104, 'ECONNRESET')
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/requests/adapters.py",
line 449, in send
timeout=timeout File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 638, in urlopen
_stacktrace=sys.exc_info()[2]) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/util/retry.py",
line 368, in increment
raise six.reraise(type(error), error, _stacktrace) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/packages/six.py",
line 685, in reraise
raise value.with_traceback(tb) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 600, in urlopen
chunked=chunked) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 384, in _make_request
six.raise_from(e, None) File "", line 2, in raise_from File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/connectionpool.py",
line 380, in _make_request
httplib_response = conn.getresponse() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 1321, in getresponse
response.begin() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 296, in begin
version, status, reason = self._read_status() File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/http/client.py",
line 257, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/socket.py", line
589, in readinto
return self._sock.recv_into(b) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/contrib/pyopenssl.py",
line 312, in recv_into
return self.recv_into(*args, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/urllib3/contrib/pyopenssl.py",
line 302, in recv_into
raise SocketError(str(e)) urllib3.exceptions.ProtocolError: ('Connection aborted.', OSError("(104, 'ECONNRESET')"))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"BTC_USDT_BINANCE_minutes.py", line 54, in
klines = client.get_historical_klines(ticker_symbol, Client.KLINE_INTERVAL_1MINUTE, _from_str, _until_str) File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 765, in get_historical_klines
first_valid_ts = self._get_earliest_valid_timestamp(symbol, interval) File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 723, in _get_earliest_valid_timestamp
endTime=None File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 705, in get_klines
return self._get('klines', data=params) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 207, in _get
return self._request_api('get', path, signed, version, **kwargs) File
"/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 181, in _request_api
return self._request(method, uri, signed, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/binance/client.py",
line 175, in _request
response = getattr(self.session, method)(uri, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/requests/sessions.py",
line 546, in get
return self.request('GET', url, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/requests/sessions.py",
line 533, in request
resp = self.send(prep, **send_kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/requests/sessions.py",
line 646, in send
r = adapter.send(request, **kwargs) File "/home/ec2-user/.pyenv/versions/3.7.3/lib/python3.7/site-packages/requests/adapters.py",
line 498, in send
raise ConnectionError(err, request=request) requests.exceptions.ConnectionError: ('Connection aborted.',
OSError("(104, 'ECONNRESET')"))
Assuming that you are using python-binance module, most likely the error happens because you are trying to reuse the client after long time (server timeout). You can try to recreate the client first:
...
client = Client(api_key, api_secret)
while True:
...
try :
klines = client.get_historical_klines(ticker_symbol, Client.KLINE_INTERVAL_1MINUTE, _from_str, _until_str)
except BinanceAPIException as e:
print(e)
print('Something went wrong. Error occured at %s. Wait for 1 hour.' % (datetime.datetime.now().astimezone(timezone('UTC'))))
sleep(3600)
client = Client(api_key, api_secret)
continue
...
The reason is because the [Client][2] uses requests Session underneath which imply usage of urllib3's connection pooling. This connection pooling can not be reused after timeout.

urllib2 httplib.BadStatusLine

I am using with python urllib2 to a connect HTTP server. Sometimes I get the response: httplib.BadStatusLine: ''.
My code :
response = None
try:
request = urllib2.Request(http_url,params)
response = urllib2.urlopen(request,timeout=5000)
return str(response.read())
except urllib2.HTTPError :
return ""
except urllib2.URLError:
return ""
response error :
File "/usr/lib64/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/usr/lib64/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 1244, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib64/python2.7/urllib2.py", line 1217, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib64/python2.7/httplib.py", line 1051, in getresponse
response.begin()
File "/usr/lib64/python2.7/httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "/usr/lib64/python2.7/httplib.py", line 379, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
I don't to know how to fix this bug? And I don't to know why I get an error response.

Python error escape "socket.error: [Errno 54] Connection reset by peer"

I'm running a scraper that's going through a few domains and it's getting hung up on http://www.1000markets.com/
Checking from different sources it seems to be down. That's totally fine, but I'm getting the error mentioned in the title.
How can I escape this? I'm using HTTPerror and URLerror but it's still getting hung up.
Any help on this would be great
def get_html(link):
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
try:
res = urllib2.urlopen(link)
html = res.read()
except URLError as e:
return link
except HTTPError as e:
return link
edit: Attached is the error
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1187, in do_open
r = h.getresponse(buffering=True)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 54] Connection reset by peer

can calling pythons urllib2.info() cause an exception?

I'm getting a couple of exceptions popping up from time to time but can't think of the cause.
Here's a snippet:
try:
r = urllib2.urlopen(url)
except urllib2.URLError, e:
if hasattr(e, 'code'):
# unauthorized
print('UA: %s' % url)
elif hasattr(e, 'reason'):
print('TO: %s' % url)
# timeout
else:
i = r.info()
try:
server = i['server']
except:
pass
else:
if not 'authenticate' in server:
print('NA: %s' % url)
I'm thinking perhaps that r.info() is causing an exception but not sure why it would as the r = urllib2.urlopen(url) is covered with the try.
The errors are:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 551, in __bootstrap_inner
self.run()
File "C:\Users\anthony\Scripts\checker.py", line 35, in run
r = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "C:\Python27\lib\httplib.py", line 1030, in getresponse
response.begin()
File "C:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "C:\Python27\lib\httplib.py", line 371, in _read_status
raise BadStatusLine(line)
BadStatusLine: ''
and
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "C:\Python27\lib\httplib.py", line 1030, in getresponse
response.begin()
File "C:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "C:\Python27\lib\httplib.py", line 365, in _read_status
line = self.fp.readline()
File "C:\Python27\lib\socket.py", line 447, in readline
data = self._sock.recv(self._rbufsize)
error: [Errno 10054] An existing connection was forcibly closed by the remote host
I've read a bit of information on the [Errno 10054] but have no idea how to prevent it.
Any help would be appriciated.
I'm thinking perhaps that r.info() is causing an exception but not
sure why it would as the r = urllib2.urlopen(url) is covered with the
try.
Nope. The first exception has nothing to do with r.info() - exception is raised on urllib2.urlopen(url), as you may see in the traceback.
BadStatusLine exception is defined in httplib and your except urllib2.URLError simply doesn't catch it. You should probably improve your exception handling logic like:
except (httplib.HTTPException, urllib2.URLError) as err:
...

Timeouts while deploying application to Google App Engine

I have a problem while deploying application to GAE. I use ubuntu. When I type command to update application this error occurs:
2011-07-22 20:13:28,598 ERROR appcfg.py:2064 An unexpected error occurred. Aborting.
Traceback (most recent call last):
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 2005, in DoUpload
missing_files = self.Begin()
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 1674, in Begin
self.Send('/api/appversion/create', payload=self.config.ToYAML())
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 1632, in Send
return self.rpcserver.Send(url, payload=payload, **self.params)
File "/home/grzegorz/google_appengine/google/appengine/tools/appengine_rpc.py", line 365, in Send
f = self.opener.open(req)
File "/usr/lib/python2.7/urllib2.py", line 391, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 409, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 369, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1193, in https_open
return self.do_open(httplib.HTTPSConnection, req)
File "/home/grzegorz/google_appengine/lib/fancy_urllib/fancy_urllib/__init__.py", line 367, in do_open
raise url_error
URLError: <urlopen error [Errno 110] Connection timed out>
Traceback (most recent call last):
File "./google_appengine/appcfg.py", line 76, in <module>
run_file(__file__, globals())
File "./google_appengine/appcfg.py", line 72, in run_file
execfile(script_path, globals_)
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 3708, in <module>
main(sys.argv)
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 3699, in main
result = AppCfgApp(argv).Run()
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 2345, in Run
self.action(self)
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 3484, in __call__
return method()
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 2745, in Update
app_summary = self.UpdateVersion(rpcserver, self.basepath, appyaml)
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 2734, in UpdateVersion
lambda path: self.opener(os.path.join(basepath, path), 'rb'))
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 2005, in DoUpload
missing_files = self.Begin()
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 1674, in Begin
self.Send('/api/appversion/create', payload=self.config.ToYAML())
File "/home/grzegorz/google_appengine/google/appengine/tools/appcfg.py", line 1632, in Send
return self.rpcserver.Send(url, payload=payload, **self.params)
File "/home/grzegorz/google_appengine/google/appengine/tools/appengine_rpc.py", line 365, in Send
f = self.opener.open(req)
File "/usr/lib/python2.7/urllib2.py", line 391, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 409, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 369, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1193, in https_open
return self.do_open(httplib.HTTPSConnection, req)
File "/home/grzegorz/google_appengine/lib/fancy_urllib/fancy_urllib/__init__.py", line 367, in do_open
raise url_error
urllib2.URLError: <urlopen error [Errno 110] Connection timed out>
I tried to figure out why this happens, especially that it didn't happen to me before.
Anything that I've done about it just didn't work. What's weird is that if I keep trying, then after about 20-30 attemps - it just magically deploys..
In my opinion there might be some Wifi problems. For example when I start xChat - it can't connect to server. I think that this problems might be connected.
Nevertheless I can't figure out anything, so I would be thankful for any help.

Categories