urllib2 httplib.BadStatusLine - python

I am using with python urllib2 to a connect HTTP server. Sometimes I get the response: httplib.BadStatusLine: ''.
My code :
response = None
try:
request = urllib2.Request(http_url,params)
response = urllib2.urlopen(request,timeout=5000)
return str(response.read())
except urllib2.HTTPError :
return ""
except urllib2.URLError:
return ""
response error :
File "/usr/lib64/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/usr/lib64/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 1244, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib64/python2.7/urllib2.py", line 1217, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib64/python2.7/httplib.py", line 1051, in getresponse
response.begin()
File "/usr/lib64/python2.7/httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "/usr/lib64/python2.7/httplib.py", line 379, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
I don't to know how to fix this bug? And I don't to know why I get an error response.

Related

urlopen error [Errno 111] Connection refused in a very simple script

I already checked if proxy settings were enabled, reset winsock, uninstall firewall and antivirus, and I still get the same error message, but if I type the url in Firefox or IE it works just fine. Thanks for any help
Here is the script:
import urllib2
import ssl
gcontext = ssl._create_unverified_context()
url = 'https://192.168.1.1:8000/cgi-bin/login.html'
html = urllib2.urlopen(url=url,context=gcontext).read()
print html
and here is the traceback:
File "get_url.py", line 6, in <module>
f = urllib2.urlopen("https://192.168.1.1:8000/cgi-bin/login.html",context=gcontext, timeout=3000)
File "/usr/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 435, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 467, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 654, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1241, in https_open
context=self._context)
File "/usr/lib/python2.7/urllib2.py", line 1198, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>

Catching errors when scraping with Selenium

As part of a scraping job, I am trying to catch errors and bypass them. I want to keep a while: loop going in spite of these errors being raised. I have this code:
logger = logging.getLogger(__name__)
# ...
except (httplib.HTTPException, IOError) as e:
logger.exception('Ignoring exception, sleeping for 20 seconds')
time.sleep(20)
But, this still throws the same socket error as before:
Traceback (most recent call last):
File "/Users/aa/Box Sync/Work/PythonCode/TWPh/dev.py", line 54, in <module>
old_length = len(driver.page_source)
File "/usr/local/lib/python2.7/site- packages/selenium/webdriver/remote/webdriver.py", line 438, in page_source
return self.execute(Command.GET_PAGE_SOURCE)['value']
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 173, in execute
response = self.command_executor.execute(driver_command, params)
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 349, in execute
return self._request(command_info[0], url, body=data)
File "/usr/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 417, in _request
resp = opener.open(request)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1227, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1200, in do_open
r = h.getresponse(buffering=True)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1073, in getresponse
response.begin()
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 371, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 54] Connection reset by peer
[Finished in 24639.7s with exit code 1]

Python error escape "socket.error: [Errno 54] Connection reset by peer"

I'm running a scraper that's going through a few domains and it's getting hung up on http://www.1000markets.com/
Checking from different sources it seems to be down. That's totally fine, but I'm getting the error mentioned in the title.
How can I escape this? I'm using HTTPerror and URLerror but it's still getting hung up.
Any help on this would be great
def get_html(link):
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
try:
res = urllib2.urlopen(link)
html = res.read()
except URLError as e:
return link
except HTTPError as e:
return link
edit: Attached is the error
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1187, in do_open
r = h.getresponse(buffering=True)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 54] Connection reset by peer

Python nonnumeric port exception

I try to send post request with json body with urllib2:
request = urllib2.Request('http://localhost:8090/api/', jdata, {'Content-Type': 'application/json'})
f = urllib2.urlopen(request)
But get exception:
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 407, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 520, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 439, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 626, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 401, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 419, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1211, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1150, in do_open
h = http_class(host, timeout=req.timeout) # will parse host:port
File "/usr/lib/python2.7/httplib.py", line 693, in __init__
self._set_hostport(host, port)
File "/usr/lib/python2.7/httplib.py", line 721, in _set_hostport
raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
httplib.InvalidURL: nonnumeric port: '8090��>��hbWb�ba'
How can i fix it?
Thank you.
It seems that the urllib2 functions doesn't parse right your request. Why don't you try curl?
import pycurl
c = pycurl.Curl()
c.setopt(c.URL, 'http://myfavpizzaplace.com/order')
c.setopt(c.POSTFIELDS, 'pizza=Quattro+Stagioni&extra=cheese')
c.setopt(c.VERBOSE, True)
c.perform()
example from http://www.angryobjects.com/2011/10/15/http-with-python-pycurl-by-example/

can calling pythons urllib2.info() cause an exception?

I'm getting a couple of exceptions popping up from time to time but can't think of the cause.
Here's a snippet:
try:
r = urllib2.urlopen(url)
except urllib2.URLError, e:
if hasattr(e, 'code'):
# unauthorized
print('UA: %s' % url)
elif hasattr(e, 'reason'):
print('TO: %s' % url)
# timeout
else:
i = r.info()
try:
server = i['server']
except:
pass
else:
if not 'authenticate' in server:
print('NA: %s' % url)
I'm thinking perhaps that r.info() is causing an exception but not sure why it would as the r = urllib2.urlopen(url) is covered with the try.
The errors are:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 551, in __bootstrap_inner
self.run()
File "C:\Users\anthony\Scripts\checker.py", line 35, in run
r = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "C:\Python27\lib\httplib.py", line 1030, in getresponse
response.begin()
File "C:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "C:\Python27\lib\httplib.py", line 371, in _read_status
raise BadStatusLine(line)
BadStatusLine: ''
and
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "C:\Python27\lib\httplib.py", line 1030, in getresponse
response.begin()
File "C:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "C:\Python27\lib\httplib.py", line 365, in _read_status
line = self.fp.readline()
File "C:\Python27\lib\socket.py", line 447, in readline
data = self._sock.recv(self._rbufsize)
error: [Errno 10054] An existing connection was forcibly closed by the remote host
I've read a bit of information on the [Errno 10054] but have no idea how to prevent it.
Any help would be appriciated.
I'm thinking perhaps that r.info() is causing an exception but not
sure why it would as the r = urllib2.urlopen(url) is covered with the
try.
Nope. The first exception has nothing to do with r.info() - exception is raised on urllib2.urlopen(url), as you may see in the traceback.
BadStatusLine exception is defined in httplib and your except urllib2.URLError simply doesn't catch it. You should probably improve your exception handling logic like:
except (httplib.HTTPException, urllib2.URLError) as err:
...

Categories