I am trying to connect to an API using python 2.7.
Code:
from urllib import urlencode
import urllib2
def http_post(url, data):
post = urlencode(data)
req = urllib2.Request(url, post)
response = urllib2.urlopen(req)
return response.read()
Error:
>>> r = http_post(LOGIN_URL, PARAMS)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 4, in http_post
File "/usr/local/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/local/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/usr/local/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/usr/local/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/local/lib/python2.7/urllib2.py", line 1222, in https_open
return self.do_open(httplib.HTTPSConnection, req)
File "/usr/local/lib/python2.7/urllib2.py", line 1184, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno -5] No address associated with hostname
Similar code in python 3.5 is running.
It looks like the url is not being found.
Have you defined LOGIN_URL just above the output we see in your "Error:" extract?
Related
I have having trouble accessing HTTPS sites using urllib2. Here's what I've got:
import urllib2
import ssl
proxy = urllib2.ProxyHandler({'https':'http://username:password#proxy:port',
'http': 'http://username:password#proxy:port'})
opener = urllib2.build_opener(urllib2.HTTPHandler(),
urllib2.HTTPSHandler(),
proxy)
urllib2.install(opener)
url_secure = 'https://www.google.com'
url_nonsecure = 'http://www.google.com'
response = urllib2.urlopen(url_nonsecure)
d = response.read()
response.close()
print d
The above runs without issue. However, if i try to run the above using
response = urllib2.urlopen(url_secure)
I get
Traceback (most recent call last):
File "google_maps.py", line 25, in <module>
response = urllib2.urlopen(url_secure)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 1241, in https_open
context=self._context)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 1198, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:590)>
From this and this it seemed like the problem could be solved by creating a new context. So I tried both:
ctx=ssl._create_unverified_context()
and
ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
and ran
response = urllib2.urlopen(url_secure, context = ctx)
d = response.read()
response.close()
print d
which both threw the following error:
Traceback (most recent call last):
File "google_maps.py", line 25, in <module>
response = urllib2.urlopen(url_secure, context=ctx)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 1241, in https_open
context=self._context)
File "/Users/username/anaconda2/lib/python2.7/urllib2.py", line 1198, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>
How can I access https sites?
I am using common_crawl_index lib for python from here to get some data from S3.
When I run the command to get data cci_lookup com.abc it has error as below.
I still get the output that is the list of urls for the lookup domain but I do not know why the error happen.
MainThread:2014-12-19 01:48:16,150:ERROR:utils:224 Caught exception reading instance data
Traceback (most recent call last):
File "/home/deploy/anaconda/lib/python2.7/site-packages/boto/utils.py", line 211, in retry_url
r = opener.open(req)
File "/home/deploy/anaconda/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/home/deploy/anaconda/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/home/deploy/anaconda/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/home/deploy/anaconda/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/home/deploy/anaconda/lib/python2.7/urllib2.py", line 1184, in do_open
raise URLError(err)
URLError: <urlopen error timed out>
com.abc.www/forum/index.php/groupcp.php:http
com.abc.www/forum/index.php/includes/MLOtvHD:http
com.abc.www/forum/index.php/includes/blog.php:http
com.abc.www/forum/index.php/includes/content.php:http
com.abc.www/forum/index.php/includes/index.php:http
Hope anyone can help!
I am pulling websites from a list and want to test, whether they are up or down. The code below works fine as long as they are up, but as soon as something is wrong with one of these urls, I get an error message and the whole scrip stops.
What I want to achieve: Error message == website not working therefore print down and move to next item in list.
import urllib2
from urllib2 import Request, urlopen, HTTPError, URLError
def checkurl(z):
user_agent = 'Mozilla/20.0.1 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent':user_agent }
link = "http://"+z
req = Request(link, headers = headers)
try:
page_open = urlopen(req)
except HTTPError, e:
print "down"
else:
print 'up'
#print urllib2.urlopen('http://'+z).read()
Traceback (most recent call last):
File "/home/user/Videos/python/onion/qweqweqweq.py", line 48, in <module>
checkurl(x)
File "/home/user/Videos/python/onion/qweqweqweq.py", line 23, in checkurl
page_open = urlopen(req)
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 401, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 419, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1211, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1178, in do_open
h.request(req.get_method(), req.get_selector(), req.data, headers)
File "/usr/lib/python2.7/httplib.py", line 962, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python2.7/httplib.py", line 996, in _send_request
self.endheaders(body)
File "/usr/lib/python2.7/httplib.py", line 958, in endheaders
self._send_output(message_body)
File "/usr/lib/python2.7/httplib.py", line 818, in _send_output
self.send(msg)
File "/usr/lib/python2.7/httplib.py", line 780, in send
self.connect()
File "/usr/lib/python2.7/httplib.py", line 761, in connect
self.timeout, self.source_address)
File "/home/user/Videos/python/onion/qweqweqweq.py", line 5, in create_connection
sock.connect(address)
File "/usr/lib/python2.7/dist-packages/socks.py", line 369, in connect
self.__negotiatesocks5(destpair[0],destpair[1])
File "/usr/lib/python2.7/dist-packages/socks.py", line 236, in __negotiatesocks5
raise Socks5Error(ord(resp[1]),_generalerrors[ord(resp[1])])
TypeError: __init__() takes exactly 2 arguments (3 given)
You are catching HTTPError, but what is thrown is Socks5Error.
You're missing Socks5Error in your except clause. Look at the traceback:
raise Socks5Error(ord(resp[1]),_generalerrors[ord(resp[1])])
Note that this wouldn't have happened if you used requests instead of urllib2. The interface is a lot clearer, the documentation better.
In answer to "would it be possible to assume that the website is down regardless of the error", then this will do it:
req = Request(link, headers = headers)
try:
page_open = urlopen(req)
except:
print "down"
else:
print 'up'
I was making my project on mac and I tried to do the same things by Beagle Bone Black(BBB).
However, I couldn't use urllib in BBB so I am stuck: I cannot go forward.(it is working well in my mac)
I tried this simple code as an example:
import urllib
conn = urllib.urlopen('http://stackoverflow.com/questions/8479736/using-python-urllib-how-to-avoid-non-html-content')
then this Error occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib.py", line 86, in urlopen
return opener.open(url)
File "/usr/lib/python2.7/urllib.py", line 207, in open
return getattr(self, name)(url)
File "/usr/lib/python2.7/urllib.py", line 351, in open_http
'got a bad status line', None)
IOError: ('http protocol error', 0, 'got a bad status line', None)
I need to fetch a html data for my project.
How can I solve this problem? Do you have any ideas ?
Thank you.
When I tried urllib2
I got this:
>>> import urllib2
>>> conn = urllib2.urlopen('http://stackoverflow.com/questions/8479736/using-python-urllib-how-to-avoid-non-html-content')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 400, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 418, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 371, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
Also I tried this:
curl http://stackoverflow.com/questions/8479736/using-python-urllib-how-to-avoid-non-html-content
curl: (52) Empty reply from server
and this:
wget http://stackoverflow.com/questions/8479736/using-python-urllib-how-to-avoid-non-html-content
Connecting to stackoverflow.com (198.252.206.16:80)
wget: error getting response
but they didn't work
at home, I also tried and failed but returns a different error:
conn = urllib2.urlopen('http://stackoverflow.com/questions/8479736/using-python-urllib-how-to-avoid-non-html-content')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 400, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 418, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1215, in https_open
return self.do_open(httplib.HTTPSConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1177, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno -2] Name or service not known>
environment
BBB: Linux beaglebone 3.8.13 #1 SMP Tue Jun 18 02:11:09 EDT 2013 armv7l GNU/Linux
python version: 2.7.3
I'm really want to recommend you requests lib:
>>> r = requests.get('https://api.github.com/user', auth=('user', 'pass'))
>>> r.status_code
200
>>> r.headers['content-type']
'application/json; charset=utf8'
>>> r.encoding
'utf-8'
>>> r.text
u'{"type":"User"...'
http://www.python-requests.org/en/latest/
How to install:
sudo pip install requests
For some reason I can get this to work, using single proxy everything seems fine.
#This works
import socks
import urllib2
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, '68.xx.193.xx', 666)
socks.wrapmodule(urllib2)
print urllib2.urlopen('http://automation.whatismyip.com/n09230945.asp').read()
&
#This doesn't
import socks
import urllib2
proxies=['68.xx.193.xx','xx.178.xx.70','98.xx.84.xx','83.xx.86.xx']
ports=[666,1080,859,910]
for i in range(len(proxies)):
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, repr(proxies[i]), ports[i])
socks.wrapmodule(urllib2)
print urllib2.urlopen('http://automation.whatismyip.com/n09230945.asp').read()
Error:
Traceback (most recent call last):
File "/home/zer0/Aptana Studio 3 Workspace/sy/src/test.py", line 38, in <module>
print urllib2.urlopen('http://automation.whatismyip.com/n09230945.asp').read()
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 391, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 409, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 369, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1185, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1160, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno -5] No address associated with hostname>
Run this:
proxies=['68.xx.193.xx','xx.178.xx.70','98.xx.84.xx','83.xx.86.xx']
ports=[666,1080,859,910]
for i in range(len(proxies)):
print (repr(proxies[i]), ports[i])
You'll get
("'68.xx.193.xx'", 666)
("'xx.178.xx.70'", 1080)
("'98.xx.84.xx'", 859)
("'83.xx.86.xx'", 910)
You're adding quotes you don't want with the repr call, so urllib2 thinks it's a hostname instead of an IP address. Get rid of it and you should be fine.