I am getting this error at qr_image = urllib2.urlopen(url).read(). I have gone through all the questions similar to this here and here Answers to these questions suggest that there is a proxy connection problem. However, I am not using any proxies. I may be doing a simple mistake but I am totally stuck here.
I have cleared the proxy setting that I was using earlier in internet options>connections>LAN settings. I have also cleared the http_proxy ans https_proxy environmental variables.
EDIT: I was previously using a proxy connection but now switched to dialup modem.
import urllib2
import urllib
import gspread
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
w = 420
gc = gspread.login('panxxxxxx#gmail.com', 'xxxxxxxx')
wks = gc.open("Spreadsheet").sheet1 # "Spreadsheet" is the name of the Google Spreadsheet made in the account
val = wks.acell('A1').value
print val
# print len(wks.row_values(1))
list_of_lists = wks.get_all_values()
print list_of_lists
url = "https://api.qrserver.com/v1/create-qr-code?data=BEGIN%3AVCARD%0AVERSION%3A2.1%0AFN%3APraveen+Sridhar%0AN%3A%3BPraveen+Sridhar%0ATEL%3BHOME%3BVOICE%3A9544344104%0AEMAIL%3BHOME%3BINTERNET%3Aprvn431%40gmail.com%0AEND%3AVCARD%0A&size=220x220&margin=0"
for i in range(5):
idi = list_of_lists[i][0]
name = list_of_lists[i][1]
name_url = urllib.quote_plus(list_of_lists[i][1]) # url encoded name for api call
email = urllib.quote_plus(list_of_lists[i][2])
number = urllib.quote_plus(list_of_lists[i][3])
url = "https://api.qrserver.com/v1/create-qr-code/?data=BEGIN%3AVCARD%0AVERSION%3A2.1%0AFN%3A" + name_url + "%0AN%3A%3B" +\
name_url + "%0ATEL%3BHOME%3BVOICE%3A" + number + "%0AEMAIL%3BHOME%3BINTERNET%3A" +\
email + "%0AEND%3AVCARD%0A&size=50x50"
qr_image = urllib2.urlopen(url).read()
name_qr = name + ".png"
outfile = open(name_qr, 'wb')
outfile.write(qr_image)
outfile.close()
qr = Image.open(name_qr)
qr.thumbnail((120, 120))
thid = Image.open("tinkerhub_card.png") # Blank ID Card over which the QR code has to be placed
thid_new = Image.new('RGB', (420, 680))
thid_new.paste(thid, (0, 0))
thid_new.paste(qr, (150, 220))
id_usr_font = ImageFont.truetype("resources/OpenSans-Regular.ttf", 25)
id_usr = ImageDraw.Draw(thid_new)
w1, h1 = id_usr_font.getsize(idi)
id_usr = id_usr.text(((w - w1) / 2, 150), idi, (0, 0, 0), font=id_usr_font)
name_usr_font = ImageFont.truetype("resources/OpenSans-Regular.ttf", 30)
name_usr = ImageDraw.Draw(thid_new)
name_usr = name_usr.text((90, 365), name, (0, 0, 0), font=name_usr_font)
thid_new.show()
Traceback
Traceback (most recent call last):
File "C:/Users/Harshil/Downloads/id-card-tinkerhub.py", line 41, in <module>
qr_image = urllib2.urlopen(url).read()
File "C:\Anaconda\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Anaconda\lib\urllib2.py", line 431, in open
response = self._open(req, data)
File "C:\Anaconda\lib\urllib2.py", line 449, in _open
'_open', req)
File "C:\Anaconda\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Anaconda\lib\urllib2.py", line 1240, in https_open
context=self._context)
File "C:\Anaconda\lib\urllib2.py", line 1197, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond>
Process finished with exit code 1
Related
I was trying to run a script in a docker container and got this.
Traceback (most recent call last):
File "experiments/caffemodel2pytorch.py", line 387, in <module>
net_param = initialize(args.caffe_proto).NetParameter()
File "experiments/caffemodel2pytorch.py", line 35, in initialize
mybytes = urlopen(caffe_proto).read()
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 526, in open
response = self._open(req, data)
File "/usr/lib/python3.6/urllib/request.py", line 544, in _open
'_open', req)
File "/usr/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/lib/python3.6/urllib/request.py", line 1361, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/usr/lib/python3.6/urllib/request.py", line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 99] Cannot assign requested address>
Part of the code is here.
def initialize(caffe_proto = 'https://raw.githubusercontent.com/BVLC/caffe/master/src/caffe/proto/caffe.proto', codegen_dir = tempfile.mkdtemp(), shadow_caffe = True):
global caffe_pb2
if caffe_pb2 is None:
local_caffe_proto = os.path.join(codegen_dir, os.path.basename(caffe_proto))
with open(local_caffe_proto, 'w') as f:
mybytes = urlopen(caffe_proto).read()
mystr = mybytes.decode('ascii', 'ignore')
f.write(mystr)
#f.write((urlopen if 'http' in caffe_proto else open)(caffe_proto).read())
subprocess.check_call(['protoc', '--proto_path', os.path.dirname(local_caffe_proto), '--python_out', codegen_dir, local_caffe_proto])
sys.path.insert(0, codegen_dir)
old_pool = google.protobuf.descriptor._message.default_pool
old_symdb = google.protobuf.symbol_database._DEFAULT
google.protobuf.descriptor._message.default_pool = google.protobuf.descriptor_pool.DescriptorPool()
google.protobuf.symbol_database._DEFAULT = google.protobuf.symbol_database.SymbolDatabase(pool = google.protobuf.descriptor._message.default_pool)
import caffe_pb2 as caffe_pb2
google.protobuf.descriptor._message.default_pool = old_pool
google.protobuf.symbol_database._DEFAULT = old_symdb
sys.modules[__name__ + '.proto'] = sys.modules[__name__]
if shadow_caffe:
sys.modules['caffe'] = sys.modules[__name__]
sys.modules['caffe.proto'] = sys.modules[__name__]
return caffe_pb2
I think it has something to do with the urlopen, but i don't know how to fix this in a docker container, any help would be appreciated, plz.
BTW, I start the container like this:
sudo nvidia-docker run -itv /home/ljh/mobilepose:/home/mobilepose -p 7777:8888 ufoym/deepo:all-py36-jupyter /bin/bash
I am just a newly self-learned programmer. I was hoping to upload some numbers to my website by python But somewhat I failed. Could you help me figure out what is wrong?
Here is my original python code.
#!/usr/bin/python
import time
import urllib.request
import random
import datetime
from urllib.request import Request,urlopen
basic_web = 'http://ihome.ust.hk/~xxxxx/cgi-bin/datafile.php?'
message=""
while(True):
local_time= time.time()
web_x = basic_web
file1 = open("datalist1.txt", "r")
queue1 = file1.read()
file1.close()
web_x += "&queue1=" + queue1
file2 = open("datalist2.txt", "r")
queue2 = file2.read()
file2.close()
web_x += "&queue2=" + queue2
web_x += "&local_time=" + str (local_time)
print (web_x)
#req = Request (web_x)
#html = urlopen(req).read()
response = urllib.request.urlopen(web_x, timeout = 1)
html = response.read()
print(html)
time.sleep(0.1)
print ("hehe")
And here is the output error that I got:
Traceback (most recent call last):
File "C:\web bus stop\local\datauploader.py", line 25, in <module>
response = urllib.request.urlopen(web_x)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 465, in open
response = self._open(req, data)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 483, in _open
'_open', req)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1268, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1243, in do_open
r = h.getresponse()
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1174, in getresponse
response.begin()
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 282, in begin
version, status, reason = self._read_status()
File "C:\Users\ad\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 264, in _read_status
raise BadStatusLine(line)
http.client.BadStatusLine: connected! queue1queue2 finish sir!
I would really appreciate it if you guys could help me figure out what is the bug.
Never mind.
I changed a computer to run it and it worked now.
Could anyone tell me what am I doing wrong? I keep getting an error with this code.
I'm trying to download all of the swf's from primaryschoolgames just as an experiment but I can't seem to do it:
#!/usr/bin/env python
# encoding: utf-8
import sys, getopt
import os, urllib, urllib2, re, string, math
help_message = '''
'''
no_param = '''
'''
verbose = False
fakeMode = False
curPath = os.getcwd() + "/"
urlRegex = ''
FileRegex = ''
outputPath = ''
currentFile = ''
def removeDuplicates(seq):
# Not order preserving
keys = {}
for e in seq:
keys[e] = 1
return keys.keys()
def go(filename):
print "Having a look at " + string.capwords(filename)
global urlRegex, FileRegex, outputPath, currentFile
url = 'http://cdn.primarygames.com' + filename
urlRegex = '/'+filename+'/.+/download'
FileRegex = '/'+filename+'/(.*?)/download'
outputPath = curPath+"Swfs"+"/"
if not os.path.exists(outputPath):
os.makedirs(outputPath)
filelist = []
while(len(url)):
# looping system
newlist, url = scrapePage(url, filename)
filelist.extend(newlist)
print 'Found %s Files.' % len(filelist)
for swf in filelist:
swfurl = swf['url']
name = swf['name']
currentFile = name
#print 'Downloading '+name,
if not fakeMode:
#print ''
urllib.urlretrieve('http://cdn.primarygames.com' + swfurl, outputPath+name)
else:
print 'Not downloading %s.' % name
print "All done with %s!" % filename
def scrapePage(url, filename):
print 'Looking through '+url
html = urllib2.urlopen(url).read()
swflist = re.findall(urlRegex, html)
swflist = removeDuplicates(swflist)
swfs = []
for swfurl in swflist:
r = re.compile(FileRegex)
swfname = r.search(swfurl).group(1)
swfname = swfname.replace('-', ' ')
name = filename + "/" + swfname + ".swf"
name = string.capwords(name)
swf.append({'name':name,'url':swfurl})
r = re.compile(nextRegex)
result = r.search(html)
if result:
nextUrl = 'http://cdn.primarygames.com' + result.group(1)
else:
nextUrl = ''
return swfs, nextUrl
def main(argv=None):
global verbose, fakeMode
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "ho:vf", ["help", "output="])
except getopt.error, msg:
raise Usage(msg)
# option processing
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-f", "--fake"):
fakeMode = True
if option in ("-h", "--help"):
raise Usage(help_message)
if option in ("-o", "--output"):
output = value
if len(args):
swfs = args
else:
raise Usage(no_param)
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
if err.msg != help_message:
print >> sys.stderr, "\t for help use --help"
return 2
for swf in swfs:
go(swf)
if __name__ == "__main__":
sys.exit(main())
This is the error I keep getting:
Having a look at *
Looking through http://cdn.primarygames.com/*
Traceback (most recent call last):
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 129, in <module>
sys.exit(main())
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 125, in main
go(swf)
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 48, in go
newlist, url = scrapePage(url, filename)
File "C:\Users\Terrii\Desktop\VB Extra's\PrimarySchoolGames Swf Downloader.py"
, line 67, in scrapePage
html = urllib2.urlopen(url).read()
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1177, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 11004] getaddrinfo failed>
A failed getaddrinfo normally suggests that something is wrong with the URL you're providing. Since I am able to resolve the address are you sure you aren't behind a proxy server? This could result in a failed DNS lookup which results in exactly this message.
How Python determines which proxy to use on Windows:
In a Windows environment, if no proxy environment variables are set,
proxy settings are obtained from the registry’s Internet Settings
section.
For more help I concurr with #MikeHunter. I tried to fix your code, but since I had to implement your Exception-Class to get the code running at all I think you should re-indent your code and provide more information. Sorry.
I'm trying to make a https get request from behind a squid proxy with cac card authentication. Loading the opensc engine and grabbing the cert and private key seem to work fine. Below is the traceback and the code.
Any help is greatly appreciated.
Traceback
Traceback (most recent call last):
File "testM2Crypto.py", line 64, in <module>
res = m2urllib2.urlopen(req)
File "c:\Python27\lib\urllib2.py", line 135, in urlopen
return _opener.open(url, data, timeout)
File "c:\Python27\lib\urllib2.py", line 415, in open
response = self._open(req, data)
File "c:\Python27\lib\urllib2.py", line 433, in _open
'_open', req)
File "c:\Python27\lib\urllib2.py", line 387, in _call_chain
result = func(*args)
File "c:\Python27\lib\site-packages\M2Crypto\m2urllib2.py", line 94, in https_open
h.request(req.get_method(), req.get_selector(), req.data, headers)
File "c:\Python27\lib\httplib.py", line 963, in request
self._send_request(method, url, body, headers)
File "c:\Python27\lib\httplib.py", line 994, in _send_request
self.putrequest(method, url, **skips)
File "c:\Python27\lib\site-packages\M2Crypto\httpslib.py", line 140, in putrequest
raise ValueError, "unknown URL type: %s" % url
ValueError: unknown URL type: /index.asp?site=SomeSite
Code
from M2Crypto import httpslib, m2urllib2, m2, SSL, Engine
import urllib2
url = 'https://some.domain.com/index.asp?site=SomeSite'
e = Engine.load_dynamic_engine("pkcs11", "c:/windows/syswow64/engine_pkcs11.dll")
pk = Engine.Engine("pkcs11")
pk.ctrl_cmd_string("MODULE_PATH", "c:/windows/syswow64/opensc-pkcs11.dll")
m2.engine_init(m2.engine_by_id("pkcs11"))
cert = e.load_certificate("slot_01-id_01")
privatekey = e.load_private_key("slot_01-id_01")
ctx = SSL.Context("sslv23")
ctx.set_cipher_list("HIGH:!aNULL:!eNULL:#STRENGTH")
ctx.set_session_id_ctx("foobar")
m2.ssl_ctx_use_x509(ctx.ctx, cert.x509)
m2.ssl_ctx_use_pkey_privkey(ctx.ctx, privatekey.pkey)
proxy_support=urllib2.ProxyHandler({'https':'https://proxy:3128'})
opener = m2urllib2.build_opener(ctx, proxy_support)
m2urllib2.install_opener(opener)
req = m2urllib2.Request(url)
res = m2urllib2.urlopen(req)
I finally was able to solve the problem yesterday. I had to make a few modifications to the code. I also had to patch a bug in the M2Crypto library that was preventing https through a proxy(Thanks to Miloslav Trmač from redhat for the patch). The solution is below for anyone else who might be running into a similar problem. Hope this helps.
Code
from M2Crypto import httpslib, m2urllib2, m2, SSL, Engine
import urllib2
userPin = "123456"
rootCertPath = 'd:/path/to/rootCert.pem'
url = 'https://some.domain.com/index.asp?site=SomeSite'
e = Engine.load_dynamic_engine("pkcs11", "c:/windows/syswow64/engine_pkcs11.dll")
pk = Engine.Engine("pkcs11")
pk.ctrl_cmd_string("MODULE_PATH", "c:/windows/syswow64/opensc-pkcs11.dll")
if len(userPin) > 0: pk.ctrl_cmd_string("PIN", userPin)
m2.engine_init(m2.engine_by_id("pkcs11"))
rootcert = X509.load_cert(rootCertPath)
cert = e.load_certificate("slot_01-id_01")
privatekey = e.load_private_key("slot_01-id_01")
ctx = SSL.Context("sslv23")
ctx.set_cipher_list("HIGH:!aNULL:!eNULL:#STRENGTH")
ctx.set_session_id_ctx("foobar")
ctx.load_verify_locations(cafile=rootcert)
m2.ssl_ctx_use_x509(ctx.ctx, cert.x509)
m2.ssl_ctx_use_pkey_privkey(ctx.ctx, privatekey.pkey)
proxy_support=urllib2.ProxyHandler({'https':'https://proxy:3128'})
opener = m2urllib2.build_opener(ctx, proxy_support)
m2urllib2.install_opener(opener)
req = m2urllib2.Request(url)
try:
res = m2urllib2.urlopen(req)
print '\nsuccess'
except urllib2.HTTPError, err:
print '\nerror'
print 'err.code: '+str(err.code)
print 'err.reason: '+str(err.reason)
print 'err.read(): '+str(err.read())
Thanks to Miloslav Trmač from redhat for the patch. I found this patch at the following url, http://arm.koji.fedoraproject.org/koji/buildinfo?buildID=61225 .
M2Crypto Patch
diff -urN M2Crypto/M2Crypto/httpslib.py M2Crypto-0.21.1/M2Crypto/httpslib.py
--- M2Crypto/M2Crypto/httpslib.py 2012-03-15 03:27:22.181524406 +0100
+++ M2Crypto-0.21.1/M2Crypto/httpslib.py 2012-03-15 03:27:40.467485033 +0100
## -182,14 +182,14 ##
else:
HTTPSConnection.putheader(self, header, value)
- def endheaders(self):
+ def endheaders(self, *args, **kwargs):
# We've recieved all of hte headers. Use the supplied username
# and password for authorization, possibly overriding the authstring
# supplied in the headers.
if not self._proxy_auth:
self._proxy_auth = self._encode_auth()
- HTTPSConnection.endheaders(self)
+ HTTPSConnection.endheaders(self, *args, **kwargs)
def connect(self):
HTTPConnection.connect(self)
diff -urN M2Crypto/M2Crypto/m2urllib2.py M2Crypto-0.21.1/M2Crypto/m2urllib2.py
--- M2Crypto/M2Crypto/m2urllib2.py 2011-01-15 20:10:05.000000000 +0100
+++ M2Crypto-0.21.1/M2Crypto/m2urllib2.py 2012-03-15 03:27:40.467485033 +0100
## -64,8 +64,10 ##
target_host = urlparse.urlparse(full_url)[1]
if (target_host != host):
+ request_uri = urlparse.urldefrag(full_url)[0]
h = httpslib.ProxyHTTPSConnection(host = host, ssl_context = self.ctx)
else:
+ request_uri = req.get_selector()
h = httpslib.HTTPSConnection(host = host, ssl_context = self.ctx)
# End our change
h.set_debuglevel(self._debuglevel)
## -80,7 +82,7 ##
# request.
headers["Connection"] = "close"
try:
- h.request(req.get_method(), req.get_selector(), req.data, headers)
+ h.request(req.get_method(), request_uri, req.data, headers)
r = h.getresponse()
except socket.error, err: # XXX what error?
raise URLError(err)
I need a python script that gets the google adsense earnings and I found adsense scraper:
http://pypi.python.org/pypi/adsense_scraper/0.5
It uses Twill and html5lib to scrape google adsense earnings data. When I use it I get this error message:
Traceback (most recent call last):
File "adsense_scraper.py", line 163, in <module>
data = main()
File "adsense_scraper.py", line 154, in main
b = get_adsense(login, password)
File "adsense_scraper.py", line 128, in get_adsense
b.submit()
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 467, in submit
self._journey('open', request)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 523, in _journey
r = func(*args, **kwargs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 590, in http_response
"http", request, response, code, msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\utils.py", line 442, in http_response
"refresh", msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 181, in open
response = urlopen(self, req, data)
File "C:\Python26\lib\urllib2.py", line 406, in _open 'unknown_open', req)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain result = func(*args)
File "C:\Python26\lib\urllib2.py", line 1163, in unknown_open raise URLError('unknown url type: %s' % type)
urllib2.URLError: <urlopen error unknown url type: 'http>
So the important thing is:
urllib2.URLError: <urlopen error unknown url type: 'http>
Can somebody tell me where the error is? Is there even a better way to get the data via python? Thanks
there are several errors with the package, you mentioned only the first one
1) twill package does not handle google's redirects correctly, adding
newurl = newurl.strip( "'" )
to twill/other_packages/_mechanize_dist/_http.py:108 before
newurl = _rfc3986.clean_url(newurl, "latin-1")
fixes that
2) you have to have the correct language set in adsense - English
3) there are several problems in the orignal adsense_scraper
#!/usr/bin/env python
"""Scrapes Google AdSense data with Python using Twill
Current canonical location of this module is here:
http://github.com/etrepum/adsense_scraper/tree/master
Usage::
from adsense_scraper import get_adsense, get_time_period
b = get_adsense('YOUR_ADSENSE_LOGIN', 'YOUR_ADSENSE_PASSWORD')
rows = get_time_period(b, 'yesterday')
# The summary data is always the first row with channel == ''
print 'I earned this much yesterday: $%(earnings)s' % rows[0]
"""
# requires html5lib, twill
import sys
import pprint
import decimal
from cStringIO import StringIO
from xml.etree import cElementTree
try:
from html5lib import HTMLParser
import twill.commands
except ImportError:
print >>sys.stderr, """\
adsense_scraper has dependencies::
Twill 0.9 http://twill.idyll.org/
html5lib 0.11 http://code.google.com/p/html5lib/
Try this::
$ easy_install twill html5lib
"""
raise SystemExit()
__version__ = '0.5'
SERVICE_LOGIN_BOX_URL = "https://www.google.com/accounts/ServiceLogin?service=adsense&rm=hide&fpui=3&nui=15&alwf=true<mpl=adsense&passive=true&continue=https%3A%2F%2Fwww.google.com%2Fadsense%2Fgaiaauth2&followup=https%3A%2F%2Fwww.google.com%2Fadsense%2Fgaiaauth2&hl=en_US"
OVERVIEW_URL = "https://www.google.com/adsense/report/overview?timePeriod="
TIME_PERIODS = [
'today',
'yesterday',
'thismonth',
'lastmonth',
'sincelastpayment',
]
def parse_decimal(s):
"""Return an int or decimal.Decimal given a human-readable number
"""
light_stripped = s.strip(u'\u20ac')
stripped = light_stripped.replace(',', '.').rstrip('%').lstrip('$')
try:
int(stripped)
return light_stripped
except ValueError:
pass
try:
float(stripped)
return light_stripped
except ValueError:
return decimal.Decimal(stripped)
def parse_summary_table(doc):
"""
Parse the etree doc for summarytable, returns::
[{'channel': unicode,
'impressions': int,
'clicks': int,
'ctr': decimal.Decimal,
'ecpm': decimal.Decimal,
'earnings': decimal.Decimal}]
"""
for t in doc.findall('.//table'):
if t.attrib.get('id') == 'summarytable':
break
else:
raise ValueError("summary table not found")
res = []
FIELDS = ['impressions', 'clicks', 'ctr', 'ecpm', 'earnings']
for row in t.findall('.//tr'):
celltext = []
for c in row.findall('td'):
tail = ''
# adsense inserts an empty span if a row has a period in it, so
# get the children and find the tail element to append to the text
if c.find('a') and c.find('a').getchildren():
tail = c.find('a').getchildren()[0].tail or ''
celltext.append('%s%s' % ((c.text or c.findtext('a') or '').strip(), tail.strip()))
celltext = filter( lambda x: x != "" , celltext )
if len(celltext) != len(FIELDS):
continue
try:
value_cols = map(parse_decimal, celltext)
except decimal.InvalidOperation:
continue
res.append(dict(zip(FIELDS, value_cols)))
return res
def get_adsense(login, password):
"""Returns a twill browser instance after having logged in to AdSense
with *login* and *password*.
The returned browser will have all of the appropriate cookies set but may
not be at the exact page that you want data from.
"""
b = twill.commands.get_browser()
b.go(SERVICE_LOGIN_BOX_URL)
for form in b.get_all_forms():
try:
form['Email'] = login
form['Passwd'] = password
except ValueError:
continue
else:
break
else:
raise ValueError("Could not find login form on page")
b._browser.select_form(predicate=lambda f: f is form)
b.submit()
return b
def get_time_period(b, period):
"""Returns the parsed summarytable for the time period *period* given
*b* which should be the result of a get_adsense call. *period* must be
a time period that AdSense supports:
``'today'``, ``'yesterday'``, ``'thismonth'``,
``'lastmonth'``, ``'sincelastpayment'``.
"""
b.go(OVERVIEW_URL + period)
# The cElementTree treebuilder doesn't work reliably enough
# to use directly, so we parse and then dump into cElementTree.
doc = cElementTree.fromstring(HTMLParser().parse(b.get_html()).toxml())
return parse_summary_table(doc)
def main():
try:
login, password = sys.argv[1:]
except ValueError:
raise SystemExit("usage: %s LOGIN PASSWORD" % (sys.argv[0],))
twill.set_output(StringIO())
twill.commands.reset_browser()
b = get_adsense(login, password)
data = {}
for period in TIME_PERIODS:
data[period] = get_time_period(b, period)
pprint.pprint(data)
twill.set_output(None)
return data
if __name__ == '__main__':
data = main()