urllib2.HTTPError - Getting Data from website on python - python

I have a script, and when running I get an error message:
urllib2.HTTPError: HTTP Error 400: Bad Request
Can you help me?
from lxml import html
import urllib2
import urllib
ip_list = []
port_list = []
protocol_list = []
array = [20, 40]
ck = True
i = 0
while i < len(array) :
h = urllib2.urlopen('http://proxylist.me/proxys/index/'+ str(array[i]))
HTML_CODE = h.read()
tree = html.fromstring(HTML_CODE)
for block in tree.xpath('//tbody/tr'):
ip, port, _, protocol, _, _, _, _, _ = [
x.strip()
for x in block.xpath('.//text()')
if x.strip() not in ""
]
ip_l = "{}".format(ip)
port_l = "{}".format(port)
protocol_l = "{}".format(protocol)
if ip_l != {}:
ck = True
ip_list.append(ip_l)
port_list.append(port_l)
protocol_list.append(protocol_l)
i = i+1
else:
ck = False
print ip_list
I am getting this error:
Traceback (most recent call last):
File "C:/Users/PC0308-PC/Desktop/get_data_html.py", line 11, in <module>
h = urllib2.urlopen('http://proxylist.me/proxys/index/'+str(i))
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 400: Bad Request

array = [0, 20, 40]
ck = True
for item in array:
h = urllib2.urlopen('http://proxylist.me/proxys/index/%s'%(item))
HTML_CODE = h.read()
tree = html.fromstring(HTML_CODE)
for block in tree.xpath('//tbody/tr'):
ip, port, _, protocol, _, _, _, _, _ = [
x.strip()
for x in block.xpath('.//text()')
if x.strip() not in ""
]
ip_l = "{}".format(ip)
port_l = "{}".format(port)
protocol_l = "{}".format(protocol)
if ip_l != {}:
ck = True
ip_list.append(ip_l)
port_list.append(port_l)
protocol_list.append(protocol_l)
else:
ck = False
print ip_list
It works on my Windows machine, parses the first 3 pages from http://proxylist.me/proxys/index
BTW, your code worked well from the beginning, but it only parsed the first page.

Related

Python requests.get() function issue - Failed to establish a new connection

I have write a simple python code which reads a list of domains from a txt file and checking each if is a WordPress site or not based on the returned result.
the code is as follow:
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line
source = requests.get(domain)
if "wp-include" in source:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
The errors are as follow:
Traceback (most recent call last):
File "./test4.py", line 8, in <module>
source = requests.get(domain)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='testing.com%0a', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd5a00c4d50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
I was able to run my code only if I set manually the value of source as follow and do not read the domains from the list and the results were correct:
source = requests.get(domain).text
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line.rstrip()
source = requests.get(domain)
if "wp-include" in source.text:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
source.text to get the requests response, rstrip() to remove \n
with domain transformation to a valid url (for requests) (python3):
#!/usr/bin/env python
import requests
import re
from urllib import parse
def get_domains(file):
res = []
with open(file) as f:
for x in f:
url = x.strip()
p = parse.urlparse(url, 'http')
netloc = p.netloc or p.path
path = p.path if p.netloc else ''
if not netloc.startswith('www.'):
netloc = 'www.' + netloc
p = parse.ParseResult('http', netloc, path, *p[3:])
res.append(p.geturl())
return res
def is_wordpress(url):
print(f"getting: {url}")
content = requests.get(url).text
if re.search('wp-include', content):
return True
else:
return False
def main():
result = {}
for domain in get_domains('domain.txt'):
result[domain] = is_wordpress(domain)
print(result)
if __name__ == '__main__':
main()

Python Twitter API trying to retrieve tweet but error: AttributeError: 'int' object has no attribute 'encode'

why am I getting an AttributeError: 'int' object has no attribute 'encode'?
I am trying to retrieve a tweet using the Twitter API on Python. Full traceback here:
Traceback (most recent call last):
File "C:/Python27/lol.py", line 34, in <module>
headers = req.to_header()
File "build\bdist.win-amd64\egg\oauth2\__init__.py", line 398, in to_header
params_header = ', '.join(header_params)
File "build\bdist.win-amd64\egg\oauth2\__init__.py", line 397, in <genexpr>
header_params = ('%s="%s"' % (k, v) for k, v in stringy_params)
File "build\bdist.win-amd64\egg\oauth2\__init__.py", line 396, in <genexpr>
stringy_params = ((k, escape(v)) for k, v in oauth_params)
File "build\bdist.win-amd64\egg\oauth2\__init__.py", line 163, in escape
s = s.encode('utf-8')
AttributeError: 'int' object has no attribute 'encode'
Below is the code I'm using.
import oauth2
import time
import urllib2
import json
url1="https://api.twitter.com/1.1/search/tweets.json"
params = {
"oauth_version": "1.9.0",
"oauth_nonce": oauth2.generate_nonce(),
"oauth_timestamp": int(time.time())
}
consumer = oauth2.Consumer(key="*********", secret="*********")
token = oauth2.Token(key="*********", secret="*********")
params["oauth_consumer_key"] = consumer.key
params["oauth_token"] = token.key
for i in range(1):
url = url1
req = oauth2.Request(method="GET", url=url, parameters=params)
signature_method = oauth2.SignatureMethod_HMAC_SHA1()
req.sign_request(signature_method, consumer, token)
headers = req.to_url()
print headers
print url
for i in range(1):
url = url1
params["q"] = "pictorial"
params["count"] = 2
req = oauth2.Request(method="GET", url=url, parameters=params)
signature_method = oauth2.SignatureMethod_HMAC_SHA1()
req.sign_request(signature_method, consumer, token)
headers = req.to_header()
url = req.to_url()
response = urllib2.Request(url)
data = json.load(urllib2.urlopen(response))
if data["statuses"] == []:
print "end of data"
break
else:
print data
And if I change int(time.time()) into str(time.time())
I get the following error:
Traceback (most recent call last):
File "C:/Python27/lol.py", line 37, in <module>
data = json.load(urllib2.urlopen(response))
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 400: Bad Request
"oauth_timestamp": int(time.time())
here you use an int, but that field must be a string.

Python Trackback Issue

When I execute it says Trackback most recent call last and line 22 also line 410 in lib and so on with problems within python. Error on home = opener.open() Traceback (most recent call last): File "", line 1, in File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 540, in runfile execfile(filename, namespace) File "C:/u/u/d/WinPython-32bit-2.7.6.2/python-2.7.6/Scripts/ox.py", line 20, in home = opener.open('', data) File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\urllib2.py", line 410, in open response = meth(req, response) File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\urllib2.py", line 523, in http_response 'http', request, response, code, msg, hdrs) File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\urllib2.py", line 448, in error return self._call_chain(*args) File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\urllib2.py", line 382, in _call_chain result = func(*args) File "C:\u\u\d\WinPython-32bit-2.7.6.2\python-2.7.6\lib\urllib2.py", line 531, in http_error_default raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) urllib2.HTTPError: HTTP Error 500: Internal Server Error
import urllib
import urllib2
import cookielib
import re
import os
from random import choice
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [
("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0"),
("Cookie", ".SECURITY=232")
]
f = 58454700
print "",
url = 'http://www.myhost.com/increment'
x = 0
while x < 1000000000:
f = f + 1
z = str(f)
url = 'http://www.myhost.com/Send/SentIncrement'
data = urllib.urlencode({"Increment": z, "VerificationToken": "Verified"})
home = opener.open('http://www.myhost.com/Send/SentIncrement', data)
os.system("cls")
print f
print data
x = x + 1
raw_input("")

proxy handler in python

when i disable proxy server in web browser setting and commenting the proxy handler coding it below code works fine.
import urllib2
import urllib2_file
import urllib
import random
import mimetypes
import string
from os import listdir
import time
from google.refine import refine
from google.refine import facet
proxy = urllib2.ProxyHandler({'http': '10.200.1.26'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
def encode_multipart(fields, files, boundary=None):
def escape_quote(s):
return s.replace('"', '\\"')
if boundary is None:
boundary = ''.join(random.choice(_BOUNDARY_CHARS) for i in range(30))
lines = []
for name, value in fields.items():
lines.extend((
'--{0}'.format(boundary),
'Content-Disposition: form-data; name="upload"', #.format(escape_quote(name)),
'',
str(value),
))
for name, value in files.items():
filename = value['filename']
if 'mimetype' in value:
mimetype = value['mimetype']
else:
mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
lines.extend((
'--{0}'.format(boundary),
'Content-Disposition: form-data; name="upload"; filename="{0}"'.format(escape_quote(filename)),
'Content-Type: {0}'.format(mimetype),
'',
value['content'],
))
lines.extend((
'--{0}--'.format(boundary),
'',
))
body = '\r\n'.join(lines)
headers = {
'Content-Type': 'multipart/form-data; boundary={0}'.format(boundary),
'Content-Length': str(len(body)),
}
return (body, headers)
_BOUNDARY_CHARS = string.digits + string.ascii_letters
u = urllib2.urlopen("http://127.0.0.1:3333/command/core/create-importing-job",data=urllib.urlencode({"test":""}))
a=u.read()
id=""
for i in a:
if(i.isdigit()):
id+=str(i)
# sample output '{ "jobID" : 1393566803991 }'
files = {}
pathtoXML = r"C:\75"
#pathtoXML = r"C:\AM\trial"
for i in listdir(pathtoXML):
files[i] = {'filename': i, 'content': open(pathtoXML + "\\"+ i).read()}
#load raw data using the job id found in
url = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobI="+id+"&subCommand=load-raw-data"
data,headers = encode_multipart({}, files)
#print len(data)
#print headers
req = urllib2.Request(url, data=data, headers=headers)
f = urllib2.urlopen(req)
f.read()
# get job status
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/get-importing-job-status?jobID="+id+"", "test")
u.read()
#from fileSelection update file selection
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&subCommand=update-file-selection&jobID="+id+"", "fileSelection=%5B0%2C1%2C2%2C3%5D")
u.read()
#init parser format text
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=initialize-parser-ui&format=text%2Fxml")
u.read()
#update format and options
updateformatoptionurl = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=update-format-and-options"
d=urllib.urlencode({"format":"text/xml","options":{"recordPath":["ArrayOfAfiles","Afiles"],"limit":-1,"includeFileSources":"false","guessCellValueTypes":"false"}})
u=urllib2.urlopen(updateformatoptionurl,d)
u.read()
'{"status":"ok"}'
#get-models
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/get-models?importingJobID="+id)
u.read()
# create project from import job
createfromimporturl = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=create-project"
d=urllib.urlencode({"format":"text/xml","options":{"recordPath":["ArrayOfAfiles","Afiles"],"limit":-1,"includeFileSources":"false","projectName":time.ctime()}})
u=urllib2.urlopen(createfromimporturl, d)
r=u.read()
After embedding proxy handler coding its not working when i ran the code its complaining:
Traceback (most recent call last):
File "C:\hari\trial.py", line 87, in <module>
u = urllib2.urlopen("http://127.0.0.1:3333/command/core/create-importing-job",data=urllib.urlencode({"test":""}))
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 391, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 409, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 369, in _call_chain
result = func(*args)
File "C:\Python27\urllib2_file.py", line 207, in http_open
return self.do_open(httplib.HTTP, req)
File "C:\Python27\urllib2_file.py", line 298, in do_open
return self.parent.error('http', req, fp, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 435, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 369, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 518, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 404: Not Found

python: get google adsense earnings report

I need a python script that gets the google adsense earnings and I found adsense scraper:
http://pypi.python.org/pypi/adsense_scraper/0.5
It uses Twill and html5lib to scrape google adsense earnings data. When I use it I get this error message:
Traceback (most recent call last):
File "adsense_scraper.py", line 163, in <module>
data = main()
File "adsense_scraper.py", line 154, in main
b = get_adsense(login, password)
File "adsense_scraper.py", line 128, in get_adsense
b.submit()
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 467, in submit
self._journey('open', request)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 523, in _journey
r = func(*args, **kwargs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 590, in http_response
"http", request, response, code, msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\utils.py", line 442, in http_response
"refresh", msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 181, in open
response = urlopen(self, req, data)
File "C:\Python26\lib\urllib2.py", line 406, in _open 'unknown_open', req)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain result = func(*args)
File "C:\Python26\lib\urllib2.py", line 1163, in unknown_open raise URLError('unknown url type: %s' % type)
urllib2.URLError: <urlopen error unknown url type: 'http>
So the important thing is:
urllib2.URLError: <urlopen error unknown url type: 'http>
Can somebody tell me where the error is? Is there even a better way to get the data via python? Thanks
there are several errors with the package, you mentioned only the first one
1) twill package does not handle google's redirects correctly, adding
newurl = newurl.strip( "'" )
to twill/other_packages/_mechanize_dist/_http.py:108 before
newurl = _rfc3986.clean_url(newurl, "latin-1")
fixes that
2) you have to have the correct language set in adsense - English
3) there are several problems in the orignal adsense_scraper
#!/usr/bin/env python
"""Scrapes Google AdSense data with Python using Twill
Current canonical location of this module is here:
http://github.com/etrepum/adsense_scraper/tree/master
Usage::
from adsense_scraper import get_adsense, get_time_period
b = get_adsense('YOUR_ADSENSE_LOGIN', 'YOUR_ADSENSE_PASSWORD')
rows = get_time_period(b, 'yesterday')
# The summary data is always the first row with channel == ''
print 'I earned this much yesterday: $%(earnings)s' % rows[0]
"""
# requires html5lib, twill
import sys
import pprint
import decimal
from cStringIO import StringIO
from xml.etree import cElementTree
try:
from html5lib import HTMLParser
import twill.commands
except ImportError:
print >>sys.stderr, """\
adsense_scraper has dependencies::
Twill 0.9 http://twill.idyll.org/
html5lib 0.11 http://code.google.com/p/html5lib/
Try this::
$ easy_install twill html5lib
"""
raise SystemExit()
__version__ = '0.5'
SERVICE_LOGIN_BOX_URL = "https://www.google.com/accounts/ServiceLogin?service=adsense&rm=hide&fpui=3&nui=15&alwf=true&ltmpl=adsense&passive=true&continue=https%3A%2F%2Fwww.google.com%2Fadsense%2Fgaiaauth2&followup=https%3A%2F%2Fwww.google.com%2Fadsense%2Fgaiaauth2&hl=en_US"
OVERVIEW_URL = "https://www.google.com/adsense/report/overview?timePeriod="
TIME_PERIODS = [
'today',
'yesterday',
'thismonth',
'lastmonth',
'sincelastpayment',
]
def parse_decimal(s):
"""Return an int or decimal.Decimal given a human-readable number
"""
light_stripped = s.strip(u'\u20ac')
stripped = light_stripped.replace(',', '.').rstrip('%').lstrip('$')
try:
int(stripped)
return light_stripped
except ValueError:
pass
try:
float(stripped)
return light_stripped
except ValueError:
return decimal.Decimal(stripped)
def parse_summary_table(doc):
"""
Parse the etree doc for summarytable, returns::
[{'channel': unicode,
'impressions': int,
'clicks': int,
'ctr': decimal.Decimal,
'ecpm': decimal.Decimal,
'earnings': decimal.Decimal}]
"""
for t in doc.findall('.//table'):
if t.attrib.get('id') == 'summarytable':
break
else:
raise ValueError("summary table not found")
res = []
FIELDS = ['impressions', 'clicks', 'ctr', 'ecpm', 'earnings']
for row in t.findall('.//tr'):
celltext = []
for c in row.findall('td'):
tail = ''
# adsense inserts an empty span if a row has a period in it, so
# get the children and find the tail element to append to the text
if c.find('a') and c.find('a').getchildren():
tail = c.find('a').getchildren()[0].tail or ''
celltext.append('%s%s' % ((c.text or c.findtext('a') or '').strip(), tail.strip()))
celltext = filter( lambda x: x != "" , celltext )
if len(celltext) != len(FIELDS):
continue
try:
value_cols = map(parse_decimal, celltext)
except decimal.InvalidOperation:
continue
res.append(dict(zip(FIELDS, value_cols)))
return res
def get_adsense(login, password):
"""Returns a twill browser instance after having logged in to AdSense
with *login* and *password*.
The returned browser will have all of the appropriate cookies set but may
not be at the exact page that you want data from.
"""
b = twill.commands.get_browser()
b.go(SERVICE_LOGIN_BOX_URL)
for form in b.get_all_forms():
try:
form['Email'] = login
form['Passwd'] = password
except ValueError:
continue
else:
break
else:
raise ValueError("Could not find login form on page")
b._browser.select_form(predicate=lambda f: f is form)
b.submit()
return b
def get_time_period(b, period):
"""Returns the parsed summarytable for the time period *period* given
*b* which should be the result of a get_adsense call. *period* must be
a time period that AdSense supports:
``'today'``, ``'yesterday'``, ``'thismonth'``,
``'lastmonth'``, ``'sincelastpayment'``.
"""
b.go(OVERVIEW_URL + period)
# The cElementTree treebuilder doesn't work reliably enough
# to use directly, so we parse and then dump into cElementTree.
doc = cElementTree.fromstring(HTMLParser().parse(b.get_html()).toxml())
return parse_summary_table(doc)
def main():
try:
login, password = sys.argv[1:]
except ValueError:
raise SystemExit("usage: %s LOGIN PASSWORD" % (sys.argv[0],))
twill.set_output(StringIO())
twill.commands.reset_browser()
b = get_adsense(login, password)
data = {}
for period in TIME_PERIODS:
data[period] = get_time_period(b, period)
pprint.pprint(data)
twill.set_output(None)
return data
if __name__ == '__main__':
data = main()

Categories