Custom Useragents wrong formatted in python - python

Today I tried to make simple requests to a Website and implement custom headers that I defined into a separate file called useragents.txt. I now wasted lot of time working on it to get it to work. The issue is, that python wont request the site during a valueerror: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
I'm not sure why it add a b' and a \n there. If I print the variable the output is without these symbols. Here a little code that might show better what I mean:
def get_soup(url, header):
time.sleep(random.choice([1, 2, 3]))
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
# count
user_agent = random.choice(user_agents_lines)
print(f"USER-AGENT: {user_agent}")
# for user_agent in user_agents_lines:
# count += 1
# print(f"Line{count}: {user_agent.strip()}")
The Full error is:
Traceback (most recent call last):
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 118, in <module>
scraper() # run the function
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 69, in scraper
soup = get_soup(surveyingurl, testheader)
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 43, in get_soup
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1389, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1257, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1298, in _send_request
self.putheader(hdr, value)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1235, in putheader
raise ValueError('Invalid header value %r' % (values[i],))
ValueError: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
Process finished with exit code 1
Below a screenshot that shows how my useragents.txt file looks like:

with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
user_agent = random.choice(user_agents_lines)
user_agent = user_agent.replace(b'\n', b'')
print(f"USER-AGENT: {user_agent}")

Related

python requests issue 429 bad request

Why when I call a website with curl it works, but when call with python always return 429? I tried to set a lot different user-agent, cookies...
curl call:
curl "https://query2.finance.yahoo.com/v10/finance/quoteSummary/GLW?formatted=true&crumb=8ldhetOu7RJ&lang=en-US&region=US&modules=summaryDetail&corsDomain=finance.yahoo.com"
response: {"quoteSummary":{"result":[{"summaryDetail":{"maxAge":1,"priceHint":{"raw":2,"fmt":"2","longFmt":"2"},"previousClose":{"raw":37.12,"fmt":"37.12"},"open":{"raw":37.19,"fmt":"37.19"},"dayLow":{"raw":37.12,"fmt":"37.12"},"dayHigh":{"raw":37.95,"fmt":"37.95"},"regularMarketPreviousClose":{"raw":37.12,"fmt":"37.12"},"regularMarketOpen":{"raw":37.19,"fmt":"37.19"},"regularMarketDayLow":{"raw":37.12,"fmt":"37.12"},"regularMarketDayHigh":{"raw":37.95,"fmt":"37.95"},"dividendRate":{"raw":0.88,"fmt":"0.88"},"dividendYield":{"raw":0.0232,"fmt":"2.32%"},"exDividendDate":{"raw":1605139200,"fmt":"2020-11-12"},"payoutRatio":{"raw":3.3077,"fmt":"330.77%"},"fiveYearAvgDividendYield":{"raw":2.43,"fmt":"2.43"},"beta":{"raw":1.173753,"fmt":"1.17"},"trailingPE":{"raw":148.82353,"fmt":"148.82"},"forwardPE":{"raw":20.294119,"fmt":"20.29"},"volume":{"raw":3372416,"fmt":"3.37M","longFmt":"3,372,416"},"regularMarketVolume":{"raw":3372416,"fmt":"3.37M","longFmt":"3,372,416"},"averageVolume":{"raw":4245485,"fmt":"4.25M","longFmt":"4,245,485"},"averageVolume10days":{"raw":3351485,"fmt":"3.35M","longFmt":"3,351,485"},"averageDailyVolume10Day":{"raw":3351485,"fmt":"3.35M","longFmt":"3,351,485"},"bid":{"raw":37.88,"fmt":"37.88"},"ask":{"raw":37.89,"fmt":"37.89"},"bidSize":{"raw":1100,"fmt":"1.1k","longFmt":"1,100"},"askSize":{"raw":800,"fmt":"800","longFmt":"800"},"marketCap":{"raw":28994179072,"fmt":"28.99B","longFmt":"28,994,179,072"},"yield":{},"ytdReturn":{},"totalAssets":{},"expireDate":{},"strikePrice":{},"openInterest":{},"fiftyTwoWeekLow":{"raw":17.44,"fmt":"17.44"},"fiftyTwoWeekHigh":{"raw":37.95,"fmt":"37.95"},"priceToSalesTrailing12Months":{"raw":2.6921244,"fmt":"2.69"},"fiftyDayAverage":{"raw":35.406857,"fmt":"35.41"},"twoHundredDayAverage":{"raw":31.052786,"fmt":"31.05"},"trailingAnnualDividendRate":{"raw":0.86,"fmt":"0.86"},"trailingAnnualDividendYield":{"raw":0.023168104,"fmt":"2.32%"},"navPrice":{},"currency":"USD","fromCurrency":null,"toCurrency":null,"lastMarket":null,"volume24Hr":{},"volumeAllCurrencies":{},"circulatingSupply":{},"algorithm":null,"maxSupply":{},"startDate":{},"tradeable":false}}],"error":null}}
with python:
import requests
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
result = requests.get('https://query2.finance.yahoo.com/v10/finance/quoteSummary/GLW?formatted=true&crumb=8ldhetOu7RJ&lang=en-US&region=US&modules=summaryDetail&corsDomain=finance.yahoo.com', headers=headers)
print result.content
response:
Traceback (most recent call last):
File "a.py", line 35, in <module>
response = urllib.request.urlopen(req, jsondataasbytes)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: Too Many Requests
Ok, ok, solved by passing get parameters via params:
import requests
payload = {"modules": "summaryDetail"}
response = requests.get("https://query2.finance.yahoo.com/v10/finance/quoteSummary/GLW", params=payload)
print(response.json())

BeautifulSoup cannot retrieve webpage links

I'm trying to detect urls of a website's listing page but BeautifulSoup cannot do that. I get the following exception, even if I try with the header,
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 384, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 380, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1321, in getresponse
response.begin()
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 296, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 257, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/socket.py", line 589, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/local/lib/python3.7/site-packages/urllib3/util/retry.py", line 368, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.7/site-packages/urllib3/packages/six.py", line 686, in reraise
raise value
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 386, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 317, in _raise_timeout
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
urllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='www.sahibinden.com', port=80): Read timed out. (read timeout=None)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/soner/PycharmProjects/bitirme2/main.py", line 8, in <module>
r = requests.get(url)
File "/usr/local/lib/python3.7/site-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python3.7/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python3.7/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.7/site-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.7/site-packages/requests/adapters.py", line 529, in send
raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPConnectionPool(host='www.sahibinden.com', port=80): Read timed out. (read timeout=None)
Process finished with exit code 1
But when I try the url in the code with https://hackertarget.com/extract-links/, it brings URLs.
import requests
from bs4 import BeautifulSoup
url = 'http://www.sahibinden.com/satilik/istanbul-kartal?pagingOffset=50&pagingSize=50'
url2 = 'http://www.stackoverflow.com'
r = requests.get(url)
html_content = r.text
soup = BeautifulSoup(html_content, 'lxml')
for link in soup.find_all("a", {"class": "classifiedTitle"}):
print(link.get('href'))
'''
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
print(requests.get(url, headers=headers, timeout=5).text)
'''
As a note, if you see yourself blocked from the website(sahibinden), it is possible. I haven't researched the usage of BeautifulSoup with a proxy list.
This is the code snippet I run and it worked as expected:
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
}
url = 'http://www.sahibinden.com/satilik/istanbul-kartal?pagingOffset=50&pagingSize=50'
r = requests.get(url, headers=headers)
if r.ok:
soup = BeautifulSoup(r.text, 'lxml')
for a in soup('a', 'classifiedTitle'):
print(a.get('href'))
And here is the ouput of the code above:
/ilan/emlak-konut-satilik-directten%2Ccift-wc-li%2Cgenis-m2de%2Ciskanli%2Culasimi-kolay-sik-3-plus1-671049902/detay
/ilan/emlak-konut-satilik-nesrin-den-kartal-ugurmumcuda-satilik-3-plus1-yunus-emre-caddesinde-692133846/detay
/ilan/emlak-konut-satilik-akelden-karliktepe-de-genis-m2-li-krediye-uygun-daire-659458837/detay
/ilan/emlak-konut-satilik-ikea-ve-metro-yani-teknik-yapi-uprise-elite_mukemmel-firsat-3-plus1-692131163/detay
/ilan/emlak-konut-satilik-kartal-atalar-da-iskanli-5-plus1-dubleks-satilik-daire-692125302/detay
/ilan/emlak-konut-satilik-satilik-daire-kartal-atalar-da-2-plus1-lux-100-m2-671083034/detay
/ilan/emlak-konut-satilik-kartal-ugurmumcuda-3-plus1-genis-masrafsiz-satilik-daire-681180607/detay
/ilan/emlak-konut-satilik-soner-den-manzara-adalar-da-satilik-kacirilmayacak-kelepir-daire-653973723/detay
/ilan/emlak-konut-satilik-mertcan-dan-tarihi-ayazma-caddesinde-2-plus1-satilik-ters-dubleks-692122837/detay
/ilan/emlak-konut-satilik-cinar-emlak%2Ctan-hurriyet-mah-105-m2-toprak-tapulu-692117031/detay
/ilan/emlak-konut-satilik-kartal-cumhuriyet-te-arsa-hisseli-yuksek-giris-daire-692116930/detay
/ilan/emlak-konut-satilik-temiz-emlaktan-petroliste-2-plus1-satilik-sifir-deniz-manzarali-671086029/detay
/ilan/emlak-konut-satilik-cemal-yalcin-dan-ozel-mimarili-luks-satilik-dubleks-623158476/detay
/ilan/emlak-konut-satilik-la-marin-kartal-da-site-icerisinde-ozel-bahce-kati-sifir-daire-645480180/detay
/ilan/emlak-konut-satilik-sen-kardeslerden-merkezde-3-plus1%2Ccok-temiz-satilik-daire%2C350.000tl-692103788/detay
/ilan/emlak-konut-satilik-kartal-petrol-is-mah-de-3-plus1-deniz-manzarali-yatirimlik-daire-619762304/detay
/ilan/emlak-konut-satilik-remax-red-rukiye-korkmaz-dan-panorama-velpark-ta-esyali-1-plus1-616596826/detay
/ilan/emlak-konut-satilik-yakacik-demirli-twinstar-sitesi-ultra-luks-174-m2-3-plus1-daire-692104680/detay
/ilan/emlak-konut-satilik-kartal-soganlikta-yatirimlik-kiracili-firsat-2-plus1-daire-682793715/detay
/ilan/emlak-konut-satilik-istmarinada-devirli-taksitli-satilik-studyo-gulsen-yanmazdan-638548163/detay
/ilan/emlak-konut-satilik-sahibinden-satilik-kartal-merkezde-kaymakamligin-karsisinda-2-plus1-692054497/detay
/ilan/emlak-konut-satilik-petrolis______ara-kat-2-plus1-110-m2-lux-panjurlu_____carsiya-yakin-692100683/detay
/ilan/emlak-konut-satilik-ful-deniz-manzarali-3-plus1-ana-yola-cok-yakin-115m2-sifir-daire-585807696/detay
/ilan/emlak-konut-satilik-kartal-karlitepe-de-ters-dublek-2-plus2-satilik-daire-692085141/detay
/ilan/emlak-konut-satilik-kartal-dap-yapi-istmarina-full-deniz-manzarali-2-plus1-satilik-621795699/detay
/ilan/emlak-konut-satilik-aybars-dan-site-icinde-havuzlu-satilik-daire-671063936/detay
/ilan/emlak-konut-satilik-soganlik-yeni-mah-5-yillik-binada-adalar-manzarali-satilik-dair-679308838/detay
/ilan/emlak-konut-satilik-kartal-soganlik-orta-mah-e-5-yani-yeni-bina-kelepir-daire-573785719/detay
/ilan/emlak-konut-satilik-sahibinden-site-icerisinde-1-plus1-644746509/detay
/ilan/emlak-konut-satilik-3-plus1-luks-sitede-646420303/detay
/ilan/emlak-konut-satilik-mirac-dan-ayazma-koru-da-lux-yapili-3-plus1-135m2-masrafsiz-daire-535382195/detay
/ilan/emlak-konut-satilik-sahibinden-site-icerisinde-3-plus1-644729603/detay
/ilan/emlak-konut-satilik-cevizli-de-satilik-daire-2-plus1-lux-85-m2-671030197/detay
/ilan/emlak-konut-satilik-esentepe-de-bahceli-acik-otoparkli-125m2-ferah-kullansli-daire-670847710/detay
/ilan/emlak-konut-satilik-atalarda-ara-katta-sifir-binada-2-plus1-85-m2-otoparkli-510436215/detay
/ilan/emlak-konut-satilik-sahil-mesa-marmara-10.kat-122m2-deniz-manzarali-0-satilik-3-plus1-692085951/detay
/ilan/emlak-konut-satilik-kartal-da-sifir-ara-kat-3-plus1-satilik-daire-692090351/detay
/ilan/emlak-konut-satilik-pega-kartal-satis-ofisinden-2-plus1-kat-mulkiyetli-hemen-teslim-644626657/detay
/ilan/emlak-konut-satilik-adalilar-dan-kartal-hurriyet-mah-de-satilik-kelepir-3-plus1-dublex-682761629/detay
/ilan/emlak-konut-satilik-kartal-kordonboyunda-2-plus1-sifir-daire-647037679/detay
/ilan/emlak-konut-satilik-aklife-den_yakacik_carsi_mah_ultra_lux_katta_tek_sifir_2-plus1-654883140/detay
/ilan/emlak-konut-satilik-aklife-den_yakacik_da_mukanbel_yapi_kaliteli_3-plus1_arakat_sifir-657772595/detay
/ilan/emlak-konut-satilik-ciceksan-insaat-dan-3-plus1-daireler-hemen-tapu-hemen-teslim-682770303/detay
/ilan/emlak-konut-satilik-satilik-daire-ofis-2-1-85-mt-klepir-634724740/detay
/ilan/emlak-konut-satilik-ricar-dan%2C7-24-guvenlik%2Cyuzme-havuzu%2Ckapali-otopark%2Csifir%2Csitede-682744629/detay
/ilan/emlak-konut-satilik-ricar-dan%2Cana-cadde-uzeri%2Cgenis%2Cferah%2Csifir%2Clux%2Cara-kat-649504313/detay
/ilan/emlak-konut-satilik-mertcan-dan-e5-e-yurume-mesafesinde-iskanli-2-plus1-sifir-daire-692078490/detay
/ilan/emlak-konut-satilik-kartal-atalar-da-sahile-yurume-mesafesinde-iskanli-masrafsiz-3-plus1-454709956/detay
/ilan/emlak-konut-satilik-tugcan-pala-dan-mesa-kartall-da-satilik-2-kat-buyuk-tip-2-plus1-670434988/detay
/ilan/emlak-konut-satilik-satilik-sifir-daire-soganlik-yeni-mah-2-plus1-kat-mulkiyetli-682522237/detay

'set' object has no attribute 'setdefault'. Error in scraping data using using Requests

I am trying to hit a website using python Requests, but it's giving me error.
import requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'}
URL = ""
PROXY = {'https://surfproxy.de.db.com:8080' }
response = requests.get(URL , proxies = PROXY, headers: headers)
The error logs:
File "", line 1, in
runfile('C:/Users/vermanjb/JiraScrapping.py', wdir='C:/Users/vermanjb')
File "C:\Program
Files\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 699, in runfile
execfile(filename, namespace)
File "C:\Program
Files\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 88, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "C:/Users/vermanjb/JiraScrapping.py", line 12, in
response = requests.get(URL , proxies = PROXY)
File "C:\Program Files\Anaconda3\lib\site-packages\requests\api.py",
line 67, in get
return request('get', url, params=params, **kwargs)
File "C:\Program Files\Anaconda3\lib\site-packages\requests\api.py",
line 53, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Program
Files\Anaconda3\lib\site-packages\requests\sessions.py", line 459, in
request
prep.url, proxies, stream, verify, cert
File "C:\Program
Files\Anaconda3\lib\site-packages\requests\sessions.py", line 619, in
merge_environment_settings
proxies.setdefault(k, v)
AttributeError: 'set' object has no attribute 'setdefault'
There are at least two problems you should fix:
The proxies parameter should be a dict, not a set.
You have a syntax error in your headers parameter.
Try this instead:
headers = {'User-Agent': 'Mozilla/5.0...'}
url = 'http://www.yoursite.com/'
proxies = {
'http': 'http://surfproxy.de.db.com:8080',
'https': 'http://surfproxy.de.db.com:8080'
}
response = requests.get(url , proxies=proxies, headers=headers)

Embedding Bokeh Server as a library: Tornado error

I am following Bokeh's User Guide.
In "Embedding Bokeh Server as a Library" at http://docs.bokeh.org/en/latest/docs/user_guide/server.html#embedding-bokeh-server-as-a-library
it refers to a demo where a Bokeh server is embedded in Flask (https://github.com/bokeh/bokeh/blob/0.12.6/examples/howto/server_embed/flask_embed.py)
It should be straightforward but I get a Tornado error if launched with python flask_embed.py. Anybody has an idea WHY?
The page on the browser is correctly launched but there is no plot.
This is the short error message:
ERROR:tornado.application:Uncaught exception GET /bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp (127.0.0.1)
HTTPServerRequest(protocol='http', host='localhost:5006', method='GET', uri='/bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp', version='HTTP/1.1', remote_ip='127.0.0.1', headers={'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Host': 'localhost:5006', 'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0', 'Connection': 'keep-alive', 'Referer': 'http://localhost:8080/', 'Cookie': 'username-localhost-8888="2|1:0|10:1501067928|23:username-localhost-8888|44:Y2EwOTUzN2YzNWRiNGQyMDgxZWEyOGMzZDJkOTI4ZWY=|f4f981dd915dc777c70e605b7135bcbbc076b3fe3482999e5ca557cb4abd518e"; _xsrf=2|c711b8e7|f913ccc5c9cc32532c1e67bbd75b6051|1500889250'})
...
HTTPError: HTTP Error 400: Bad Request
ERROR:tornado.access:500 GET /bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp (127.0.0.1)
And here the whole traceback:
Opening Flask app with embedded Bokeh application on http://localhost:8080/
ERROR:tornado.application:Uncaught exception GET /bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp (127.0.0.1)
HTTPServerRequest(protocol='http', host='localhost:5006', method='GET', uri='/bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp', version='HTTP/1.1', remote_ip='127.0.0.1', headers={'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Host': 'localhost:5006', 'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0', 'Connection': 'keep-alive', 'Referer': 'http://localhost:8080/', 'Cookie': 'username-localhost-8888="2|1:0|10:1501067928|23:username-localhost-8888|44:Y2EwOTUzN2YzNWRiNGQyMDgxZWEyOGMzZDJkOTI4ZWY=|f4f981dd915dc777c70e605b7135bcbbc076b3fe3482999e5ca557cb4abd518e"; _xsrf=2|c711b8e7|f913ccc5c9cc32532c1e67bbd75b6051|1500889250'})
Traceback (most recent call last):
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/web.py", line 1511, in _execute
result = yield result
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/concurrent.py", line 238, in result
raise_exc_info(self._exc_info)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1063, in run
yielded = self.gen.throw(*exc_info)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/bokeh/server/views/autoload_js_handler.py", line 31, in get
session = yield self.get_session()
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/concurrent.py", line 238, in result
raise_exc_info(self._exc_info)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1063, in run
yielded = self.gen.throw(*exc_info)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/bokeh/server/views/session_handler.py", line 40, in get_session
session = yield self.application_context.create_session_if_needed(session_id, self.request)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/concurrent.py", line 238, in result
raise_exc_info(self._exc_info)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/tornado/gen.py", line 1069, in run
yielded = self.gen.send(value)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/bokeh/server/application_context.py", line 177, in create_session_if_needed
self._application.initialize_document(doc)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/bokeh/application/application.py", line 121, in initialize_document
h.modify_document(doc)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/bokeh/application/handlers/function.py", line 16, in modify_document
self._func(doc)
File "main.py", line 22, in modify_doc
df = pd.read_csv(data_url, parse_dates=True, index_col=0)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/pandas/io/parsers.py", line 655, in parser_f
return _read(filepath_or_buffer, kwds)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/pandas/io/parsers.py", line 392, in _read
filepath_or_buffer, encoding, compression)
File "/home/alessandro/git-files/python/study_graph2/env/local/lib/python2.7/site-packages/pandas/io/common.py", line 186, in get_filepath_or_buffer
req = _urlopen(url)
File "/usr/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 435, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 473, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 556, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 400: Bad Request
ERROR:tornado.access:500 GET /bkapp/autoload.js?bokeh-autoload-element=3a711948-3668-4f63-8d0c-8cd1584fb92d&bokeh-app-path=/bkapp&bokeh-absolute-url=http://localhost:5006/bkapp (127.0.0.1) 425.75ms
When the page is served, the server tries to load CSV data from an external URL using Pandas. I'm not sure whether this example worked before, but right now it seems that pd.read_csv does not encode URL query, so the server is unable to treat characters > and <. You can either replace the characters manually (refer to https://en.wikipedia.org/wiki/Percent-encoding) or use some library for it, like Python's urllib.

500 error with urllib.request.urlopen

The following code:
req = urllib.request.Request(url=r"http://borel.slu.edu/cgi-bin/cc.cgi?foirm_ionchur=im&foirm=Seol&hits=1&format=xml",headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'})
handler = urllib.request.urlopen(req)
is giving me the following exception:
Traceback (most recent call last):
File "C:/Users/Foo/lang/old/test.py", line 46, in <module>
rip()
File "C:/Users/Foo/lang/old/test.py", line 36, in rip
handler = urllib.request.urlopen(req)
File "C:\Python32\lib\urllib\request.py", line 138, in urlopen
return opener.open(url, data, timeout)
File "C:\Python32\lib\urllib\request.py", line 375, in open
response = meth(req, response)
File "C:\Python32\lib\urllib\request.py", line 487, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python32\lib\urllib\request.py", line 413, in error
return self._call_chain(*args)
File "C:\Python32\lib\urllib\request.py", line 347, in _call_chain
result = func(*args)
File "C:\Python32\lib\urllib\request.py", line 495, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Internal Server Error
but it works fine in my browser, whats the issue?
The server is rather b0rken. It responds with a 500 error in the browser as well.
You can catch the exception and still read the response:
import urllib.request
from urllib.error import HTTPError
req = urllib.request.Request(url=r"http://borel.slu.edu/cgi-bin/cc.cgi?foirm_ionchur=im&foirm=Seol&hits=1&format=xml",headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'})
try:
handler = urllib.request.urlopen(req)
except HTTPError as e:
content = e.read()
When it happened to me I've reduced the plt.figure size parameter and it worked. It may be some odd parameter on your code that is not being able to be read.

Categories