ua = UserAgent()
headers={
'user-agent':str(ua.random),
'Connection':'close'
}
r = requests.get(url,headers=headers,timeout=5)
I want to scrape some information from a website ,but the function request.get() raise exception occasionally (sometimes successful but sometime not). I've tried many methods, random u-a, timeout, time.sleep, max tries, but of no use.
Is there something wrong with my code, or is it a fault or some anti-scraper system of the website?
Here is the full exception:
Traceback (most recent call last):
File "d:\AAA临时文档\抢课app\爬虫\run2.py", line 7, in <module>
r=requests.get(url=url,headers=headers,timeout=20)
File "C:\Users\86153\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\86153\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\86153\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\86153\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\86153\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\adapters.py", line 504, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='www.dy2018.com', port=443): Max retries exceeded with url: /i/103887.html (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x04046A18>, 'Connection to www.dy2018.com timed out. (connect timeout=20)'))
Related
I need to get a connection to this uri: uri = 'https://www.wfs.nrw.de/geobasis/wfs_nw_inspire-flurstuecke_alkis?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&STOREDQUERY_ID=urn:ogc:def:query:OGC-WFS::GetFeatureById&ID=CadastralParcel_05518904700963______'
Because it dosen't work, i tryed this to find my misstakes:
import requests
import urllib.request
uri = 'http://www.google.de'
uriS = 'https://www.google.de'
myproxies = urllib.request.getproxies()
try:
r = requests.get(uri)
print('HTTP OK')
except:
print('HTTP NOT OK')
try:
r = requests.get(uri, proxies = myproxies)
print('HTTP WITH PROXIES OK')
except:
print('HTTP WITH PROXIES NOT OK')
try:
r = requests.get(uriS)
print('HTTPS OK')
except:
print('HTTPS NOT OK')
try:
r = requests.get(uriS, proxies = myproxies)
print('HTTPS WITH PROXIES OK')
except:
print('HTTPS WITH PROXIES NOT OK')
This is the result:
HTTP OK
HTTP WITH PROXIES OK
HTTPS NOT OK
HTTPS WITH PROXIES NOT OK
This happens when i do it in the console:
>>> import requests
>>> uri = 'https://www.google.de'
>>> r = requests.get(uri)
Traceback (most recent call last):
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\urllib3\connectionpool.py", line 815, in _prepare_proxy
conn.connect()
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\urllib3\connection.py", line 324, in connect
self._tunnel()
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\http\client.py", line 911, in _tunnel
message.strip()))
OSError: Tunnel connection failed: 407 Proxy Authorization Required
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\adapters.py", line 445, in send
timeout=timeout
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\urllib3\connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\urllib3\util\retry.py", line 398, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.google.de', port=443): Max retries exceeded with url: / (Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 407 Proxy Authorization Required')))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\code.py", line 90, in runcode
exec(code, self.locals)
File "<input>", line 1, in <module>
File "<string>", line 3, in <module>
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\sessions.py", line 512, in request
resp = self.send(prep, **send_kwargs)
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\sessions.py", line 622, in send
r = adapter.send(request, **kwargs)
File "C:\PROGRA~1\QGIS3~1.8\apps\Python37\lib\site-packages\requests\adapters.py", line 507, in send
raise ProxyError(e, request=request)
requests.exceptions.ProxyError: HTTPSConnectionPool(host='www.google.de', port=443): Max retries exceeded with url: / (Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 407 Proxy Authorization Required')))
We solved the Problem!
The code was correct.
We (oure network admin) have put the uri "https://www.wfs.nrw.de" on a list for uri with no authentication needed.
import requests
import urllib.request
myproxies = urllib.request.getproxies()
uri = 'https://www.wfs.nrw.de/geobasis/wfs_nw_inspire-flurstuecke_alkis?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&STOREDQUERY_ID=urn:ogc:def:query:OGC-WFS::GetFeatureById&ID=CadastralParcel_05518904700963______'
r = requests.get(uri, proxies = myproxies)
print(r.text)
In Python Requests module, using proxy connection to browse some url
I added proxy ip and port to https_proxy and http_proxy environment variables to route the traffic to proxy
import os
import requests
os.environ["HTTPS_PROXY"] = "proxy1.xx.local:8081"
os.environ["HTTP_PROXY"] = "proxy1xx.local:8081"
H = {"X-Authenticated-User": "bharani#dummy.com"}
url = r"https://google.co.in"
r = requests.get(url,headers=H,verify=False)
print r.status_code
print r.text
Response I'm getting from above code
Traceback (most recent call last):
File "pilot.py", line 356, in <module>
r = requests.get(url,headers=H,verify=False)
File "C:\Python27\lib\site-packages\requests\api.py", line 70, in get
return request('get', url, params=params, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 56, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 497, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: ("bad handshake: SysCallError(-1, 'Unexpected EOF')",)
pyopenssl, ndg-httpsclient, pyasn1 are also installed.
Not sure What I'm missing.
I wrote a python script to fetch definitions and URLs of list of items (I am using a long list with no less than 3000 items).
The script was working fine, I used it several times, but suddenly I started to get the following error:
('Connection aborted.', error(54, 'Connection reset by peer'))
here is the full traceback
Traceback (most recent call last):
File "Wiki.py", line 41, in <module>
page = wikipedia.page(item)
File "/Library/Python/2.7/site-packages/wikipedia/wikipedia.py", line 270, in page
results, suggestion = search(title, results=1, suggestion=True)
File "/Library/Python/2.7/site-packages/wikipedia/util.py", line 28, in __call__
ret = self._cache[key] = self.fn(*args, **kwargs)
File "/Library/Python/2.7/site-packages/wikipedia/wikipedia.py", line 103, in search
raw_results = _wiki_request(search_params)
File "/Library/Python/2.7/site-packages/wikipedia/wikipedia.py", line 737, in _wiki_request
r = requests.get(API_URL, params=params, headers=headers)
File "/Library/Python/2.7/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Library/Python/2.7/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 502, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 612, in send
r = adapter.send(request, **kwargs)
File "/Library/Python/2.7/site-packages/requests/adapters.py", line 490, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', error(54, 'Connection reset by peer'))
It seems that the while installing the regular requests python library, it does not install the necessary packages to cope for https connections. Install these by:
pip install requests[security]
I have a strange problem with a python script.
import requests
argumente = {'NRBUS': 'LINIA_1', 'COORDONATE': '46.195323,21.306300'}
r=requests.get("http://www.roroid.ro/php/GPS_cloud/GPS_cloud.php",params=argumente)
print r.url
print r.text
On my PC is worknig without any problems but on my raspberry PI after some time I end up with
Traceback (most recent call last):
File "testt.py", line 4, in <module>
r=requests.get("http://www.roroid.ro/php/GPS_cloud/GPS_cloud.php",params=argumente)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 456, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 559, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 375, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='www.roroid.ro', port=80): Max retries exceeded with url: /php/GPS_cloud/GPS_cloud.php?COORDONATE=46.195323%2C21.306300&NRBUS=LINIA_1 (Caused by <class 'socket.error'>: [Errno 110] Connection timed out)
Thanks for any help.
I need to call a web requests and Im using Python request module.
I have a requests being served on a machine For ex:55.84.201.228. When I enter it throught the browser it works fine and Im able to view the webpage..
but when I use the requests.get, it does not work..It errors with a socket error..
>>> import requests
>>> r = requests.get('https://55.84.201.228')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 383, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 486, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 378, in send
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='55.84.201.228', port=443): Max retries exceeded with url: / (Caused by <class 'socket.error'>: [Errno 110] Connection timed out)
How can I fix this issue?
>>>r = requests.get('https://www.cnn.com')
This works fine.
I am making a wild guess here since I can't access your machine but from experience, since you are using https on what I assume is some test server try
r = requests.get('https://55.84.201.228', verify=False)
According to the documentation certificate verification is on by default.