Recovery from dns lookup failure in Python - python

I'm trying to check 22,800+ urls from a 2012 database to find out which ones are still valid. I'm using urllib in Python 3.8 in PyCharm. It makes it through the first 47 urls which are in a text file that I read in. Then it crashes when the host can't be found.
Here's the error output:
Traceback (most recent call last):
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1350, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 950, in send
self.connect()
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 787, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 918, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11002] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/rmcape/PycharmProjects/first/venv/validateURLs.py", line 19, in
resp=urllib.request.urlopen(req)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1379, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\rmcape\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1353, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11002] getaddrinfo failed>
How can I detect the DNS lookup failure and recover from it and continue on to the next URL in the file? Is there some other library that I should be using? I've googled about everything I can think of.
Thanks for any help.
Here's the code:
#!/bin/python
#
#validateURLs.py
import urllib
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
import responses
import socket
f = open("updatedURLs.txt", "r")
site=f.readline()
siteCount=1
errorCount=0
while site:
site=site.strip()
req = urllib.request.Request(site)
try:
resp=urllib.request.urlopen(req)
respo=str(resp.getcode())
result = "("+str(siteCount)+") "+respo+" ==> "+site
print(result)
#print(siteCount, site, resp.getcode())
except urllib.error.HTTPError as e:
errorCount=errorCount+1
result="("+str(siteCount)+") "+str(e.code)+" ==> "+site
print(result)
print("errorCount = "+str(errorCount))
site=f.readline()
siteCount=siteCount+1
print(errorCount)
print("Done")

Will this work for you?:
#!/bin/python
#
#validateURLs.py
import urllib
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
import responses
import socket
f = open("updatedURLs.txt", "r")
site=f.readline()
siteCount=1
errorCount=0
while site:
site=site.strip()
req = urllib.request.Request(site)
try:
resp=urllib.request.urlopen(req)
respo=str(resp.getcode())
result = "("+str(siteCount)+") "+respo+" ==> "+site
print(result)
#print(siteCount, site, resp.getcode())
except Exception as e:
errorCount=errorCount+1
result="("+str(siteCount)+") "+str(e)+" ==> "+site
print(result)
print("errorCount = "+str(errorCount))
else:
site=f.readline()
siteCount=siteCount+1
print(errorCount)
print("Done")

Related

Python script crashes when fail to connect to server

I'm testing my Python script and when server turned off my script crashes. How can I change this, to make connection again. Now, my script is crashing when it fails to connect to the server. Here is my script:
import urllib.request
import json
def connectToServer():
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
data = json.loads(url.read().decode())
Here is error:
Traceback (most recent call last):
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\socket.py", line 843, in create_connection
raise err
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\socket.py", line 831, in create_connection
sock.connect(sa)
ConnectionRefusedError: [WinError 10061] Подключение не установлено, т.к. конечный компьютер отверг запрос на подключение
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "e:\Projects\Hacker_Pro Python\main.py", line 4, in <module>
server.connectToServer()
File "e:\Projects\Hacker_Pro Python\server.py", line 8, in connectToServer
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1375, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1349, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [WinError 10061] Подключение не установлено, т.к. конечный компьютер отверг запрос на подключение>
The python try statement may help you.
def connectToServer():
try:
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
data = json.loads(url.read().decode())
except ConnectionRefusedError as error:
print('connection was refused:\n{}'.format(error))
except:
print('could not connect to server')

Exception handling results in "unresolved reference" python

Currently I'm trying to read a webpage and then process the content but I also want to implement exception handling for when the client is offline. This is the code I have so far:
from urllib.request import Request, urlopen
from lxml import html
def get_webSite(link): # returns a HTML page which can be read by the xPath
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
return html.fromstring(urlopen(req).read())
try:
htmlPage = get_webSite("www.example.com")
print(htmlPage)
except urllib.error.URLError as e:
print("You're offline, try again later")
Problem is that python does not seem to recognize the "urllib.error.URLError" which get's thrown if there is no internet connection. Intellij always tells me "Unresolved reference urllib"
This is the output of the console without any internet connection:
"C:\Program Files\Python39\py
thon.exe" C:/test/test.py
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\urllib\request.py", line 1342, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Program Files\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Program Files\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Program Files\Python39\lib\http\client.py", line 1417, in connect
super().connect()
File "C:\Program Files\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Program Files\Python39\lib\socket.py", line 822, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Program Files\Python39\lib\socket.py", line 953, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/test/test.py", line 17, in <module>
htmlPage = get_webSite("https://www.google.com")
File "C:/test/test.py", line 10, in get_webSite
return html.fromstring(urlopen(req).read())
File "C:\Program Files\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Program Files\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Program Files\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Program Files\Python39\lib\urllib\request.py", line 1385, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Program Files\Python39\lib\urllib\request.py", line 1345, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/test/test.py", line 19, in <module>
except urllib.error.URLError as e:
NameError: name 'urllib' is not defined
Process finished with exit code 1
I'm only able to catch this error with "except:" but not with "except urllib.error.URLError:". How can I solve that?
You imported urllib.request. Your program then doesn't know urllib.error as it is not included in urllib.request.
Either import urllib or import urllib.error should do the trick.

TimeoutError: [WinError 10060] and URLError when using urllib in Python3

I am trying to read and parse website content. I am using Python 3.5.2 But I have done it for like 300.000 entries. At some point, I got two errors;
1.TimeoutError: [WinError 10060]
2. AttributeError: type object 'URLError' has no attribute 'reason'
I have searched for these errors and tried some of the solutions. But, I am still getting them. I dont know how to solve the first error. For the second errors, it tells me there is no attribute "reason" in URLError. But I have also tried "code" attribute. In addition to this, the webpage exist when I control it, why I am getting these errors. How can I solve two of the errors?
Error in pdb no chain added. Obj is empty. 3dzd
<class 'urllib.error.URLError'>
Traceback (most recent call last):
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 1106, in request
self._send_request(method, url, body, headers)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 1151, in _send_request
self.endheaders(body)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 1102, in endheaders
self._send_output(message_body)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 934, in _send_output
self.send(msg)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 877, in send
self.connect()
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 1252, in connect
super().connect()
File "C:\...\AppData\Local\Programs\Python\Python35\lib\http\client.py", line 849, in connect
(self.host,self.port), self.timeout, self.source_address)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\socket.py", line 711, in create_connection
raise err
File "C:\...\AppData\Local\Programs\Python\Python35\lib\socket.py", line 702, in create_connection
sock.connect(sa)
TimeoutError: [WinError 10060] Bağlanılan uygun olarak belli bir süre içinde yanıt vermediğinden veya kurulan
bağlantı bağlanılan ana bilgisayar yanıt vermediğinden bir bağlantı kurulamadı
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\...\Google Drive\Scripts\4\script_4.py", line 146, in read_url_content
data = list(urlopen("https://files.rcsb.org/view/"+pdb+".pdb", context = context ,timeout=60)) # it's a file like object and works just like a file
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 466, in open
response = self._open(req, data)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 484, in _open
'_open', req)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 444, in _call_chain
result = func(*args)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 1297, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\...\AppData\Local\Programs\Python\Python35\lib\urllib\request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [WinError 10060] Bağlanılan uygun olarak belli bir süre içinde yanıt vermediğinden veya kurulan
bağlantı bağlanılan ana bilgisayar yanıt vermediğinden bir bağlantı kurulamadı>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\...\Google Drive\Scripts\4\script_4.py", line 207, in <module>
obj = read_url_content(pdb)
File "C:\...\Google Drive\Scripts\4\script_4.py", line 156, in read_url_content
print(e.reason)
AttributeError: type object 'URLError' has no attribute 'reason'
My code is like following;
import requests
import time
from urllib.request import urlopen # the lib that handles the url stuff
import urllib.request, urllib.error, urllib.parse
import ssl
import sys
from socket import timeout
def read_url_content(pdb):
pdbObj = None
try:
context = ssl._create_unverified_context()
data = list(urlopen("https://files.rcsb.org/view/"+pdb+".pdb", context = context ,timeout=60)) # it's a file like object and works just like a file
pdbObj = PDB(pdb)
except urllib.error.URLError as e:
print("Error in pdb no chain added. Obj is empty.", pdb)
ResponseData = e.reason
e = sys.exc_info()[0]
print(e)
print(e.reason)
pass
except timeout:
logging.error('socket timed out - URL %s', pdb)
return pdbObj

Python fetching URL error: urllib.error.URLError: <urlopen error [WinError 10060]

I am using python 3.4 and my task is to download link from a website. But while fetching the data I got an error that:
I have tried it from both httplib2 and urllib, but in both cases it is showing the same error.
OUTPUT:
>>> import urllib.request
>>> request = urllib.request.Request(url)
>>> response = urllib.request.urlopen(request)
Traceback (most recent call last):
File "C:\Python34\lib\urllib\request.py", line 1183, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\Python34\lib\http\client.py", line 1137, in request
self._send_request(method, url, body, headers)
File "C:\Python34\lib\http\client.py", line 1182, in _send_request
self.endheaders(body)
File "C:\Python34\lib\http\client.py", line 1133, in endheaders
self._send_output(message_body)
File "C:\Python34\lib\http\client.py", line 963, in _send_output
self.send(msg)
File "C:\Python34\lib\http\client.py", line 898, in send
self.connect()`
File "C:\Python34\lib\http\client.py", line 871, in connect
self.timeout, self.source_address)`
File "C:\Python34\lib\socket.py", line 516, in create_connection
raise err
File "C:\Python34\lib\socket.py", line 507, in create_connection
sock.connect(sa)
TimeoutError: [WinError 10060] A connection attempt failed because the
connected party did not properly respond after a period of time, or
established connection failed because connected host has failed to respond

Python 3 get HTML content

I'm using this code to get the web site html content,
import urllib.request
import lxml.html as lh
req= urllib.request.Request("http://www.ip-adress.com/ip_tracer/157.123.22.11",
headers={'User-Agent' : "Magic Browser"})
html = urllib.request.urlopen(req).read()
doc = lh.fromstring(html)
print (''.join(doc.xpath('.//*[#class="odd"]')[-1].text_content().split()))
I want to get the Organization: Zenith Data Systems.
but it shows some errors
Traceback (most recent call last):
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1135, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 967, in request
self._send_request(method, url, body, headers)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 1005, in _send_request
self.endheaders(body)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 963, in endheaders
self._send_output(message_body)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 808, in _send_output
self.send(msg)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 746, in send
self.connect()
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 724, in connect
self.timeout, self.source_address)
File "/usr/local/python3.2.3/lib/python3.2/socket.py", line 404, in create_connection
raise err
File "/usr/local/python3.2.3/lib/python3.2/socket.py", line 395, in create_connection
sock.connect(sa)
socket.error: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ext.py", line 4, in <module>
html = urllib.request.urlopen(req).read()
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 138, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 369, in open
response = self._open(req, data)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 387, in _open
'_open', req)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 347, in _call_chain
result = func(*args)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1155, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1138, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>}
How to solve it. Thanks,
Basically, Connection Refused means only registered users are allowed to access the page, or server under heavy maintenance or similar reasons.
From your above code, if you want to handle errors you may try using try and except like below code:
try:
req= urllib.request.Request("http://www.ip-adress.com/ip_tracer/157.123.22.11",headers={'User-Agent' : "Magic Browser"})
html = urllib.request.urlopen(req).read()
doc = lh.fromstring(html)
print (''.join(doc.xpath('.//*[#class="odd"]')[-1].text_content().split()))
except urllib.error.URLError as e:
print(e.reason)

Categories