I recently switched from urlib2 to requests and I'm not sure how to deal with exceptions. What is best practice? My current code looks like this, but is not doing any good:
try:
response = requests.get(url)
except requests.ConnectionError , e:
logging.error('ConnectionError = ' + str(e.code))
return False
except requests.HTTPError , e:
logging.error('HTTPError = ' + str(e.reason))
return False
except requests.Timeout, e:
logging.error('Timeout')
return False
except requests.TooManyRedirects:
logging.error('TooManyRedirects')
return False
except Exception:
import traceback
logging.error('generic exception: ' + traceback.format_exc())
return False
Since it looks bad as a comment, have you tried:
try:
# some code
except Exception as e:
print e
Related
I want to get the "profile_pic_id" from the json list
I can get the 'follower_count' , 'following_count' and the 'username'
import requests
import json
import re
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name']['profile_pic_url'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
followingCount=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
I expect the output is followers, following and profile_pic_url, but the actual is follower and following only
'''
import requests
import json
import re
#'https://www.instagram.com/web/search/topsearch/?query={query}' para averiguar el pk
print("Colocar tu PK:")
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
profile_pic_url=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
'''
I am calling an API. While making requests I hit the maximum number of tries and I get a connection error. I would like to edit the url programmatically by incrementing the number in the url. I do know how to change the arguments programmatically but not sure how to change/increment an argument when I hit connection error.
My language of usage is Python and I am using requests library.
Code Snippet
Libraries importing
from requests.auth import HTTPBasicAuth
import requests
from requests.exceptions import ConnectionError
```def make_request(data , id=None):
url = "http://server001.net:8080/?id="
result = {}
if id:
response = requests.get(url +id , auth=HTTPBasicAuth('uname', 'pass'))
return response
else :
for line in data:
try:
response = requests.get(url +line , auth=HTTPBasicAuth('uname', 'pass'))
result = html_parser2(response)
if result:
write_csv(result)
else:
pass
except ConnectionError as e:
print (e)```
Expected output
url = "http://server001.net:8080/?id="
url_edited = "http://server002.net:8080/?id="
Only if I hit the maximum number of tries, i.e I get an exception or
else keep requesting the same url.
One of the options is to enclose the try..except block with a while loop.
Besides, may be you should put your first requests.get into try..except block too.
Also try to avoid multiple unrelated operations in one try..except block, i.e. execute write_csv after successful connection only.
def make_request(data , id=None):
url = 'http://server001.net:8080/?id={}'
connection_failed = False
response = None
if id:
try:
response = requests.get(url.format(id) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('id = {}, e: {}'.format(id, e))
else:
for line in data:
while not connection_failed:
try:
response = requests.get(url.format(line) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
connection_failed = True
print('line = {}, e: {}'.format(id, e))
else:
result = html_parser2(response)
if result:
write_csv(result)
return response
def make_request(data , id=None):
url = 'http://server001.net:8080/?id={}'
response = None
if id:
try:
response = requests.get(url.format(id) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('id = {}, e: {}'.format(id, e))
else:
for line in data:
try:
response = requests.get(url.format(line) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('line = {}, e: {}'.format(id, e))
else:
result = html_parser2(response)
if result:
write_csv(result)
break
return response
This question already has answers here:
What is the fastest way to send 100,000 HTTP requests in Python?
(21 answers)
Closed 6 years ago.
I have a python-script with a lot of exceptions. I'm trying to make around 50,000 requests. And it is very slow as of now also I'd like for my script to be running therefore I added almost all the exceptions request has which has mostly to do with connectionError etc.
Is there a way I can make this script so it's much faster than it is now and more modular?
for i in range(50450000,50500000):
try:
try:
try:
try:
try:
try:
try:
try:
try:
try:
try:
try:
check_response = 'http://www.barneys.com/product/adidas--22human-race-22-nmd-sneakers-'+str(i)+'.html'
make_requests = requests.get(check_response,headers=headers).text
soup = BeautifulSoup(make_requests)
try:
main_wrapper = soup.find('h1',attrs={'class':'title'}).text
print main_wrapper + ' ' + str(i)
except AttributeError:
arr.append(check_response)
with open('working_urls.json','wb') as outfile:
json.dump(arr,outfile,indent=4)
except requests.exceptions.InvalidURL:
continue
except requests.exceptions.InvalidSchema:
continue
except requests.exceptions.MissingSchema:
continue
except requests.exceptions.TooManyRedirects:
continue
except requests.exceptions.URLRequired:
continue
except requests.exceptions.ConnectTimeout:
continue
except requests.exceptions.Timeout:
continue
except requests.exceptions.SSLError:
continue
except requests.exceptions.ProxyError:
continue
except requests.exceptions.HTTPError:
continue
except requests.exceptions.ReadTimeout:
continue
except requests.exceptions.ConnectionError:
continue
First, please replace all these ugly try/except blocks by a single one, like:
for i in range(50450000,50500000):
try:
check_response = 'http://www.barneys.com/product/adidas--22human-race-22-nmd-sneakers-'+str(i)+'.html'
make_requests = requests.get(check_response,headers=headers).text
soup = BeautifulSoup(make_requests)
try:
main_wrapper = soup.find('h1',attrs={'class':'title'}).text
print main_wrapper + ' ' + str(i)
except AttributeError:
arr.append(check_response)
with open('working_urls.json','wb') as outfile:
json.dump(arr,outfile,indent=4)
except requests.exceptions.InvalidURL:
continue
except requests.exceptions.InvalidSchema:
continue
except requests.exceptions.MissingSchema:
continue
...
And if everything you do is continue in all cases, use the base class RequestException. It becomes:
try:
check_response = 'http://www.barneys.com/product/adidas--22human-race-22-nmd-sneakers-'+str(i)+'.html'
make_requests = requests.get(check_response,headers=headers).text
soup = BeautifulSoup(make_requests)
try:
main_wrapper = soup.find('h1',attrs={'class':'title'}).text
print main_wrapper + ' ' + str(i)
except AttributeError:
arr.append(check_response)
with open('working_urls.json','wb') as outfile:
json.dump(arr,outfile,indent=4)
except requests.exceptions.RequestException:
pass
Maybe not faster, but for sure far easier to read!
As for the speed issue, you should consider using threads/processes. Take a look at the threading and multiprocessing modules.
When I handle an exception in python
try:
a = dict()
a[1]
except Exception as e:
print str(e)
It prints
1
I expect it to print
KeyError: 1
Is there a way to retrieve the default error message ?
Instead of this:
print str(e)
do this:
print(type(e).__name__ + ": " + str(e))
or just this:
print(type(e).__name__, e)
If you replace str(e) with repr(e) Python 2 will produce KeyError(1,) and Python 3 will produce KeyError(1)
This doesn't quite produce your desired output, but it may be close enough?
This is part of my code in python. I want to check the error message and if HTTPError() then I want to add the host to the file ok.txt. But it doesn't work. what is the problem here?
except urllib2.URLError, e:
print '%-15s\t%15r' % (url.strip(), e)
if e == 'HTTPError()':
OK.write('%-15s' % (url.strip()) + '\n')
OK.flush()
When I run whole script the output is something like this:
http://a.com HTTPError()
http://b.com URLError(timeout('timed out',),)
http://c.com URLError(timeout('timed out',),)
http://d.com URLError(error(111, 'Connection refused'),)
http://e.com 200
Use isinstance() to check whether or not your error is of type HTTPError:
except urllib2.URLError as e: # use the "as e" instead of the old style comma delimitation.
print '%-15s\t%15r' % (url.strip(), e)
if isinstance(e, HTTPError):
OK.write('%-15s' % (url.strip()) + '\n')
OK.flush()