How to implement retry mechanism into python requests library? - python

I would like to add a retry mechanism to python request library, so scripts that are using it will retry for non fatal errors.
At this moment I do consider three kind of errors to be recoverable:
HTTP return codes 502, 503, 504
host not found (less important now)
request timeout
At the first stage I do want to retry specified 5xx requests every minute.
I want to be able to add this functionality transparently, without having to manually implement recovery for each HTTP call made from inside these scripts or libraries that are using python-requests.

This snippet of code will make all HTTP requests from the same session retry for a total of 5 times, sleeping between retries with an increasing backoff of 0s, 2s, 4s, 8s, 16s (the first retry is done immediately). It will retry on basic connectivity issues (including DNS lookup failures), and HTTP status codes of 502, 503 and 504.
import logging
import requests
from requests.adapters import HTTPAdapter, Retry
logging.basicConfig(level=logging.DEBUG)
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
s.mount('http://', HTTPAdapter(max_retries=retries))
s.get("http://httpstat.us/503")
See Retry class for details.

This is a snippet of code I used to retry for the petitions made with urllib2. Maybe you could use it for your purposes:
retries = 1
success = False
while not success:
try:
response = urllib2.urlopen(request)
success = True
except Exception as e:
wait = retries * 30;
print 'Error! Waiting %s secs and re-trying...' % wait
sys.stdout.flush()
time.sleep(wait)
retries += 1
The waiting time grows incrementally to avoid be banned from server.

Possible solution using retrying package
from retrying import retry
import requests
def retry_if_connection_error(exception):
""" Specify an exception you need. or just True"""
#return True
return isinstance(exception, ConnectionError)
# if exception retry with 2 second wait
#retry(retry_on_exception=retry_if_connection_error, wait_fixed=2000)
def safe_request(url, **kwargs):
return requests.get(url, **kwargs)
response = safe_request('test.com')

from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
MAX_RETRY = 2
MAX_RETRY_FOR_SESSION = 2
BACK_OFF_FACTOR = 0.3
TIME_BETWEEN_RETRIES = 1000
ERROR_CODES = (500, 502, 504)
def requests_retry_session(retries=MAX_RETRY_FOR_SESSION,
back_off_factor=BACK_OFF_FACTOR,
status_force_list=ERROR_CODES,
session=None):
session = session
retry = Retry(total=retries, read=retries, connect=retries,
backoff_factor=back_off_factor,
status_forcelist=status_force_list,
method_whitelist=frozenset(['GET', 'POST']))
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
class ConfigService:
def __init__(self):
self.session = requests_retry_session(session=requests.Session())
def call_to_api():
config_url = 'http://localhost:8080/predict/'
headers = {
"Content-Type": "application/json",
"x-api-key": self.x_api_key
}
response = self.session.get(config_url, headers=headers)
return response

I was able to obtain the desired level of reliability by extending requests.Session class.
Here is the code https://bitbucket.org/bspeakmon/jira-python/src/a7fca855394402f58507ca4056de87ccdbd6a213/jira/resilientsession.py?at=master
EDIT That code was:
from requests import Session
from requests.exceptions import ConnectionError
import logging
import time
class ResilientSession(Session):
"""
This class is supposed to retry requests that do return temporary errors.
At this moment it supports: 502, 503, 504
"""
def __recoverable(self, error, url, request, counter=1):
if hasattr(error,'status_code'):
if error.status_code in [502, 503, 504]:
error = "HTTP %s" % error.status_code
else:
return False
DELAY = 10 * counter
logging.warn("Got recoverable error [%s] from %s %s, retry #%s in %ss" % (error, request, url, counter, DELAY))
time.sleep(DELAY)
return True
def get(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).get(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'GET', counter):
continue
return r
def post(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).post(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'POST', counter):
continue
return r
def delete(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).delete(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'DELETE', counter):
continue
return r
def put(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).put(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'PUT', counter):
continue
return r
def head(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).head(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'HEAD', counter):
continue
return r
def patch(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).patch(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'PATCH', counter):
continue
return r
def options(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).options(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'OPTIONS', counter):
continue
return r

Method to retry certain logic if some exception has occured at time intervals t1=1 sec, t2=2 sec, t3=4 sec.
We can increase/decrease the time interval as well.
MAX_RETRY = 3
retries = 0
try:
call_to_api() // some business logic goes here.
except Exception as exception:
retries += 1
if retries <= MAX_RETRY:
print("ERROR=Method failed. Retrying ... #%s", retries)
time.sleep((1 << retries) * 1) // retry happens after time as a exponent of 2
continue
else:
raise Exception(exception)

Related

Python: Periodically run http requests in thread

import requests
import json
import threading
data = {
"amount": 2
}
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
except requests.exceptions.RequestException as e:
raise SystemExist(e)
threading.Timer(1, foo, [data]).start()
I want to run this http request every second using a thread in my program. However, the program only runs the http request once and exit. How do I fix this?
You need to restart the timer after each request :
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
threading.Timer(1, foo, [data]).start() # New Line Added
except requests.exceptions.RequestException as e:
raise SystemExist(e)

Download and handle errors

I've been using a function that I took from the book Web Scraping with Python from O'Really by Ryan Mitchell:
import sys
import os.path
import socket
import random
import urllib2
import contextlib
import diskCache
import logging as logger
from bs4 import BeautifulSoup
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout (DEFAULT_TIMEOUT)
def download (url, delay=DEFAULT_DELAY, user_agent=DEFAULT_AGENT, proxies=None, \
cache=None, num_retries=DEFAULT_RETRIES, timeout=DEFAULT_TIMEOUT, data=None):
result = None
if cache:
try:
result = cache[url]
except KeyError:
# url is not available in cache
pass
if result is not None and result['code'] is not None \
and num_retries > 0 and 500 <= result['code'] < 600:
# server error so ignore result from cache and re-download
result = None
if result is None:
proxy = random.choice(proxies) if proxies else None
headers = {'User-agent': user_agent}
result = call (url, headers, proxy=proxy, num_retries=num_retries, cache=cache)
if cache:
# save result to cache
cache[url] = result
return result['html']
def call (url, headers, proxy, num_retries, cache=None, data=None):
request = urllib2.Request(url, data, headers or {})
with contextlib.closing (urllib2.urlopen(request)) as connection:
try:
logger.info ('Downloading: %s', url)
html = connection.read ()
code = connection.getcode ()
except Exception as e:
logger.exception ('Download error:', str(e))
if cache:
del cache['url']
html = None
if hasattr (e, 'code'):
code = e.code
if num_retries > 0 and 500 <= code < 600:
return download (url, headers, num_retries-1, data) # retry server errors
else:
code = None
return {'html': html, 'code':code}
I wanted to know if there is a simpler way of handling the errors when downloading urls. I've seen that the requests library is a higher level and easier library and maybe it could simplify this. At the very least how would this code be for python3?
It would be something like
"""Functions used by the fetch module"""
# Standard library imports
import time
import socket
import logging as logger
from typing import Dict, Optional
# Third party imports
import requests
from requests.exceptions import HTTPError, Timeout
from bs4 import BeautifulSoup
# Constants
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout(DEFAULT_TIMEOUT)
def fetch(url: str, retries: Optional[int] = DEFAULT_RETRIES) -> Dict:
"""Download an url"""
code = None
try:
logger.info('Downloading: %s', url)
resp = requests.get(url)
resp.raise_for_status()
code = resp.status_code
except (HTTPError, Timeout) as ex:
logger.exception("Couldn't download %s", ex)
return None
if code is not None and retries > 0 and \
500 <= code < 600: # Server error
logger.info('Retrying download')
time.sleep(DEFAULT_DELAY)
return fetch(url, retries-1)
return {'html': resp, 'code': code}
As you said this is a lot easier with requests
resp = requests.get(url, headers=headers, timeout=timeout)
print(resp.status_code)
print(resp.text)
# for an API use resp.json()
There is no exception raised by default. You can call resp.raise_for_status() if you do want to raise an exception.
See http://docs.python-requests.org/en/master/user/quickstart/ for details

bitcoin json rpc with python requests module?

I've been trying for hours, and I just don't know what I'm doing wrongly. It's just for planning/research (not performant) -- playing around with some code from github -- but I need to see it functional.
RPC_USER = username
RPC_PASS = pasword
rpc_id = ID HERE
jsonrpc = "2.0"
payload = {"jsonrpc": jsonrpc, "id": rpc_id, "method": method, "params": params}
authstr = base64.encodestring(bytes('%s:%s' % (RPC_USER, RPC_PASS), 'utf-8')).strip()
request_headers = {"Authorization": "Basic %s" % authstr, 'content-type': 'application/json'}
try:
response = requests.get(RPC_URL, headers = request_headers, data = json.dumps(payload)).json()
print(response['result'])
except Exception as e: print(str(e))
if response['id'] != rpc_id:
raise ValueError("invalid response id!")
I get an error as follows:
Here's the whole traceback:
Expecting value: line 1 column 1 (char 0) # prints the Exception
Traceback (most recent call last):
File "miner_2017.py", line 411, in <module>
solo_miner(bin2hex("------coinbase message here -----"), "-----bitcoin address here-----")
File "miner_2017.py", line 401, in solo_miner
mined_block, hps = block_mine(rpc_getblocktemplate(), coinbase_message, 0, address, timeout=60)
File "miner_2017.py", line 63, in rpc_getblocktemplate
try: return rpc("getblocktemplate", [{}])
File "miner_2017.py", line 52, in rpc
if response['id'] != rpc_id:
UnboundLocalError: local variable 'response' referenced before assignment
Which after doing some looking seems to be a problem with decoding the json object from a bytes object rather than a string object. I don't know how to fix this. It seems the "response" variable assignment was unsuccessful due to the json problem. How can I get the json object in string form from the request?
Would somebody help me out? Thanks
#!/usr/bin/env python
import getpass
import json
import requests
def instruct_wallet(method, params):
url = "http://127.0.0.1:8332/"
payload = json.dumps({"method": method, "params": params})
headers = {'content-type': "application/json", 'cache-control': "no-cache"}
try:
response = requests.request("POST", url, data=payload, headers=headers, auth=(rpc_user, rpc_password))
return json.loads(response.text)
except requests.exceptions.RequestException as e:
print e
except:
print 'No response from Wallet, check Bitcoin is running on this machine'
rpc_user='foo'
rpc_password='bar'
passphrase = getpass.getpass('Enter your wallet passphrase: ')
timeout = raw_input('Unlock for how many seconds: ')
answer = instruct_wallet('walletpassphrase', [passphrase, timeout])
if answer['error'] != None:
print answer['error']
else:
print answer['result']
I'm using something similar for Altcoins
import decimal
import itertools
import json
import requests
id_counter = itertools.count()
class BTCJsonRPC(object):
def __init__(self, url, user, passwd, log, method=None, timeout=30):
self.url = url
self._user = user
self._passwd = passwd
self._method_name = method
self._timeout = timeout
self._log = log
def __getattr__(self, method_name):
return BTCJsonRPC(self.url, self._user, self._passwd, self._log, method_name, timeout=self._timeout)
def __call__(self, *args):
# rpc json call
playload = json.dumps({'jsonrpc': '2.0', 'id': next(id_counter), "method": self._method_name, "params": args})
headers = {'Content-type': 'application/json'}
resp = None
try:
resp = requests.post(self.url, headers=headers, data=playload, timeout=self._timeout,
auth=(self._user, self._passwd))
resp = resp.json(parse_float=decimal.Decimal)
except Exception as e:
error_msg = resp.text if resp is not None else e
msg = u"{} {}:[{}] \n {}".format('post', self._method_name, args, error_msg)
self._log.error(msg)
return
if resp.get('error') is not None:
e = resp['error']
self._log.error('{}:[{}]\n {}:{}'.format(self._method_name, args, e['code'], e['message']))
return None
elif 'result' not in resp:
self._log.error('[{}]:[{}]\n MISSING JSON-RPC RESULT'.format(self._method_name, args, ))
return None
return resp['result']
I'm pretty sure you just need to change from using a GET to a POST, i.e.:
change
response = requests.get(RPC_URL, headers = request_headers, data = json.dumps(payload)).json()
to
response = requests.post(RPC_URL, headers=request_headers, data=json.dumps(payload)).json()
In fact, when I tried this with GET (without dumping the response to json), I got a 405 response. You should always take a look at your response object before doing further debugging with it.

python http client stuck on 100 continue

I have a simple http server in python that implement PUT using 100 continue:
class TestHandler(SimpleHTTPRequestHandler):
def do_PUT(self):
length = int(self.headers.get('Content-Length'))
self.send_response_only(100)
self.end_headers()
data = self.rfile.read(length)
res = manipulate(data)
new_length = len(res)
self.send_response(200)
self.send_header("Content-Length", new_length)
self.end_headers()
self.wfile.write(res)
server = HTTPServer(("localhost", 8080), TestHandler)
server.serve_forever()
I try to connect to the server using this client:
def send_put(data):
c = HTTPConnection('localhost', 8080)
c.request('PUT', 'http://localhost:8080/', headers={'Content-Length': len(data), 'Expect': '100-continue'})
r = c.getresponse()
if 100 != r.status:
return
c.request('PUT', 'http://localhost:8080/', body=data)
r = c.getresponse()
print(r.read())
but the code always get stuck on the first 'getresponse' even though I can see the 100-continue response on wireshark, what am I doing wrong here? Is python http even support 100-continue?
EDIT: after looking at some of python http code I found why the getresponse is stuck; python's http just ignores the 100-continue and waits for the next response that never comes(from python3.4/http/client.py):
# read until we get a non-100 response
while True:
version, status, reason = self._read_status()
if status != CONTINUE:
break
# skip the header from the 100 response
while True:
skip = self.fp.readline(_MAXLINE + 1)
if len(skip) > _MAXLINE:
raise LineTooLong("header line")
skip = skip.strip()
if not skip:
break
if self.debuglevel > 0:
print("header:", skip)
I ran into this as well; it's a nine year old Python issue. I came up with the following rather gross "just get it to run" workaround, which seems to work in my case (Python 3.5, HTTPS only):
class ContinueHTTPResponse(http.client.HTTPResponse):
def _read_status(self, *args, **kwargs):
version, status, reason = super()._read_status(*args, **kwargs)
if status == 100:
status = 199
return version, status, reason
def begin(self, *args, **kwargs):
super().begin(*args, **kwargs)
if self.status == 199:
self.status = 100
def _check_close(self, *args, **kwargs):
return super()._check_close(*args, **kwargs) and self.status != 100
class ContinueHTTPSConnection(http.client.HTTPSConnection):
response_class = ContinueHTTPResponse
def getresponse(self, *args, **kwargs):
logging.debug('running getresponse')
response = super().getresponse(*args, **kwargs)
if response.status == 100:
setattr(self, '_HTTPConnection__state', http.client._CS_REQ_SENT)
setattr(self, '_HTTPConnection__response', None)
return response
I'm using it somewhat like this:
conn = ContinueHTTPSConnection(host)
conn.request(...)
resp = conn.getresponse()
if resp.status == http.client.CONTINUE:
resp.read()
conn.send(body)
resp = conn.getresponse()
# do something with resp if you want...
Caveat: super hacky. Probably full of bugs. Use at your own risk.

Which is the right way of recovering from a requests.exceptions.ConnectionError?

I am scrapping a web site, but sometimes the laptop lost the connection, and I got (obviously) a requests.exceptions.ConnectionError. Which is the right (or most elegant?) way of recover from this error? I mean: I don't want the program to stop, but retry the connection, maybe some seconds later? This is my code, but I got the feeling is not correct:
def make_soup(session,url):
try:
n = randint(1, MAX_NAPTIME)
sleep(n)
response = session.get(url)
except requests.exceptions.ConnectionError as req_ce:
error_msg = req_ce.args[0].reason.strerror
print "Error: %s con la url %s" % (eror_msg, url)
session = logout(session)
n = randint(MIN_SLEEPTIME, MAX_SLEEPTIME)
sleep(n)
session = login(session)
response = session.get(url)
soup = BeautifulSoup(response.text)
return soup
Any ideas?
Note that I need a session to scrap this pages, so, I think that the login (i.e. login again to the site, after a logout) could be cause troubles
So why not something like
import requests
import time
def retry(cooloff=5, exc_type=None):
if not exc_type:
exc_type = [requests.exceptions.ConnectionError]
def real_decorator(function):
def wrapper(*args, **kwargs):
while True:
try:
return function(*args, **kwargs)
except Exception as e:
if e.__class__ in exc_type:
print "failed (?)"
time.sleep(cooloff)
else:
raise e
return wrapper
return real_decorator
Which is a decorator that allows you to call any function until it succeeds. e.g.
#retry(exc_type=[ZeroDivisionError])
def test():
return 1/0
print test()
Which will just print "failed (y)" every 5 seconds until the end of time (or until the laws of math change)
Is it really needed to logout and relogin into your session? I'd just retry the connection the same way:
def make_soup(session,url):
success = False
response = None
for attempt in range(1, MAXTRIES):
try:
response = session.get(url)
# If session.get succeeded, we break out of the
# for loop after setting a success flag
success = True
break
except requests.exceptions.ConnectionError as req_ce:
error_msg = req_ce.args[0].reason.strerror
print "Error: %s con la url %s" % (error_msg, url)
print " Attempt %s of %s" % (attempt, MAXTRIES)
sleep(randint(MIN_SLEEPTIME, MAX_SLEEPTIME))
# Figure out if we were successful.
# Note it may not be needed to have a flag, you can maybe just
# check the value of response here.
if not success:
print "Couldn't get it after retrying many times"
return None
#Once we get here, we know we got a good response
soup = BeautifulSoup(response.text)
return soup

Categories