python http client stuck on 100 continue - python

I have a simple http server in python that implement PUT using 100 continue:
class TestHandler(SimpleHTTPRequestHandler):
def do_PUT(self):
length = int(self.headers.get('Content-Length'))
self.send_response_only(100)
self.end_headers()
data = self.rfile.read(length)
res = manipulate(data)
new_length = len(res)
self.send_response(200)
self.send_header("Content-Length", new_length)
self.end_headers()
self.wfile.write(res)
server = HTTPServer(("localhost", 8080), TestHandler)
server.serve_forever()
I try to connect to the server using this client:
def send_put(data):
c = HTTPConnection('localhost', 8080)
c.request('PUT', 'http://localhost:8080/', headers={'Content-Length': len(data), 'Expect': '100-continue'})
r = c.getresponse()
if 100 != r.status:
return
c.request('PUT', 'http://localhost:8080/', body=data)
r = c.getresponse()
print(r.read())
but the code always get stuck on the first 'getresponse' even though I can see the 100-continue response on wireshark, what am I doing wrong here? Is python http even support 100-continue?
EDIT: after looking at some of python http code I found why the getresponse is stuck; python's http just ignores the 100-continue and waits for the next response that never comes(from python3.4/http/client.py):
# read until we get a non-100 response
while True:
version, status, reason = self._read_status()
if status != CONTINUE:
break
# skip the header from the 100 response
while True:
skip = self.fp.readline(_MAXLINE + 1)
if len(skip) > _MAXLINE:
raise LineTooLong("header line")
skip = skip.strip()
if not skip:
break
if self.debuglevel > 0:
print("header:", skip)

I ran into this as well; it's a nine year old Python issue. I came up with the following rather gross "just get it to run" workaround, which seems to work in my case (Python 3.5, HTTPS only):
class ContinueHTTPResponse(http.client.HTTPResponse):
def _read_status(self, *args, **kwargs):
version, status, reason = super()._read_status(*args, **kwargs)
if status == 100:
status = 199
return version, status, reason
def begin(self, *args, **kwargs):
super().begin(*args, **kwargs)
if self.status == 199:
self.status = 100
def _check_close(self, *args, **kwargs):
return super()._check_close(*args, **kwargs) and self.status != 100
class ContinueHTTPSConnection(http.client.HTTPSConnection):
response_class = ContinueHTTPResponse
def getresponse(self, *args, **kwargs):
logging.debug('running getresponse')
response = super().getresponse(*args, **kwargs)
if response.status == 100:
setattr(self, '_HTTPConnection__state', http.client._CS_REQ_SENT)
setattr(self, '_HTTPConnection__response', None)
return response
I'm using it somewhat like this:
conn = ContinueHTTPSConnection(host)
conn.request(...)
resp = conn.getresponse()
if resp.status == http.client.CONTINUE:
resp.read()
conn.send(body)
resp = conn.getresponse()
# do something with resp if you want...
Caveat: super hacky. Probably full of bugs. Use at your own risk.

Related

Why h.getresponse() is needed in python logging HTTPHandler?

I overrided the method emit of python logging httphandler to adapt it to my needs, and I noticed the line
h.getresponse() #can't do anything with the result
Why is this line necessary?
I noticed that removing this line has no effect when using unsecure logging, but makes the logs fail when using secure connection.
def emit(self, record):
"""
Emit a record.
Send the record to the Web server as a percent-encoded dictionary
"""
try:
import http.client, urllib.parse
host = self.host
if self.secure:
h = http.client.HTTPSConnection(host, context=self.context)
else:
h = http.client.HTTPConnection(host)
url = self.url
data = urllib.parse.urlencode(self.mapLogRecord(record))
if self.method == "GET":
if (url.find('?') >= 0):
sep = '&'
else:
sep = '?'
url = url + "%c%s" % (sep, data)
h.putrequest(self.method, url)
# support multiple hosts on one IP address...
# need to strip optional :port from host, if present
i = host.find(":")
if i >= 0:
host = host[:i]
# See issue #30904: putrequest call above already adds this header
# on Python 3.x.
# h.putheader("Host", host)
if self.method == "POST":
h.putheader("Content-type",
"application/x-www-form-urlencoded")
h.putheader("Content-length", str(len(data)))
if self.credentials:
import base64
s = ('%s:%s' % self.credentials).encode('utf-8')
s = 'Basic ' + base64.b64encode(s).strip().decode('ascii')
h.putheader('Authorization', s)
h.endheaders()
if self.method == "POST":
h.send(data.encode('utf-8'))
h.getresponse() #can't do anything with the result
except Exception:
self.handleError(record)
The getresponse() call guarantees that the request is actually sent to the server by getting the response to the request.

Using aiohttp's MultipartWriter with StreamResponse

I am trying to create a data endpoint that streams either the entirety of a file or responds appropriately to range requests. Streaming the whole file seems understandable, but it's not clear to me how to deal with range requests. Particularly, I can't see how aiohttp.MultipartWriter can write to a StreamResponse.
Here's an abstracted form of my code, so far:
from aiohttp.web import Request, StreamResponse
from aiohttp.multipart import MultipartWriter
async def data_handler(req:Request) -> StreamResponse:
is_range_request = "Range" in req.headers
with open("my_big_file", "rb") as f:
if is_range_request:
status_code = 202
content_type = "multipart/bytes"
else:
status_code = 200
content_type = "application/octet-stream"
resp = SteamResponse(status=status_code, headers={"Content-Type": content_type})
resp.enable_chunked_encoding()
resp.enable_compression()
await resp.prepare(req)
if is_range_request:
# _parse_range_header :: str -> List[ByteRange]
# ByteRange = Tuple[int, int] i.e., "from" and "to", inclusive
ranges = _parse_range_header(req.headers["Range"])
mpwriter = MultipartWriter("bytes")
for r in ranges:
range_from, range_to = r
range_size = (range_to - range_from) + 1
range_header = {"Content-Type": "application/octet-stream"}
# FIXME Won't this block?
f.seek(range_from)
mpwriter.append(f.read(range_size), range_header)
# TODO Write to response. How?...
else:
while True:
data = f.read(8192)
if not data:
await resp.drain()
break
resp.write(data)
return resp
This also doesn't return the response until it gets to the end. This doesn't seem correct to me: How does an upstream call know what's going on until the response is returned; or is the asyncio stuff doing this for me automagically?

How to implement retry mechanism into python requests library?

I would like to add a retry mechanism to python request library, so scripts that are using it will retry for non fatal errors.
At this moment I do consider three kind of errors to be recoverable:
HTTP return codes 502, 503, 504
host not found (less important now)
request timeout
At the first stage I do want to retry specified 5xx requests every minute.
I want to be able to add this functionality transparently, without having to manually implement recovery for each HTTP call made from inside these scripts or libraries that are using python-requests.
This snippet of code will make all HTTP requests from the same session retry for a total of 5 times, sleeping between retries with an increasing backoff of 0s, 2s, 4s, 8s, 16s (the first retry is done immediately). It will retry on basic connectivity issues (including DNS lookup failures), and HTTP status codes of 502, 503 and 504.
import logging
import requests
from requests.adapters import HTTPAdapter, Retry
logging.basicConfig(level=logging.DEBUG)
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
s.mount('http://', HTTPAdapter(max_retries=retries))
s.get("http://httpstat.us/503")
See Retry class for details.
This is a snippet of code I used to retry for the petitions made with urllib2. Maybe you could use it for your purposes:
retries = 1
success = False
while not success:
try:
response = urllib2.urlopen(request)
success = True
except Exception as e:
wait = retries * 30;
print 'Error! Waiting %s secs and re-trying...' % wait
sys.stdout.flush()
time.sleep(wait)
retries += 1
The waiting time grows incrementally to avoid be banned from server.
Possible solution using retrying package
from retrying import retry
import requests
def retry_if_connection_error(exception):
""" Specify an exception you need. or just True"""
#return True
return isinstance(exception, ConnectionError)
# if exception retry with 2 second wait
#retry(retry_on_exception=retry_if_connection_error, wait_fixed=2000)
def safe_request(url, **kwargs):
return requests.get(url, **kwargs)
response = safe_request('test.com')
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
MAX_RETRY = 2
MAX_RETRY_FOR_SESSION = 2
BACK_OFF_FACTOR = 0.3
TIME_BETWEEN_RETRIES = 1000
ERROR_CODES = (500, 502, 504)
def requests_retry_session(retries=MAX_RETRY_FOR_SESSION,
back_off_factor=BACK_OFF_FACTOR,
status_force_list=ERROR_CODES,
session=None):
session = session
retry = Retry(total=retries, read=retries, connect=retries,
backoff_factor=back_off_factor,
status_forcelist=status_force_list,
method_whitelist=frozenset(['GET', 'POST']))
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
class ConfigService:
def __init__(self):
self.session = requests_retry_session(session=requests.Session())
def call_to_api():
config_url = 'http://localhost:8080/predict/'
headers = {
"Content-Type": "application/json",
"x-api-key": self.x_api_key
}
response = self.session.get(config_url, headers=headers)
return response
I was able to obtain the desired level of reliability by extending requests.Session class.
Here is the code https://bitbucket.org/bspeakmon/jira-python/src/a7fca855394402f58507ca4056de87ccdbd6a213/jira/resilientsession.py?at=master
EDIT That code was:
from requests import Session
from requests.exceptions import ConnectionError
import logging
import time
class ResilientSession(Session):
"""
This class is supposed to retry requests that do return temporary errors.
At this moment it supports: 502, 503, 504
"""
def __recoverable(self, error, url, request, counter=1):
if hasattr(error,'status_code'):
if error.status_code in [502, 503, 504]:
error = "HTTP %s" % error.status_code
else:
return False
DELAY = 10 * counter
logging.warn("Got recoverable error [%s] from %s %s, retry #%s in %ss" % (error, request, url, counter, DELAY))
time.sleep(DELAY)
return True
def get(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).get(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'GET', counter):
continue
return r
def post(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).post(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'POST', counter):
continue
return r
def delete(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).delete(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'DELETE', counter):
continue
return r
def put(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).put(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'PUT', counter):
continue
return r
def head(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).head(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'HEAD', counter):
continue
return r
def patch(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).patch(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'PATCH', counter):
continue
return r
def options(self, url, **kwargs):
counter = 0
while True:
counter += 1
try:
r = super(ResilientSession, self).options(url, **kwargs)
except ConnectionError as e:
r = e.message
if self.__recoverable(r, url, 'OPTIONS', counter):
continue
return r
Method to retry certain logic if some exception has occured at time intervals t1=1 sec, t2=2 sec, t3=4 sec.
We can increase/decrease the time interval as well.
MAX_RETRY = 3
retries = 0
try:
call_to_api() // some business logic goes here.
except Exception as exception:
retries += 1
if retries <= MAX_RETRY:
print("ERROR=Method failed. Retrying ... #%s", retries)
time.sleep((1 << retries) * 1) // retry happens after time as a exponent of 2
continue
else:
raise Exception(exception)

Timeout not working using urllib2, socks5 proxy and socksipy

I'm using socksipy with urllib2 in Python 2.6. Everything works fine except the timeouts when i hit a hanging URL. None of the urllib2 function timeout arguments or global socket default timeouts are working. I've even tried setting the timeout a number of different ways in the sublcassed handlers below with no success. Any ideas?
Here is a test script (assuming that you have the socksipy project installed and are adding it to your system path):
import os, sys
import httplib
sys.path.append( "/parent/path/to/socksipy/project" )
import socks # import socksipy
import socket
socket.setdefaulttimeout(30.0)
import urllib2
class SocksiPyConnection(httplib.HTTPConnection):
def __init__(self, proxytype, proxyaddr, proxyport = None, rdns = False, username = None, password = None, *args, **kwargs):
self.proxyargs = (proxytype, proxyaddr, proxyport, rdns, username, password)
httplib.HTTPConnection.__init__(self, *args, **kwargs)
def connect(self):
self.sock = socks.socksocket()
self.sock.setproxy(*self.proxyargs)
if isinstance(self.timeout, float):
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
class SocksiPyHandler(urllib2.HTTPHandler):
def __init__(self, *args, **kwargs):
self.args = args
self.kw = kwargs
urllib2.HTTPHandler.__init__(self)
def http_open(self, req):
def build(host, port=None, strict=None, timeout=0):
conn = SocksiPyConnection(*self.args, host=host, port=port, strict=strict, timeout=timeout, **self.kw)
return conn
return self.do_open(build, req)
if __name__ == '__main__':
#
# this one works for non-hanging URL
#
proxyhost = "responder.w2"
proxyport = 1050
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxyhost, int(proxyport))
socket.socket = socks.socksocket
resp = urllib2.urlopen("http://www.google.com", timeout=30.0)
# hang here
print "returned 1"
#
# one way to go about it for a hanging URL
#
proxyhost = "responder.w2"
proxyport = 1050
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxyhost, int(proxyport))
socket.socket = socks.socksocket
resp = urllib2.urlopen("http://erma.orr.noaa.gov/cgi-bin/mapserver/charts?version=1.1.1&service=wms&request=GetCapabilities", timeout=30.0)
# it hangs here
print "returned 2"
#
# another way to go about it for hanging URL
#
proxyhost = "responder.w2"
proxyport = 1050
opener = urllib2.build_opener(SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, proxyhost, int(proxyport)) )
resp = opener.open("http://erma.orr.noaa.gov/cgi-bin/mapserver/charts?version=1.1.1&service=wms&request=GetCapabilities", timeout=30.0)
# it hangs here
print "returned 3"
This worked for me:
socks.socket.setdefaulttimeout(7)
You should avoid editing python socks library directly.
It turns out the "hanging/timeout" issue i mentioned above was in fact a "blocking" issue in the sockssipy socks.py code. If you are hitting an endpoint that still responds with 200 but sends no data (0 bytes) then socks.py will block cause that's how it's written. Here is the before and after for creating your own timeout:
socks.py BEFORE:
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = ""
while len(data) < bytes:
data = data + self.recv(bytes-len(data))
return data
socks.py AFTER with timeout:
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = self.recv(bytes, socket.MSG_WAITALL)
if type(data) not in (str, unicode) or len(data) != bytes:
raise socket.timeout('timeout')
return data

Problems using multipart_encode (poster library)

I am trying to upload a file using multipart_encode to realize the MIME process. However, I met the following error AttributeError: multipart_yielder instance has no attribute '__len__'. Below are is my approach, I really appreciate if anyone can give me some suggestions.
url = "https://pi-user-files.s3-external-1.amazonaws.com/"
post_data = {}
#data is a dict
post_data['AWSAccessKeyId']=(data['ticket']['AWSAccessKeyId'])
post_data['success_action_redirect']=(data['ticket']['success_action_redirect'])
post_data['acl']=(data['ticket']['acl'])
post_data['key']=(data['ticket']['key'])
post_data['signature']=(data['ticket']['signature'])
post_data['policy']=(data['ticket']['policy'])
post_data['Content-Type']=(data['ticket']['Content-Type'])
#I would like to upload a text file "new 2"
post_data['file']=open("new 2.txt", "rb")
datagen, headers = multipart_encode(post_data)
request2 = urllib2.Request(url, datagen, headers)
result = urllib2.urlopen(request2)
If you want to send a file you should wrap other parameters with a MultipartParam object, example code for creating a send file request:
from poster.encode import multipart_encode, MultipartParam
import urllib2
def postFileRequest(url, paramName, fileObj, additionalHeaders={}, additionalParams={}):
items = []
#wrap post parameters
for name, value in additionalParams.items():
items.append(MultipartParam(name, value))
#add file
items.append(MultipartParam.from_file(paramName, fileObj))
datagen, headers = multipart_encode(items)
#add headers
for item, value in additionalHeaders.iteritems():
headers[item] = value
return urllib2.Request(url, datagen, headers)
Also I think you should execute register_openers() once at the beginning. Some details you can find in docs
The problem is that in httplib.py, the generator is not detected as such and is treated instead like a string that holds the full data to be sent (and therefore it tries to find its length):
if hasattr(data,'read') and not isinstance(data, array): # generator
if self.debuglevel > 0: print "sendIng a read()able"
....
A solution is to make the generator act like a read()able:
class GeneratorToReadable():
def __init__(self, datagen):
self.generator = datagen
self._end = False
self.data = ''
def read(self, n_bytes):
while not self._end and len(self.data) < n_bytes:
try:
next_chunk = self.generator.next()
if next_chunk:
self.data += next_chunk
else:
self._end = True
except StopIteration:
self._end = True
result = self.data[0:n_bytes]
self.data = self.data[n_bytes:]
return result
and use like so:
datagen, headers = multipart_encode(post_data)
readable = GeneratorToReadable(datagen)
req = urllib2.Request(url, readable, headers)
result = urllib2.urlopen(req)

Categories