How do I stream a file using werkzeug?

How do I stream a file using werkzeug? - python

I want to stream a big file via werkzeug.
Currently my wsgi application looks like this:
from werkzeug.wrappers import Request, Response
from werkzeug.wsgi import ClosingIterator, wrap_file
import os
class Streamer(object):
def __init__(self):
pass
def __call__(self, environ, start_response):
request = Request(environ)
filename = os.getcwd() + "/bigfile.xml"
try:
response = wrap_file(environ, open(filename) )
return response
except HTTPException, e:
response = e
return ClosingIterator(response(environ, start_response))
I'm not sure what I should do with the object returned by the wrap_file function.

Haven't tried myself but I think following will work.
g = file(path_to_bigfile) # or any generator
return Response(g, direct_passthrough=True)

Just in case one would additionally like to:
1. preserve the file name
2. issue download without page redirect
# file_name assumed to be known
# file_path assumed to be known
file_size = os.path.getsize(file_path)
fh = file(file_path, 'rb')
return Response(fh,
mimetype='application/octet-stream',
headers=[
('Content-Length', str(file_size)),
('Content-Disposition', "attachment; filename=\"%s\"" % file_name),
],
direct_passthrough=True)

Related

python http server threading via cli

New in version 3.7 supports ThreadingHTTPServer as mentioned in doc
to run from command line we use
python -m http.server
but its still run normal HTTPServer, is there any way to enable via command line.
EDITED:
python 3.7 runs ThreadingHTTPServer by default, no argument necessary

Simple Python 2 HTTP Server with multi-threading and partial-content support
#!/usr/bin/env python2
# Standard library imports.
from SocketServer import ThreadingMixIn
import BaseHTTPServer
import SimpleHTTPServer
import sys
import json
import os
from os.path import (join, exists, dirname, abspath, isabs, sep, walk, splitext,
isdir, basename, expanduser, split, splitdrive)
from os import makedirs, unlink, getcwd, chdir, curdir, pardir, rename, fstat
from shutil import copyfileobj, copytree
import glob
from zipfile import ZipFile
from urlparse import urlparse, parse_qs
from urllib import urlopen, quote, unquote
from posixpath import normpath
from cStringIO import StringIO
import re
import ConfigParser
import cgi
import threading
import socket
import errno
DATA_DIR = getcwd() # join(expanduser('~'), APP_NAME)
class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
pass
class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
""" Handler to handle POST requests for actions.
"""
serve_path = DATA_DIR
def do_GET(self):
""" Overridden to handle HTTP Range requests. """
self.range_from, self.range_to = self._get_range_header()
if self.range_from is None:
# nothing to do here
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
print 'range request', self.range_from, self.range_to
f = self.send_range_head()
if f:
self.copy_file_range(f, self.wfile)
f.close()
def copy_file_range(self, in_file, out_file):
""" Copy only the range in self.range_from/to. """
in_file.seek(self.range_from)
# Add 1 because the range is inclusive
bytes_to_copy = 1 + self.range_to - self.range_from
buf_length = 64*1024
bytes_copied = 0
while bytes_copied < bytes_to_copy:
read_buf = in_file.read(min(buf_length, bytes_to_copy-bytes_copied))
if len(read_buf) == 0:
break
out_file.write(read_buf)
bytes_copied += len(read_buf)
return bytes_copied
def send_range_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = join(path, index)
if exists(index):
path = index
break
else:
return self.list_directory(path)
if not exists(path) and path.endswith('/data'):
# FIXME: Handle grits-like query with /data appended to path
# stupid grits
if exists(path[:-5]):
path = path[:-5]
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
if self.range_from is None:
self.send_response(200)
else:
self.send_response(206)
self.send_header("Content-type", ctype)
fs = fstat(f.fileno())
file_size = fs.st_size
if self.range_from is not None:
if self.range_to is None or self.range_to >= file_size:
self.range_to = file_size-1
self.send_header("Content-Range",
"bytes %d-%d/%d" % (self.range_from,
self.range_to,
file_size))
# Add 1 because ranges are inclusive
self.send_header("Content-Length",
(1 + self.range_to - self.range_from))
else:
self.send_header("Content-Length", str(file_size))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
# Note: a link to a directory displays with # and links with /
f.write('<li>%s\n'
% (quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
""" Override to handle redirects.
"""
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = normpath(unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.serve_path
for word in words:
drive, word = splitdrive(word)
head, word = split(word)
if word in (curdir, pardir): continue
path = join(path, word)
return path
# Private interface ######################################################
def _get_range_header(self):
""" Returns request Range start and end if specified.
If Range header is not specified returns (None, None)
"""
range_header = self.headers.getheader("Range")
if range_header is None:
return (None, None)
if not range_header.startswith("bytes="):
print "Not implemented: parsing header Range: %s" % range_header
return (None, None)
regex = re.compile(r"^bytes=(\d+)\-(\d+)?")
rangething = regex.search(range_header)
if rangething:
from_val = int(rangething.group(1))
if rangething.group(2) is not None:
return (from_val, int(rangething.group(2)))
else:
return (from_val, None)
else:
print 'CANNOT PARSE RANGE HEADER:', range_header
return (None, None)
def get_server(port=8000, next_attempts=0, serve_path=None):
Handler = RequestHandler
if serve_path:
Handler.serve_path = serve_path
while next_attempts >= 0:
try:
httpd = ThreadingHTTPServer(("", port), Handler)
return httpd
except socket.error as e:
if e.errno == errno.EADDRINUSE:
next_attempts -= 1
port += 1
else:
raise
def main(args=None):
if args is None:
args = sys.argv[1:]
PORT = 8000
if len(args)>0:
PORT = int(args[-1])
serve_path = DATA_DIR
if len(args) > 1:
serve_path = abspath(args[-2])
httpd = get_server(port=PORT, serve_path=serve_path)
print "serving at port", PORT
httpd.serve_forever()
if __name__ == "__main__" :
main()

Python Flask chunk data upload not working

I am trying to upload a large file (say ~1GB) from client (using Python request.post) to the flask server.
When client sends the request to server in chunks of 1024, server do not read the whole file and save to server 0kb.
Can you please help me in debugging what exactly I am mistaking here.
Server - Flask Code:
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import os
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'
#app.route("/upload/<filename>", methods=["POST", "PUT"])
def upload_process(filename):
filename = secure_filename(filename)
fileFullPath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
with open(fileFullPath, "wb") as f:
chunk_size = 1024
chunk = request.stream.read(chunk_size)
f.write(chunk)
return jsonify({'filename': filename})
if __name__ == '__main__':
app.run(host="0.0.0.0", port=int("8080"),debug=True)
Client - Request Code
import os
import requests
def read_in_chunks(file_object, chunk_size=1024):
while True:
data = file_object.read(chunk_size)
if not data:
break
yield data
def main(fname, url):
content_path = os.path.abspath(fname)
with open(content_path, 'r') as f:
try:
r = requests.post(url, data=read_in_chunks(f))
print "r: {0}".format(r)
except Exception, e:
print e
if __name__ == '__main__':
filename = 'bigfile.zip' # ~1GB
url = 'http://localhost:8080/upload/{0}'.format(filename)
main(filename, url)

kindly use 'file.stream.read(chunk_size)' instead of request.stream.read(chunk_size). It works for me...!

Old thread but I was looking for something similar so I'll post here anyway.
The server reads the file in write mode which will overwrite at each chunk. Prefer append mode:
with open(fileFullPath, "ab") as f:
The client needs to read the file in byte mode:
with open(content_path, "rb") as f:
Finally, the generator read_in_chunks needs to be used in a loop before being passed to the request:
def main(fname, url):
content_path = os.path.abspath(fname)
with open(content_path, "rb") as f:
try:
for data in read_in_chunks(f):
r = requests.post(url, data=data)
print("r: {0}".format(r))
except Exception as e:
print(e)
Then you have your 2 files
Server
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import os
app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "uploads/"
#app.route("/upload/<filename>", methods=["POST", "PUT"])
def upload_process(filename):
filename = secure_filename(filename)
fileFullPath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
with open(fileFullPath, "ab") as f:
chunk_size = 1024
chunk = request.stream.read(chunk_size)
f.write(chunk)
return jsonify({"filename": filename})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=int("8080"), debug=True)
Client
import os
import requests
def read_in_chunks(file_object, chunk_size=1024):
while True:
data = file_object.read(chunk_size)
if not data:
break
yield data
def main(fname, url):
content_path = os.path.abspath(fname)
with open(content_path, "rb") as f:
try:
for data in read_in_chunks(f):
r = requests.post(url, data=data)
print("r: {0}".format(r))
except Exception as e:
print(e)
if __name__ == "__main__":
filename = "bigfile.zip" # ~1GB
url = "http://localhost:8080/upload/{0}".format(filename)
main(filename, url)
Note that posting un chunks usually requires the total number of chunks and a hash of the file to validate the upload.

Flask depends on werkzeug to process streams, and werkzeug demands a content length for a stream. There's a thread on this here, but no real solution currently available, other than to take another framework approach.

This example below should work very well for you all. If you use Redis, you can also pub/sub the chunk being processed for progression bar in another API.
from flask import Flask, request, jsonify
#app.route("/submit_vdo", methods=['POST'])
def submit_vdo():
#copy_current_request_context
def receive_chunk(stream, full_file_path):
if full_file_path is None:
tmpfile = tempfile.NamedTemporaryFile('wb+', prefix=str(uuid.uuid4())+"_")
full_file_path = tmpfile.name
print ('Write temp to ', full_file_path)
with open(full_file_path, "wb") as f:
max_chunk_size = settings.VIDEO_MAX_SIZE_CHUNK # config.MAX_UPLOAD_BYTE_LENGHT
count_chunks = 0
total_uploaded = 0
try:
while True:
print ('Chunk ', count_chunks)
chunk = stream.read(max_chunk_size)
if chunk is not None and len(chunk)>0:
total_uploaded += len(chunk)
count_chunks += 1
f.write(chunk)
temp = {}
temp ['chunk_counts'] = count_chunks
temp ['total_bytes'] = total_uploaded
temp ['status'] = 'uploading...'
temp ['success'] = True
db_apn_logging.set(user_id+"#CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
print (temp)
else:
f.close()
temp = {}
temp ['chunk_counts'] = count_chunks
temp ['total_bytes'] = total_uploaded
temp ['status'] = 'DONE'
temp ['success'] = True
db_apn_logging.set(user_id+"#CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
break
except Exception as e:
temp = {}
temp ['chunk_counts'] = count_chunks
temp ['total_bytes'] = total_uploaded
temp ['status'] = e
temp ['success'] = False
db_apn_logging.set(user_id+"#CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
return None
return full_file_path
stream = flask.request.files['file']
stream.seek(0)
full_file_path = receive_chunk(stream, full_file_path)
return "DONE !"

BaseHttpServer returned code 404 with the Cyrillic alphabet

I have the following problem.
I used BaseHttpServer.
class ReqHandler( BaseHTTPServer.BaseHTTPRequestHandler):
def __init__(self, request, client_address, server):
BaseHTTPServer.BaseHTTPRequestHandler.__init__( self, request, client_address, server )
def do_GET(self ):
self.performReq(self.path.decode('utf-8'))
def performReq (self, req ):
curDir = os.getcwd()
fname = curDir + '/' + self.path[1:]
try:
self.send_response(200,"Ok!")
ext = os.path.splitext(self.path)[1]
self.send_header('Content', 'text/xml; charset=UTF-8' )
self.end_headers()
f = open(fname, 'rb')
for l in f:
self.wfile.write(l)
f.close()
print 'file '+fname+" Ok"
except IOError:
print 'no file '+fname
self.send_error(404)
if __name__=='__main__':
server = BaseHTTPServer.HTTPServer( ('',8081), ReqHandler )
print('server ok!')
server.serve_forever()
If the path to the file contains Cyrillic.
http://localhost:8081/ТРА/Понедельник/Пн.doc)
I get code 404.
Thank you.

URLs are not only encoded as UTF-8; they are also URL encoded. Decode that too, using the urllib.urlunquote() function:
from urllib import urlunquote
self.performReq(unlunquote(self.path).decode('utf-8'))
Demo:
>>> from urllib import unquote
>>> path = '/%D0%A2%D0%A0%D0%90/%D0%9F%D0%BE%D0%BD%D0%B5%D0%B4%D0%B5%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA/%D0%9F%D0%BD.doc'
>>> unquote(path).decode('utf8')
u'/\u0422\u0420\u0410/\u041f\u043e\u043d\u0435\u0434\u0435\u043b\u044c\u043d\u0438\u043a/\u041f\u043d.doc'
>>> print unquote(path).decode('utf8')
/ТРА/Понедельник/Пн.doc

Progress of Python requests post

I am uploading a large file using the Python requests package, and I can't find any way to give data back about the progress of the upload. I have seen a number of progress meters for downloading a file, but these will not work for a file upload.
The ideal solution would be some sort of callback method such as:
def progress(percent):
print percent
r = requests.post(URL, files={'f':hugeFileHandle}, callback=progress)
Thanks in advance for your help :)

requests doesn't support upload streaming e.g.:
import os
import sys
import requests # pip install requests
class upload_in_chunks(object):
def __init__(self, filename, chunksize=1 << 13):
self.filename = filename
self.chunksize = chunksize
self.totalsize = os.path.getsize(filename)
self.readsofar = 0
def __iter__(self):
with open(self.filename, 'rb') as file:
while True:
data = file.read(self.chunksize)
if not data:
sys.stderr.write("\n")
break
self.readsofar += len(data)
percent = self.readsofar * 1e2 / self.totalsize
sys.stderr.write("\r{percent:3.0f}%".format(percent=percent))
yield data
def __len__(self):
return self.totalsize
# XXX fails
r = requests.post("http://httpbin.org/post",
data=upload_in_chunks(__file__, chunksize=10))
btw, if you don't need to report progress; you could use memory-mapped file to upload large file.
To workaround it, you could create a file adaptor similar to the one from
urllib2 POST progress monitoring:
class IterableToFileAdapter(object):
def __init__(self, iterable):
self.iterator = iter(iterable)
self.length = len(iterable)
def read(self, size=-1): # TBD: add buffer for `len(data) > size` case
return next(self.iterator, b'')
def __len__(self):
return self.length
Example
it = upload_in_chunks(__file__, 10)
r = requests.post("http://httpbin.org/post", data=IterableToFileAdapter(it))
# pretty print
import json
json.dump(r.json, sys.stdout, indent=4, ensure_ascii=False)

I recommend to use a tool package named requests-toolbelt, which make monitoring upload bytes very easy, like
from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
import requests
def my_callback(monitor):
# Your callback function
print monitor.bytes_read
e = MultipartEncoder(
fields={'field0': 'value', 'field1': 'value',
'field2': ('filename', open('file.py', 'rb'), 'text/plain')}
)
m = MultipartEncoderMonitor(e, my_callback)
r = requests.post('http://httpbin.org/post', data=m,
headers={'Content-Type': m.content_type})
And you may want to read this to show a progress bar.

I got it working with the code from here: Simple file upload progressbar in PyQt.
I changed it a bit, to use BytesIO instead of StringIO.
class CancelledError(Exception):
def __init__(self, msg):
self.msg = msg
Exception.__init__(self, msg)
def __str__(self):
return self.msg
__repr__ = __str__
class BufferReader(BytesIO):
def __init__(self, buf=b'',
callback=None,
cb_args=(),
cb_kwargs={}):
self._callback = callback
self._cb_args = cb_args
self._cb_kwargs = cb_kwargs
self._progress = 0
self._len = len(buf)
BytesIO.__init__(self, buf)
def __len__(self):
return self._len
def read(self, n=-1):
chunk = BytesIO.read(self, n)
self._progress += int(len(chunk))
self._cb_kwargs.update({
'size' : self._len,
'progress': self._progress
})
if self._callback:
try:
self._callback(*self._cb_args, **self._cb_kwargs)
except: # catches exception from the callback
raise CancelledError('The upload was cancelled.')
return chunk
def progress(size=None, progress=None):
print("{0} / {1}".format(size, progress))
files = {"upfile": ("file.bin", open("file.bin", 'rb').read())}
(data, ctype) = requests.packages.urllib3.filepost.encode_multipart_formdata(files)
headers = {
"Content-Type": ctype
}
body = BufferReader(data, progress)
requests.post(url, data=body, headers=headers)
The trick is, to generate data and header from the files list manually, using encode_multipart_formdata() from urllib3

I know this is an old question, but I couldn't find an easy answer anywhere else, so hopefully this will help somebody else:
import requests
import tqdm
with open(file_name, 'rb') as f:
r = requests.post(url, data=tqdm(f.readlines()))

This solution uses requests_toolbelt and tqdm both well maintained and popular libraries.
from pathlib import Path
from tqdm import tqdm
import requests
from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
def upload_file(upload_url, fields, filepath):
path = Path(filepath)
total_size = path.stat().st_size
filename = path.name
with tqdm(
desc=filename,
total=total_size,
unit="B",
unit_scale=True,
unit_divisor=1024,
) as bar:
with open(filepath, "rb") as f:
fields["file"] = ("filename", f)
e = MultipartEncoder(fields=fields)
m = MultipartEncoderMonitor(
e, lambda monitor: bar.update(monitor.bytes_read - bar.n)
)
headers = {"Content-Type": m.content_type}
requests.post(upload_url, data=m, headers=headers)
Example usage
upload_url = 'https://uploadurl'
fields = {
"field1": value1,
"field2": value2
}
filepath = '97a6fce8_owners_2018_Van Zandt.csv'
upload_file(upload_url, fields, filepath)

Usually you would build a streaming datasource (a generator) that reads the file chunked and reports its progress on the way (see kennethreitz/requests#663. This does not work with requests file-api, because requests doesn’t support streaming uploads (see kennethreitz/requests#295) – a file to upload needs to be complete in memory before it starts getting processed.
but requests can stream content from a generator as J.F. Sebastian has proven before, but this generator needs to generate the complete datastream including the multipart encoding and boundaries. This is where poster comes to play.
poster is originally written to be used with pythons urllib2 and supports streaming generation of multipart requests, providing progress indication as it goes along. Posters Homepage provides examples of using it together with urllib2 but you really don’t want to use urllib2. Check out this example-code on how to to HTTP Basic Authentication with urllib2. Horrrrrrrrible.
So we really want to use poster together with requests to do file uploads with tracked progress. And here is how:
# load requests-module, a streamlined http-client lib
import requests
# load posters encode-function
from poster.encode import multipart_encode
# an adapter which makes the multipart-generator issued by poster accessable to requests
# based upon code from http://stackoverflow.com/a/13911048/1659732
class IterableToFileAdapter(object):
def __init__(self, iterable):
self.iterator = iter(iterable)
self.length = iterable.total
def read(self, size=-1):
return next(self.iterator, b'')
def __len__(self):
return self.length
# define a helper function simulating the interface of posters multipart_encode()-function
# but wrapping its generator with the file-like adapter
def multipart_encode_for_requests(params, boundary=None, cb=None):
datagen, headers = multipart_encode(params, boundary, cb)
return IterableToFileAdapter(datagen), headers
# this is your progress callback
def progress(param, current, total):
if not param:
return
# check out http://tcd.netinf.eu/doc/classnilib_1_1encode_1_1MultipartParam.html
# for a complete list of the properties param provides to you
print "{0} ({1}) - {2:d}/{3:d} - {4:.2f}%".format(param.name, param.filename, current, total, float(current)/float(total)*100)
# generate headers and gata-generator an a requests-compatible format
# and provide our progress-callback
datagen, headers = multipart_encode_for_requests({
"input_file": open('recordings/really-large.mp4', "rb"),
"another_input_file": open('recordings/even-larger.mp4', "rb"),
"field": "value",
"another_field": "another_value",
}, cb=progress)
# use the requests-lib to issue a post-request with out data attached
r = requests.post(
'https://httpbin.org/post',
auth=('user', 'password'),
data=datagen,
headers=headers
)
# show response-code and -body
print r, r.text

My upload server doesn't support Chunk-Encoded so I came up with this solution. It basically just a wrapper around python IOBase and allow tqdm.wrapattr to work seamless.
import io
import requests
from typing import Union
from tqdm import tqdm
from tqdm.utils import CallbackIOWrapper
class UploadChunksIterator(Iterable):
"""
This is an interface between python requests and tqdm.
Make tqdm to be accessed just like IOBase for requests lib.
"""
def __init__(
self, file: Union[io.BufferedReader, CallbackIOWrapper], total_size: int, chunk_size: int = 16 * 1024
): # 16MiB
self.file = file
self.chunk_size = chunk_size
self.total_size = total_size
def __iter__(self):
return self
def __next__(self):
data = self.file.read(self.chunk_size)
if not data:
raise StopIteration
return data
# we dont retrive len from io.BufferedReader because CallbackIOWrapper only has read() method.
def __len__(self):
return self.total_size
fp = "data/mydata.mp4"
s3url = "example.com"
_quiet = False
with open(fp, "rb") as f:
total_size = os.fstat(f.fileno()).st_size
if not _quiet:
f = tqdm.wrapattr(f, "read", desc=hv, miniters=1, total=total_size, ascii=True)
with f as f_iter:
res = requests.put(
url=s3url,
data=UploadChunksIterator(f_iter, total_size=total_size),
)
res.raise_for_status()

Making #jfs' answer better in terms of an informative progress bar.
import math
import os
import requests
import sys
class ProgressUpload:
def __init__(self, filename, chunk_size=1250):
self.filename = filename
self.chunk_size = chunk_size
self.file_size = os.path.getsize(filename)
self.size_read = 0
self.divisor = min(math.floor(math.log(self.file_size, 1000)) * 3, 9) # cap unit at a GB
self.unit = {0: 'B', 3: 'KB', 6: 'MB', 9: 'GB'}[self.divisor]
self.divisor = 10 ** self.divisor
def __iter__(self):
progress_str = f'0 / {self.file_size / self.divisor:.2f} {self.unit} (0 %)'
sys.stderr.write(f'\rUploading {dist_file}: {progress_str}')
with open(self.filename, 'rb') as f:
for chunk in iter(lambda: f.read(self.chunk_size), b''):
self.size_read += len(chunk)
yield chunk
sys.stderr.write('\b' * len(progress_str))
percentage = self.size_read / self.file_size * 100
completed_str = f'{self.size_read / self.divisor:.2f}'
to_complete_str = f'{self.file_size / self.divisor:.2f} {self.unit}'
progress_str = f'{completed_str} / {to_complete_str} ({percentage:.2f} %)'
sys.stderr.write(progress_str)
sys.stderr.write('\n')
def __len__(self):
return self.file_size
# sample usage
requests.post(upload_url, data=ProgressUpload('file_path'))
The key is the __len__ method. Without it, I was getting connection closed errors. That's the only reason you can't just use tqdm + iter to get a simple progress bar.

My python code that works great. Credit : twine
import sys
import tqdm
import requests
import requests_toolbelt
class ProgressBar(tqdm.tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
with open("test.zip", "rb") as fp:
data_to_send = []
session = requests.session()
data_to_send.append(
("files", ("test.zip", fp))
)
encoder = requests_toolbelt.MultipartEncoder(data_to_send)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
'http://httpbin.org/post',
data=monitor,
headers={"Content-Type": monitor.content_type},
)
print(r.text)

Send file using POST from a Python script

Is there a way to send a file using POST from a Python script?

From: https://requests.readthedocs.io/en/latest/user/quickstart/#post-a-multipart-encoded-file
Requests makes it very simple to upload Multipart-encoded files:
with open('report.xls', 'rb') as f:
r = requests.post('http://httpbin.org/post', files={'report.xls': f})
That's it. I'm not joking - this is one line of code. The file was sent. Let's check:
>>> r.text
{
"origin": "179.13.100.4",
"files": {
"report.xls": "<censored...binary...data>"
},
"form": {},
"url": "http://httpbin.org/post",
"args": {},
"headers": {
"Content-Length": "3196",
"Accept-Encoding": "identity, deflate, compress, gzip",
"Accept": "*/*",
"User-Agent": "python-requests/0.8.0",
"Host": "httpbin.org:80",
"Content-Type": "multipart/form-data; boundary=127.0.0.1.502.21746.1321131593.786.1"
},
"data": ""
}

Yes. You'd use the urllib2 module, and encode using the multipart/form-data content type. Here is some sample code to get you started -- it's a bit more than just file uploading, but you should be able to read through it and see how it works:
user_agent = "image uploader"
default_message = "Image $current of $total"
import logging
import os
from os.path import abspath, isabs, isdir, isfile, join
import random
import string
import sys
import mimetypes
import urllib2
import httplib
import time
import re
def random_string (length):
return ''.join (random.choice (string.letters) for ii in range (length + 1))
def encode_multipart_data (data, files):
boundary = random_string (30)
def get_content_type (filename):
return mimetypes.guess_type (filename)[0] or 'application/octet-stream'
def encode_field (field_name):
return ('--' + boundary,
'Content-Disposition: form-data; name="%s"' % field_name,
'', str (data [field_name]))
def encode_file (field_name):
filename = files [field_name]
return ('--' + boundary,
'Content-Disposition: form-data; name="%s"; filename="%s"' % (field_name, filename),
'Content-Type: %s' % get_content_type(filename),
'', open (filename, 'rb').read ())
lines = []
for name in data:
lines.extend (encode_field (name))
for name in files:
lines.extend (encode_file (name))
lines.extend (('--%s--' % boundary, ''))
body = '\r\n'.join (lines)
headers = {'content-type': 'multipart/form-data; boundary=' + boundary,
'content-length': str (len (body))}
return body, headers
def send_post (url, data, files):
req = urllib2.Request (url)
connection = httplib.HTTPConnection (req.get_host ())
connection.request ('POST', req.get_selector (),
*encode_multipart_data (data, files))
response = connection.getresponse ()
logging.debug ('response = %s', response.read ())
logging.debug ('Code: %s %s', response.status, response.reason)
def make_upload_file (server, thread, delay = 15, message = None,
username = None, email = None, password = None):
delay = max (int (delay or '0'), 15)
def upload_file (path, current, total):
assert isabs (path)
assert isfile (path)
logging.debug ('Uploading %r to %r', path, server)
message_template = string.Template (message or default_message)
data = {'MAX_FILE_SIZE': '3145728',
'sub': '',
'mode': 'regist',
'com': message_template.safe_substitute (current = current, total = total),
'resto': thread,
'name': username or '',
'email': email or '',
'pwd': password or random_string (20),}
files = {'upfile': path}
send_post (server, data, files)
logging.info ('Uploaded %r', path)
rand_delay = random.randint (delay, delay + 5)
logging.debug ('Sleeping for %.2f seconds------------------------------\n\n', rand_delay)
time.sleep (rand_delay)
return upload_file
def upload_directory (path, upload_file):
assert isabs (path)
assert isdir (path)
matching_filenames = []
file_matcher = re.compile (r'\.(?:jpe?g|gif|png)$', re.IGNORECASE)
for dirpath, dirnames, filenames in os.walk (path):
for name in filenames:
file_path = join (dirpath, name)
logging.debug ('Testing file_path %r', file_path)
if file_matcher.search (file_path):
matching_filenames.append (file_path)
else:
logging.info ('Ignoring non-image file %r', path)
total_count = len (matching_filenames)
for index, file_path in enumerate (matching_filenames):
upload_file (file_path, index + 1, total_count)
def run_upload (options, paths):
upload_file = make_upload_file (**options)
for arg in paths:
path = abspath (arg)
if isdir (path):
upload_directory (path, upload_file)
elif isfile (path):
upload_file (path)
else:
logging.error ('No such path: %r' % path)
logging.info ('Done!')

Looks like python requests does not handle extremely large multi-part files.
The documentation recommends you look into requests-toolbelt.
Here's the pertinent page from their documentation.

The only thing that stops you from using urlopen directly on a file object is the fact that the builtin file object lacks a len definition. A simple way is to create a subclass, which provides urlopen with the correct file.
I have also modified the Content-Type header in the file below.
import os
import urllib2
class EnhancedFile(file):
def __init__(self, *args, **keyws):
file.__init__(self, *args, **keyws)
def __len__(self):
return int(os.fstat(self.fileno())[6])
theFile = EnhancedFile('a.xml', 'r')
theUrl = "http://example.com/abcde"
theHeaders= {'Content-Type': 'text/xml'}
theRequest = urllib2.Request(theUrl, theFile, theHeaders)
response = urllib2.urlopen(theRequest)
theFile.close()
for line in response:
print line

Chris Atlee's poster library works really well for this (particularly the convenience function poster.encode.multipart_encode()). As a bonus, it supports streaming of large files without loading an entire file into memory. See also Python issue 3244.

I am trying to test django rest api and its working for me:
def test_upload_file(self):
filename = "/Users/Ranvijay/tests/test_price_matrix.csv"
data = {'file': open(filename, 'rb')}
client = APIClient()
# client.credentials(HTTP_AUTHORIZATION='Token ' + token.key)
response = client.post(reverse('price-matrix-csv'), data, format='multipart')
print response
self.assertEqual(response.status_code, status.HTTP_200_OK)

pip install http_file
#импорт вспомогательных библиотек
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import requests
#импорт http_file
from http_file import download_file
#создание новой сессии
s = requests.Session()
#соеденение с сервером через созданную сессию
s.get('URL_MAIN', verify=False)
#загрузка файла в 'local_filename' из 'fileUrl' через созданную сессию
download_file('local_filename', 'fileUrl', s)

You may also want to have a look at httplib2, with examples. I find using httplib2 is more concise than using the built-in HTTP modules.

def visit_v2(device_code, camera_code):
image1 = MultipartParam.from_file("files", "/home/yuzx/1.txt")
image2 = MultipartParam.from_file("files", "/home/yuzx/2.txt")
datagen, headers = multipart_encode([('device_code', device_code), ('position', 3), ('person_data', person_data), image1, image2])
print "".join(datagen)
if server_port == 80:
port_str = ""
else:
port_str = ":%s" % (server_port,)
url_str = "http://" + server_ip + port_str + "/adopen/device/visit_v2"
headers['nothing'] = 'nothing'
request = urllib2.Request(url_str, datagen, headers)
try:
response = urllib2.urlopen(request)
resp = response.read()
print "http_status =", response.code
result = json.loads(resp)
print resp
return result
except urllib2.HTTPError, e:
print "http_status =", e.code
print e.read()

I tried some of the options here, but I had some issue with the headers ('files' field was empty).
A simple mock to explain how I did the post using requests and fixing the issues:
import requests
url = 'http://127.0.0.1:54321/upload'
file_to_send = '25893538.pdf'
files = {'file': (file_to_send,
open(file_to_send, 'rb'),
'application/pdf',
{'Expires': '0'})}
reply = requests.post(url=url, files=files)
print(reply.text)
More at https://requests.readthedocs.io/en/latest/user/quickstart/
To test this code, you could use a simple dummy server as this one (thought to run in a GNU/Linux or similar):
import os
from flask import Flask, request, render_template
rx_file_listener = Flask(__name__)
files_store = "/tmp"
#rx_file_listener.route("/upload", methods=['POST'])
def upload_file():
storage = os.path.join(files_store, "uploaded/")
print(storage)
if not os.path.isdir(storage):
os.mkdir(storage)
try:
for file_rx in request.files.getlist("file"):
name = file_rx.filename
destination = "/".join([storage, name])
file_rx.save(destination)
return "200"
except Exception:
return "500"
if __name__ == "__main__":
rx_file_listener.run(port=54321, debug=True)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How do I stream a file using werkzeug? - python

Haven't tried myself but I think following will work. g = file(path_to_bigfile) # or any generator return Response(g, direct_passthrough=True)

Related

python http server threading via cli

Python Flask chunk data upload not working

BaseHttpServer returned code 404 with the Cyrillic alphabet

Progress of Python requests post

Send file using POST from a Python script

Categories

Resources