I have a bottle "server" serving me block device images' in .raw format. Unfortunately, after download finishes (successfully or not, doesn't matter) I have to run one more method to unmount exported block device.
Below I provide the code which is used to export the image but only provides me with errors (cannot unmap device as it's used by different process) and isn't exporting images as intended:
class ExportResponse(HTTPResponse):
def __init__(self, devpath, volume_name):
self.devpath = devpath
self.volume_name = volume_name
output_filename = "%s.raw" % (volume_name)
fp = open(devpath)
content_length = os.lseek(fp.fileno(), 0, os.SEEK_END)
os.lseek(fp.fileno(), 0, os.SEEK_SET)
headers = {
'Content-Length': str(content_length),
'Content-Type': "application/octet-stream",
'Content-Disposition': 'attachment; filename="%s"' % output_filename
}
log.info("%s, %s" % (os.SEEK_CUR, content_length))
super(ExportResponse, self).__init__(fp, **headers)
#get('/retrieve/<volume_name>/')
def export_volume(volume_name):
fmt = request.query['format']
dev = find_mapping(volume_name)
if dev:
log.debug("export, name=%s, already mapped dev=%s" % (repr(volume_name), str(dev)))
dev_owner = dev is None
try:
if not dev:
map_volume(volume_name)
dev = find_mapping(volume_name)
log.debug("export, name=%s, mapped to dev=%s" % (repr(volume_name), str(dev)))
return(ExportResponse(dev, volume_name))
except:
log.exception("export, name=%s" % repr(volume_name))
if dev and dev_owner:
unmap_volume(volume_name)
raise
finally:
unmap_volume(volume_name)
Is there any good way of doing this?
There are two ways of doing this.
1) Read the entire image into memory using a BytesIO object. Use that as the stream handle you pass to __init__, instead of the handle to the actual file.
2) Obtain from bottle the raw handle to the response stream and use the write() method on the stream to write chunks of the file yourself, closing it when you are done. I am not sure how to get that stream from bottle, but I know it exists because the WSGI protocol specifies the response is passed as a stream.
Related
I am trying to write a file to a specific mount location in Linux. The API returns a path which is required for the further operations. The problem is if the file size is huge, then i face a request time-out error because of which iam not able to get the path. The code is as follows:
#migration_blueprint.route("/migration/upload", methods=["POST"])
def upload_migration_file():
file_abs_path = ""
try:
file = request.files['files']
logger.debug("The file recieved is '{}'".format(file))
file_name = str(datetime.now().strftime("%H%M%s")) + file.filename
proxy_bin = db.find_one("bins", query={"bin_type":"proxy", "status":"active"})
if not proxy_bin:
raise Exception("Proxy Bin not found")
base_proxy_path = "/mnt/share_{}/migration/".format(proxy_bin['_id'])
if not os.path.exists(base_proxy_path):
os.makedirs(base_proxy_path)
file_abs_path = os.path.join(base_proxy_path, file_name)
file.save(file_abs_path)
except Exception as ex:
logger.exception("Error : {}".format(str(ex)))
abort("500",{"message" : str(ex)})
return {"path" : file_abs_path}
Is there any workaround where irrespective of the file size, the file gets written to the location and the path is also returned as response in time ?
You could try uploading files via ajax and polling the server at intervals until the filename is ready (you could also use server side events) or you could use websockets to upload files and then update the client when the filename is available.
I tried to impelement the solution through multiprocessing in python.
from multiprocessing import Process
def copy_file(file_data, abs_path):
file_data.save(abs_path)
In the API i updated it:
p = Process(target=copy_file(file, file_abs_path))
p.start()
# file.save(file_abs_path)
I have one function view that creates a report using xlsxwriter, it is created on the fly using a StringIO as buffer and finally sending through HttpResponse. It works well using Local Server.
The problem is that on Heroku, after some seconds (documentation mention 30 seconds timeout and not modifiable) the server hangs out and reboot the web process, giving error as a response.
What is the best way to...?:
create an xmlx file on the fly (dynamically) in memory
serve the entire file to the client.
prevent server to hang out because of the long process running
This is a piece of the code I am using:
def reporte_usuarios(request):
from xlsxwriter.workbook import Workbook
try:
import cStringIO as StringIO
except ImportError:
import StringIO
# create a workbook in memory
output = StringIO.StringIO()
workbook = Workbook(output)
bold = workbook.add_format({'bold': True})
# get the data
from django.db.models import Count
usuarios = User.objects.filter(....... # all filter stuff
for usr in usuarios:
if usr.activos > 0:
# create a workbook sheet every User registered
ws = workbook.add_worksheet(u'%s' % usr.username)
# some relevant user data
ws.write(1, 1, u'USUARIO: %s' % usr.username)
...
# get rows for user
log = LogActivos.objects.filter(usuario=usr).select_related('activo__unidad__id', 'activo__unidad__nombre', 'activo__nombre')
# write headers
ws.write(3, 0, u'FECHA', bold)
...
sig_fila = 4 #starting row for data (after headers)
for l in log:
# write all data
ws.write(sig_fila, 0, u'%s' % l.fecha)
...
sig_fila += 1
# close the workbook
workbook.close()
# go to the beginning of the buffer
output.seek(0)
# response using the buffer
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
response['Content-Disposition'] = 'attachment; filename="ACTIVOS_USUARIOS__%s.xlsx"' % datetime.now().strftime("%Y%m%d_%H%M")
return response
Notes: I am using Gunicorn on Heroku, django 1.9.13 and python 2.7.11
IMHO you should follow a totally different approach in this case.
As you are generating a file rather big in size, it's normal for the system to hang out because of a timeout error.
What you could do instead is to deploy a background task queue, like Celery or DjangoRQ. With that, you will get a background task to create this file using your user's data, and then you can let your user know that it's ready by any mean, like a notification or an email.
If you need more details regarding how you can do something like this, let me know and I can help :)
Using requests_toolbelt to upload large files in a Multipart form, I have constructed a method below which succeeds in uploading the file, however I cannot access the posted filename. How do I access the filename on the server?
# client-side
file = open('/Volumes/Extra/test/my_video.mpg', 'rb')
payload = MultipartEncoder({file.name: file})
r = requests.post(url, data=payload, headers={'Content-Type': 'application/octet-stream'})
# server-side
#view_config(route_name='remote.agent_upload', renderer='json')
def remote_agent_upload(request):
r = request.response
fs = request.body_file
f = open('/Volumes/Extra/tests2/bar.mpg', 'wb') # wish to use filename here
f.write(fs.read())
fs.close()
f.close()
return r
OK, it looks like you are using the name of the file as the field name. Also, the way that you are doing it, seems like the entire post content is being written to file... Is this the desired outcome? Have you tried to actually play your mpg files after you write them on the server side?
I don't have an HTTP server readily available to test at the moment which automagically gives me a request object, but I am assuming that the request object is a webob.Request object (at least it seems like that is the case, please correct me if I'm wrong)
OK, let me show you my test. (This works on python3.4, not sure what version of Python you are using, but I think it should also work on Python 2.7 - not tested though)
The code in this test is a bit long, but it is heavily commented to help you understand what I did every step of the way. Hopefully, it will give you a better understanding of how HTTP requests and responses work in python with the tools you are using
# My Imports
from requests_toolbelt import MultipartEncoder
from webob import Request
import io
# Create a buffer object that can be read by the MultipartEncoder class
# This works just like an open file object
file = io.BytesIO()
# The file content will be simple for my test.
# But you could just as easily have a multi-megabyte mpg file
# Write the contents to the file
file.write(b'test mpg content')
# Then seek to the beginning of the file so that the
# MultipartEncoder can read it from the beginning
file.seek(0)
# Create the payload
payload = MultipartEncoder(
{
# The name of the file upload field... Not the file name
'uploadedFile': (
# This would be the name of the file
'This is my file.mpg',
# The file handle that is ready to be read from
file,
# The content type of the file
'application/octet-stream'
)
}
)
# To send the file, you would use the requests.post method
# But the content type is not application-octet-stream
# The content type is multipart/form-data; with a boundary string
# Without the proper header type, your server would not be able to
# figure out where the file begins and ends and would think the
# entire post content is the file, which it is not. The post content
# might even contain multiple files
# So, to send your file, you would use:
#
# response = requests.post(url, data=payload, headers={'Content-Type': payload.content_type})
# Instead of sending the payload to the server,
# I am just going to grab the output as it would be sent
# This is because I don't have a server, but I can easily
# re-create the object using this output
postData = payload.to_string()
# Create an input buffer object
# This will be read by our server (our webob.Request object)
inputBuffer = io.BytesIO()
# Write the post data to the input buffer so that the webob.Request object can read it
inputBuffer.write(postData)
# And, once again, seek to 0
inputBuffer.seek(0)
# Create an error buffer so that errors can be written to it if there are any
errorBuffer = io.BytesIO()
# Setup our wsgi environment just like the server would give us
environment = {
'HTTP_HOST': 'localhost:80',
'PATH_INFO': '/index.py',
'QUERY_STRING': '',
'REQUEST_METHOD': 'POST',
'SCRIPT_NAME': '',
'SERVER_NAME': 'localhost',
'SERVER_PORT': '80',
'SERVER_PROTOCOL': 'HTTP/1.0',
'CONTENT_TYPE': payload.content_type,
'wsgi.errors': errorBuffer,
'wsgi.input': inputBuffer,
'wsgi.multiprocess': False,
'wsgi.multithread': False,
'wsgi.run_once': False,
'wsgi.url_scheme': 'http',
'wsgi.version': (1, 0)
}
# Create our request object
# This is the same as your request object and should have all our info for reading
# the file content as well as the file name
request = Request(environment)
# At this point, the request object is the same as what you get on your server
# So, from this point on, you can use the following code to get
# your actual file content as well as your file name from the object
# Our uploaded file is in the POST. And the POST field name is 'uploadedFile'
# Grab our file so that it can be read
uploadedFile = request.POST['uploadedFile']
# To read our content, you can use uploadedFile.file.read()
print(uploadedFile.file.read())
# And to get the file name, you can use uploadedFile.filename
print(uploadedFile.filename)
So, I think this modified code will work for you. (Hopefully)
Again, not tested because I don't actually have a server to test with. And also, I don't know what kind of object your "request" object is on the server side.... OK, here goes:
# client-side
import requests
file = open('/Volumes/Extra/test/my_video.mpg', 'rb')
payload = MultipartEncoder({'uploadedFile': (file.name, file, 'application/octet-stream')})
r = requests.post('http://somewhere/somefile.py', data=payload, headers={'Content-Type': payload.content_type})
# server-side
#view_config(route_name='remote.agent_upload', renderer='json')
def remote_agent_upload(request):
# Write your actual file contents, not the post data which contains multi part boundary
uploadedFile = request.POST['uploadedFile']
fs = uploadedFile.file
# The file name is insecure. What if the file name comes through as '../../../etc/passwd'
# If you don't secure this, you've just wiped your /etc/passwd file and your server is toast
# (assuming the web user has write permission to the /etc/passwd file
# which it shouldn't, but just giving you a worst case scenario)
fileName = uploadedFile.filename
# Secure the fileName here...
# Make sure it doesn't have any slashes or double dots, or illegal characters, etc.
# I'll leave that up to you
# Write the file
f = open('/Volumes/Extra/tests2/' + fileName, 'wb')
f.write(fs.read())
Probably too late for the original OP but may help someone else. This is how I upload a file with accompanying json in a multipart/form-data upload using the MultipartEncoder. When I require a file to uploaded as binary with a single json string as part of a multipart request (so there are just two parts, the file and the json). Note that in creating my request header (it's a custom header as designated by the receiving server) I get the content_type from the encoded object (it usually comes through as multipart/form-data). I'm using simplejson.dumps but you can just use json.dumps I believe.
m = MultipartEncoder([
('json', (None, simplejson.dumps(datapayload), 'text/plain')),
('file', (os.path.basename(file_path), open(file_path, 'rb'), 'text/plain'))],
None, encoding='utf-8')
headers = {'Authorization': 'JwToken' + ' ' + jwt_str, 'content-type': m.content_type}
response = requests.post(uri, headers=headers, data=m, timeout=45, verify=True )
In the file part, the field is called "file", but I use os.path.basename(file_path) to get just the filename from a full file path e.g. c:\temp\mytestfile.txt . It's possible I could just as easily call the file something else (that's not the original name) in this field if I wanted to.
I use the Python Requests library to download a big file, e.g.:
r = requests.get("http://bigfile.com/bigfile.bin")
content = r.content
The big file downloads at +- 30 Kb per second, which is a bit slow. Every connection to the bigfile server is throttled, so I would like to make multiple connections.
Is there a way to make multiple connections at the same time to download one file?
You can use HTTP Range header to fetch just part of file (already covered for python here).
Just start several threads and fetch different range with each and you're done ;)
def download(url,start):
req = urllib2.Request('http://www.python.org/')
req.headers['Range'] = 'bytes=%s-%s' % (start, start+chunk_size)
f = urllib2.urlopen(req)
parts[start] = f.read()
threads = []
parts = {}
# Initialize threads
for i in range(0,10):
t = threading.Thread(target=download, i*chunk_size)
t.start()
threads.append(t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
# Sort parts and you're done
result = ''.join(parts[i] for i in sorted(parts.keys()))
Also note that not every server supports Range header (and especially servers with php scripts responsible for data fetching often don't implement handling of it).
Here's a Python script that saves given url to a file and uses multiple threads to download it:
#!/usr/bin/env python
import sys
from functools import partial
from itertools import count, izip
from multiprocessing.dummy import Pool # use threads
from urllib2 import HTTPError, Request, urlopen
def download_chunk(url, byterange):
req = Request(url, headers=dict(Range='bytes=%d-%d' % byterange))
try:
return urlopen(req).read()
except HTTPError as e:
return b'' if e.code == 416 else None # treat range error as EOF
except EnvironmentError:
return None
def main():
url, filename = sys.argv[1:]
pool = Pool(4) # define number of concurrent connections
chunksize = 1 << 16
ranges = izip(count(0, chunksize), count(chunksize - 1, chunksize))
with open(filename, 'wb') as file:
for s in pool.imap(partial(download_part, url), ranges):
if not s:
break # error or EOF
file.write(s)
if len(s) != chunksize:
break # EOF (servers with no Range support end up here)
if __name__ == "__main__":
main()
The end of file is detected if a server returns empty body, or 416 http code, or if the response size is not chunksize exactly.
It supports servers that doesn't understand Range header (everything is downloaded in a single request in this case; to support large files, change download_chunk() to save to a temporary file and return the filename to be read in the main thread instead of the file content itself).
It allows to change independently number of concurrent connections (pool size) and number of bytes requested in a single http request.
To use multiple processes instead of threads, change the import:
from multiprocessing.pool import Pool # use processes (other code unchanged)
This solution requires the linux utility named "aria2c", but it has the advantage of easily resuming downloads.
It also assumes that all the files you want to download are listed in the http directory list for location MY_HTTP_LOC. I tested this script on an instance of lighttpd/1.4.26 http server. But, you can easily modify this script so that it works for other setups.
#!/usr/bin/python
import os
import urllib
import re
import subprocess
MY_HTTP_LOC = "http://AAA.BBB.CCC.DDD/"
# retrieve webpage source code
f = urllib.urlopen(MY_HTTP_LOC)
page = f.read()
f.close
# extract relevant URL segments from source code
rgxp = '(\<td\ class="n"\>\<a\ href=")([0-9a-zA-Z\(\)\-\_\.]+)(")'
results = re.findall(rgxp,str(page))
files = []
for match in results:
files.append(match[1])
# download (using aria2c) files
for afile in files:
if os.path.exists(afile) and not os.path.exists(afile+'.aria2'):
print 'Skipping already-retrieved file: ' + afile
else:
print 'Downloading file: ' + afile
subprocess.Popen(["aria2c", "-x", "16", "-s", "20", MY_HTTP_LOC+str(afile)]).wait()
you could use a module called pySmartDLfor this it uses multiple threads and can do a lot more also this module gives a download bar by default.
for more info check this answer
I have seen the receipes for uploading files via multipartform-data and pycurl. Both methods seem to require a file on disk. Can these recipes be modified to supply binary data from memory instead of from disk ? I guess I could just use a xmlrpc server instead.I wanted to get around having to encode and decode the binary data and send it raw... Do pycurl and mutlipartform-data work with raw data ?
This (small) library will take a file descriptor, and will do the HTTP POST operation: https://github.com/seisen/urllib2_file/
You can pass it a StringIO object (containing your in-memory data) as the file descriptor.
Find something that cas work with a file handle. Then simply pass a StringIO object instead of a real file descriptor.
I met similar issue today, after tried both and pycurl and multipart/form-data, I decide to read python httplib/urllib2 source code to find out, I did get one comparably good solution:
set Content-Length header(of the file) before doing post
pass a opened file when doing post
Here is the code:
import urllib2, os
image_path = "png\\01.png"
url = 'http://xx.oo.com/webserviceapi/postfile/'
length = os.path.getsize(image_path)
png_data = open(image_path, "rb")
request = urllib2.Request(url, data=png_data)
request.add_header('Cache-Control', 'no-cache')
request.add_header('Content-Length', '%d' % length)
request.add_header('Content-Type', 'image/png')
res = urllib2.urlopen(request).read().strip()
return res
see my blog post: http://www.2maomao.com/blog/python-http-post-a-binary-file-using-urllib2/
Following python code works reliable on 2.6.x. The input data is of type str.
Note that the server that receives the data has to loop to read all the data as the large
data sizes will be chunked. Also attached java code snippet to read the chunked data.
def post(self, url, data):
self.curl = pycurl.Curl()
self.response_headers = StringIO.StringIO()
self.response_body = io.BytesIO()
self.curl.setopt(pycurl.WRITEFUNCTION, self.response_body.write)
self.curl.setopt(pycurl.HEADERFUNCTION, self.response_headers.write)
self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
self.curl.setopt(pycurl.MAXREDIRS, 5)
self.curl.setopt(pycurl.TIMEOUT, 60)
self.curl.setopt(pycurl.ENCODING,"deflate, gzip")
self.curl.setopt(pycurl.URL, url)
self.curl.setopt(pycurl.VERBOSE, 1)
self.curl.setopt(pycurl.POST,1)
self.curl.setopt(pycurl.POSTFIELDS,data)
self.curl.setopt(pycurl.HTTPHEADER, [ "Content-Type: octate-stream" ])
self.curl.setopt(pycurl.POSTFIELDSIZE, len(data))
self.curl.perform()
return url, self.curl.getinfo(pycurl.RESPONSE_CODE),self.response_headers.getvalue(), self.response_body.getvalue()
Java code for the servlet engine:
int postSize = Integer.parseInt(req.getHeader("Content-Length"));
results = new byte[postSize];
int read = 0;
while(read < postSize) {
int n = req.getInputStream().read(results);
if (n < 0) break;
read += n;
}
found a solution that works with the cherrypy file upload example: urllib2-binary-upload.py
import io # Part of core Python
import requests # Install via: 'pip install requests'
# Get the data in bytes. I got it via:
# with open("smile.gif", "rb") as fp: data = fp.read()
data = b"GIF89a\x12\x00\x12\x00\x80\x01\x00\x00\x00\x00\xff\x00\x00!\xf9\x04\x01\n\x00\x01\x00,\x00\x00\x00\x00\x12\x00\x12\x00\x00\x024\x84\x8f\x10\xcba\x8b\xd8ko6\xa8\xa0\xb3Wo\xde9X\x18*\x15x\x99\xd9'\xad\x1b\xe5r(9\xd2\x9d\xe9\xdd\xde\xea\xe6<,\xa3\xd8r\xb5[\xaf\x05\x1b~\x8e\x81\x02\x00;"
# Hookbin is similar to requestbin - just a neat little service
# which helps you to understand which queries are sent
url = 'https://hookb.in/je2rEl733Yt9dlMMdodB'
in_memory_file = io.BytesIO(data)
response = requests.post(url, files=(("smile.gif", in_memory_file),))