Is there a way to send a file using POST from a Python script?
From: https://requests.readthedocs.io/en/latest/user/quickstart/#post-a-multipart-encoded-file
Requests makes it very simple to upload Multipart-encoded files:
with open('report.xls', 'rb') as f:
r = requests.post('http://httpbin.org/post', files={'report.xls': f})
That's it. I'm not joking - this is one line of code. The file was sent. Let's check:
>>> r.text
{
"origin": "179.13.100.4",
"files": {
"report.xls": "<censored...binary...data>"
},
"form": {},
"url": "http://httpbin.org/post",
"args": {},
"headers": {
"Content-Length": "3196",
"Accept-Encoding": "identity, deflate, compress, gzip",
"Accept": "*/*",
"User-Agent": "python-requests/0.8.0",
"Host": "httpbin.org:80",
"Content-Type": "multipart/form-data; boundary=127.0.0.1.502.21746.1321131593.786.1"
},
"data": ""
}
Yes. You'd use the urllib2 module, and encode using the multipart/form-data content type. Here is some sample code to get you started -- it's a bit more than just file uploading, but you should be able to read through it and see how it works:
user_agent = "image uploader"
default_message = "Image $current of $total"
import logging
import os
from os.path import abspath, isabs, isdir, isfile, join
import random
import string
import sys
import mimetypes
import urllib2
import httplib
import time
import re
def random_string (length):
return ''.join (random.choice (string.letters) for ii in range (length + 1))
def encode_multipart_data (data, files):
boundary = random_string (30)
def get_content_type (filename):
return mimetypes.guess_type (filename)[0] or 'application/octet-stream'
def encode_field (field_name):
return ('--' + boundary,
'Content-Disposition: form-data; name="%s"' % field_name,
'', str (data [field_name]))
def encode_file (field_name):
filename = files [field_name]
return ('--' + boundary,
'Content-Disposition: form-data; name="%s"; filename="%s"' % (field_name, filename),
'Content-Type: %s' % get_content_type(filename),
'', open (filename, 'rb').read ())
lines = []
for name in data:
lines.extend (encode_field (name))
for name in files:
lines.extend (encode_file (name))
lines.extend (('--%s--' % boundary, ''))
body = '\r\n'.join (lines)
headers = {'content-type': 'multipart/form-data; boundary=' + boundary,
'content-length': str (len (body))}
return body, headers
def send_post (url, data, files):
req = urllib2.Request (url)
connection = httplib.HTTPConnection (req.get_host ())
connection.request ('POST', req.get_selector (),
*encode_multipart_data (data, files))
response = connection.getresponse ()
logging.debug ('response = %s', response.read ())
logging.debug ('Code: %s %s', response.status, response.reason)
def make_upload_file (server, thread, delay = 15, message = None,
username = None, email = None, password = None):
delay = max (int (delay or '0'), 15)
def upload_file (path, current, total):
assert isabs (path)
assert isfile (path)
logging.debug ('Uploading %r to %r', path, server)
message_template = string.Template (message or default_message)
data = {'MAX_FILE_SIZE': '3145728',
'sub': '',
'mode': 'regist',
'com': message_template.safe_substitute (current = current, total = total),
'resto': thread,
'name': username or '',
'email': email or '',
'pwd': password or random_string (20),}
files = {'upfile': path}
send_post (server, data, files)
logging.info ('Uploaded %r', path)
rand_delay = random.randint (delay, delay + 5)
logging.debug ('Sleeping for %.2f seconds------------------------------\n\n', rand_delay)
time.sleep (rand_delay)
return upload_file
def upload_directory (path, upload_file):
assert isabs (path)
assert isdir (path)
matching_filenames = []
file_matcher = re.compile (r'\.(?:jpe?g|gif|png)$', re.IGNORECASE)
for dirpath, dirnames, filenames in os.walk (path):
for name in filenames:
file_path = join (dirpath, name)
logging.debug ('Testing file_path %r', file_path)
if file_matcher.search (file_path):
matching_filenames.append (file_path)
else:
logging.info ('Ignoring non-image file %r', path)
total_count = len (matching_filenames)
for index, file_path in enumerate (matching_filenames):
upload_file (file_path, index + 1, total_count)
def run_upload (options, paths):
upload_file = make_upload_file (**options)
for arg in paths:
path = abspath (arg)
if isdir (path):
upload_directory (path, upload_file)
elif isfile (path):
upload_file (path)
else:
logging.error ('No such path: %r' % path)
logging.info ('Done!')
Looks like python requests does not handle extremely large multi-part files.
The documentation recommends you look into requests-toolbelt.
Here's the pertinent page from their documentation.
The only thing that stops you from using urlopen directly on a file object is the fact that the builtin file object lacks a len definition. A simple way is to create a subclass, which provides urlopen with the correct file.
I have also modified the Content-Type header in the file below.
import os
import urllib2
class EnhancedFile(file):
def __init__(self, *args, **keyws):
file.__init__(self, *args, **keyws)
def __len__(self):
return int(os.fstat(self.fileno())[6])
theFile = EnhancedFile('a.xml', 'r')
theUrl = "http://example.com/abcde"
theHeaders= {'Content-Type': 'text/xml'}
theRequest = urllib2.Request(theUrl, theFile, theHeaders)
response = urllib2.urlopen(theRequest)
theFile.close()
for line in response:
print line
Chris Atlee's poster library works really well for this (particularly the convenience function poster.encode.multipart_encode()). As a bonus, it supports streaming of large files without loading an entire file into memory. See also Python issue 3244.
I am trying to test django rest api and its working for me:
def test_upload_file(self):
filename = "/Users/Ranvijay/tests/test_price_matrix.csv"
data = {'file': open(filename, 'rb')}
client = APIClient()
# client.credentials(HTTP_AUTHORIZATION='Token ' + token.key)
response = client.post(reverse('price-matrix-csv'), data, format='multipart')
print response
self.assertEqual(response.status_code, status.HTTP_200_OK)
pip install http_file
#импорт вспомогательных библиотек
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import requests
#импорт http_file
from http_file import download_file
#создание новой сессии
s = requests.Session()
#соеденение с сервером через созданную сессию
s.get('URL_MAIN', verify=False)
#загрузка файла в 'local_filename' из 'fileUrl' через созданную сессию
download_file('local_filename', 'fileUrl', s)
You may also want to have a look at httplib2, with examples. I find using httplib2 is more concise than using the built-in HTTP modules.
def visit_v2(device_code, camera_code):
image1 = MultipartParam.from_file("files", "/home/yuzx/1.txt")
image2 = MultipartParam.from_file("files", "/home/yuzx/2.txt")
datagen, headers = multipart_encode([('device_code', device_code), ('position', 3), ('person_data', person_data), image1, image2])
print "".join(datagen)
if server_port == 80:
port_str = ""
else:
port_str = ":%s" % (server_port,)
url_str = "http://" + server_ip + port_str + "/adopen/device/visit_v2"
headers['nothing'] = 'nothing'
request = urllib2.Request(url_str, datagen, headers)
try:
response = urllib2.urlopen(request)
resp = response.read()
print "http_status =", response.code
result = json.loads(resp)
print resp
return result
except urllib2.HTTPError, e:
print "http_status =", e.code
print e.read()
I tried some of the options here, but I had some issue with the headers ('files' field was empty).
A simple mock to explain how I did the post using requests and fixing the issues:
import requests
url = 'http://127.0.0.1:54321/upload'
file_to_send = '25893538.pdf'
files = {'file': (file_to_send,
open(file_to_send, 'rb'),
'application/pdf',
{'Expires': '0'})}
reply = requests.post(url=url, files=files)
print(reply.text)
More at https://requests.readthedocs.io/en/latest/user/quickstart/
To test this code, you could use a simple dummy server as this one (thought to run in a GNU/Linux or similar):
import os
from flask import Flask, request, render_template
rx_file_listener = Flask(__name__)
files_store = "/tmp"
#rx_file_listener.route("/upload", methods=['POST'])
def upload_file():
storage = os.path.join(files_store, "uploaded/")
print(storage)
if not os.path.isdir(storage):
os.mkdir(storage)
try:
for file_rx in request.files.getlist("file"):
name = file_rx.filename
destination = "/".join([storage, name])
file_rx.save(destination)
return "200"
except Exception:
return "500"
if __name__ == "__main__":
rx_file_listener.run(port=54321, debug=True)
Related
I built a python client REST API wrapper for an Okta authenticated pricing API. My code runs but I am not getting any response back. I need to be able to get a JSON response. I believe a print statement would work but I hit a wall do not know what argument to pass the print statement in order to receive a response from
"conn = http.client.HTTPSConnection("sso.lukka.tech")"
import http.client
import urllib.request, urllib.parse, urllib.error
import json
import base64
def loadJson(response):
body = response.read()
if body == "" or body is None:
print(("Empty response found with status " + str(response.status)))
return {}
else:
return json.loads(body)
class DataPricingClient:
def __init__(self, host, clientId, clientSecret):
conn = http.client.HTTPSConnection("sso.lukka.tech")
path = "/oauth2/aus1imo2fqcx5Ik4Q0h8/v1/token"
encodedData = base64.b64encode(bytes(f"{clientId}:{clientSecret}", "ISO-8859-1")).decode("ascii")
authHeader = "Basic " + encodedData
headers = {
"Authorization": authHeader,
"Accept": "application/json",
"Content-Type": "application/x-www-form-urlencoded"
}
params = urllib.parse.urlencode({
"grant_type": "client_credentials",
"scope": "pricing"
})
conn.request("POST", path, params, headers)
response = conn.getresponse()
if response.status != 200:
raise ApiErrorException(response.status, "Failed to get access token")
self.host = host
self.accessToken = loadJson(response)["access_token"]
def default_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": "Bearer " + self.accessToken
}
def _send_request(self,path):
with http.client.HTTPSConnection(self.host) as conn:
headers = self.default_headers()
conn.request("GET", path, None, headers)
response = conn.getresponse()
return response
def get_available_sources(self):
path = "/v1/pricing/sources"
return _send_request(path)
def get_source_details(self, sourceId):
path = f"/v1/pricing/sources/{sourceId}"
return _send_request(path)
def get_latest_prices(self,asOf, sourceId, pairCodes):
path = f"/v1/pricing/sources/{sourceId} {pairCodes}"
return _send_request(path)
def historical_prices(self, sourceId, pairCode, begTs, endTs, fill, limit, variances):
path = f"/v1/pricing/sources/{sourceId} {pairCode} {begTs} {endTs} {fill} {limit} {variances}"
return _send_request(path)
class ApiErrorException(Exception):
def __init__(self, status, msg):
self.msg = "Error " + str(status) + ": " + msg
def __str__(self):
return self.msg
if __name__ == '__main__':
from pricing_api_creds import lukka_pricing
c = DataPricingClient(**lukka_pricing)
The problem you have is because of a redirect. The API is basically telling you you have to go to a different page. You can do that manually (by yourself) or use a library to do that. One way to do it with http.client is like in this answer: https://stackoverflow.com/a/20475712/3352383
But I would suggest you to use requests because it does it automatically and usually it is also easier to use. Good luck!
I have a 300 mb file that I need to upload, and my current code just isn't cutting it.
#----------------------------------------------------------------------------------
def _post_multipart(self, host, selector,
fields, files,
ssl=False,port=80,
proxy_url=None,proxy_port=None):
""" performs a multi-post to AGOL, Portal, or AGS
Inputs:
host - string - root url (no http:// or https://)
ex: www.arcgis.com
selector - string - everything after the host
ex: /PWJUSsdoJDp7SgLj/arcgis/rest/services/GridIndexFeatures/FeatureServer/0/1/addAttachment
fields - dictionary - additional parameters like token and format information
files - tuple array- tuple with the file name type, filename, full path
ssl - option to use SSL
proxy_url - string - url to proxy server
proxy_port - interger - port value if not on port 80
Output:
JSON response as dictionary
Useage:
import urlparse
url = "http://sampleserver3.arcgisonline.com/ArcGIS/rest/services/SanFrancisco/311Incidents/FeatureServer/0/10261291"
parsed_url = urlparse.urlparse(url)
params = {"f":"json"}
print _post_multipart(host=parsed_url.hostname,
selector=parsed_url.path,
files=files,
fields=params
)
"""
content_type, body = self._encode_multipart_formdata(fields, files)
headers = {
'content-type': content_type,
'content-length': str(len(body))
}
if proxy_url:
if ssl:
h = httplib.HTTPSConnection(proxy_url, proxy_port)
h.request('POST', 'https://' + host + selector, body, headers)
else:
h = httplib.HTTPConnection(proxy_url, proxy_port)
h.request('POST', 'http://' + host + selector, body, headers)
else:
if ssl:
h = httplib.HTTPSConnection(host,port)
h.request('POST', selector, body, headers)
else:
h = httplib.HTTPConnection(host,port)
h.request('POST', selector, body, headers)
resp_data = h.getresponse().read()
try:
result = json.loads(resp_data)
except:
return None
if 'error' in result:
if result['error']['message'] == 'Request not made over ssl':
return self._post_multipart(host=host, selector=selector, fields=fields,
files=files, ssl=True,port=port,
proxy_url=proxy_url,proxy_port=proxy_port)
return result
def _encode_multipart_formdata(self, fields, files):
boundary = mimetools.choose_boundary()
buf = StringIO()
for (key, value) in fields.iteritems():
buf.write('--%s\r\n' % boundary)
buf.write('Content-Disposition: form-data; name="%s"' % key)
buf.write('\r\n\r\n' + self._tostr(value) + '\r\n')
for (key, filepath, filename) in files:
if os.path.isfile(filepath):
buf.write('--%s\r\n' % boundary)
buf.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename))
buf.write('Content-Type: %s\r\n' % (self._get_content_type3(filename)))
file = open(filepath, "rb")
try:
buf.write('\r\n' + file.read() + '\r\n')
finally:
file.close()
buf.write('--' + boundary + '--\r\n\r\n')
buf = buf.getvalue()
content_type = 'multipart/form-data; boundary=%s' % boundary
return content_type, buf
I cannot use requests module, and must use the standard libraries like urllib2, urllib, etc.. for python 2.7.x.
Is there a way to load the 300 mb files to a site without pushing the whole thing to memory?
UPDATE:
So I switched to requests, and now I get: MissingSchema: Invalid URL u'www.arcgis.com/sharing/rest/content/users//addItem?': No schema supplied. Perhaps you meant http://www.arcgis.com/sharing/rest/content/users//addItem??
What does this mean?
I provide the fields with the request.post() as such:
#----------------------------------------------------------------------------------
def _post_big_files(self, host, selector,
fields, files,
ssl=False,port=80,
proxy_url=None,proxy_port=None):
import sys
sys.path.insert(1,os.path.dirname(__file__))
from requests_toolbelt import MultipartEncoder
import requests
if proxy_url is not None:
proxyDict = {
"http" : "%s:%s" % (proxy_url, proxy_port),
"https" : "%s:%s" % (proxy_url, proxy_port)
}
else:
proxyDict = {}
for k,v in fields.iteritems():
print k,v
fields[k] = json.dumps(v)
for key, filepath, filename in files:
fields[key] = ('filename', open(filepath, 'rb'), self._get_content_type3(filepath))
m = MultipartEncoder(
fields=fields)
print host + selector
r = requests.post(host + selector , data=m,
headers={'Content-Type': m.content_type})
print r
I followed the example in the help documentation from both request and the toolbelt. Any ideas why this is breaking?
Thank you,
On a django server, I process uploaded zip files sent from a python script. But I am getting "" (a blank string) for file.content_type. What am I doing wrong?
#csrf_exempt
def Import( request ):
if request.method != 'POST':
return HttpResponseNotAllowed('Only POST here')
if not request.FILES or not request.FILES.get( u'file' ):
return HttpResponse('Must upload a file')
file = request.FILES[u'file']
if file.content_type == 'application/zip':
unzipped_dir = unzip_file( file )
uid = create_project( unzipped_dir )
shutil.rmtree( unzipped_dir )
py_ob = { }
py_ob['success'] = uid is not None
if uid is not None:
py_ob['id'] = uid
json_ob = simplejson.dumps(py_ob)
return HttpResponse( json_ob, mimetype="application/json" )
else:
return HttpResponseNotAllowed( 'Only POST zip files here' )
This is the script which sends the zip file up:
import sys
import os
import requests
if len (sys.argv) < 5:
print "pass in url, username, password, file"
else:
url = sys.argv[1]
username = sys.argv[2]
password = sys.argv[3]
phile = sys.argv[4]
if os.path.exists(phile):
files = {'file': open( phile, 'rb' )}
r = requests.post( url, files=files, auth=( username, password ) )
if r.status_code == 200:
json_response = r.json()
if json_response['success']:
print "id: " + str( json_response['id'] )
else:
print "failure in processing bundle"
else:
print "server problem: " + str(r.status_code)
print r.text
else:
print "cannot find file to upload"
The Content-Type header is completely arbitrary (and optional) and not a good way to detect whether or not you're dealing with a valid ZIP file. Have you made sure your browser is supplying it?
Django's documentation tells us the same:
UploadedFile.content_type
The content-type header uploaded with the file (e.g. text/plain or application/pdf). Like any data supplied by the user, you shouldn’t
trust that the uploaded file is actually this type. You’ll still need
to validate that the file contains the content that the content-type
header claims – “trust but verify.”
You should be using zipfile.is_zipfile instead.
I have the following problem.
I used BaseHttpServer.
class ReqHandler( BaseHTTPServer.BaseHTTPRequestHandler):
def __init__(self, request, client_address, server):
BaseHTTPServer.BaseHTTPRequestHandler.__init__( self, request, client_address, server )
def do_GET(self ):
self.performReq(self.path.decode('utf-8'))
def performReq (self, req ):
curDir = os.getcwd()
fname = curDir + '/' + self.path[1:]
try:
self.send_response(200,"Ok!")
ext = os.path.splitext(self.path)[1]
self.send_header('Content', 'text/xml; charset=UTF-8' )
self.end_headers()
f = open(fname, 'rb')
for l in f:
self.wfile.write(l)
f.close()
print 'file '+fname+" Ok"
except IOError:
print 'no file '+fname
self.send_error(404)
if __name__=='__main__':
server = BaseHTTPServer.HTTPServer( ('',8081), ReqHandler )
print('server ok!')
server.serve_forever()
If the path to the file contains Cyrillic.
http://localhost:8081/ТРА/Понедельник/Пн.doc)
I get code 404.
Thank you.
URLs are not only encoded as UTF-8; they are also URL encoded. Decode that too, using the urllib.urlunquote() function:
from urllib import urlunquote
self.performReq(unlunquote(self.path).decode('utf-8'))
Demo:
>>> from urllib import unquote
>>> path = '/%D0%A2%D0%A0%D0%90/%D0%9F%D0%BE%D0%BD%D0%B5%D0%B4%D0%B5%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA/%D0%9F%D0%BD.doc'
>>> unquote(path).decode('utf8')
u'/\u0422\u0420\u0410/\u041f\u043e\u043d\u0435\u0434\u0435\u043b\u044c\u043d\u0438\u043a/\u041f\u043d.doc'
>>> print unquote(path).decode('utf8')
/ТРА/Понедельник/Пн.doc
I want to stream a big file via werkzeug.
Currently my wsgi application looks like this:
from werkzeug.wrappers import Request, Response
from werkzeug.wsgi import ClosingIterator, wrap_file
import os
class Streamer(object):
def __init__(self):
pass
def __call__(self, environ, start_response):
request = Request(environ)
filename = os.getcwd() + "/bigfile.xml"
try:
response = wrap_file(environ, open(filename) )
return response
except HTTPException, e:
response = e
return ClosingIterator(response(environ, start_response))
I'm not sure what I should do with the object returned by the wrap_file function.
Haven't tried myself but I think following will work.
g = file(path_to_bigfile) # or any generator
return Response(g, direct_passthrough=True)
Just in case one would additionally like to:
1. preserve the file name
2. issue download without page redirect
# file_name assumed to be known
# file_path assumed to be known
file_size = os.path.getsize(file_path)
fh = file(file_path, 'rb')
return Response(fh,
mimetype='application/octet-stream',
headers=[
('Content-Length', str(file_size)),
('Content-Disposition', "attachment; filename=\"%s\"" % file_name),
],
direct_passthrough=True)