python requests api not posting in-memory zipped file - python

question: How can I get this to work
I'm trying to use the python requests api to send a zipped file to a server. I saw this method in the docs:
r = requests.post(url, files=open('foo.png', 'rb'))
but the difference between what I'm doing, is that the zipped file that I have is in memory, there's just a python object, no physical zipped version of the file is created:
I'm using the zipfile api, and this is how I'm creating my zip file:
inMemoryOutputFile = StringIO()
outFile = zipfile.ZipFile(inMemoryOutputFile, "w",
compression=zipfile.ZIP_DEFLATED)
and trying the following (after writing to the zip file):
r = requests.post(url, outFile)
however its not working, looks like the object is not being recognized as a parameter. here's the stack trace
Traceback (most recent call last): File
"/Users/abdulahmad/Desktop/upload-script-ve/bin/cogs", line 11, in
<module>
sys.exit(main()) File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/cogs/run.py",
line 396, in main
return run(sys.argv) File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/cogs/run.py",
line 384, in run
return instance() File "/Users/abdulahmad/Desktop/upload-script-ve//src/ctl.py",
line 53, in __call__
handler = uploader(self.url, self.file) File "/Users/abdulahmad/Desktop/upload-script-ve//src/uploader.py",
line 24, in __call__
response = self.session.post(url, files=payload)
#this is where I'm adding the file (the payload)
File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/sessions.py",
line 511, in post
return self.request('POST', url, data=data, json=json, **kwargs) File
"/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/sessions.py",
line 454, in request
prep = self.prepare_request(req) File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/sessions.py",
line 388, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks), File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/models.py",
line 296, in prepare
self.prepare_body(data, files, json) File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/models.py",
line 447, in prepare_body
(body, content_type) = self._encode_files(files, data) File "/Users/abdulahmad/Desktop/upload-script-ve/lib/python2.7/site-packages/requests/models.py",
line 150, in _encode_files
fdata = fp.read() TypeError: read() takes at least 2 arguments (1 given)
actual code:
inMemoryOutputFile = StringIO()
parentDir, dirToZip = os.path.split(dirPath)
def trimPath(path):
archivePath = path.replace(parentDir, "", 1)
if parentDir:
archivePath = archivePath.replace(os.path.sep, "", 1)
if not includeDirInZip:
archivePath = archivePath.replace(dirToZip + os.path.sep, "", 1)
return os.path.normcase(archivePath)
outFile = zipfile.ZipFile(inMemoryOutputFile, "w",
compression=zipfile.ZIP_DEFLATED)
for (archiveDirPath, dirNames, fileNames) in os.walk(dirPath):
for fileName in fileNames:
filePath = os.path.join(archiveDirPath, fileName)
outFile.write(filePath, trimPath(filePath))
if not fileNames and not dirNames:
zipInfo = zipfile.ZipInfo(trimPath(archiveDirPath) + "/")
outFile.writestr(zipInfo, "")
outFile.close()
return outFile

You need to pass the StringIO buffer to requests, not the ZipFile. ZipFile.read("somefile.txt") reads an uncompressed file from the archive, it doesn't read the compressed binary stream. That read requires 1 parameter and that's why you got the strange error message. Rewind the file before posting or the POST data will be empty.
This example shows you the workflow.
import zipfile
from cStringIO import StringIO
import requests
import logging
logging.basicConfig(level=logging.DEBUG)
buf = StringIO()
with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zippy:
zippy.write('somefile.txt')
buf.seek(0)
requests.post('http://localhost:8080',
headers = {'content-type': 'application/octet-stream'},
data=buf)

Related

Streaming data from Amazon S3 using smart_open causing TypeError

I am trying to stream data from a large text file sitting in Amazon S3 to my AWS Lambda, I am using smart_open to accomplish that, here's my test code -
import smart_open
def stream_data():
my_bucket = 'monkey-business-dev'
my_key = 'incoming_monkey_data/banana/banana'
uri = 's3://{}/{}'.format(my_bucket, my_key)
total_lines = 0
total_records = 0
for line in smart_open.smart_open(uri):
total_records += 1
if __name__ == '__main__':
stream_data()
I am using Python3x and I am facing this exception -
/usr/local/lib/python3.6/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.
You can access NaTType as type(pandas.NaT)
#convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))
Traceback (most recent call last):
File "/Users/xxxx/PycharmProjects/monkey_lambda/datastream_from_s3.py", line 16, in <module>
stream_data()
File "/Users/xxxx/PycharmProjects/monkey_lambda/datastream_from_s3.py", line 11, in stream_data
for line in smart_open.smart_open(uri):
File "/usr/local/lib/python3.6/site-packages/smart_open/smart_open_lib.py", line 163, in smart_open
bucket = s3_connection.get_bucket(parsed_uri.bucket_id)
File "/usr/local/lib/python3.6/site-packages/boto/s3/connection.py", line 509, in get_bucket
return self.head_bucket(bucket_name, headers=headers)
File "/usr/local/lib/python3.6/site-packages/boto/s3/connection.py", line 528, in head_bucket
response = self.make_request('HEAD', bucket_name, headers=headers)
File "/usr/local/lib/python3.6/site-packages/boto/s3/connection.py", line 671, in make_request
retry_handler=retry_handler
File "/usr/local/lib/python3.6/site-packages/boto/connection.py", line 1071, in make_request
retry_handler=retry_handler)
File "/usr/local/lib/python3.6/site-packages/boto/connection.py", line 913, in _mexe
self.is_secure)
File "/usr/local/lib/python3.6/site-packages/boto/connection.py", line 705, in get_http_connection
return self.new_http_connection(host, port, is_secure)
File "/usr/local/lib/python3.6/site-packages/boto/connection.py", line 747, in new_http_connection
connection = self.proxy_ssl(host, is_secure and 443 or 80)
File "/usr/local/lib/python3.6/site-packages/boto/connection.py", line 796, in proxy_ssl
sock.sendall("CONNECT %s HTTP/1.0\r\n" % host)
TypeError: a bytes-like object is required, not 'str'
and I tried to convert it to utf-8 but then I get following error -
/usr/local/lib/python3.6/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.
You can access NaTType as type(pandas.NaT)
#convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))
Traceback (most recent call last):
File "/Users/xxxxx/PycharmProjects/monkey_lambda/datastream_from_s3.py", line 16, in <module>
stream_data()
File "/Users/xxxxx/PycharmProjects/monkey_lambda/datastream_from_s3.py", line 11, in stream_data
for line in smart_open.smart_open(uri.encode()):
File "/usr/local/lib/python3.6/site-packages/smart_open/smart_open_lib.py", line 208, in smart_open
raise TypeError('don\'t know how to handle uri %s' % repr(uri))
TypeError: don't know how to handle uri b's3://monkey-business-dev/incoming_monkey_data/banana/banana'
Process finished with exit code 1
I know this is kinda late and you might already have this figured out, but here are the two problems with what you are doing:
You are trying to read a directory using smart_open. Here's what you should do:
For example, if your filename is "test.txt":
my_bucket = 'monkey-business-dev'
my_key = 'incoming_monkey_data/banana/banana'
my_file = 'test.txt'
uri = 's3://{}/{}/{}'.format(my_bucket, my_key, my_file)
file = smart_open.smart_open(uri, encoding="utf-8")
content = file.read()
content_list = content.split("\n")
total_lines = len(content_list)
return total_lines
Upvote this answer if it helps. Thanks.

Python ftplib.error_perm 550: No such file or directory?

I've written a Python script that is part of my attempt to automate daily ftp transfers from my server. I've tested the script with a number of files and file types (html, mp3, png, jpg, etc.) and everything seems to work out fine so far.
However, when I try to download a simple text file, 'file.txt' (9 kb), the download fails, although I account for text files and switch from binary to text mode for the transfer. The following exception is thrown by ftplib:
ftplib.error_perm: 550 file.txt: No such file or directory
Here's my script:
from ftplib import FTP_TLS, error_perm
import os
def open_connection(server, user, pwd, work_dir=None):
global ftps
try:
ftps = FTP_TLS(host=server)
ftps.login(user=user, passwd=pwd)
ftps.prot_p() # switch to secure data connection
if work_dir != None:
ftps.cwd(work_dir)
else:
pass
except:
pass
def download_file(remote_path, local_path):
remote_file = os.path.basename(remote_path)
local_file_path = os.path.join(local_path, remote_file)
# differentiate between text and binary files
file_type, encoding = guess_type_and_encoding(remote_file)
# possibly needs a permission exception catch
if file_type.split("/")[0] == "text" and encoding == None:
# use text mode for transfer
local_file = open(local_file_path, 'w')
def callback(line): local_file.write(line + "\n")
ftps.retrlines("RETR " + remote_file, callback)
local_file.close()
else:
# use binary mode for transfer
local_file = open(local_file_path, 'wb')
ftps.retrbinary("RETR " + remote_file, local_file.write)
local_file.close()
return
def guess_type_and_encoding(filename):
from mimetypes import guess_type, add_type
add_type('text/x-python-win', '.pyw') # not in tables
mimetype, encoding = guess_type(filename, False) # allow extras
mimetype = mimetype or "?/?" # type unknown
return mimetype, encoding
open_connection(server, user, pwd, work_dir)
download_file("/files/dir/file.txt", "/Users/username/Desktop")
ftps.close()
I don't get why the error is raised!? The arguments 'remote_path' and 'local_path' are correctly provided. Both paths exist! 'file.txt' exists on the server under /files/dir and /Users/username/Desktop points to my desktop on OS X.
Here's the detailed ftplib error:
Traceback (most recent call last):
File "ftp2.py", line 138, in <module>
download_file("/files/dir/file.txt", "/Users/username/Desktop")
File "ftp2.py", line 93, in download_file
ftps.retrlines("RETR " + remote_file, callback)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 735, in retrlines
conn = self.transfercmd(cmd)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 376, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 710, in ntransfercmd
conn, size = FTP.ntransfercmd(self, cmd, rest)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 339, in ntransfercmd
resp = self.sendcmd(cmd)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 249, in sendcmd
return self.getresp()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 224, in getresp
raise error_perm, resp
ftplib.error_perm: 550 file.txt: No such file or directory
Any help is greatly appreciated.
Thanks. :)
Try to
replace remote_file
in ftps.retrlines("RETR " + remote_file, callback)
with remote_path.

error 501 on ftp.retrbinary (ftplib)

I'm coding a script to check a FTP directory and download the new files. This is part of the code:
from ftplib import FTP
import os
ftp = FTP(ftp_lance)
ftp.login(login, password)
ftp.cwd('xxxxxx')
FTP_list = ftp.nlst()
lista_diferenca = [file for file in FTP_list if file not in local_list]
for file in lista_diferenca:
local_filename = os.path.join(cache, file)
ftp.retrbinary('REST ' + file, open(local_filename, 'wb').write)
When I run it, I get this error message:
Traceback (most recent call last):
File "D:\Scripts\Istari\Radagast\Radagast.py", line 44, in <module>
ftp.retrbinary('REST tabela14_pag5.pdf', open(local_filename, 'wb').write)
File "D:\Portable Python 2.7.6.1\App\lib\ftplib.py", line 414, in retrbinary
conn = self.transfercmd(cmd, rest)
File "D:\Portable Python 2.7.6.1\App\lib\ftplib.py", line 376, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "D:\Portable Python 2.7.6.1\App\lib\ftplib.py", line 339, in ntransfercmd
resp = self.sendcmd(cmd)
File "D:\Portable Python 2.7.6.1\App\lib\ftplib.py", line 249, in sendcmd
return self.getresp()
File "D:\Portable Python 2.7.6.1\App\lib\ftplib.py", line 224, in getresp
raise error_perm, resp
error_perm: 501 Bad parameter. Numeric value required
I check several sites searching for this kind of error and find nothing. It seems that my retrbinaty is broken, but the arguments looks right (first the 'Rest' + file, and then the callback function).
Some idea about my error?
You need to specify the FTP command RETR, not REST:
ftp.retrbinary('RETR ' + file, open(local_filename, 'wb').write)

Python ftp download and archive

I have been trying to script a code with python to grade the main directory of that ftp and archive it into a the local pc. I am not an amateur coder and python is fairly new to me.
What I am getting as an error right now is.
File "C:\Users\Ali\Desktop\ftp_archiving_script.py", line 24, in <module>
ftpDownload = ftp.retrbinary('RETR', filename)
Code:
from ftplib import FTP
import zipfile
import os
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
modes = { zipfile.ZIP_DEFLATED: "deflated",
zipfile.ZIP_STORED: "stored",
}
#print "Logging in..."
with FTP('xxx.xxx.xxx') as ftp: #hostname goes here
ftp.login('xxxx','xxxx') #user followed by pass
#print "changing to root directory"
ftp.mlsd('//')
#print "Accessing files"
filenames = []
#print filenames
ftp.retrlines('NLST', filenames.append)
try:
for filename in filenames:
ftpDownload = ftp.retrbinary('RETR', filename)
with ZipFile(os.path.join('C:\\','DC_archive.zip'), 'w') as myzip:
myzip.write(ftpDownload, compress_type=compression)
myzip.close()
finally:
#print "closing"
ftp.close()
ftp.quit()
Can anyone enlighten me on this problem.
Thank you,
Update
try:
for filename in filenames:
with io.StringIO() as fObject:
ftp.retrbinary('RETR %s' %filename, fObject.write)
with ZipFile(os.path.join('C:\\','DC_archive.zip'), 'w') as myzip:
myzip.write(fObject, compress_type=compression)
myzip.close()
updated Traceback for #fals... Also this is using your code below and not the one I have at the top.
Traceback (most recent call last):
File "C:\Users\Ali\Desktop\ftp_archive2.py", line 20, in <module>
ftpDownload = ftp.retrbinary('RETR ' + filename, f.write)
File "C:\Python33\lib\ftplib.py", line 424, in retrbinary
with self.transfercmd(cmd, rest) as conn:
File "C:\Python33\lib\ftplib.py", line 386, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "C:\Python33\lib\ftplib.py", line 352, in ntransfercmd
resp = self.sendcmd(cmd)
File "C:\Python33\lib\ftplib.py", line 259, in sendcmd
return self.getresp()
File "C:\Python33\lib\ftplib.py", line 233, in getresp
raise error_perm(resp)
ftplib.error_perm: 550 File not found
From the Python documentation for ftplib.retrbinary:
FTP.retrbinary(command, callback[, maxblocksize[, rest]])
Retrieve a file in binary transfer mode. command should be an
appropriate RETR command: 'RETR filename'. The callback function is
called for each block of data received, with a single string argument
giving the data block.
Nowhere does it indicate that it returns a file-like object or string.
Instead, you have to create your own callback to write to a file object.
with open('my-downloaded-file', 'wb') as f:
ftp.retrbinary('RETR %s' % filename, f.write)
Here, f.write is the callback which will receive data as it arrives from the socket. If you don't want to save the file to disk using open, you can use the StringIO module to simulate a file in memory.
Try following code:
import ftplib
from io import BytesIO
import os
import zipfile
REMOTE_HOST = 'xxx.xxx.xxx'
REMOTE_USER = '...'
REMOTE_PASS = '...'
REMOTE_DIR_PATH = '//'
LOCAL_ZIP_PATH = os.path.join(os.path.expanduser('~'), 'Desktop', 'DC_archive.zip')
ftp = ftplib.FTP(REMOTE_HOST)
try:
ftp.login(REMOTE_USER, REMOTE_PASS)
ftp.cwd(REMOTE_DIR_PATH)
filenames = ftp.nlst()
with zipfile.ZipFile(LOCAL_ZIP_PATH, 'w') as zf:
for filename in filenames:
with BytesIO() as f:
try:
ftpDownload = ftp.retrbinary('RETR ' + filename, f.write)
zf.writestr(filename, f.getvalue())
except ftplib.Error as e:
print('Skip {}: {}'.format(filename, e))
finally:
ftp.quit()

Python: saving large web page to file

Let me start off by saying, I'm not new to programming but am very new to python.
I've written a program using urllib2 that requests a web page that I would then like to save to a file. The web page is about 300KB, which doesn't strike me as particularly large but seems to be enough to give me trouble, so I'm calling it 'large'.
I'm using a simple call to copy directly from the object returned from urlopen into the file:
file.write(webpage.read())
but it will just sit for minutes, trying to write into the file and I eventually receive the following:
Traceback (most recent call last):
File "program.py", line 51, in <module>
main()
File "program.py", line 43, in main
f.write(webpage.read())
File "/usr/lib/python2.7/socket.py", line 351, in read
data = self._sock.recv(rbufsize)
File "/usr/lib/python2.7/httplib.py", line 541, in read
return self._read_chunked(amt)
File "/usr/lib/python2.7/httplib.py", line 592, in _read_chunked
value.append(self._safe_read(amt))
File "/usr/lib/python2.7/httplib.py", line 649, in _safe_read
raise IncompleteRead(''.join(s), amt)
httplib.IncompleteRead: IncompleteRead(6384 bytes read, 1808 more expected)
I don't know why this should give the program so much grief?
EDIT |
here is how I'm retrieving the page
jar = cookielib.CookieJar()
cookie_processor = urllib2.HTTPCookieProcessor(jar);
opener = urllib2.build_opener(cookie_processor)
urllib2.install_opener(opener)
requ_login = urllib2.Request(LOGIN_PAGE,
data = urllib.urlencode( { 'destination' : "", 'username' : USERNAME, 'password' : PASSWORD } ))
requ_page = urllib2.Request(WEBPAGE)
try:
#login
urllib2.urlopen(requ_login)
#get desired page
portfolio = urllib2.urlopen(requ_page)
except urllib2.URLError as e:
print e.code, ": ", e.reason
I'd use a handy fileobject copier function provided by shutil module. It worked on my machine :)
>>> import urllib2
>>> import shutil
>>> remote_fo = urllib2.urlopen('http://docs.python.org/library/shutil.html')
>>> with open('bigfile', 'wb') as local_fo:
... shutil.copyfileobj(remote_fo, local_fo)
...
>>>
UPDATE: You may want to pass the 3rd argument to copyfileobj that controls the size of internal buffer used to transfer bytes.
UPDATE2: There's nothing fancy about shutil.copyfileobj. It simply reads a chunk of bytes from source file object and writes it the destination file object repeatedly until there's nothing more to read. Here's the actual source code of it that I grabbed from inside Python standard library:
def copyfileobj(fsrc, fdst, length=16*1024):
"""copy data from file-like object fsrc to file-like object fdst"""
while 1:
buf = fsrc.read(length)
if not buf:
break
fdst.write(buf)

Categories