Python download zip files from a public FTP server - python

I need to download several (Digital Earth Model) zip files in a folder "C:\DEMDownload" on my PC (windows OS) from the public geodata base of Canada Government.
when i run my code at the line ftp.retrbinary('RETR %s' %file, open(local_file, 'wb').write) i get the following error message
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Python27\lib\ftplib.py", line 414, in retrbinary
conn = self.transfercmd(cmd, rest)
File "C:\Python27\lib\ftplib.py", line 376, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "C:\Python27\lib\ftplib.py", line 339, in ntransfercmd
resp = self.sendcmd(cmd)
File "C:\Python27\lib\ftplib.py", line 249, in sendcmd
return self.getresp()
File "C:\Python27\lib\ftplib.py", line 224, in getresp
raise error_perm, resp
error_perm: 550 Failed to open file.
Second. Is It possible to avoid to write available_days list and create a list of all zip files to download
import os, ftplib
destdir='C:\DEMDownload'
ftp = ftplib.FTP('ftp2.cits.rncan.gc.ca')
ftp.login('anonymous', '')
available_days= ['001k11.zip',
'001k12.zip',
'001k13.zip',
'001k14.zip',
'001k15.zip',
'001l13.zip',
'001l14.zip',
'001l16.zip',
'001m01.zip',
'001m02.zip',
'001m03.zip',
'001m04.zip',
'001m05.zip',
'001m06.zip',
'001m07.zip',
'001m08.zip',
'001m09.zip',
'001m10.zip',
'001m11.zip',
'001m12.zip',
'001m13.zip',
'001m14.zip',
'001m15.zip',
'001m16.zip',
'001n02.zip',
'001n03.zip',
'001n04.zip',
'001n05.zip',
'001n06.zip',
'001n07.zip',
'001n10.zip',
'001n11.zip',
'001n12.zip',
'001n13.zip',
'001n14.zip',
'001n15.zip']
hdfs = list()
for day in available_days :
file = available_days[available_days.index(day)]
print 'file=', file
local_file = os.path.join(destdir, file)
ftp.retrbinary('RETR %s' %file, open(local_file, 'wb').write)
hdfs.append(os.path.abspath(local_file))
ftp.cwd('..')
ftp.quit()

I was able to successfully download the zip files with your given url with this:
# connect to ftp
url = urlparse.urlparse("http://ftp2.cits.rncan.gc.ca/pub/geobase/official/cded/50k_dem/")
ftp = ftplib.FTP(url.netloc)
ftp.login()
ftp.cwd(ftp_dirname)
with open(filename, 'w') as fobj:
ftp.retrbinary('RETR %s' % basename, fobj.write)
You can avoid the hardcoded dir/filenames by walking through the ftp directories similar to how you might walk through a local directory with some creative usage of ftplib.FTP.dir()
Full code below:
url = 'http://ftp2.cits.rncan.gc.ca/pub/geobase/official/cded/50k_dem/'
url = urlparse.urlparse(url)
local_root = os.path.expanduser("~/ftp_download") # change this to wherever you want to download to
def download(ftp, ftp_path, filename, check_cwd=True):
"""
Using the given ftp connection, download from ftp_path to
filename.
If check_cwd is False, assume the ftp connection is already
in the correct current working directory (cwd)
"""
basename = posixpath.basename(ftp_path)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
if check_cwd:
ftp_dirname = posixpath.dirname(ftp_path)
if ftp_dirname != ftp.pwd():
ftp.cwd(ftp_dirname)
with open(filename, 'w') as fobj:
ftp.retrbinary('RETR %s' % basename, fobj.write)
def ftp_dir(ftp):
"""
Given a valid ftp connection, get a list of 2-tuples of the
files in the ftp current working directory, where the first
element is whether the file is a directory and the second
element is the filename.
"""
# use a callback to grab the ftp.dir() output in a list
dir_listing = []
ftp.dir(lambda x: dir_listing.append(x))
return [(line[0].upper() == 'D', line.rsplit()[-1]) for line in dir_listing]
# connect to ftp
ftp = ftplib.FTP(url.netloc)
ftp.login()
# recursively walk through the directory and download each file, depth first
stack = [url.path]
while stack:
path = stack.pop()
ftp.cwd(path)
# add all directories to the queue
children = ftp_dir(ftp)
dirs = [posixpath.join(path, child[1]) for child in children if child[0]]
files = [posixpath.join(path, child[1]) for child in children if not child[0]]
stack.extend(dirs[::-1]) # add dirs reversed so they are popped out in order
# download all files in the directory
for filepath in files:
download(ftp, filepath, os.path.join(local_root, filepath.split(url.path,1)[-1]),
check_cwd=False)
# logout
ftp.quit()
You can condense this further through usage of one of the python ftp wrapper libraries such as ftptool or ftputil

Related

Pysftp/Paramiko "No such file" when repeatedly doing cwd and listdir

I am writing a Python script (for testing purposes) that, downloads an xml file from a directory, converts it into json, converts it back to xml and uploads it again to a different directory, as long as there is an xml file left in the source directory.
def start():
now = str(datetime.now().strftime("%d%m%Y%H%M%S"))
try:
pysftp.Connection(HOST, username=AGENT, password=PW, private_key=".ppk", cnopts=CNOPTS)
except:
print('Connection error')
return
xml_data = []
new_xml = ''
with pysftp.Connection(HOST, username=AGENT, password=PW, private_key=".ppk", cnopts=CNOPTS) as sftp:
for filename in sftp.listdir(SOURCE_FOLDER):
if fnmatch.fnmatch(filename, WILDCARD) and 'cancel' not in filename:
doc_type = return_doc_type(filename)
sftp.cwd(SOURCE_FOLDER)
file_object = io.BytesIO()
sftp.getfo(filename, file_object)
xml_file = file_object.getvalue()
new_xml = xmltodict.parse(xml_file)
if new_xml == '':
return
xml_data.append(new_xml)
json_data = json.dumps(xml_data)
new_xml_file = '<?xml version="1.0" encoding="utf-8" standalone="yes"?>' + dict2xml(json.loads(json_data))
new_xml_file = indent(new_xml_file, indentation = ' ',newline = '\r\n')
with pysftp.Connection(HOST, username=AGENT, password=PW, private_key=".ppk", cnopts=CNOPTS) as sftp2:
with sftp2.cd(DEST_FOLDER):
with sftp2.open(f'test-{AGENT}-{doc_type}-{now}.xml', mode='w+', bufsize=32768) as f:
f.write(new_xml_file)
print('xml file deployed on server: ', now, '\n')
file_count = len(sftp.listdir(SOURCE_FOLDER))
if file_count > 3:
start()
else:
print('no new files')
return
The SOURCE_FOLDER is like 'somefolder/out/'.
I have tested it with one file and it works, but when I try to make it recursive, I get this error after the 2nd iteration:
Exception in thread django-main-thread:
Traceback (most recent call last):
File ".../app/views.py", line 232, in start
file_count = len(sftp.listdir(SOURCE_FOLDER))
File ".../lib/python3.7/site-packages/pysftp/__init__.py", line 592, in listdir
return sorted(self._sftp.listdir(remotepath))
File ".../lib/python3.7/site-packages/paramiko/sftp_client.py", line 218, in listdir
return [f.filename for f in self.listdir_attr(path)]
File ".../lib/python3.7/site-packages/paramiko/sftp_client.py", line 239, in listdir_attr
t, msg = self._request(CMD_OPENDIR, path)
File ".../lib/python3.7/site-packages/paramiko/sftp_client.py", line 813, in _request
return self._read_response(num)
File ".../lib/python3.7/site-packages/paramiko/sftp_client.py", line 865, in _read_response
self._convert_status(msg)
File ".../lib/python3.7/site-packages/paramiko/sftp_client.py", line 894, in _convert_status
raise IOError(errno.ENOENT, text)
FileNotFoundError: [Errno 2] No such file
The original file is in the source directory, so I don't know what "No such file" is referring to.
Thank you for any suggestions
Your SOURCE_FOLDER is a relative path somefolder/out/. So say, you start in /home/path. Then when you cwd to somefolder/out/, you end up in /home/path/somefolder/out/. If you then ls somefolder/out/, you are actually referring to /home/path/somefolder/out/somefolder/out/, what most probably is not what you want.
And that actually means that even your for filename loop cannot work. As you cwd to somefolder/out/ in every iteration, in the second one, it must fail, as it will try to cwd to /home/path/somefolder/out/somefolder/out/.
You better use absolute paths to avoid this mess.
Your code has other issues. For example:
I do not understand, why you open the second connection to the same host for the upload. You can use the connection you already have.
xmltodict.parse can take a file-like object. There's no need to waste memory by copying BytesIO to a string. And actually you do not even need to waste the memory with the BytesIO, you can use:
with sftp.open(filename, bufsize=32768) as f:
new_xml = xmltodict.parse(f)

paramiko sftp script works on test server not on production

I'm a newb at python, so please excuse the hack job I created in order to transfer the contents of a folder into an ssh server.
The problem is that it works great in my test server, but as soon as run it against the actual server that I need to upload files for I receive the error below, and I'm not sure what it means.
I've googled it, but I can't figure it out, please help.
Thanks.
import paramiko
import glob
import os
from shutil import move
host = "192.168.1.87" #hard-coded
port = 22
password ="passwd" #hard-coded
username = "administator" #hard-coded
remotepath ='' #hard-coded
localpath = 'D:\\PH/PH_PROD\\PowerConnectInterf1_WINS\\bin\\data\\Sheex\\bc\\945\\'
#build filename array
os.chdir("D:/PH/PH_PROD/PowerConnectInterf1_WINS/bin/data/Sheex/bc/945")
filelist=[]
for files in glob.glob( "2016*" ):
f = open(files, 'r')
filelist.append(f.name)
f.close()
if (len(filelist)>0):
transport = paramiko.Transport((host, port))
transport.connect(username=username, password=password)
sftp = paramiko.SFTPClient.from_transport(transport)
for s in filelist:
#print remotepath+s
sftp.put(localpath+s,remotepath+s)
#os.rename(localpath+s,localpath+"945back/"+s)
sftp.close()
transport.close()
#print 'Upload done.'
Error:
D:\Scripts>python mysftp.py
Traceback (most recent call last):
File "mysftp.py", line 37, in <module>
sftp.put(localpath+s,remotepath+s)
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 676, in put
return self.putfo(fl, remotepath, file_size, callback, confirm)
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 634, in put
fo
with self.file(remotepath, 'wb') as fr:
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 327, in ope
n
t, msg = self._request(CMD_OPEN, filename, imode, attrblock)
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 730, in _re
quest
return self._read_response(num)
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 781, in _re
ad_response
self._convert_status(msg)
File "C:\Python27\lib\site-packages\paramiko\sftp_client.py", line 807, in _co
nvert_status
raise IOError(errno.ENOENT, text)
IOError: [Errno 2] Invalid file ID
It sounds like the path you are writing on the server doesn't exist. You should check and create if doesn't exist.

Python ftplib.error_perm 550: No such file or directory?

I've written a Python script that is part of my attempt to automate daily ftp transfers from my server. I've tested the script with a number of files and file types (html, mp3, png, jpg, etc.) and everything seems to work out fine so far.
However, when I try to download a simple text file, 'file.txt' (9 kb), the download fails, although I account for text files and switch from binary to text mode for the transfer. The following exception is thrown by ftplib:
ftplib.error_perm: 550 file.txt: No such file or directory
Here's my script:
from ftplib import FTP_TLS, error_perm
import os
def open_connection(server, user, pwd, work_dir=None):
global ftps
try:
ftps = FTP_TLS(host=server)
ftps.login(user=user, passwd=pwd)
ftps.prot_p() # switch to secure data connection
if work_dir != None:
ftps.cwd(work_dir)
else:
pass
except:
pass
def download_file(remote_path, local_path):
remote_file = os.path.basename(remote_path)
local_file_path = os.path.join(local_path, remote_file)
# differentiate between text and binary files
file_type, encoding = guess_type_and_encoding(remote_file)
# possibly needs a permission exception catch
if file_type.split("/")[0] == "text" and encoding == None:
# use text mode for transfer
local_file = open(local_file_path, 'w')
def callback(line): local_file.write(line + "\n")
ftps.retrlines("RETR " + remote_file, callback)
local_file.close()
else:
# use binary mode for transfer
local_file = open(local_file_path, 'wb')
ftps.retrbinary("RETR " + remote_file, local_file.write)
local_file.close()
return
def guess_type_and_encoding(filename):
from mimetypes import guess_type, add_type
add_type('text/x-python-win', '.pyw') # not in tables
mimetype, encoding = guess_type(filename, False) # allow extras
mimetype = mimetype or "?/?" # type unknown
return mimetype, encoding
open_connection(server, user, pwd, work_dir)
download_file("/files/dir/file.txt", "/Users/username/Desktop")
ftps.close()
I don't get why the error is raised!? The arguments 'remote_path' and 'local_path' are correctly provided. Both paths exist! 'file.txt' exists on the server under /files/dir and /Users/username/Desktop points to my desktop on OS X.
Here's the detailed ftplib error:
Traceback (most recent call last):
File "ftp2.py", line 138, in <module>
download_file("/files/dir/file.txt", "/Users/username/Desktop")
File "ftp2.py", line 93, in download_file
ftps.retrlines("RETR " + remote_file, callback)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 735, in retrlines
conn = self.transfercmd(cmd)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 376, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 710, in ntransfercmd
conn, size = FTP.ntransfercmd(self, cmd, rest)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 339, in ntransfercmd
resp = self.sendcmd(cmd)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 249, in sendcmd
return self.getresp()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ftplib.py", line 224, in getresp
raise error_perm, resp
ftplib.error_perm: 550 file.txt: No such file or directory
Any help is greatly appreciated.
Thanks. :)
Try to
replace remote_file
in ftps.retrlines("RETR " + remote_file, callback)
with remote_path.

Python: Uploading files FTP_TLS- "550 The parameter is incorrect"

I'm trying to connect to an FTP server using TLS and upload a text file. The below code connects to the site just fine, but it's not uploading the file. Instead I'm getting the following error:
Traceback (most recent call last):
File "X:/HR & IT/Ryan/Python Scripts/ftps_connection_test.py", line 16, in <module>
ftps.storlines("STOR " + filename, open(filename,"r"))
File "C:\Python33\lib\ftplib.py", line 816, in storlines
with self.transfercmd(cmd) as conn:
File "C:\Python33\lib\ftplib.py", line 391, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "C:\Python33\lib\ftplib.py", line 756, in ntransfercmd
conn, size = FTP.ntransfercmd(self, cmd, rest)
File "C:\Python33\lib\ftplib.py", line 357, in ntransfercmd
resp = self.sendcmd(cmd)
File "C:\Python33\lib\ftplib.py", line 264, in sendcmd
return self.getresp()
File "C:\Python33\lib\ftplib.py", line 238, in getresp
raise error_perm(resp)
ftplib.error_perm: 550 The parameter is incorrect.
There's probably something really basic I'm missing, my code is below and any help is much appreciated.
import os
from ftplib import FTP_TLS as f
# Open secure connection
ftps = f("ftp.foo.com")
ftps.login(username,password)
ftps.prot_p()
# Create the test txt file to upload
filename = r"c:\path\to\file"
testFile = open(filename,"w")
testFile.write("Test file with test text")
testFile.close()
# Transfer testFile
ftps.storlines("STOR " + filename, open(filename,"r"))
# Quit connection
ftps.quit()
I have got the same error when trying to write upload a file to FTP server. In my case, the destination file name is not the correct format. It was something like
data_20180411T12:00:12.3435Z.txt
I renamed something like
data_20180411T120012_3435Z.txt. Then it worked.
filename = r"c:\path\to\file"
is the absolute path to a local file. This same value is being passed in the STOR command, i.e.
ftps.storlines("STOR " + filename, open(filename,"r"))
attempts to perform a STOR c:\path\to\file operation, however, it is unlikely that the path exists on the remote server, and the ftplib.error_perm exception would suggest that you don't have permission to write there (even if it does exist).
You could try this instead:
ftps.storlines("STOR " + os.path.basename(filename), open(filename,"r"))
which would issue a STOR file operation and upload the file to the default directory on the remote server. If you need to upload to a different path on the remote server, just add that to STOR.

Python ftp download and archive

I have been trying to script a code with python to grade the main directory of that ftp and archive it into a the local pc. I am not an amateur coder and python is fairly new to me.
What I am getting as an error right now is.
File "C:\Users\Ali\Desktop\ftp_archiving_script.py", line 24, in <module>
ftpDownload = ftp.retrbinary('RETR', filename)
Code:
from ftplib import FTP
import zipfile
import os
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
modes = { zipfile.ZIP_DEFLATED: "deflated",
zipfile.ZIP_STORED: "stored",
}
#print "Logging in..."
with FTP('xxx.xxx.xxx') as ftp: #hostname goes here
ftp.login('xxxx','xxxx') #user followed by pass
#print "changing to root directory"
ftp.mlsd('//')
#print "Accessing files"
filenames = []
#print filenames
ftp.retrlines('NLST', filenames.append)
try:
for filename in filenames:
ftpDownload = ftp.retrbinary('RETR', filename)
with ZipFile(os.path.join('C:\\','DC_archive.zip'), 'w') as myzip:
myzip.write(ftpDownload, compress_type=compression)
myzip.close()
finally:
#print "closing"
ftp.close()
ftp.quit()
Can anyone enlighten me on this problem.
Thank you,
Update
try:
for filename in filenames:
with io.StringIO() as fObject:
ftp.retrbinary('RETR %s' %filename, fObject.write)
with ZipFile(os.path.join('C:\\','DC_archive.zip'), 'w') as myzip:
myzip.write(fObject, compress_type=compression)
myzip.close()
updated Traceback for #fals... Also this is using your code below and not the one I have at the top.
Traceback (most recent call last):
File "C:\Users\Ali\Desktop\ftp_archive2.py", line 20, in <module>
ftpDownload = ftp.retrbinary('RETR ' + filename, f.write)
File "C:\Python33\lib\ftplib.py", line 424, in retrbinary
with self.transfercmd(cmd, rest) as conn:
File "C:\Python33\lib\ftplib.py", line 386, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "C:\Python33\lib\ftplib.py", line 352, in ntransfercmd
resp = self.sendcmd(cmd)
File "C:\Python33\lib\ftplib.py", line 259, in sendcmd
return self.getresp()
File "C:\Python33\lib\ftplib.py", line 233, in getresp
raise error_perm(resp)
ftplib.error_perm: 550 File not found
From the Python documentation for ftplib.retrbinary:
FTP.retrbinary(command, callback[, maxblocksize[, rest]])
Retrieve a file in binary transfer mode. command should be an
appropriate RETR command: 'RETR filename'. The callback function is
called for each block of data received, with a single string argument
giving the data block.
Nowhere does it indicate that it returns a file-like object or string.
Instead, you have to create your own callback to write to a file object.
with open('my-downloaded-file', 'wb') as f:
ftp.retrbinary('RETR %s' % filename, f.write)
Here, f.write is the callback which will receive data as it arrives from the socket. If you don't want to save the file to disk using open, you can use the StringIO module to simulate a file in memory.
Try following code:
import ftplib
from io import BytesIO
import os
import zipfile
REMOTE_HOST = 'xxx.xxx.xxx'
REMOTE_USER = '...'
REMOTE_PASS = '...'
REMOTE_DIR_PATH = '//'
LOCAL_ZIP_PATH = os.path.join(os.path.expanduser('~'), 'Desktop', 'DC_archive.zip')
ftp = ftplib.FTP(REMOTE_HOST)
try:
ftp.login(REMOTE_USER, REMOTE_PASS)
ftp.cwd(REMOTE_DIR_PATH)
filenames = ftp.nlst()
with zipfile.ZipFile(LOCAL_ZIP_PATH, 'w') as zf:
for filename in filenames:
with BytesIO() as f:
try:
ftpDownload = ftp.retrbinary('RETR ' + filename, f.write)
zf.writestr(filename, f.getvalue())
except ftplib.Error as e:
print('Skip {}: {}'.format(filename, e))
finally:
ftp.quit()

Categories