I am trying to download files using python script from my ftp server...However i am getting the files which are of size 0 kb...i can't understand exactly where i am wrong...I am actually searching the files by a particular string in filename and then downloading all the files having that string on my ftp in a given directory.
Here is my code:
# Libraries
import re
import os
import ftplib
import ntpath
ftp = ftplib.FTP("192.168.1.786:22")
ftp.login("Marshmellow", "YourPasswordHere")
##ftp.dir("feed_1")
files = []
## F = open('Files.txt','a')
try:
files = ftp.nlst("feed_1")
for fname in files:
res = re.findall("2018-07-25", fname)
if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
# Download the file a chunk at a time
# Each chunk is sent to handleDownload
# We append the chunk to the file and then print a '.' for progress
# RETR is an FTP command
print 'Getting ' + ntpath.basename(fname)
try:
ftp.retrbinary('RETR ' + ntpath.basename(fname), file.write)
except:
pass
# Clean up time
print 'Closing file ' + ntpath.basename(fname)
file.close()
print (fname)
## F.write(fname + '\n')
if not res:
continue
except ftplib.error_perm , resp:
if str(resp) == "550 No files found":
print "No files in this directory"
pass
else:
raise
## F.close()
Help Me Out if anyone knows what's wrong in this.
try:
ftp.cwd("feed_1")
files = ftp.nlst() for fname in files:
res = re.findall("2018-07-25", fname) if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
i've just set the current working directory using ftp.cwd("feed_1") which i did the wrong way earlier like: files = ftp.nlst("feed_1")
Related
I have a function to list all files in a directory and encrypt them with a public key.
The issue I'm having is that if there are several files inside the directory some of them get corrupted. Like 2 or 3 of them would be a GPG file with exactly 858 bytes size and no content. If decrypt them I would have 0-byte size files.
Function
def gpg_encrypt(source):
gpg = GPG(gnupghome='/home/mohs3n/.gnupg', use_agent=True)
try:
if os.path.isfile(source):
if source.endswith('.gpg'):
print(source + ' is already encrypted')
else:
stream = open(source, "rb")
status = gpg.encrypt_file(stream, 'C05819CE8A9DA638BD6B6E08688D1CE89FCE05B3', armor=False, always_trust=True, output=source+'.gpg', symmetric=False)
stream.close()
if status.ok:
os.remove(source)
print(source, ' successfully encrypted')
elif os.path.isdir(source):
for root, dirs, files in os.walk(source, topdown=True):
for name in files:
current_file = (os.path.join(root, name))
if current_file.endswith('.gpg'):
print(current_file + ' : is already encrypted')
else:
stream = open(current_file, "rb")
status = gpg.encrypt_file(stream, 'C05819CE8A9DA638BD6B6E08688D1CE89FCE05B3', armor=False, always_trust=True, output=source+'/'+name+'.gpg', symmetric=False)
stream.close()
if status.ok:
os.remove(current_file)
print(current_file + ' successfully encrypted')
except Exception as e:
print(e)
I am trying to search for .txt files in a specified folder and encrypt each one of the .txt files found using my encryption algorithms. However I cannot seem to be able to figure out how to encrypt all the .txt files found within the folder and rename them
this is the code I am working with currently
import time, os, sys, encrypt, decrypt, caesarCipher, reverseCipher, vigenereCipher, glob
def main():
outputFilename = 'ABC.encrypted.txt'
mKey = 5
myMode = 'encrypt'
for root, dirs, files in os.walk('/Ransom'):
for file in files:
if file.endswith((".txt")):
inputFilename = os.path.join(root, file)
if not os.path.exists(inputFilename):
print('The file %s does not exist. Exiting....' % (inputFilename))
sys.exit()
fileObj = open(inputFilename)
content = fileObj.read()
fileObj.close()
print ('%sing...' % (myMode.title()))
startTime = time.time()
if myMode == 'encrypt':
translated = encrypt.encryptMess(mKey, content, myMode)
elif myMode == 'decrypt':
translated = decrypt.decryptMess(mKey, content, myMode)
outputFileObj = open(outputFilename, 'w')
outputFileObj.write(translated)
outputFileObj.close()
print('Done %sing %s (%s characters).' % (myMode, inputFilename, len(content)))
print('%sed file is %s.' % (myMode.title(), outputFilename))
if __name__ == '__main__':
main()
I really appreciate any help to guide me into achieving this.
This code iterates over all the files in a given folder and calls a designated method whenever the file is '*.txt'
import os
baseUrl = './'
def encryptFile(filename):
# process one file here
print baseUrl + filename
alist = next(os.walk(baseUrl))[2]
for i in xrange(len(alist)):
afile = alist[i]
if afile[-4:] == '.txt':
encryptFile(afile)
My goal is to get to a txt file that is withing the second layer of zip files. The issue is that the txt file has the same name in all the .zip, so it overwrites the .txt and it only returns 1 .txt
from ftplib import *
import os, shutil, glob, zipfile, xlsxwriter
ftps = FTP_TLS()
ftps.connect(host='8.8.8.8', port=23)
ftps.login(user='xxxxxxx', passwd='xxxxxxx')
print ftps.getwelcome()
print 'Access was granted'
ftps.prot_p()
ftps.cwd('DirectoryINeed')
data = ftps.nlst() #Returns a list of .zip diles
data.sort() #Sorts the thing out
theFile = data[-2] #Its a .zip file #Stores the .zip i need to retrieve
fileSize = ftps.size(theFile) #gets the size of the file
print fileSize, 'bytes' #prints the size
def grabFile():
filename = 'the.zip'
localfile = open(filename, 'wb')
ftps.retrbinary('RETR ' + theFile, localfile.write)
ftps.quit()
localfile.close()
def unzipping():
zip_files = glob.glob('*.zip')
for zip_file in zip_files:
with zipfile.ZipFile(zip_file, 'r')as Z:
Z.extractall('anotherdirectory')
grabFile()
unzipping()
lastUnzip()
After this runs it grabs the .zip that I need and extracts the contents to a folder named anotherdirectory. Where it holds the second tier of .zips. This is where I get into trouble. When I try to extract the files from each zip. They all share the same name. I end up with a single .txt when I need one for each zip.
I think you're specifying the same output directory and filename each time. In the unzipping function,
change
Z.extractall('anotherdirectory')
to
Z.extractall(zip_file)
or
Z.extractall('anotherdirectory' + zip_file)
if the zip_file's are all the same, give each output folder a unique numbered name:
before unzipping function:
count = 1
then replace the other code with this:
Z.extractall('anotherdirectory/' + str(count))
count += 1
Thanks to jeremydeanlakey's response, I was able to get this part of my script. Here is how I did it:
folderUnzip = 'DirectoryYouNeed'
zip_files = glob.glob('*.zip')
count = 1
for zip_file in zip_files:
with zipfile.ZipFile(zip_file, 'r') as Z:
Z.extractall(folderUnzip + '/' + str(count))
count += 1
I wrote a script to read PDF metadata to ease a task at work. The current working version is not very usable in the long run:
from pyPdf import PdfFileReader
BASEDIR = ''
PDFFiles = []
def extractor():
output = open('windoutput.txt', 'r+')
for file in PDFFiles:
try:
pdf_toread = PdfFileReader(open(BASEDIR + file, 'r'))
pdf_info = pdf_toread.getDocumentInfo()
#print str(pdf_info) #print full metadata if you want
x = file + "~" + pdf_info['/Title'] + " ~ " + pdf_info['/Subject']
print x
output.write(x + '\n')
except:
x = file + '~' + ' ERROR: Data missing or corrupt'
print x
output.write(x + '\n')
pass
output.close()
if __name__ == "__main__":
extractor()
Currently, as you can see, I have to manually input the working directory and manually populate the list of PDF files. It also just prints out the data in the terminal in a format that I can copy/paste/separate into a spreadsheet.
I'd like the script to work automatically in whichever directory I throw it in and populate a CSV file for easier use. So far:
from pyPdf import PdfFileReader
import csv
import os
def extractor():
basedir = os.getcwd()
extension = '.pdf'
pdffiles = [filter(lambda x: x.endswith('.pdf'), os.listdir(basedir))]
with open('pdfmetadata.csv', 'wb') as csvfile:
for f in pdffiles:
try:
pdf_to_read = PdfFileReader(open(f, 'r'))
pdf_info = pdf_to_read.getDocumentInfo()
title = pdf_info['/Title']
subject = pdf_info['/Subject']
csvfile.writerow([file, title, subject])
print 'Metadata for %s written successfully.' % (f)
except:
print 'ERROR reading file %s.' % (f)
#output.writerow(x + '\n')
pass
if __name__ == "__main__":
extractor()
In its current state it seems to just prints a single error (as in, the error message in the exception, not an error returned by Python) message and then stop. I've been staring at it for a while and I'm not really sure where to go from here. Can anyone point me in the right direction?
writerow([file, title, subject]) should be writerow([f, title, subject])
You can use sys.exc_info() to print the details of your error
http://docs.python.org/2/library/sys.html#sys.exc_info
Did you check the pdffiles variable contains what you think it does? I was getting a list inside a list... so maybe try:
for files in pdffiles:
for f in files:
#do stuff with f
I personally like glob. Notice I add * before the .pdf in the extension variable:
import os
import glob
basedir = os.getcwd()
extension = '*.pdf'
pdffiles = glob.glob(os.path.join(basedir,extension)))
Figured it out. The script I used to download the files was saving the files with '\r\n' trailing after the file name, which I didn't notice until I actually ls'd the directory to see what was up. Thanks for everyone's help.
My python script opens a gzip file from FTP and reads it. Every time I run the script .gz files are downloaded to my Harddisk (Sites/htdocs folder as its a python cgi). I dont want to download the files to my harddisk or else delete the files after execution of script.
Here is snapshot of my script:
u = 'http://hapmap.ncbi.nlm.nih.gov/downloads/frequencies/2010-05_phaseIII/'
filename1 = 'allele_freqs_chr' + chromosomes[i] + '_' + populations[0] + '_phase3.3_nr.b36_fwd.txt.gz'
url = u + filename1
try:
site = urllib.urlretrieve(url,filename1)
except IOError:
print >> sys.stderr,'Error opening URL.\n'
try:
f1 = gzip.open(filename1, 'rb')
except IOError:
print >> sys.stderr, 'Error opening file1.\n'
sys.exit(1)
line=f1.readline()
# ...
I appreciate your suggestions.
os.unlink(filename1) should work. Also, use finally: in your try: block to close the file descriptor like so:
import os
u = 'http://hapmap.ncbi.nlm.nih.gov/downloads/frequencies/2010-05_phaseIII/'
filename1 = 'allele_freqs_chr' + chromosomes[i] + '_' + populations[0] + '_phase3.3_nr.b36_fwd.txt.gz'
url = u + filename1
try:
site = urllib.urlretrieve(url,filename1)
except IOError:
print >> sys.stderr,'Error opening URL.\n'
try:
f1 = gzip.open(filename1, 'rb')
except IOError:
print >> sys.stderr, 'Error opening file1.\n'
sys.exit(1)
else:
line = f1.readline()
# ....
finally:
try:
f1.close()
except:
pass
os.unlink(filename1)
You can use urllib.urlopen instead of urllib.urlretrieve
fd = urllib.urlopen(url)
s_data = fd.read() # These 2 lines are unfortunately
s_stream = StringIO.StringIO(s_data) # needed in Python < 3.2
f1 = gzip.GzipFile(fileobj=s_stream)
See also: http://www.enricozini.org/2011/cazzeggio/python-gzip/ (On why you have to use StringIO)