I am trying to search for .txt files in a specified folder and encrypt each one of the .txt files found using my encryption algorithms. However I cannot seem to be able to figure out how to encrypt all the .txt files found within the folder and rename them
this is the code I am working with currently
import time, os, sys, encrypt, decrypt, caesarCipher, reverseCipher, vigenereCipher, glob
def main():
outputFilename = 'ABC.encrypted.txt'
mKey = 5
myMode = 'encrypt'
for root, dirs, files in os.walk('/Ransom'):
for file in files:
if file.endswith((".txt")):
inputFilename = os.path.join(root, file)
if not os.path.exists(inputFilename):
print('The file %s does not exist. Exiting....' % (inputFilename))
sys.exit()
fileObj = open(inputFilename)
content = fileObj.read()
fileObj.close()
print ('%sing...' % (myMode.title()))
startTime = time.time()
if myMode == 'encrypt':
translated = encrypt.encryptMess(mKey, content, myMode)
elif myMode == 'decrypt':
translated = decrypt.decryptMess(mKey, content, myMode)
outputFileObj = open(outputFilename, 'w')
outputFileObj.write(translated)
outputFileObj.close()
print('Done %sing %s (%s characters).' % (myMode, inputFilename, len(content)))
print('%sed file is %s.' % (myMode.title(), outputFilename))
if __name__ == '__main__':
main()
I really appreciate any help to guide me into achieving this.
This code iterates over all the files in a given folder and calls a designated method whenever the file is '*.txt'
import os
baseUrl = './'
def encryptFile(filename):
# process one file here
print baseUrl + filename
alist = next(os.walk(baseUrl))[2]
for i in xrange(len(alist)):
afile = alist[i]
if afile[-4:] == '.txt':
encryptFile(afile)
Related
I am trying to download files using python script from my ftp server...However i am getting the files which are of size 0 kb...i can't understand exactly where i am wrong...I am actually searching the files by a particular string in filename and then downloading all the files having that string on my ftp in a given directory.
Here is my code:
# Libraries
import re
import os
import ftplib
import ntpath
ftp = ftplib.FTP("192.168.1.786:22")
ftp.login("Marshmellow", "YourPasswordHere")
##ftp.dir("feed_1")
files = []
## F = open('Files.txt','a')
try:
files = ftp.nlst("feed_1")
for fname in files:
res = re.findall("2018-07-25", fname)
if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
# Download the file a chunk at a time
# Each chunk is sent to handleDownload
# We append the chunk to the file and then print a '.' for progress
# RETR is an FTP command
print 'Getting ' + ntpath.basename(fname)
try:
ftp.retrbinary('RETR ' + ntpath.basename(fname), file.write)
except:
pass
# Clean up time
print 'Closing file ' + ntpath.basename(fname)
file.close()
print (fname)
## F.write(fname + '\n')
if not res:
continue
except ftplib.error_perm , resp:
if str(resp) == "550 No files found":
print "No files in this directory"
pass
else:
raise
## F.close()
Help Me Out if anyone knows what's wrong in this.
try:
ftp.cwd("feed_1")
files = ftp.nlst() for fname in files:
res = re.findall("2018-07-25", fname) if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
i've just set the current working directory using ftp.cwd("feed_1") which i did the wrong way earlier like: files = ftp.nlst("feed_1")
Here is what I try to do:
I would like to get a list of all files that are heavier than 35 MB in my C drive.
Here is my code:
def getAllFileFromDirectory(directory, temp):
files = os.listdir(directory)
for file in files:
if (os.path.isdir(file)):
getAllFileFromDirectory(file, temp)
elif (os.path.isfile(file) and os.path.getsize(file) > 35000000):
temp.write(os.path.abspath(file))
def getFilesOutOfTheLimit():
basePath = "C:/"
tempFile = open('temp.txt', 'w')
getAllFileFromDirectory(basePath, tempFile)
tempFile.close()
print("Get all files ... Done !")
For some reason, the interpreter doesn't go in the if-block inside 'getAllFileFromDirectory'.
Can someone tell me what I'm doing wrong and why (learning is my aim). How to fix it ?
Thanks a lot for your comments.
I fixed your code. Your problem was that os.path.isdir can only know if something is a directory if it receives the full path of it. So, I changed the code to the following and it works. Same thing for os.path.getsize and os.path.isfile.
import os
def getAllFileFromDirectory(directory, temp):
files = os.listdir(directory)
for file in files:
if (os.path.isdir(directory + file)):
if file[0] == '.': continue # i added this because i'm on a UNIX system
print(directory + file)
getAllFileFromDirectory(directory + file, temp)
elif (os.path.isfile(directory + file) and os.path.getsize(directory + file) > 35000000):
temp.write(os.path.abspath(file))
def getFilesOutOfTheLimit():
basePath = "/"
tempFile = open('temp.txt', 'w')
getAllFileFromDirectory(basePath, tempFile)
tempFile.close()
print("Get all files ... Done !")
getFilesOutOfTheLimit()
I want to find the md5sum of files starting with "10" ( could be exe, doc, pdf etc) hence not checking the file extension but only the start two digits. So far I've a script to traverse through the directory and print out all such files but couldn't get the checksum to be printed for each of them:
def print_files(file_directory, file_extensions=['10']):
''' Print files in file_directory with extensions in file_extensions, recursively. '''
# Get the absolute path of the file_directory parameter
file_directory = os.path.abspath(file_directory)
# Get a list of files in file_directory
file_directory_files = os.listdir(file_directory)
# Traverse through all files
for filename in file_directory_files:
filepath = os.path.join(file_directory, filename)
# Check if it's a normal file or directory
if os.path.isfile(filepath):
# Check if the file has an extension of typical video files
for file_extension in file_extensions:
# Not a reqd file, ignore
#if not filepath.endswith(file_extension):
if not filename.startswith(file_extension) or len(filename) != 19:
continue
# We have got a '10' file!
print_files.counter += 1
## TRYING TO READ AND PRINT MD5 USING HASHLIB/ DOESNT WORK###
hasher = hashlib.md5()
with open(filename, 'rb') as afile:
buf = afile.read(65536)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(65536)
# Print it's name
print('{0}'.format(filepath))
print hasher('{0}.format(filepath)').hexdigest()
print '\n'
elif os.path.isdir(filepath):
# We got a directory, enter into it for further processing
print_files(filepath)
if __name__ == '__main__':
# Directory argument supplied
if len(sys.argv) == 2:
if os.path.isdir(sys.argv[1]):
file_directory = sys.argv[1]
else:
print('ERROR: "{0}" is not a directory.'.format(sys.argv[1]))
exit(1)
else:
# Set file directory to CWD
file_directory = os.getcwd()
print('\n -- Looking for Required Files in "{0}" -- \n'.format(file_directory))
# Set the number of processed files equal to zero
print_files.counter = 0
# Start Processing
print_files(file_directory)
# We are done. Exit now.
'
I'd recommend that you do not solve this recursively, but instead make use of os.walk() to traverse the directory structure. The following code could be the body of your print_files function.
file_directory = os.path.abspath(file_directory)
paths_to_hash = []
for root, dirs, filenames in os.walk(file_directory, topdown=False):
for i, dir in enumerate(dirs):
for filename in filenames[i]:
if filenames[:2] == '10':
paths_to_hash += [os.path.abspath('{0}/{1}/{2}'.format(root, dir, filename)]
for path in paths_to_hash:
hash = hashlib.md5(open(path, 'rb').read()).digest())
print 'hash: {0} for path: {1}'.format(hash, path)
The line printing the hasher should be:
print('{0}'.format(hasher.hexdigest()))
Got it fixed with this line
print hashlib.md5(open('{0}'.format(filepath)).read()).hexdigest()
I wasnt reading the file but just passing hashlib.md5. Thanks Matt for the insight.
I am trying to write a script that tracks for changes made in directories/files set to multiple file paths created by an installer. I found Thomas Sileo's DirTools project on git, modified it, but am now running into some issues when writing/reading from JSON:
1) First, I believe that I am writing to JSON incorrectly and am finding that my create_state() function is only writing the last path I need.
2) If I get it working, I am unable to read/parse the file like I was before. I usually get ValueError: Extra data errors
Code below:
import os import json import getpass
files = [] subdirs = []
USER = getpass.getuser()
pathMac = ['/Applications/',
'/Users/' + USER + '/Documents/' ]
def create_dir_index(path):
files = []
subdirs = []
for root, dirs, filenames in os.walk(path):
for subdir in dirs:
subdirs.append(os.path.relpath(os.path.join(root, subdir), path))
for f in filenames:
files.append(os.path.relpath(os.path.join(root, f), path))
return dict(files=files, subdirs=subdirs)
def create_state(): for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("Manifest.json", "w")
json.dump(dir_state, out_file)
out_file.close()
def compare_states(dir_base, dir_cmp):
'''
return a comparison two manifest json files
'''
data = {}
data['deleted'] = list(set(dir_cmp['files']) - set(dir_base['files']))
data['created'] = list(set(dir_base['files']) - set(dir_cmp['files']))
data['deleted_dirs'] = list(set(dir_cmp['subdirs']) - set(dir_base['subdirs']))
data['created_dirs'] = list(set(dir_base['subdirs']) - set(dir_cmp['subdirs']))
return data
if __name__ == '__main__':
response = raw_input("Would you like to Compare or Create? ")
if response == "Create":
# CREATE MANIFEST json file
create_state()
print "Manifest file created."
elif response == "Compare":
# create the CURRENT state of all indexes in pathMac and write to json file
for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("CurrentState.json", "w")
json.dump(dir_state, out_file)
out_file.close()
# Open and Load the contents from the file into dictionaries
manifest = json.load(open("Manifest.json", "r"))
current = json.load(open("CurrentState.json", "r"))
print compare_states(current, manifest)
I wrote a script to read PDF metadata to ease a task at work. The current working version is not very usable in the long run:
from pyPdf import PdfFileReader
BASEDIR = ''
PDFFiles = []
def extractor():
output = open('windoutput.txt', 'r+')
for file in PDFFiles:
try:
pdf_toread = PdfFileReader(open(BASEDIR + file, 'r'))
pdf_info = pdf_toread.getDocumentInfo()
#print str(pdf_info) #print full metadata if you want
x = file + "~" + pdf_info['/Title'] + " ~ " + pdf_info['/Subject']
print x
output.write(x + '\n')
except:
x = file + '~' + ' ERROR: Data missing or corrupt'
print x
output.write(x + '\n')
pass
output.close()
if __name__ == "__main__":
extractor()
Currently, as you can see, I have to manually input the working directory and manually populate the list of PDF files. It also just prints out the data in the terminal in a format that I can copy/paste/separate into a spreadsheet.
I'd like the script to work automatically in whichever directory I throw it in and populate a CSV file for easier use. So far:
from pyPdf import PdfFileReader
import csv
import os
def extractor():
basedir = os.getcwd()
extension = '.pdf'
pdffiles = [filter(lambda x: x.endswith('.pdf'), os.listdir(basedir))]
with open('pdfmetadata.csv', 'wb') as csvfile:
for f in pdffiles:
try:
pdf_to_read = PdfFileReader(open(f, 'r'))
pdf_info = pdf_to_read.getDocumentInfo()
title = pdf_info['/Title']
subject = pdf_info['/Subject']
csvfile.writerow([file, title, subject])
print 'Metadata for %s written successfully.' % (f)
except:
print 'ERROR reading file %s.' % (f)
#output.writerow(x + '\n')
pass
if __name__ == "__main__":
extractor()
In its current state it seems to just prints a single error (as in, the error message in the exception, not an error returned by Python) message and then stop. I've been staring at it for a while and I'm not really sure where to go from here. Can anyone point me in the right direction?
writerow([file, title, subject]) should be writerow([f, title, subject])
You can use sys.exc_info() to print the details of your error
http://docs.python.org/2/library/sys.html#sys.exc_info
Did you check the pdffiles variable contains what you think it does? I was getting a list inside a list... so maybe try:
for files in pdffiles:
for f in files:
#do stuff with f
I personally like glob. Notice I add * before the .pdf in the extension variable:
import os
import glob
basedir = os.getcwd()
extension = '*.pdf'
pdffiles = glob.glob(os.path.join(basedir,extension)))
Figured it out. The script I used to download the files was saving the files with '\r\n' trailing after the file name, which I didn't notice until I actually ls'd the directory to see what was up. Thanks for everyone's help.