I have to run my code twice to get desired outcome, why? - python

I'm finding that I have to run my code twice for the desired output and I'm not sure why. It's also printing a long string of letters in the shell that aren't needed. I'd just like it to be a bit cleaner.
The code creates folders with subfolders, based on files names, then moves the files into specific subfolders.
Filename example is "A123456-20190101-A01.mp3"
import os
import shutil
path = "/Volumes/ADATA UFD/For script/Files"
file_names = [file for file in os.listdir(path) if
os.path.isfile(os.path.join(path, file))]
file_map = {'O':'1-Original','P':'2-PreservationMaster','M':'3-Mezzanine','T':'4-Presentation','A':'5-Access','R':'6-Reference'}
parent_folders = set(file_name.rsplit('-', 1)[0] for file_name in file_names)
sub_folders = ['1-Original','2-PreservationMaster','3-Mezzanine','4-Presentation','5-Access','6-Reference']
for folder in parent_folders:
folder_path = os.path.join(path, folder)
try:
os.mkdir(folder_path)
except:
print('folder already exist:', folder_path)
for folders in sub_folders:
try:
folders_path = os.path.join(folder_path, folders)
os.mkdir(folders_path)
except:
print('folder already exists:', folders_path)
for file_name in file_names:
parent_folder = file_name.rsplit('-', 1)[0]
ext = file_name[19]
print(ext)
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
src = os.path.join(path, file_name)
try:
shutil.move(src, dest)
except Exception as e:
print(e)
I'm getting this error message:
Traceback (most recent call last):
File "/Volumes/ADATA UFD/For script/MoveFilesToPreservationBundleTest3.py", line 30, in <module>
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
builtins.KeyError: '0'

Related

Check list if file has downloaded and skip if it has?

I am new to Python and sure the below can be optimised however I have ran in to an issue with my last step in my script.
The aim is not to download a file if it has been previously downloaded. At this time I log the download in a file called download_history.log
I need to therefore implement a check here to kind of do the following check the log - if it exists in log do nothing and move to next file if it does not exists download the file and log it in to the file.
Any help would be appreciated.
#!/usr/bin/env python3
import boto
import sys, os
import zipfile
import shutil
import glob
import re
from boto.s3.key import Key
from boto.exception import S3ResponseError
#Make the download files
DOWNLOAD_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
if not os.path.exists(DOWNLOAD_LOCATION_PATH):
print ("Making download directory")
os.mkdir(DOWNLOAD_LOCATION_PATH)
#Delete Output Folder if it exsists
OUTPUT_FOLDER = os.path.expanduser("~") + "/AWSSplunk/Output/"
shutil.rmtree(OUTPUT_FOLDER)
#Define the AWS Bucket
def backup_s3_folder():
BUCKET_NAME = "my-bucket-name"
AWS_ACCESS_KEY_ID= os.getenv("##################")
AWS_ACCESS_SECRET_KEY = os.getenv("#########################")
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_SECRET_KEY)
bucket = conn.get_bucket(BUCKET_NAME)
#goto through the list of files
bucket_list = bucket.list()
for l in bucket_list:
key_string = str(l.key)
s3_path = DOWNLOAD_LOCATION_PATH + key_string
try:
# Add files to the log file
print ("Downloading file ", key_string)
file_object = open('download_history.log', 'a')
file_object.write(key_string)
file_object.write("\n")
# Working code
file_object.close()
l.get_contents_to_filename(s3_path)
except (OSError,S3ResponseError) as e:
pass
# check if the file has been downloaded locally
if not os.path.exists(s3_path):
try:
os.makedirs(s3_path)
except OSError as exc:
# let guard againts race conditions
import errno
if exc.errno != errno.EEXIST:
raise
if __name__ == '__main__':
backup_s3_folder()
# Start the unzipping process
print("Unzipping Starting")
dir_path = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
for path, dir_list, file_list in os.walk(dir_path):
for file_name in file_list:
if file_name.endswith(".zip"):
abs_file_path = os.path.join(path, file_name)
parent_path = os.path.split(abs_file_path)[0]
output_folder_name = os.path.splitext(abs_file_path)[0]
output_path = os.path.join(parent_path, output_folder_name)
zip_obj = zipfile.ZipFile(abs_file_path, 'r')
zip_obj.extractall(output_path)
zip_obj.close()
print("Unzipping Completed")
# Start moving files to output
print("Moving Files")
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
if not os.path.exists(FILE_LOCATION_PATH):
print ("Making download directory")
os.mkdir(FILE_LOCATION_PATH)
# .log files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.log'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .txt files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.txt'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .json files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.json'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
print("Files Move Complete")
# Delete Directory
print("Cleaning up Downloads Directory")
shutil.rmtree(DOWNLOAD_LOCATION_PATH)
# Remove EFR Audit Logs stratinbg with 2020
print("Remove the encrypted Audit Logs")
pattern = "^(2020)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with EFR
pattern = "^(EFR)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with 2019
pattern = "^(2019)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Script clean up
print("Script Complete")
#with open("download_history.log", "a") as myfile:
# myfile.write('New Line\n')
With os you can check whether a file exist or not:
if not os.isfile(PATH_TO_EXPECTED_DOWNLOADED_FILE):
#do download
For your own security please seperate your steps into functions and build a pipeline of these.

How to move img files only to another folder with orginal folder name+ImageOnly?

I want to move img only files to another folder
if folder didn't exist I will create it with original name+ImageOnly
ex.
D:\Test #contain some folder
D:\Test\aaa\img1.jpg
D:\Test\bbb\ccc\img2.jpg
import os
import shutil
def moveImage(srcdirs):
for roots, dirs, files in os.walk(srcdirs):
grand_father = srcdirs #D:\Test
not_need =('.zip','.js','.html','.log','.lst','.txt','.ini')
imgExt = ('.jpg','.png','.gif','.jpeg')
father = os.path.split(roots)[1]+'-ImageOnly'
for file in files:
if file.endswith(imgExt) and not file.endswith(not_need):
path = os.path.join(roots,file)
des= os.path.join(grand_father,father)
if not os.path.exists(des):
createFolder(father)
print("folder created")
shutil.move(path,des)
elif file.endswith(not_need): #remove unnecessary file
n1 = os.path.join(roots,file)
os.remove(n1)
def createFolder(directory):
dirs = ('./%s/'%directory)
try:
if not os.path.exists(dirs):
os.makedirs(dirs)
except OSError:
print ('Error: Creating directory. ' + dirs)
src = r'D:\Test'
moveImage(src)
My code gives me
img1.jpg move to aaa-ImageOnly
but for img2.jpg it moved to ccc-ImageOnly
I want it to move to bbb-ImageOnly
to first subfolder name (I call it right?), not it last subfolder name.
Here you go:
import os
import shutil
FOLDER = r'D:\Test'
EXTENSIONS = ('.jpg', '.png', '.gif', '.jpeg')
def move_images(root):
levels = len(root.split(os.sep))
for path, dirs, files in os.walk(root):
for file in files:
if file.endswith(EXTENSIONS):
src_file = os.path.join(path, file)
dst_dir = os.path.join(root, '{}-ImageOnly'.format(path.split(os.sep)[levels]))
dst_file = os.path.join(dst_dir, file)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
shutil.move(src_file, dst_file)
move_images(FOLDER)
It produces me:
D:\Test\aaa-ImageOnly\img1.jpg
D:\Test\bbb-ImageOnly\img2.jpg

Renaming file with os.rename causing NameError

I'm trying to rename 2 raster files: old_name.jpg and old_name.tiff to new_name.jpg and new_name.tiff:
new_name = 'new_name' # defining new name here
for root_dir, dirname, filenames in os.walk(TargetDir):
for file in filenames:
if re.match(r'.*.jpg$', file, re.IGNORECASE) is not None: # converting jpg
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".jpg"))
if re.match(r'.*.tiff$', file, re.IGNORECASE) is not None: # converting tiff
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".tiff"))
It works on jpg like charm, but then throws
Traceback (most recent call last):
File "C:/!Scripts/py2/meta_to_BKA.py", line 66, in <module>
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".tiff"))
NameError: name 'new_name' is not defined
Note that it uses new_name to rename jpg, but then variable vanishes in the very next block. I tried using shutil.move(), but got the same error. What is the problem?
The stack trace suggests that your snippet isn't the whole story.
I can't reproduce:
from __future__ import division, print_function, unicode_literals
import os
TargetDir = '/tmp/test'
new_name = 'new_name'
def main():
for root_dir, _, filenames in os.walk(TargetDir):
for filename in filenames:
if '.' not in filename:
continue
endswith = filename.rsplit('.', 1)[-1].lower()
if endswith not in set(['jpg', 'tiff']):
continue
new_filename = '{}.{}'.format(new_name, endswith)
from_fn = os.path.join(root_dir, filename)
to_fn = os.path.join(root_dir, new_filename)
print ('Moving', from_fn, 'to', to_fn)
os.rename(from_fn, to_fn)
if __name__ == '__main__':
main()
but I took the liberty of rewriting a bit.
> python hest.py
Moving /tmp/test/narf.jpg to /tmp/test/new_name.jpg
Moving /tmp/test/bla.tiff to /tmp/test/new_name.tiff

Going into subfolders (python)

I've written something to remove special characters in Filenames. But it just includes the one folder and not it's subfolders. How can I do this also in subfolders and subsubfolders and so on?
import os
import re
def dir_list2(directory, *args):
fileList = []
content = os.listdir(directory)
for file in content :
dirfile = os.path.join(directory, file)
if os.path.isfile(dirfile):
if len(args) == 0:
fileList.append(dirfile)
else:
if os.path.splitext(dirfile)[1][1:] in args:
fileList.append(dirfile)
print "##################################################"
print "Old filename:", file
filename = file
remove = re.compile("[^.a-zA-z0-9_]")
output = remove.sub('_', filename)
newfile = directory + "/" + output
os.rename(dirfile, newfile)
print "Corrected filename:", output
#Removes Special Characters
return fileList
if __name__ == '__main__':
fileList = dir_list2('/path/')
Try using os.walk instead of os.listdir, it allows you to walk through a folder and its files and subfolders and so on.
Edit your code to be like:
content = os.walk(directory)
for dirpath, dirnames, filenames in content:
for file in filenames:
dirfile = os.path.join(dirpath, file)
# The rest of your code

python rename files not working as I expected

I am trying to rename all the files in a directory making multiple changes on each file in order to make files names internet friendly. It works correctly for a few replacements and then it says file not found. I though that if I slowed it down with time.sleep() it would work, but this seems to have no effect (other than being slow). In some cases I can run the script many times and accomplish the goal, but in some cases it completes without error but the changes are not made. Any suggestions would be appreciated.
import os, glob, time
path = os.getcwd()
dirlist = glob.glob('*.pdf')
for filename in dirlist:
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(' ', '_')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.lower().encode('utf8')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(' (', '-')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(')', '')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(',', '')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace('_-_', '-')))
filename.replace() returns a new string - it does not change filename in any way. So filename will become outdated after renaming a file and cause a file not found error next time it is used.
Try something like this:
import os, glob, time
def new_filename(filename):
return filename.replace(' ', '_').lower().encode('utf8').replace(' (', '-') \
.replace(')', '').replace(',', '').replace('_-_', '-')
path = os.getcwd()
dirlist = glob.glob('*.pdf')
for filename in dirlist:
os.rename(os.path.join(path, filename), os.path.join(path, new_filename(filename)))
usage = '''
$python slug_dir.py DIR
'''
from os import rename, walk
from os.path import join, isdir, splitext
from slugify import slugify
def rename_files(dir_path):
for path, subdirs, files in walk(dir_path):
for fname in files:
filename, extension = splitext(fname)
s = slugify(filename)
newname = s+extension
try:
oldpath = join(path, fname)
newpath = join(path, newname)
print rename(oldpath, newpath)
print 'Rename: ', oldpath, '->', newpath
except Exception as e:
print 'Error triying rename: ', fname, '->', newname
raise e
def main():
dirname = sys.argv[1]
if not isdir(dirname):
print usage
sys.exit(1)
rename_files(dirname)
if __name__ == '__main__':
main()

Categories