I am trying to rename all the files in a directory making multiple changes on each file in order to make files names internet friendly. It works correctly for a few replacements and then it says file not found. I though that if I slowed it down with time.sleep() it would work, but this seems to have no effect (other than being slow). In some cases I can run the script many times and accomplish the goal, but in some cases it completes without error but the changes are not made. Any suggestions would be appreciated.
import os, glob, time
path = os.getcwd()
dirlist = glob.glob('*.pdf')
for filename in dirlist:
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(' ', '_')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.lower().encode('utf8')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(' (', '-')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(')', '')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace(',', '')))
os.rename(os.path.join(path, filename), os.path.join(path, filename.replace('_-_', '-')))
filename.replace() returns a new string - it does not change filename in any way. So filename will become outdated after renaming a file and cause a file not found error next time it is used.
Try something like this:
import os, glob, time
def new_filename(filename):
return filename.replace(' ', '_').lower().encode('utf8').replace(' (', '-') \
.replace(')', '').replace(',', '').replace('_-_', '-')
path = os.getcwd()
dirlist = glob.glob('*.pdf')
for filename in dirlist:
os.rename(os.path.join(path, filename), os.path.join(path, new_filename(filename)))
usage = '''
$python slug_dir.py DIR
'''
from os import rename, walk
from os.path import join, isdir, splitext
from slugify import slugify
def rename_files(dir_path):
for path, subdirs, files in walk(dir_path):
for fname in files:
filename, extension = splitext(fname)
s = slugify(filename)
newname = s+extension
try:
oldpath = join(path, fname)
newpath = join(path, newname)
print rename(oldpath, newpath)
print 'Rename: ', oldpath, '->', newpath
except Exception as e:
print 'Error triying rename: ', fname, '->', newname
raise e
def main():
dirname = sys.argv[1]
if not isdir(dirname):
print usage
sys.exit(1)
rename_files(dirname)
if __name__ == '__main__':
main()
Related
How can we identify files of desired text in a file of the same directory and then separate all those files into another directory?
import os
user_input = input('What is the name of your directory')
directory = os.listdir(user_input)
searching = input('What word are you trying to find?')
for fname in directory:
if os.path.isfile(user_input + os.sep + fname):
# Full path
f = open(user_input + os.sep + fname, 'r')
if searching in f.read():
print('found string in file %s' % fname)
else:
print('string not found')
f.close()
shutil has many methods you can use. One of which is:
import shutil
shutil.copyfile(src, dst)
# 2nd option
shutil.copy(src, dst)
You can copy those file into another location.
by adding
if searching in f.read():
print('found string in file %s' % fname)
shutil.copy(user_input + os.sep + fname,destination)
I'm looking for a quick way to copy the entire directory structure (including sub folders and files), with the following conditions:
Copy file if it does not exist in the destination or source is newer
Allow excluding a list of sub folders i.e. ['temp', '.git']
Allow excluding files by type i.e. ['.txt', '.pyc', '*.zip']
I have seen some of the answers using shutil.copy and copytree but none is doing what I was looking for...
I am hoping this could by done by using one of the standard utilities by providing arguments etc. If not I will write a script to do it...
This is what I ended up writing... it does the job, I was hoping this basic functionality would be provided by one of the standard libraries...
import os, sys, pathlib, shutil
def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
this_dir = dirpath.replace('\\', "/")
if os.path.basename(this_dir) in sub_folder_to_include:
dest_dir = this_dir.replace(srcdir, dstdir)
# create folder in the destinatin if it does not exist
pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)
for filename in files:
dest_file = os.path.join(dest_dir, os.path.basename(filename))
source_file = os.path.join(this_dir, filename)
if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
# copy file if destination is older by more than a second, or does not exist
if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
print (f'Copying {source_file} to {dest_dir}')
shutil.copy2(source_file, dest_dir)
else:
print (f'.....Skipping {source_file} to {dest_dir}')
srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')
copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)
This is the solution:
import os, sys, pathlib, shutil
def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
this_dir = dirpath.replace('\\', "/")
if os.path.basename(this_dir) in sub_folder_to_include:
dest_dir = this_dir.replace(srcdir, dstdir)
# create folder in the destinatin if it does not exist
pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)
for filename in files:
dest_file = os.path.join(dest_dir, os.path.basename(filename))
source_file = os.path.join(this_dir, filename)
if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
# copy file if destination is older by more than a second, or does not exist
if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
print (f'Copying {source_file} to {dest_dir}')
shutil.copy2(source_file, dest_dir)
else:
print (f'.....Skipping {source_file} to {dest_dir}')
srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')
copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)
I'm finding that I have to run my code twice for the desired output and I'm not sure why. It's also printing a long string of letters in the shell that aren't needed. I'd just like it to be a bit cleaner.
The code creates folders with subfolders, based on files names, then moves the files into specific subfolders.
Filename example is "A123456-20190101-A01.mp3"
import os
import shutil
path = "/Volumes/ADATA UFD/For script/Files"
file_names = [file for file in os.listdir(path) if
os.path.isfile(os.path.join(path, file))]
file_map = {'O':'1-Original','P':'2-PreservationMaster','M':'3-Mezzanine','T':'4-Presentation','A':'5-Access','R':'6-Reference'}
parent_folders = set(file_name.rsplit('-', 1)[0] for file_name in file_names)
sub_folders = ['1-Original','2-PreservationMaster','3-Mezzanine','4-Presentation','5-Access','6-Reference']
for folder in parent_folders:
folder_path = os.path.join(path, folder)
try:
os.mkdir(folder_path)
except:
print('folder already exist:', folder_path)
for folders in sub_folders:
try:
folders_path = os.path.join(folder_path, folders)
os.mkdir(folders_path)
except:
print('folder already exists:', folders_path)
for file_name in file_names:
parent_folder = file_name.rsplit('-', 1)[0]
ext = file_name[19]
print(ext)
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
src = os.path.join(path, file_name)
try:
shutil.move(src, dest)
except Exception as e:
print(e)
I'm getting this error message:
Traceback (most recent call last):
File "/Volumes/ADATA UFD/For script/MoveFilesToPreservationBundleTest3.py", line 30, in <module>
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
builtins.KeyError: '0'
I have been working on a script that will check through every subdirectory in a directory and match files using regex and then use different commands based on what kind of a file it is.
So what i have finished is the use of different commands based on regex matching. Right now it checks for either a .zip file, .rar file or .r00 file and uses different commands for each match. However i need help iterating through every directory and first check if there is a .mkv file in there, then it should just pass that directory and jump to the next, but if there is a match it should run the command and then when it's finished continue to the next directory.
import os
import re
rx = '(.*zip$)|(.*rar$)|(.*r00$)'
path = "/mnt/externa/folder"
for root, dirs, files in os.walk(path):
for file in files:
res = re.match(rx, file)
if res:
if res.group(1):
print("Unzipping ",file, "...")
os.system("unzip " + root + "/" + file + " -d " + root)
elif res.group(2):
os.system("unrar e " + root + "/" + file + " " + root)
if res.group(3):
print("Unraring ",file, "...")
os.system("unrar e " + root + "/" + file + " " + root)
EDIT:
Here is the code i have now:
import os
import re
from subprocess import check_call
from os.path import join
rx = '(.*zip$)|(.*rar$)|(.*r00$)'
path = "/mnt/externa/Torrents/completed/test"
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
found_r = False
for file in files:
pth = join(root, file)
try:
if file.endswith(".zip"):
print("Unzipping ",file, "...")
check_call(["unzip", pth, "-d", root])
found_zip = True
elif not found_r and file.endswith((".rar",".r00")):
check_call(["unrar","e","-o-", pth, root,])
found_r = True
break
except ValueError:
print ("Oops! That did not work")
This script works mostly fine but sometimes i seem to run into issues when there are Subs in the folder, here is an error i message i get when i run the script:
$ python unrarscript.py
UNRAR 5.30 beta 2 freeware Copyright (c) 1993-2015 Alexander Roshal
Extracting from /mnt/externa/Torrents/completed/test/The.Conjuring.2013.1080p.BluRay.x264-ALLiANCE/Subs/the.conjuring.2013.1080p.bluray.x264-alliance.subs.rar
No files to extract
Traceback (most recent call last):
File "unrarscript.py", line 19, in <module>
check_call(["unrar","e","-o-", pth, root])
File "/usr/lib/python2.7/subprocess.py", line 541, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['unrar', 'e', '-o-', '/mnt/externa/Torrents/completed/test/The.Conjuring.2013.1080p.BluRay.x264-ALLiANCE/Subs/the.conjuring.2013.1080p.bluray.x264-alliance.subs.rar', '/mnt/externa/Torrents/completed/test/The.Conjuring.2013.1080p.BluRay.x264-ALLiANCE/Subs']' returned non-zero exit status 10
I cannot really understand what is wrong about the code, so what im hoping is that some of you are willing to help me.
Just use any to see if any files end in .mkv before going any further, you can also simplify to an if/else as you do the same thing for the last two matches. Also using subprocess.check_call would be a better approach:
import os
import re
from subprocess import check_call
from os.path import join
rx = '(.*zip$)|(.*rar$)|(.*r00$)'
path = "/mnt/externa/folder"
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
for file in files:
res = re.match(rx, file)
if res:
# use os.path.join
pth = join(root, file)
# it can only be res.group(1) or one of the other two so we only need if/else.
if res.group(1):
print("Unzipping ",file, "...")
check_call(["unzip" , pth, "-d", root])
else:
check_call(["unrar","e", pth, root])
You could also forget the rex and just use an if/elif and str.endswith:
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
for file in files:
pth = join(root, file)
if file.endswith("zip"):
print("Unzipping ",file, "...")
check_call(["unzip" , pth, "-d", root])
elif file.endswith((".rar",".r00")):
check_call(["unrar","e", pth, root])
if you really care about not repeating steps and speed, you can filter as you iterate you can collect by extension by slicing as you do the check for the .mkv and use for/else logic:
good = {"rar", "zip", "r00"}
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
tmp = {"rar": [], "zip": []}
for file in files:
ext = file[-4:]
if ext == ".mkv":
break
elif ext in good:
tmp[ext].append(join(root, file))
else:
for p in tmp.get(".zip", []):
print("Unzipping ", p, "...")
check_call(["unzip", p, "-d", root])
for p in tmp.get(".rar", []):
check_call(["unrar", "e", p, root])
That will short circuit on any match for a .mkv or else only iterate over any matches for .rar or .r00 but unless you really care about efficiency I would use the second logic.
To avoid overwriting you can unrar/unzip each to a new subdirectory using a counter to help create a new dir name:
from itertools import count
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
counter = count()
for file in files:
pth = join(root, file)
if file.endswith("zip"):
p = join(root, "sub_{}".format(next(counter)))
os.mkdir(p)
print("Unzipping ",file, "...")
check_call(["unzip" , pth, "-d", p])
elif file.endswith((".rar",".r00")):
p = join(root, "sub_{}".format(next(counter)))
os.mkdir(p)
check_call(["unrar","e", pth, p])
Each will be unpacked into a new directory under root i.e root_path/sub_1 etc..
You probably would have been better adding an example to your question but if the real problem is you only want one of .rar or .r00 then you can set a flag when you find any match for the .rar or .r00 and only unpack if the flag is not set:
for root, dirs, files in os.walk(path):
if not any(f.endswith(".mkv") for f in files):
found_r = False
for file in files:
pth = join(root, file)
if file.endswith("zip"):
print("Unzipping ",file, "...")
check_call(["unzip", pth, "-d", root])
found_zip = True
elif not found_r and file.endswith((".rar",".r00"))
check_call(["unrar","e", pth, root])
found_r = True
If there is also only one zip you can set two flags and leave the loop where both are set.
The example below will work directly! As suggested by #Padraic I replaced os.system with the more suitable subprocess.
What about joining all the files in a single string and look for *.mkv within the string?
import os
import re
from subprocess import check_call
from os.path import join
rx = '(.*zip$)|(.*rar$)|(.*r00$)'
path = "/mnt/externa/folder"
regex_mkv = re.compile('.*\.mkv\,')
for root, dirs, files in os.walk(path):
string_files = ','.join(files)+', '
if regex_mkv.match(string_files): continue
for file in files:
res = re.match(rx, file)
if res:
# use os.path.join
pth = join(root, file)
# it can only be res.group(1) or one of the other two so we only need if/else.
if res.group(1):
print("Unzipping ",file, "...")
check_call(["unzip" , pth, "-d", root])
else:
check_call(["unrar","e", pth, root])
re is overkill for something like this. There's a library function for extracting file extensions, os.path.splitext. In the following example, we build an extension-to-filenames map and we use it both for checking the presence of .mkv files in constant time and for mapping each filename to the appropriate command.
Note that you can unzip files with zipfile (standard lib) and third-party packages are available for .rar files.
import os
for root, dirs, files in os.walk(path):
ext_map = {}
for fn in files:
ext_map.setdefault(os.path.splitext(fn)[1], []).append(fn)
if '.mkv' not in ext_map:
for ext, fnames in ext_map.iteritems():
for fn in fnames:
if ext == ".zip":
os.system("unzip %s -d %s" % (fn, root))
elif ext == ".rar" or ext == ".r00":
os.system("unrar %s %s" % (fn, root))
import os
import re
regex = re.complile(r'(.*zip$)|(.*rar$)|(.*r00$)')
path = "/mnt/externa/folder"
for root, dirs, files in os.walk(path):
for file in files:
res = regex.match(file)
if res:
if res.group(1):
print("Unzipping ",file, "...")
os.system("unzip " + root + "/" + file + " -d " + root)
elif res.group(2):
os.system("unrar e " + root + "/" + file + " " + root)
else:
print("Unraring ",file, "...")
os.system("unrar e " + root + "/" + file + " " + root)
In Python 2.7.4 on Windows, if I have a directory structure that follows:
test/foo/a.bak
test/foo/b.bak
test/foo/bar/c.bak
test/d.bak
And I use the following to add them to an existing archive such that 'd.bak' is at the root of the archive:
import zipfile
import os.path
import fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
if __name__=='__main__':
z = zipfile.ZipFile("testarch.zip", "a", zipfile.ZIP_DEFLATED)
for filename in find_files('test', '*.*'):
print 'Found file:', filename
z.write(filename, os.path.basename(filename), zipfile.ZIP_DEFLATED)
z.close()
The directory of the zip file is flat. It creates the foo/ directory only if a sub-directory exists in it (If I exclude test/foo/bar/c.bak, it does not create the directory. If it is included, foo/ is created but not foo/bar/ if that makes sense), but no sub-directories or files:
foo/
a.bak
b.bak
c.bak
d.bak
Am I missing something?
The problem is that you're explicitly asking it to flatten all the paths:
z.write(filename, os.path.basename(filename), zipfile.ZIP_DEFLATED)
If you look at the docs, the default arcname is:
the same as filename, but without a drive letter and with leading path separators removed
But you're overriding that with os.path.basename(filename). (If you don't know what basename does, it returns "the last pathname component". If you don't want just the last pathname component, don't call basename.)
If you just do z.write('test/foo/bar/c.bak'), it will create a zip entry named test/foo/bar/c.bak, but if you do z.write('test/foo/bar/c.bak', 'c.bak'), it will create a zip entry named c.bak. Since you do that for all of the entries, the whole thing ends up flattened.
I figured it out. As abarnet pointed out, I had misread the docs on zipfiles. Using the following function, I can create the correct archive name for the zip file:
def createArchName(path):
line = path
if "\\" in line:
''' windows '''
discard, val = line.split("\\", 1)
return val
else:
''' unix '''
discard, val = line.split("/", 1)
return val
For those interested, the full code is as follows:
import urllib2
import zipfile
import os.path
import fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
def createArchName(path):
line = path
if "\\" in line:
''' windows '''
discard, val = line.split("\\", 1)
return val
else:
''' unix '''
discard, val = line.split("/", 1)
return val
if __name__=='__main__':
if not os.path.exists("test"):
os.mkdir("test")
z = zipfile.ZipFile("testarch.zip", "a", zipfile.ZIP_DEFLATED)
for filename in find_files('test', '*.*'):
archname = createArchName(filename)
print 'Found file:', archname
z.write(filename, archname, zipfile.ZIP_DEFLATED)
z.close()