Python: can't create subdirectory - python

I want to apply a test to list of files. The files that past the test should be moved to the directory "Pass"; the others should be moved to the directory "Fail".
Thus the output directory should contain subdirectories "Pass" and "Fail".
Here is my attempt:
if(<scan==pass>) # Working fine up to this point
dest_dir = outDir + '\\' + 'Pass' # The problem is here
print("Pass", xmlfile)
MoveFileToDirectory(inDir, xmlfile, dest_dir)
else:
dest_dir = os.path.dirname(outDir + '\\' + 'Fail')
print("Fail: ", xmlfile)
MoveFileToDirectory(inDir, xmlfile, dest_dir)
However, my code is moving the files to the output directory and not creating the "Pass" or "Fail" subdirectories. Any ideas why?

Use os.path.join(). Example:
os.path.join(outDir, 'Pass')
See this SO post
Also, we don't know what MoveFileToDirectory does. Use the standard os.rename:
os.rename("path/to/current/file.foo", "path/to/new/desination/for/file.foo")
See this SO post
So:
source_file = os.path.join(inDir, xmlfile)
if(conditionTrue):
dest_file = os.path.join(outDir, 'Pass', xmlfile)
print("Pass: ", xmlfile)
else:
dest_file = os.path.join(outDir, 'File', xmlfile)
print("Fail: ", xmlfile)
os.rename(source_file, dest_file)

Create directories exactly once:
import os
labels = 'Fail', 'Pass'
dirs = [os.path.join(out_dir, label) for label in labels]
for d in dirs:
try:
os.makedirs(d)
except EnvironmentError:
pass # ignore errors
Then you could move files into the created directories:
import shutil
print("%s: %s" % (labels[condition_true], xmlfile))
shutil.move(os.path.join(out_dir, xmlfile), dirs[condition_true])
The code exploits that False == 0 and True == 1 in Python.

Related

Os.rename Files Overwrite

I am making a Python project that renames multiple files. However, sometimes the files overwrite.
suffixes = ['.pdf', '.epub', '.mobi']
file_list = []
def change_fname(dir_name, part=' (z-lib.org)', action='remove'):
fnames = os.listdir(dir_name)
for suffix in suffixes:
fnames_suffix = [f for f in fnames if f.endswith(suffix)]
for fname in fnames_suffix:
print(f'{action} "{part}" into/from "{fname}"')
if action == 'remove' and fname.endswith(part+suffix):
new_name = fname[:-len(suffix) - len(part)] + suffix
print(f'fname is {fname}')
elif action == 'insert':
new_name = fname[:-len(suffix)] + part + suffix
else:
raise Exception(f'Unknown Action: {action}')
print(new_name)
old_file = os.path.join(dir_name, fname)
new_file = os.path.join(dir_name, new_name)
os.rename(old_file, new_file)
file_to_show = '/Users/ChrisHart/Downloads/test i love you daddy/'
subprocess.call(["open", "-R", file_to_show])
if __name__ == '__main__':
dir_name = '/Users/ChrisHart/Downloads/test i love you daddy/'
try:
change_fname(dir_name, part=' (z-lib.org)', action='remove')
except Exception as ex:
print(ex)
This is my program ^
file (part).pdf
file.pdf
The file will delete " (part)", so we get this
file.pdf
file.pdf
And they overwrite.
file.pdf
How can I fix this overwriting?
I also wrote a script that changes multiple files. Maybe my code helps you understand your problem:
import os
print(os.getcwd()) #Gives you your current directory
os.chdir('/PATH/TO/FILES') #Change directory to the files
for i in os.listdir('/PATH/TO/FILES'):
os.rename(i, i.replace('(z-lib.org)', ' ')) #replaces z-lib with one whitespace
print(i)
I know what you're trying to replace :D ... I did the same thing

python unzip files below the root folder

i would like to unzip all the folders and files of an archive below the root folder, i have archive named abc.zip which gives me files as abc/xyz/ abc/123.jpg abc/xyz1/ , i just want to extract xyz/ , 123.jpg and xyz1/ in the CWD
i use below code to extract a file, but would need help on how to omit the root folder of the list
def unzip_artifact( local_directory, file_path ):
fileName, ext = os.path.splitext( file_path )
if ext == ".zip":
Downloadfile = basename(fileName) + ext
print 'unzipping file ' + Downloadfile
try:
zipfile.ZipFile(file_path).extractall(local_directory)
except zipfile.error, e:
print "Bad zipfile: %s" % (e)
return
You have to use a more complex (and therefore more customizable) way to unzip. Instead of using the 'extractall' method, you must extract each files separately with the 'extract' method. Then you will be able to change the destination directory, omitting archive's sub-directories.
Here is your code with the modification you needed :
def unzip_artifact( local_directory, file_path ):
fileName, ext = os.path.splitext( file_path )
if ext == ".zip":
Downloadfile = fileName + ext
print 'unzipping file ' + Downloadfile
try:
#zipfile.ZipFile(file_path).extractall(local_directory) # Old way
# Open the zip
with zipfile.ZipFile(file_path) as zf:
# For each members of the archive
for member in zf.infolist():
# If it's a directory, continue
if member.filename[-1] == '/': continue
# Else write its content to the root
with open(local_directory+'/'+os.path.basename(member.filename), "w") as outfile:
outfile.write(zf.read(member))
except zipfile.error, e:
print "Bad zipfile: %s" % (e)
return

Python: Replace one list with another list?

I'm stuck. I'm moving folders around on our network which all have a unique ID into a central location. There are a few folders with typos and therefore do not match a unique ID in the central location. I have found the correct IDS but I need to rename these folders before I move them. For example, I have created an excel spreadsheet with the wrong unique ID and in a separate column have the correct ID. Now, I want to rename the folders with the correct ID and then transfer those folders to the central location. My code is....rough because I can't think of a good way to do it. I feel like using a list is the way to go, but since my code is iterating through a folder I'm not sure how to achieve this
Edit: I think something like this may be what I'm looking for
Ex:
In Folder A : A file named 12334 SHOULD be renamed 1234. Then moved to the base directory with in folder 1234.
Heres my code:
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
newname = re.sub(372911000002001,372911000003100,ARN)
newname =re.sub(372809000001400,372909000001400,ARN)
newname =re.sub(372809000001500,372909000001500,ARN)
newname =re.sub(372809000001700,372909000001700,ARN)
newname = re.sub(372812000006800,372912000006800,ARN)
newname =re.sub(372812000006900,372912000006900,ARN)
newname =re.sub(372812000007000,372912000007000,ARN)
newname =re.sub(372812000007100,372912000007100,ARN)
newname =re.sub(372812000007200,372912000007200,ARN)
newname =re.sub(372812000007300,372912000007300,ARN)
newname =re.sub(372812000007400,372912000007400,ARN)
newname =re.sub(372812000007500,372912000007500,ARN)
newname =re.sub(372812000007600,372912000007600,ARN)
newname =re.sub(372812000007700,372912000007700,ARN)
newname =re.sub(372812000011100,372912000011100,ARN)
os.rename(os.path.join(movdir, ARN, extension ),
os.path.join(movdir, newname, extension))
oldpath = os.path.join(root, newname)
print ARN, "to", newname
newpath = basedir + "\\" + newname + subfolder
shutil.copy(oldpath, newpath)
print "Copied"
except:
print ("Error occurred")
Thanks to the answers below here is my final code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
import string
l = ['372911000002001',
'372809000001400',
'372809000001500',
'372809000001700',
'37292200000800'
]
l2 = ['372911000003100',
'372909000001400',
'372909000001500',
'372909000001700',
'372922000000800'
]
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
oldname = str(ARN)
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
for ii, jj in zip(l, l2):
newname = re.sub(ii,jj, ARN)
newname = str(newname)
print path
newpath = os.path.join(root, oldname) + extension
print "new name", newpath
os.rename(path, newpath)
print "Renaming"
newpath2 = basedir + "\\" + newname + subfolder
shutil.copy(newpath, newpath2)
print "Copied"
if newname != ARN:
break
else:
continue
except:
print ("Error occurred")
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
pymsg = "PYTHON ERRORS:\nTraceback Info:\n" + tbinfo + "\nError Info:\n " + \
str(sys.exc_type)+ ": " + str(sys.exc_value) + "\n"
msgs = "GP ERRORS:\n" + arcpy.GetMessages(2 )+ "\n"
print (pymsg)
print (msgs)
For me the way to go is to read both lists into list objects:
list1 = ["372911000002001", "372809000001400", "372809000001500"]
list2 = ["372911000003100", "372909000001400", "372909000001500"]
for ii, jj in zip(list1, list2):
newname = re.sub(ii,jj,ARN) #re.sub returns ARN if no substitution done
if newname != ARN:
break
An idea: try to convert the id's to strings. I mean:
newname = re.sub('372911000002001','372911000003100',ARN)
Hope it helps!

Python copy files to a new directory and rename if file name already exists

I've already read this thread but when I implement it into my code it only works for a few iterations.
I'm using python to iterate through a directory (lets call it move directory) to copy mainly pdf files (matching a unique ID) to another directory (base directory) to the matching folder (with the corresponding unique ID). I started using shutil.copy but if there are duplicates it overwrites the existing file.
I'd like to be able to search the corresponding folder to see if the file already exists, and iteratively name it if more than one occurs.
e.g.
copy file 1234.pdf to folder in base directory 1234.
if 1234.pdf exists to name it 1234_1.pdf,
if another pdf is copied as 1234.pdf then it would be 1234_2.pdf.
Here is my code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
print path
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir,ARN)
# if the folder already exists in new directory
if os.path.exists(link):
#this is the file location in the new directory
file = os.path.join(basedir, ARN, ARN)
linkfn = os.path.join(basedir, ARN, filename)
if os.path.exists(linkfn):
i = 0
#if this file already exists in the folder
print "Path exists already"
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
else:
shutil.copy(path, link)
print ARN + " " + "Copied"
else:
print ARN + " " + "Not Found"
Sometimes it is just easier to start over... I apologize if there is any typo, I haven't had the time to test it thoroughly.
movdir = r"C:\Scans"
basedir = r"C:\Links"
# Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
# I use absolute path, case you want to move several dirs.
old_name = os.path.join( os.path.abspath(root), filename )
# Separate base from extension
base, extension = os.path.splitext(filename)
# Initial new name
new_name = os.path.join(basedir, base, filename)
# If folder basedir/base does not exist... You don't want to create it?
if not os.path.exists(os.path.join(basedir, base)):
print os.path.join(basedir,base), "not found"
continue # Next filename
elif not os.path.exists(new_name): # folder exists, file does not
shutil.copy(old_name, new_name)
else: # folder exists, file exists as well
ii = 1
while True:
new_name = os.path.join(basedir,base, base + "_" + str(ii) + extension)
if not os.path.exists(new_name):
shutil.copy(old_name, new_name)
print "Copied", old_name, "as", new_name
break
ii += 1
I always use the time-stamp - so its not possible, that the file exists already:
import os
import shutil
import datetime
now = str(datetime.datetime.now())[:19]
now = now.replace(":","_")
src_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand.xlsx"
dst_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand_"+str(now)+".xlsx"
shutil.copy(src_dir,dst_dir)
For me shutil.copy is the best:
import shutil
#make a copy of the invoice to work with
src="invoice.pdf"
dst="copied_invoice.pdf"
shutil.copy(src,dst)
You can change the path of the files as you want.
I would say you have an indentation problem, at least as you wrote it here:
while not os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
should be:
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
Check this out, please!
import os
import shutil
import glob
src = r"C:\Source"
dest = r"C:\Destination"
par = "*"
i=1
d = []
for file in glob.glob(os.path.join(src,par)):
f = str(file).split('\\')[-1]
for n in glob.glob(os.path.join(dest,par)):
d.append(str(n).split('\\')[-1])
if f not in d:
print("copied",f," to ",dest)
shutil.copy(file,dest)
else:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
while f1 in d:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
print("{} already exists in {}".format(f1,dest))
i =i + 1
shutil.copy(file,os.path.join(dest,f1))
print("renamed and copied ",f1 ,"to",dest)
i = 1

Fail to check files integrity

I'm writing a quick Python script to migrate files from one directory to another. Everything works out perfectly except for the part where I have to compare the filenames and checksum. I stored the file in both locations, that is, /root/src/file1 and /root/dst/file1.
So when I ran for filenames compare, it failed to match the file since it included the whole filepath. md5Srt is a dict that stores files and checksum.
Is there a way that I can compare the filenames without using the whole filepath?
for key in md5Srt.keys():
if key in md5Dst:
print "keys match " + key
print '\ncomparing the values of files\n'
if md5Srt[key] == md5Dst[key]:
print md5Srt[key]
print md5Dst[key]
print "files match\n"
print "checking the next pair"
else:
print "values of files don't match"
If you just have a bunch of files in a directory you can just use os.path.basename:
import os
>>> dst = os.path.basename('/root/dst/file1.file')
>>> src = os.path.basename('/root/src/file1.file')
>>> dst
'file1.file'
>>> src
'file1.file'
>>> dst == src
True
If you are dealing with subdirectories you would need to know the base src and dst directories and then remove them from the beginning of each path:
>>> src = '/root/src'
>>> dst = '/root/dst'
>>> src_file = '/root/src/dir1/file1.file'
>>> dst_file = '/root/dst/dir1/file1.file'
>>> os.path.relpath(src_file, src)
'dir1/file1.file'
>>> os.path.relpath(dst_file, dst)
'dir1/file1.file'
>>> os.path.relpath(src_file, src) == os.path.relpath(dst_file, dst)
True
If you combine this with your function you get:
import os
src = '/root/src'
dst = '/root/dst'
for key, src_file in md5Srt.iteritems():
dst_file = md5Dst.get(key)
if dst_file is None:
print 'The destination is missing %s' src_file
continue
print "keys match " + key
print '\ncomparing the values of files\n'
if os.path.relpath(src_file, src) == os.path.relpath(dst_file, dst)
print srcFile
print dst_file
print "files match\n"
print "checking the next pair"
else:
print "values of files don't match"
I think you should rethink trying to compare files by finding a file in dst that has the same md5sum as a file in src. If a file is renamed or there are two files with the same hash you may end up with directories that are not exactly the same. A better approach would be to compare filenames first, and then check md5sums if there is a file that is in both src and dst.
Here is what that might look like:
import os
src_dir = '/root/src'
dst_dir = '/root/dst'
# reverse the dictionaries, hopefully you would create these dictionaries
# to begin with. A single file can only have one md5sum, but the same md5Sum can
# match multiple files
src_file_hashes = dict((os.path.relpath(v, src_dir), k) for k, v in md5Srt)
dst_file_hashes = dict((os.path.relpath(v, dst_dir), k) for k, v in md5Dst)
for src_file, src_hash in src_file_hashes.iteritems():
dst_hash = dst_file_hashes.get(src_file)
src_path = os.path.join(src_dir, src_file)
dst_path = os.path.join(dst_dir, dst_file)
if dst_hash is None:
print 'The destination file %s is missing ' % dst_path
continue
if src_hash == dst_hash:
print '%s matches %s and %s' % (src_hash, src_path, dst_path)
else:
print '%s and %s have different hashes' % (src_path, dst_path)

Categories