Python: Replace one list with another list? - python

I'm stuck. I'm moving folders around on our network which all have a unique ID into a central location. There are a few folders with typos and therefore do not match a unique ID in the central location. I have found the correct IDS but I need to rename these folders before I move them. For example, I have created an excel spreadsheet with the wrong unique ID and in a separate column have the correct ID. Now, I want to rename the folders with the correct ID and then transfer those folders to the central location. My code is....rough because I can't think of a good way to do it. I feel like using a list is the way to go, but since my code is iterating through a folder I'm not sure how to achieve this
Edit: I think something like this may be what I'm looking for
Ex:
In Folder A : A file named 12334 SHOULD be renamed 1234. Then moved to the base directory with in folder 1234.
Heres my code:
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
newname = re.sub(372911000002001,372911000003100,ARN)
newname =re.sub(372809000001400,372909000001400,ARN)
newname =re.sub(372809000001500,372909000001500,ARN)
newname =re.sub(372809000001700,372909000001700,ARN)
newname = re.sub(372812000006800,372912000006800,ARN)
newname =re.sub(372812000006900,372912000006900,ARN)
newname =re.sub(372812000007000,372912000007000,ARN)
newname =re.sub(372812000007100,372912000007100,ARN)
newname =re.sub(372812000007200,372912000007200,ARN)
newname =re.sub(372812000007300,372912000007300,ARN)
newname =re.sub(372812000007400,372912000007400,ARN)
newname =re.sub(372812000007500,372912000007500,ARN)
newname =re.sub(372812000007600,372912000007600,ARN)
newname =re.sub(372812000007700,372912000007700,ARN)
newname =re.sub(372812000011100,372912000011100,ARN)
os.rename(os.path.join(movdir, ARN, extension ),
os.path.join(movdir, newname, extension))
oldpath = os.path.join(root, newname)
print ARN, "to", newname
newpath = basedir + "\\" + newname + subfolder
shutil.copy(oldpath, newpath)
print "Copied"
except:
print ("Error occurred")
Thanks to the answers below here is my final code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
import string
l = ['372911000002001',
'372809000001400',
'372809000001500',
'372809000001700',
'37292200000800'
]
l2 = ['372911000003100',
'372909000001400',
'372909000001500',
'372909000001700',
'372922000000800'
]
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
oldname = str(ARN)
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
for ii, jj in zip(l, l2):
newname = re.sub(ii,jj, ARN)
newname = str(newname)
print path
newpath = os.path.join(root, oldname) + extension
print "new name", newpath
os.rename(path, newpath)
print "Renaming"
newpath2 = basedir + "\\" + newname + subfolder
shutil.copy(newpath, newpath2)
print "Copied"
if newname != ARN:
break
else:
continue
except:
print ("Error occurred")
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
pymsg = "PYTHON ERRORS:\nTraceback Info:\n" + tbinfo + "\nError Info:\n " + \
str(sys.exc_type)+ ": " + str(sys.exc_value) + "\n"
msgs = "GP ERRORS:\n" + arcpy.GetMessages(2 )+ "\n"
print (pymsg)
print (msgs)

For me the way to go is to read both lists into list objects:
list1 = ["372911000002001", "372809000001400", "372809000001500"]
list2 = ["372911000003100", "372909000001400", "372909000001500"]
for ii, jj in zip(list1, list2):
newname = re.sub(ii,jj,ARN) #re.sub returns ARN if no substitution done
if newname != ARN:
break

An idea: try to convert the id's to strings. I mean:
newname = re.sub('372911000002001','372911000003100',ARN)
Hope it helps!

Related

How to copy only non-duplicate files whilst maintaining folder structure?

I am trying to find duplicates between two folders and copy only unique image files to the 'dest' folder. I can copy all the non-dupes using the code below, however it doesn't maintain the source directory structure. I think OS.walk returns 3 tuples, but they aren't linked so not sure how to re-construct the sub dir?
Example:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
result
dupes = []
srcfiles = []
filecount = []
failed = []
removed = []
for i in result.values():
dupes.append(i['location'])
for dirpath, subdirs, files in os.walk(source):
for x in files:
if x.endswith(ext):
srcfiles.append(os.path.join(dirpath, x))
for f in srcfiles:
if f not in dupes:
shutil.copy(f, dest)
print('File copied successfully - '+f)
filecount.append(f)
else:
print('File not copied successfully !!!! - '+f)
failed.append(f)
I have also tried using the shutil.copytree function with an ignore list, however it requires a new folder and can't get the ignore list function to work
shutil.copytree example:
for i in result.values():
df = []
df.append(i['filename'])
def ignorelist(source, df):
return [f for f in df if os.path.isfile(os.path.join(source, f))]
shutil.copytree(source, destnew, ignore=ignorelist)
This function ignorelist should do the trick:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
def ignorelist(source, files):
return [file for file in files
if (os.path.isfile(os.path.join(source, file))
and (os.path.join(source, file) in dupes
or not file.lower().endswith(ext)))]
shutil.copytree(source, dest, ignore=ignorelist)
And the other "more manual" way would be
import shutil, os
from difPy import dif
source = input('Input source folder:').rstrip('/\\')
dest = input('Input backup \ destination folder:').rstrip('/\\')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
srcfiles = []
copied = []
failed = []
skipped = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.lower().endswith(ext):
srcfile = os.path.join(dirpath,file)
srcfiles.append(srcfile)
if srcfile in dupes:
print('File not copied (duplicate) - '+srcfile)
skipped.append(srcfile)
else:
try:
destfile = os.path.join(dest,srcfile[len(source)+1:])
os.makedirs(os.path.dirname(destfile), exist_ok=True)
shutil.copy(srcfile,destfile)
print('File copied successfully - '+srcfile)
copied.append(srcfile)
except Exception as err:
print('File not copied (error %s) - %s' % (str(err),srcfile))
failed.append(f)
I have changed some variable names to make them more descriptive. And what you call failed is really just a list of files that are not copied because they are duplicates rather than files whose copying was attempted but failed.
import shutil, os
from difPy import dif
source = input('Input source folder: ')
dest = input('Input backup \ destination folder: ')
# Remove trailing path separators if they exist:
if source.endswith(('/', '\\')):
source = source[:-1]
if dest.endswith(('/', '\\')):
dest = dest[:-1]
# Use the correct path separator to
# ensure correct matching with dif results:
if os.sep == '/':
source = source.replace('\\', os.sep)
elif os.sep == '\\':
source = source.replace('/', os.sep)
source_directory_length = len(source) + 1
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
# Set comprehension:
dupes = {duplicate['location'] for duplicate in result.values()}
copied = []
not_copied = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.endswith(ext):
source_path = os.path.join(dirpath, file)
if source_path not in dupes:
# get subdirectory of source directory that this file is in:
file_length = len(file) + 1
# Get subdirectory relative to the source directory:
subdirectory = source_path[source_directory_length:-file_length]
if subdirectory:
dest_directory = os.path.join(dest, subdirectory)
# ensure directory exists:
os.makedirs(dest_directory, exist_ok=True)
else:
dest_directory = dest
dest_path = os.path.join(dest_directory, file)
shutil.copy(source_path, dest_path)
print('File copied successfully -', source_path)
copied.append(source_path)
else:
print('File not copied -', source_path)
not_copied.append(source_path)

Python - renaming files twice with os

I am currently working on a script to take raw text files, place them in correct year folders based on a version number system, and rename them so that our devs can integrate them into our product easily.
The file in question is called APR30CaseRVU-1Day.txt.
I need this to be in the form refAPRCaseRVU-1Day.txt.
Append ref, remove 30, keep 1Day.
The script works fine for the other files that do not have this extra "1Day". I'm a beginner so I'm sure my workflow is shit but my idea was to use os.rename twice. Once to remove the characters, and then split on the hyphen to add the "1" back in to the filename, but when I print the filename after the first os.rename it still has 30 and 1.
Anyone have any tips to do this more effectively?
folder = r"C:\Users\xx\Desktop\Python Final Project\Raw"
import os
import re
import shutil
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
srcpath = os.path.join(root, filename)
#split the filename so that we can rename accordingly below, filename_split[0] = filename, filename_split[1] = file ext
filename_split = os.path.splitext(filename)
name = filename_split[0]
ext = filename_split[1]
newfolder = ''
destpath = os.path.join(newfolder, "ref" + re.sub(r'\d', '', filename_split[0]) + filename_split[1])
#first step: remove AP files entirely from directory
if filename.startswith("AP27"):
os.remove(os.path.join(folder, filename))
#the rest include a version number that must be routed to the correct year folder directory created above
elif filename.__contains__("30") and filename.__contains__("Day"):
newfolder = r"C:\Users\xx\Desktop\Python Final Project\Raw\2013"
os.rename(os.path.join(root, filename),
os.path.join(newfolder, "ref" + re.sub(r'\d', '', filename_split[0]) + filename_split[1]))
print(filename)
day_name = re.split(r'[\s-]+', filename)
print(day_name)
first_name = day_name[0]
last_name = day_name[1]
os.rename(os.path.join(newfolder, filename),
os.path.join(newfolder, "ref" + first_name + '1' + last_name))
I solved this myself:
os.rename(os.path.join(root, filename),
os.path.join(newfolder, "ref" + re.sub(r'\d{2}', '', name) + ext))
Adding the {2} made it look for 2 consecutive numbers, and left the 1 alone.
Thanks!

Python - Renaming all files in a directory using a loop

I have a folder with images that are currently named with timestamps. I want to rename all the images in the directory so they are named 'captured(x).jpg' where x is the image number in the directory.
I have been trying to implement different suggestions as advised on this website and other with no luck. Here is my code:
path = '/home/pi/images/'
i = 0
for filename in os.listdir(path):
os.rename(filename, 'captured'+str(i)+'.jpg'
i = i +1
I keep getting an error saying "No such file or directory" for the os.rename line.
The results returned from os.listdir() does not include the path.
path = '/home/pi/images/'
i = 0
for filename in os.listdir(path):
os.rename(os.path.join(path,filename), os.path.join(path,'captured'+str(i)+'.jpg'))
i = i +1
The method rename() takes absolute paths, You are giving it only the file names thus it can't locate the files.
Add the folder's directory in front of the filename to get the absolute path
path = 'G:/ftest'
i = 0
for filename in os.listdir(path):
os.rename(path+'/'+filename, path+'/captured'+str(i)+'.jpg')
i = i +1
Two suggestions:
Use glob. This gives you more fine grained control over filenames and dirs to iterate over.
Use enumerate instead of manual counting the iterations
Example:
import glob
import os
path = '/home/pi/images/'
for i, filename in enumerate(glob.glob(path + '*.jpg')):
os.rename(filename, os.path.join(path, 'captured' + str(i) + '.jpg'))
This will work
import glob2
import os
def rename(f_path, new_name):
filelist = glob2.glob(f_path + "*.ma")
count = 0
for file in filelist:
print("File Count : ", count)
filename = os.path.split(file)
print(filename)
new_filename = f_path + new_name + str(count + 1) + ".ma"
os.rename(f_path+filename[1], new_filename)
print(new_filename)
count = count + 1
the function takes two arguments your filepath to rename the file and your new name to the file

Python - Choose directory that contains a specific string

The following code prints a list of directories that all happen to contain a 3 letter code, Example:
//Server/Jobs/2016\AAM - 'areallylongfilename'/
//Server/Jobs/2016\CLM - 'areallylongfilename'/
//Server/Jobs/2016\COO - 'areallylongfilename'/
import os
basepath = '//Server/Jobs/2016'
for fname in os.listdir(basepath):
path = os.path.join(basepath, fname)
if os.path.isdir(path):
print(path)
How can I get one directory from the list based on the 3 letter code?
import os
basepath = '//Server/Jobs/2016'
asked_name = 'COO'
if len(asked_name) != 3:
print "Expected 3 letter code, got:", asked_name
else:
for fname in os.listdir(basepath):
path = os.path.join(basepath, fname)
if os.path.isdir(path):
if fname == asked_name:
print(path)
Suppose that you want to scan the "d:" disk, you can code as:
import os
dir="d:\\"
for root,dirs,files in os.walk(dir):
for a_dir in dirs:
if ("Server" in a_dir) and ("Jobs" in a_dir) and ("2016" in a_dir):
print os.path.join(root,a_dir)

Python copy files to a new directory and rename if file name already exists

I've already read this thread but when I implement it into my code it only works for a few iterations.
I'm using python to iterate through a directory (lets call it move directory) to copy mainly pdf files (matching a unique ID) to another directory (base directory) to the matching folder (with the corresponding unique ID). I started using shutil.copy but if there are duplicates it overwrites the existing file.
I'd like to be able to search the corresponding folder to see if the file already exists, and iteratively name it if more than one occurs.
e.g.
copy file 1234.pdf to folder in base directory 1234.
if 1234.pdf exists to name it 1234_1.pdf,
if another pdf is copied as 1234.pdf then it would be 1234_2.pdf.
Here is my code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
print path
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir,ARN)
# if the folder already exists in new directory
if os.path.exists(link):
#this is the file location in the new directory
file = os.path.join(basedir, ARN, ARN)
linkfn = os.path.join(basedir, ARN, filename)
if os.path.exists(linkfn):
i = 0
#if this file already exists in the folder
print "Path exists already"
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
else:
shutil.copy(path, link)
print ARN + " " + "Copied"
else:
print ARN + " " + "Not Found"
Sometimes it is just easier to start over... I apologize if there is any typo, I haven't had the time to test it thoroughly.
movdir = r"C:\Scans"
basedir = r"C:\Links"
# Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
# I use absolute path, case you want to move several dirs.
old_name = os.path.join( os.path.abspath(root), filename )
# Separate base from extension
base, extension = os.path.splitext(filename)
# Initial new name
new_name = os.path.join(basedir, base, filename)
# If folder basedir/base does not exist... You don't want to create it?
if not os.path.exists(os.path.join(basedir, base)):
print os.path.join(basedir,base), "not found"
continue # Next filename
elif not os.path.exists(new_name): # folder exists, file does not
shutil.copy(old_name, new_name)
else: # folder exists, file exists as well
ii = 1
while True:
new_name = os.path.join(basedir,base, base + "_" + str(ii) + extension)
if not os.path.exists(new_name):
shutil.copy(old_name, new_name)
print "Copied", old_name, "as", new_name
break
ii += 1
I always use the time-stamp - so its not possible, that the file exists already:
import os
import shutil
import datetime
now = str(datetime.datetime.now())[:19]
now = now.replace(":","_")
src_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand.xlsx"
dst_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand_"+str(now)+".xlsx"
shutil.copy(src_dir,dst_dir)
For me shutil.copy is the best:
import shutil
#make a copy of the invoice to work with
src="invoice.pdf"
dst="copied_invoice.pdf"
shutil.copy(src,dst)
You can change the path of the files as you want.
I would say you have an indentation problem, at least as you wrote it here:
while not os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
should be:
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
Check this out, please!
import os
import shutil
import glob
src = r"C:\Source"
dest = r"C:\Destination"
par = "*"
i=1
d = []
for file in glob.glob(os.path.join(src,par)):
f = str(file).split('\\')[-1]
for n in glob.glob(os.path.join(dest,par)):
d.append(str(n).split('\\')[-1])
if f not in d:
print("copied",f," to ",dest)
shutil.copy(file,dest)
else:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
while f1 in d:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
print("{} already exists in {}".format(f1,dest))
i =i + 1
shutil.copy(file,os.path.join(dest,f1))
print("renamed and copied ",f1 ,"to",dest)
i = 1

Categories