file exists or not through matching filename in list - python

i have files in folders and subfolders. folder structure is like this
2020(folder)
-01(sub folder)
--14(sub-sub folder)
----abc1-2020-01-14.csv
----abc2-2020-01-14.csv
-02(subfolder in 2020)
--17(sub-sub folder in 02)
----abc1-2020-02-17.csv
----abc4-2020-02-17.csv
i have list of file names.
li = ['abc1','abc2','abc3','abc4']
i want to know if these file exists in directory or not. each subdirectory should have all 4 files. if not then code must return path where particular file doesnot exist.
import glob
BASE_PATH = r'2020/'
allin= BASE_PATH + '/*/*'
li = ['abc1','abc2','abc3','abc4']
print('Names of files:')
for name in glob.glob(allin):
print('\t', name)
for k in li:
try:
f = open(r"C:\\Users\\Karar\\ProjectFiles\\scripts\\"+ name + "\\" + k + "*.csv")
except IOError:
print(name+k+ ".csv""File not present")
print name is returning 2020\01\14 and 2020\02\17
iam having difficulty in giving path here in open method. please also note that my filename stored in folders has date in the end so need to tackle that as well in path so that for any date at the end of file name if folder carry files with name in list then okay do nothing but if files are missing in sub folders then print EXCEPT file not present with path.
note each folder has to carry all 4 files if not then return except.

One possible approach:
import glob, os.path
base = '2020'
li = ['abc1','abc2','abc3','abc4']
for dirname in glob.glob(base + '/*/*'):
year, month, day = dirname.split('/')
for prefix in li:
filename = "{}/{}.csv".format(dirname, '-'.join(prefix, year, month, day))
if not os.path.exists(filename):
print(filename)

Related

Python 3.7 - How to check if file exists and rename

I am trying to figure out how to check if a file within my source folder, exists within my destination folder, then copy the file over to the destination folder.
If the file within the source folder exists within the destination folder, rename file within source folder to "_1" or _i+1 then copy it to destination folder.
For Example (will not be a .txt, just using this as an example, files will be dynamic in nature):
I want to copy file.txt from folder a over to folder b.
file.txt already exists within within folder b a. If I attempted to copy file.txt over to folder b, I would receive a copy error.
Rename file.txt to file_1.txt a. Copy file_1.txt to folder b b. If file_1.txt exists then make it file_2.txt
What I have so far is this:
for filename in files:
filename_only = os.path.basename(filename)
src = path + "\\" + filename
failed_f = pathx + "\\Failed\\" + filename
# This is where I am lost, I am not sure how to declare the i and add _i + 1 into the code.
if path.exists(file_path):
numb = 1
while True:
new_path = "{0}_{2}{1}".format(*path.splitext(file_path) + (numb,))
if path.exists(new_path):
numb += 1
shutil.copy(src, new_path)
else:
shutil.copy(src, new_path)
shutil.copy(src, file_path)
Thanks much in advance.
import os
for filename in files:
src = os.path.join(path, filename)
i = 0
while True:
base = os.path.basename(src)
name = base if i == 0 else "_{}".format(i).join(os.path.splitext(base))
dst_path = os.path.join(dst, name)
if not os.path.exists(dst_path):
shutil.copy(src, dst_path)
break
i += 1

Create folders based on filenames

I have a folder with some 1500 excel files . The format of each file is something like this:
0d20170101abcd.xlsx
1d20170101ef.xlsx
0d20170104g.xlsx
0d20170109hijkl.xlsx
1d20170109mno.xlsx
0d20170110pqr.xlsx
The first character of the file name is either '0' or '1' followed by 'd' followed by the date when the file was created followed by customer id(abcd,ef,g,hijkl,mno,pqr).The customer id has no fixed length and it can vary.
I want to create folders for each unique date(folder name should be date) and move the files with the same date into a single folder .
So for the above example , 4 folders (20170101,20170104,20170109,20170110) has to be created with files with same dates copied into their respective folders.
I want to know if there is any way to do this in python ? Sorry for not posting any sample code because I have no idea as to how to start.
Try this out:
import os
import re
root_path = 'test'
def main():
# Keep track of directories already created
created_dirs = []
# Go through all stuff in the directory
file_names = os.listdir(root_path)
for file_name in file_names:
process_file(file_name, created_dirs)
def process_file(file_name, created_dirs):
file_path = os.path.join(root_path, file_name)
# Check if it's not itself a directory - safe guard
if os.path.isfile(file_path):
file_date, user_id, file_ext = get_file_info(file_name)
# Check we could parse the infos of the file
if file_date is not None \
and user_id is not None \
and file_ext is not None:
# Make sure we haven't already created the directory
if file_date not in created_dirs:
create_dir(file_date)
created_dirs.append(file_date)
# Move the file and rename it
os.rename(
file_path,
os.path.join(root_path, file_date, '{}.{}'.format(user_id, file_ext)))
print file_date, user_id
def create_dir(dir_name):
dir_path = os.path.join(root_path, dir_name)
if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
os.mkdir(dir_path)
def get_file_info(file_name):
match = re.search(r'[01]d(\d{8})([\w+-]+)\.(\w+)', file_name)
if match:
return match.group(1), match.group(2), match.group(3)
return None, None, None
if __name__ == '__main__':
main()
Note that depending on the names of your files, you might want to change (in the future) the regex I use, i.e. [01]d(\d{8})([\w+-]+) (you can play with it and see details about how to read it here)...
Check this code.
import os
files = list(x for x in os.listdir('.') if x.is_file())
for i in files:
d = i[2:10] #get data from filename
n = i[10:] #get new filename
if os.path.isdir(i[2:10]):
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
else:
os.mkdir(os.getcwd()+i)
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
Here's is the repl link
Try this out :
import os, shutil
filepath = "your_file_path"
files = list(x for x in os.listdir(filepath) if x.endswith(".xlsx"))
dates = list(set(x[2:10] for x in files))
for j in dates:
os.makedirs(filepath + j)
for i in files:
cid = i[10:]
for j in dates:
if j in i:
os.rename(filepath+i,cid)
shutil.copy2(filepath+cid, filepath+j)

Python - Renaming all files in a directory using a loop

I have a folder with images that are currently named with timestamps. I want to rename all the images in the directory so they are named 'captured(x).jpg' where x is the image number in the directory.
I have been trying to implement different suggestions as advised on this website and other with no luck. Here is my code:
path = '/home/pi/images/'
i = 0
for filename in os.listdir(path):
os.rename(filename, 'captured'+str(i)+'.jpg'
i = i +1
I keep getting an error saying "No such file or directory" for the os.rename line.
The results returned from os.listdir() does not include the path.
path = '/home/pi/images/'
i = 0
for filename in os.listdir(path):
os.rename(os.path.join(path,filename), os.path.join(path,'captured'+str(i)+'.jpg'))
i = i +1
The method rename() takes absolute paths, You are giving it only the file names thus it can't locate the files.
Add the folder's directory in front of the filename to get the absolute path
path = 'G:/ftest'
i = 0
for filename in os.listdir(path):
os.rename(path+'/'+filename, path+'/captured'+str(i)+'.jpg')
i = i +1
Two suggestions:
Use glob. This gives you more fine grained control over filenames and dirs to iterate over.
Use enumerate instead of manual counting the iterations
Example:
import glob
import os
path = '/home/pi/images/'
for i, filename in enumerate(glob.glob(path + '*.jpg')):
os.rename(filename, os.path.join(path, 'captured' + str(i) + '.jpg'))
This will work
import glob2
import os
def rename(f_path, new_name):
filelist = glob2.glob(f_path + "*.ma")
count = 0
for file in filelist:
print("File Count : ", count)
filename = os.path.split(file)
print(filename)
new_filename = f_path + new_name + str(count + 1) + ".ma"
os.rename(f_path+filename[1], new_filename)
print(new_filename)
count = count + 1
the function takes two arguments your filepath to rename the file and your new name to the file

renaming the extracted file from zipfile

I have lots of zipped files on a Linux server and each file includes multiple text files.
what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.
My problem is that I am not able to change the extracted file's name.
I would appreciate if you could advise.
import os
import sys
import zipfile
os.chdir("/feeds/lipper/emaxx")
pwkwd = "/feeds/lipper/emaxx"
for item in os.listdir(pwkwd): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
fh = open(file_name, "rb")
zip_ref = zipfile.ZipFile(fh)
filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
for name in filelist :
try:
outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
zip_ref.extract(name, outpath)
except KeyError:
{}
fh.close()
import zipfile
zipdata = zipfile.ZipFile('somefile.zip')
zipinfos = zipdata.infolist()
# iterate through each file
for zipinfo in zipinfos:
# This will do the renaming
zipinfo.filename = do_something_to(zipinfo.filename)
zipdata.extract(zipinfo)
Reference:
https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
Why not just read the file in question and save it yourself instead of extracting? Something like:
import os
import zipfile
source_dir = "/feeds/lipper/emaxx" # folder with zip files
target_dir = "/SCRATCH/emaxx" # folder to save the extracted files
# Are you sure your files names are capitalized in your zip files?
filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
for item in os.listdir(source_dir): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_path = os.path.join(source_dir, item) # get zip file path
with zipfile.ZipFile(file_path) as zf: # open the zip file
for target_file in filelist: # loop through the list of files to extract
if target_file in zf.namelist(): # check if the file exists in the archive
# generate the desired output name:
target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
target_path = os.path.join(target_dir, target_name) # output path
with open(target_path, "w") as f: # open the output path for writing
f.write(zf.read(target_file)) # save the contents of the file in it
# next file from the list...
# next zip file...
You could simply run a rename after each file is extracted right? os.rename should do the trick.
zip_ref.extract(name, outpath)
parent_zip = os.path.basename(os.path.dirname(outpath)) + ".zip"
new_file_name = os.path.splitext(os.path.basename(name))[0] # just the filename
new_name_path = os.path.dirname(outpath) + os.sep + new_file_name + "_" + parent_zip
os.rename(outpath, new_namepath)
For the filename, if you want it to be incremental, simply start a count and for each file, go up by on.
count = 0
for file in files:
count += 1
# ... Do our file actions
new_file_name = original_file_name + "_" + str(count)
# ...
Or if you don't care about the end name you could always use something like a uuid.
import uuid
random_name = uuid.uuid4()
outpath = '/SCRATCH/emaxx'
suffix = os.path.splitext(item)[0]
for name in filelist :
index = zip_ref.namelist().find(name)
if index != -1: # check the file exists in the zipfile
filename, ext = os.path.splitext(name)
zip_ref.filelist[index].filename = f'{filename}_{suffix}.{ext}' # rename the extracting file to the suffix file name
zip_ref.extract(zip_ref.filelist[index], outpath) # use the renamed file descriptor to extract the file
I doubt this is possible to rename file during their extraction.
What about renaming files once they are extracted ?
Relying on linux bash, you can achieve it in a one line :
os.system("find "+outpath+" -name '*.txt' -exec echo mv {} `echo {} | sed s/.txt/"+zipName+".txt/` \;")
So, first we search all txt files in the specified folder, then exec the renaming command, with the new name computed by sed.
Code not tested, i'm on windows now ^^'

Python copy files to a new directory and rename if file name already exists

I've already read this thread but when I implement it into my code it only works for a few iterations.
I'm using python to iterate through a directory (lets call it move directory) to copy mainly pdf files (matching a unique ID) to another directory (base directory) to the matching folder (with the corresponding unique ID). I started using shutil.copy but if there are duplicates it overwrites the existing file.
I'd like to be able to search the corresponding folder to see if the file already exists, and iteratively name it if more than one occurs.
e.g.
copy file 1234.pdf to folder in base directory 1234.
if 1234.pdf exists to name it 1234_1.pdf,
if another pdf is copied as 1234.pdf then it would be 1234_2.pdf.
Here is my code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
print path
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir,ARN)
# if the folder already exists in new directory
if os.path.exists(link):
#this is the file location in the new directory
file = os.path.join(basedir, ARN, ARN)
linkfn = os.path.join(basedir, ARN, filename)
if os.path.exists(linkfn):
i = 0
#if this file already exists in the folder
print "Path exists already"
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
else:
shutil.copy(path, link)
print ARN + " " + "Copied"
else:
print ARN + " " + "Not Found"
Sometimes it is just easier to start over... I apologize if there is any typo, I haven't had the time to test it thoroughly.
movdir = r"C:\Scans"
basedir = r"C:\Links"
# Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
# I use absolute path, case you want to move several dirs.
old_name = os.path.join( os.path.abspath(root), filename )
# Separate base from extension
base, extension = os.path.splitext(filename)
# Initial new name
new_name = os.path.join(basedir, base, filename)
# If folder basedir/base does not exist... You don't want to create it?
if not os.path.exists(os.path.join(basedir, base)):
print os.path.join(basedir,base), "not found"
continue # Next filename
elif not os.path.exists(new_name): # folder exists, file does not
shutil.copy(old_name, new_name)
else: # folder exists, file exists as well
ii = 1
while True:
new_name = os.path.join(basedir,base, base + "_" + str(ii) + extension)
if not os.path.exists(new_name):
shutil.copy(old_name, new_name)
print "Copied", old_name, "as", new_name
break
ii += 1
I always use the time-stamp - so its not possible, that the file exists already:
import os
import shutil
import datetime
now = str(datetime.datetime.now())[:19]
now = now.replace(":","_")
src_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand.xlsx"
dst_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand_"+str(now)+".xlsx"
shutil.copy(src_dir,dst_dir)
For me shutil.copy is the best:
import shutil
#make a copy of the invoice to work with
src="invoice.pdf"
dst="copied_invoice.pdf"
shutil.copy(src,dst)
You can change the path of the files as you want.
I would say you have an indentation problem, at least as you wrote it here:
while not os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
should be:
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
Check this out, please!
import os
import shutil
import glob
src = r"C:\Source"
dest = r"C:\Destination"
par = "*"
i=1
d = []
for file in glob.glob(os.path.join(src,par)):
f = str(file).split('\\')[-1]
for n in glob.glob(os.path.join(dest,par)):
d.append(str(n).split('\\')[-1])
if f not in d:
print("copied",f," to ",dest)
shutil.copy(file,dest)
else:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
while f1 in d:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
print("{} already exists in {}".format(f1,dest))
i =i + 1
shutil.copy(file,os.path.join(dest,f1))
print("renamed and copied ",f1 ,"to",dest)
i = 1

Categories