Python setting the sequence from the beginning - python

I have some files as below:
As a result, I want to have:
49111809953_1.txt
78100705693_1.txt
78100705693_2.txt
78100705693_3.txt
but now I have:
49111809953_1.txt
78100705693_**2**.txt
78100705693_3.txt
78100705693_4.txt
Anyone have any idea where I should add something to start counting over if the filename is different?
import os
import re
folderPath = r'C:/Users/a/Desktop/file'
fileSequence = 1
if os.path.exists(folderPath):
files = []
for name in os.listdir(folderPath):
if os.path.isfile(os.path.join(folderPath, name)):
files.append(os.path.join(folderPath, name))
print(files)
for ii in files:
os.rename(ii, folderPath + '/' + str(os.path.basename(ii).split("ODS")[0]) + str(fileSequence) + '.txt')
fileSequence += 1

Suppose the following unordered list:
# After os.listdir()
files = ['C:/Users/a/Desktop/file/78100705693_ODS_2_231711.txt',
'C:/Users/a/Desktop/file/49111809953_ODS_2_231648.txt',
'C:/Users/a/Desktop/file/78100705693_ODS_2_231655.txt',
'C:/Users/a/Desktop/file/78100705693_ODS_2_231702.txt']
You can use groupby from itertools and pathlib:
from itertools import groupby
import pathlib
for name, grp in groupby(sorted(files)):
for seq, file in enumerate(grp, 1):
file = pathlib.Path(file)
new_name = f"{file.stem.split('_', maxsplit=1)[0]}_{seq}"
file.rename(file.with_stem(new_name))

You need to reset your variable fileSequence each time the first part of the file name changes.
This is what I would do, based on your code:
import os
import re
folderPath = r'C:/Users/a/Desktop/file'
if os.path.exists(folderPath):
files = []
for name in os.listdir(folderPath):
if os.path.isfile(os.path.join(folderPath, name)):
files.append(os.path.join(folderPath, name))
prefix = None
for this_file in files:
current = os.path.basename(this_file).split("ODS")[0]
if prefix is None or current != prefix:
prefix = current
fileSequence = 1
os.rename(this_file, folderPath + '/' + prefix + str(fileSequence) + '.txt')
fileSequence += 1

Related

Filename Numbering in Python

I want to put proper sequence no in my file using python. Its working partially not fully. Suppose in a particular folder there is 3 files. The sequence should Num1_.doc,Num2_.pdf,Num3_.doc. It's working fine.
But suppose a new file is coming on that folder, how it maintain the proper sequence.
My code is -
import os
os.chdir('C:\\Users\\Project\\')
print(os.getcwd())
for count, f in enumerate(os.listdir()):
f_name, f_ext = os.path.splitext(f)
f_name = "Num" + str(count) + '_' + f_name
new_name = f'{f_name}{f_ext}'
os.rename(f, new_name)
Its generate Num1_.doc,Num2_.pdf,Num3_.doc etc.
Now new file is added on that folder. The sequence should Num4_.doc. How to do it in python.
Find what the maximum number is out of the current files, then rename any new files with a number 1 higher
import os
current_max = 0
for i in os.listdir():
# The number in this filename
num = int(i[3:-5])
if num > current_max:
current_max = num
# If the current_file has not been named yet
if "Num_" not in i:
#Rename by adding 1 to the current maximum number recorded
os.rename(i, f"Num_{current_max+1}")
Try to first get the maximum number already assigned from previous renamings, and then rename the remaining files starting from that number.
import os
import re
os.chdir('C:\\Users\\Project\\')
print(os.getcwd())
# find max number in filenames, if already named
filenames = [os.path.splitext(f)[0] for f in os.listdir()]
filenames = filter(lambda f: f[:3] == "Num" and len(f) >= 4, filenames)
max_n_file = max(map(lambda f: int(re.search('^[0-9]*', f[3:]).group(0)), filenames), default=0) + 1
# apply same algorithm but rename only not-"Num_" files
for _, f in enumerate(os.listdir()):
f_name, f_ext = os.path.splitext(f)
if f_name[:3] != "Num":
f_name = "Num" + str(max_n_file) + '_' + f_name
new_name = f'{f_name}{f_ext}'
os.rename(f, new_name)
max_n_file += 1

File Path Details | Using command prompt or any script

Can anybody help me with some command prompt details/ script detail/python programming on how to get file details?
Scenario:
Folder contains many subfolders -- > how to get to know what file formats are present in the folders and how to get path of all those files.
Like, I need, distinct file names/formats/path of the files present under a folder/subfolders
Is there anyway possible to get that or manual effort will only be required?
To recursively list all files in folders and sub-folders in Python:
Glob [docs]
from glob import glob
glob("**", recursive=True)
OS Walk [docs]
import os
list(os.walk("./"))
import os, csv
import glob
import pandas as pd
import ast
dir_path = r'<path of directory>'
extension_output_path = r"<path of output file. Path where u want to save output in csv format>"
output_filenames_path = r"<path of output file. Path where u want to save output in csv format>"
exts = set(f.split('.')[-1] for dir,dirs,files in os.walk(dir_path) for f in files if '.' in f)
exts = list(set(exts))
subdirs = [x[0] for x in os.walk(dir_path)]
print(exts)
big_list = []
bigg_list = []
def FindMaxLength(lst):
maxLength = max(map(len, lst))
return maxLength
for dirs in subdirs:
split_dirs = dirs.split('\\')
big_list.append(split_dirs)
big_list_count = FindMaxLength(big_list)
for subdis in big_list:
count_val = big_list_count - len(subdis)
bigg_list.append(subdis + ['']* count_val + ['/'.join(subdis)])
output_list = []
path_list = []
for subbs in bigg_list:
big_dict = {}
for ext in exts:
tifCounter = len(glob.glob1(subbs[-1],"*."+ext))
filenames = glob.glob1(subbs[-1],"*."+ext)
if filenames != []:
val = list(map((subbs[-1]+'/').__add__,filenames))
if len(val) >1:
for li in val:
path_list.append([ext, li])
else:
path_list.append([ext]+val)
if tifCounter != 0:
big_dict[ext] = tifCounter
output_list.append(subbs+ [big_dict])
columns_row = ['col']* (big_list_count + 1)+ ['val'] + exts
with open(extension_output_path,'w', newline='') as csv_file:
csv_wr = csv.writer(csv_file)
csv_wr.writerow(columns_row)
csv_wr.writerows(output_list)
cv = pd.read_csv(extension_output_path)
for index, row in cv.iterrows():
for ext in exts:
if row['val'] != '{}' and ext in ast.literal_eval(row['val']):
cv.loc[index,ext] = ast.literal_eval(row['val'])[ext]
del cv['val']
cv.to_csv(extension_output_path, index=False)
with open(output_filenames_path,'w', newline='') as csv_file:
csv_wr = csv.writer(csv_file)
csv_wr.writerow(['extension', 'filename'])
csv_wr.writerows(path_list)
print("completed")
This output file will contain folder/subfolder path with extension's count.

How to prevent shutil.move from overwriting a file if it already exists?

I'm using this Python code in Windows:
shutil.move(documents_dir + "\\" + file_name, documents_dir + "\\backup\\"
+ subdir_name + "\\" + file_name)
When this code is called more times, it overwrites the destination file. I would like to move the file
and if the destination already exists, to rename it
e.g. file_name = foo.pdf
and in backup folder will be foo.pdf, foo(1).pdf, foo(2).pdf etc. or similarly e.g. with dashes
foo-1.pdf, foo-2.pdf etc.
You could just check with os.path.exists() as you're going.
import os
import shutil
file_name = 'test.csv'
documents_dir = r'C:\BR\Test'
subdir_name = 'test'
# using os.path.join() makes your code easier to port to another OS
source = os.path.join(documents_dir, file_name)
dest = os.path.join(documents_dir, 'backup', subdir_name, file_name)
num = 0
# loop until we find a file that doesn't exist
while os.path.exists(dest):
num += 1
# use rfind to find your file extension if there is one
period = file_name.rfind('.')
# this ensures that it will work with files without extensions
if period == -1:
period = len(file_name)
# create our new destination
# we could extract the number and increment it
# but this allows us to fill in the gaps if there are any
# it has the added benefit of avoiding errors
# in file names like this "test(sometext).pdf"
new_file = f'{file_name[:period]}({num}){file_name[period:]}'
dest = os.path.join(documents_dir, 'backup', subdir_name, new_file)
shutil.move(source, dest)
Or since this is probably used in a loop you could just drop it into a function.
import os
import shutil
def get_next_file(file_name, dest_dir):
dest = os.path.join(dest_dir, file_name)
num = 0
while os.path.exists(dest):
num += 1
period = file_name.rfind('.')
if period == -1:
period = len(file_name)
new_file = f'{file_name[:period]}({num}){file_name[period:]}'
dest = os.path.join(dest_dir, new_file)
return dest
file_name = 'test.csv'
documents_dir = r'C:\BR\Test'
subdir_name = 'test'
source = os.path.join(documents_dir, file_name)
dest = get_next_file(file_name, os.path.join(documents_dir, 'backup', subdir_name))
shutil.move(source, dest)

Python copy files to a new directory and rename if file name already exists

I've already read this thread but when I implement it into my code it only works for a few iterations.
I'm using python to iterate through a directory (lets call it move directory) to copy mainly pdf files (matching a unique ID) to another directory (base directory) to the matching folder (with the corresponding unique ID). I started using shutil.copy but if there are duplicates it overwrites the existing file.
I'd like to be able to search the corresponding folder to see if the file already exists, and iteratively name it if more than one occurs.
e.g.
copy file 1234.pdf to folder in base directory 1234.
if 1234.pdf exists to name it 1234_1.pdf,
if another pdf is copied as 1234.pdf then it would be 1234_2.pdf.
Here is my code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
print path
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir,ARN)
# if the folder already exists in new directory
if os.path.exists(link):
#this is the file location in the new directory
file = os.path.join(basedir, ARN, ARN)
linkfn = os.path.join(basedir, ARN, filename)
if os.path.exists(linkfn):
i = 0
#if this file already exists in the folder
print "Path exists already"
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
else:
shutil.copy(path, link)
print ARN + " " + "Copied"
else:
print ARN + " " + "Not Found"
Sometimes it is just easier to start over... I apologize if there is any typo, I haven't had the time to test it thoroughly.
movdir = r"C:\Scans"
basedir = r"C:\Links"
# Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
# I use absolute path, case you want to move several dirs.
old_name = os.path.join( os.path.abspath(root), filename )
# Separate base from extension
base, extension = os.path.splitext(filename)
# Initial new name
new_name = os.path.join(basedir, base, filename)
# If folder basedir/base does not exist... You don't want to create it?
if not os.path.exists(os.path.join(basedir, base)):
print os.path.join(basedir,base), "not found"
continue # Next filename
elif not os.path.exists(new_name): # folder exists, file does not
shutil.copy(old_name, new_name)
else: # folder exists, file exists as well
ii = 1
while True:
new_name = os.path.join(basedir,base, base + "_" + str(ii) + extension)
if not os.path.exists(new_name):
shutil.copy(old_name, new_name)
print "Copied", old_name, "as", new_name
break
ii += 1
I always use the time-stamp - so its not possible, that the file exists already:
import os
import shutil
import datetime
now = str(datetime.datetime.now())[:19]
now = now.replace(":","_")
src_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand.xlsx"
dst_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand_"+str(now)+".xlsx"
shutil.copy(src_dir,dst_dir)
For me shutil.copy is the best:
import shutil
#make a copy of the invoice to work with
src="invoice.pdf"
dst="copied_invoice.pdf"
shutil.copy(src,dst)
You can change the path of the files as you want.
I would say you have an indentation problem, at least as you wrote it here:
while not os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
should be:
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
Check this out, please!
import os
import shutil
import glob
src = r"C:\Source"
dest = r"C:\Destination"
par = "*"
i=1
d = []
for file in glob.glob(os.path.join(src,par)):
f = str(file).split('\\')[-1]
for n in glob.glob(os.path.join(dest,par)):
d.append(str(n).split('\\')[-1])
if f not in d:
print("copied",f," to ",dest)
shutil.copy(file,dest)
else:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
while f1 in d:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
print("{} already exists in {}".format(f1,dest))
i =i + 1
shutil.copy(file,os.path.join(dest,f1))
print("renamed and copied ",f1 ,"to",dest)
i = 1

Put all files with same name in a folder

I'm new to Python and I need a program that copies files from the same day into a new folder.
Example files:
20120807_first_day_pic.jpg
20120807_first_day_sheet.jpg
20120807_first_day_sheet2.jpg
20120907_second_day_pic.jpg
20120907_second_day_sheet.jpg
20120907_second_day_sheet2.jpg
This is what I have so far, but every file gets a folder and not the whole day.
import os, re, shutil
tfolder = 'D:/Testing/src/'
os.chdir(tfolder)
re_year19xxxxxx = re.compile('(19[0-9][0-9][0-9][0-9])')
re_year20xxxxxx = re.compile('(20[0-9][0-9][0-9][0-9])')
re_ed = re.compile('(ED[0-9])')
destPath = 'D:/Testing/Dest/'
def analyse_file_name(fname):
filePath, coords = os.path.split(fname) #the new folders will be named according to the first 4 characters of the original file name
coordsFolder = coords[:53]
coordsFname = coords[:53]
coordsExt = os.path.splitext(fname)
year = 'year' #create variable year
ed = 'ed' #create variable ed to store the edition number if necessary
bname = fname #the original file name
for re_year in (re_year19xxxxxx, re_year20xxxxxx):
rx = re_year.search(fname) #search for regex in the file name and store it in rx
if rx:
year = rx.group(1) #if the regex is found, store the year
bname.replace(year, ' ')
res = re_ed.search(fname)
if res:
ed = res.group(1)
bname.replace(ed, ' ')
os.chdir(destPath)
if year is 'year':
fname2 = os.path.join(destPath, coordsFolder) + '\\' + coordsFname + coordsExt[1]
else:
fname2 = os.path.join(destPath, coordsFolder,year,ed) + '\\' + coordsFname + coordsExt[1]
print('%s -> %s' % (fname, fname2)) #debug print
dirn, _ = os.path.split(fname2)
if not os.path.exists(dirn):
os.makedirs(dirn)
shutil.copy(fname, fname2)
for root, dirs, files in os.walk(tfolder):
for name in files:
fn = os.path.join(root, name)
analyse_file_name(fn)
If you just want to copy files that start with a known date string format, how about something like this?
def copyfile(filepath, target_dir):
p, filename = os.path.split(filepath)
# get date component of name
date_component = filename.split("_", 1)[0]
# try to parse out the date
try:
d = datetime.datetime.strptime(date_component, "%Y%m%d")
except ValueError:
print "Could not place: ", filename
return
target_date_dir = os.path.join(target_dir, str(d.year), str(d.month), str(d.day))
os.makedirs(target_date_dir)
shutil.copy(filepath, target_date_dir)
First, create a dict (a defaultdict was even more convenient here) that will gather the files for a date (it's good to use re, but given the names of your files using split was easier):
>>> import os
>>> import re
>>> pat = r'(\d+)(?:_\d+)?_(\w+?)[\._].*'
>>> from collections import defaultdict
>>> dict_date = defaultdict(lambda : defaultdict(list))
>>> for fil in os.listdir(path):
if os.path.isfile(os.path.join(path, fil)):
date, animal = re.match(pat, fil).groups()
dict_date[date][animal].append(fil)
>>> dict_date['20120807']
defaultdict(<type 'list'>, {'first': ['20120807_first_day_pic.jpg', '20120807_first_day_sheet.jpg', '20120807_first_day_sheet2.jpg']})
Then for each date, create a subfolder and copy the corresponding files there:
>>> from shutil import copyfile
>>> for date in dict_date:
for animal in dict_date[date]:
try:
os.makedirs(os.path.join(path, date, animal))
except os.error:
pass
for fil in dict_date[date][animal]:
copyfile(os.path.join(path, fil), os.path.join(path, date, animal, fil))
EDIT: took into account OP's new requirements, and Khalid's remark.
Regex day :)
What about trying to match the filename with
pattern=r'(?P<filedate>(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))\_(?P<bloodyrestofname>.*)'
Complete date, year, etc. may be retrieved from the respective named groups in the match.
import os, shutil
src_path = "D:\\Testing\\Src\\"
dest_path = "D:\\Testing\\Dest\\"
for file in os.listdir(src_path):
if not os.path.isdir(dest_path + file.split("-")[0]):
os.mkdir(dest_path + file.split("-")[0])
shutil.copy(src_path + file, dest_path + file.split("-")[0])

Categories