Create folders based on filenames - python

I have a folder with some 1500 excel files . The format of each file is something like this:
0d20170101abcd.xlsx
1d20170101ef.xlsx
0d20170104g.xlsx
0d20170109hijkl.xlsx
1d20170109mno.xlsx
0d20170110pqr.xlsx
The first character of the file name is either '0' or '1' followed by 'd' followed by the date when the file was created followed by customer id(abcd,ef,g,hijkl,mno,pqr).The customer id has no fixed length and it can vary.
I want to create folders for each unique date(folder name should be date) and move the files with the same date into a single folder .
So for the above example , 4 folders (20170101,20170104,20170109,20170110) has to be created with files with same dates copied into their respective folders.
I want to know if there is any way to do this in python ? Sorry for not posting any sample code because I have no idea as to how to start.

Try this out:
import os
import re
root_path = 'test'
def main():
# Keep track of directories already created
created_dirs = []
# Go through all stuff in the directory
file_names = os.listdir(root_path)
for file_name in file_names:
process_file(file_name, created_dirs)
def process_file(file_name, created_dirs):
file_path = os.path.join(root_path, file_name)
# Check if it's not itself a directory - safe guard
if os.path.isfile(file_path):
file_date, user_id, file_ext = get_file_info(file_name)
# Check we could parse the infos of the file
if file_date is not None \
and user_id is not None \
and file_ext is not None:
# Make sure we haven't already created the directory
if file_date not in created_dirs:
create_dir(file_date)
created_dirs.append(file_date)
# Move the file and rename it
os.rename(
file_path,
os.path.join(root_path, file_date, '{}.{}'.format(user_id, file_ext)))
print file_date, user_id
def create_dir(dir_name):
dir_path = os.path.join(root_path, dir_name)
if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
os.mkdir(dir_path)
def get_file_info(file_name):
match = re.search(r'[01]d(\d{8})([\w+-]+)\.(\w+)', file_name)
if match:
return match.group(1), match.group(2), match.group(3)
return None, None, None
if __name__ == '__main__':
main()
Note that depending on the names of your files, you might want to change (in the future) the regex I use, i.e. [01]d(\d{8})([\w+-]+) (you can play with it and see details about how to read it here)...

Check this code.
import os
files = list(x for x in os.listdir('.') if x.is_file())
for i in files:
d = i[2:10] #get data from filename
n = i[10:] #get new filename
if os.path.isdir(i[2:10]):
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
else:
os.mkdir(os.getcwd()+i)
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
Here's is the repl link

Try this out :
import os, shutil
filepath = "your_file_path"
files = list(x for x in os.listdir(filepath) if x.endswith(".xlsx"))
dates = list(set(x[2:10] for x in files))
for j in dates:
os.makedirs(filepath + j)
for i in files:
cid = i[10:]
for j in dates:
if j in i:
os.rename(filepath+i,cid)
shutil.copy2(filepath+cid, filepath+j)

Related

I want to move a file based on part of the name to a folder with that name

I have a directory with a large number of files that I want to move into folders based on part of the file name. My list of files looks like this:
001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc
I want to move the files I have into separate folders based on the first part of the file name (001-096-016B, 001-023-022, 001-022-151). The first parts of the file name always have the same number of numbers and are always in 3 parts separated by an underscore '-'.
The folder names are named like this \oe-xxxx\xxxx\xxxx\001-Disc-PED\020-Rotor-parts-1200.
So for example, this file should be placed in the above folder, based on the folder name (the numbers):
001-020-012B-B.nc
File path divided into column to show where the above file has to be moved to:
(001)-Disc-PED\(020)-Rotor-parts-1200.
Therefore:
(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc
This is what I have tried from looking online but it does not work:
My thinking is I want to loop through the folders and look for matches.
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
first33CharsOfRoot = str(root[0:33])
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))
print(firstCharOfRoot==first3Char)
if(firstCharOfRoot == first3Char):
print("Hello")
for root in os.walk(destinationPath):
print(os.path.basename(root))
# if(os.path)
I realized that I should not look for the last 3 chars in the path, because it is the first (001) etc. Numbers that I need to look for in the beginning to find the first path that I need to go to.
EDIT:
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){5}(\d+).*$', str(cleanRoot5))
secondCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){6}(\d+).*$', str(cleanRoot5))
firstCharOfRootCleaned = ''.join(firstCharOfRoot)
secondCharOfRoot = ''.join(secondCharOfRoot)
cleanRoot6 = str(cleanRoot5).replace("(", "")
if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):
print("BINGOf")
# for root1 in os.walk(cleanRoot6):
Solution
There is an improved solution in the next section. But let's decompose the straightforward solution before.
First, get the complete list of subfolders.
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
if os.path.isdir(f)]
Then, use a function on each of your file to find its matching folder, or a new filepath if it doesn't exist. I include this function called find_folder() in the rest of the script:
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
return os.path.join(*matching_folder2)
# Whole new folder tree
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
os.makedirs(dest)
return dest
# Inside the already existing folder part "1"
dest = os.path.join(matching_folder2[0], part2)
os.makedirs(dest)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
Improved solution
In case you have a large number of files, it could be detrimental to "split and match" every folder at each iteration.
We can store the folder, found given a pattern, in a dictionary. The dictionary will be updated if a new pattern is given, else it will return the previously found folder.
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
def find_folder(part1, part2):
current_key = tuple([part1, part2])
if current_key in pattern_match:
# Already found previously.
# We just return the folder path, stored as the value.
return pattern_match[current_key]
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
dest = os.path.join(*matching_folder2)
# Update the dictionary
pattern_match[current_key] = dest
return dest
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
else:
dest = os.path.join(matching_folder2[0], part2)
# Update the dictionary
pattern_match[current_key] = dest
os.makedirs(dest, exist_ok = True)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
This updated solution makes it more efficient (especially when many files should share the same folder) and you could also make use of the dictionary later, if you save it.

delete older folder with similar name using python

I need to iterate over a folder tree. I have to check each subfolder, which looks like this:
moduleA-111-date
moduleA-112-date
moduleA-113-date
moduleB-111-date
moduleB-112-date
etc.
I figured out how to iterate over a folder tree. I can also use stat with mtime to get the date of the folder which seems easier than parsing the name of the date.
How do I single out modules with the same prefix (such as "moduleA") and compare their mtime's so I can delete the oldest?
Since you have no code, I assume that you're looking for design help. I'd lead my students to something like:
Make a list of the names
From each name, find the prefix, such as "moduleA. Put those in a set.
For each prefix in the set
Find all names with that prefix; put these in a temporary list
Sort this list.
For each file in this list *except* the last (newest)
delete the file
Does this get you moving?
I'm posting the code (answer) here, I suppose my question wasn't clear since I'm getting minus signs but anyway the solution wasn't as straight forward as I thought, I'm sure the code could use some fine tuning but it get's the job done.
#!/usr/bin/python
import os
import sys
import fnmatch
import glob
import re
import shutil
##########################################################################################################
#Remove the directory
def remove(path):
try:
shutil.rmtree(path)
print "Deleted : %s" % path
except OSError:
print OSError
print "Unable to remove folder: %s" % path
##########################################################################################################
#This function will look for the .sh files in a given path and returns them as a list.
def searchTreeForSh(path):
full_path = path+'*.sh'
listOfFolders = glob.glob(full_path)
return listOfFolders
##########################################################################################################
#Gets the full path to files containig .sh and returns a list of folder names (prefix) to be acted upon.
#listOfScripts is a list of full paths to .sh file
#dirname is the value that holds the root directory where listOfScripts is operating in
def getFolderNames(listOfScripts):
listOfFolders = []
folderNames = []
for foldername in listOfScripts:
listOfFolders.append(os.path.splitext(foldername)[0])
for folders in listOfFolders:
folder = folders.split('/')
foldersLen=len(folder)
folderNames.append(folder[foldersLen-1])
folderNames.sort()
return folderNames
##########################################################################################################
def minmax(items):
return max(items)
##########################################################################################################
#This function will check the latest entry in the tuple provided, and will then send "everything" to the remove function except that last entry
def sortBeforeDelete(statDir, t):
count = 0
tuple(statDir)
timeNotToDelete = minmax(statDir)
for ff in t:
if t[count][1] == timeNotToDelete:
count += 1
continue
else:
remove(t[count][0])
count += 1
##########################################################################################################
#A loop to run over the fullpath which is broken into items (see os.listdir above), elemenates the .sh and the .txt files, leaves only folder names, then matches it to one of the
#name in the "folders" variable
def coolFunction(folderNames, path):
localPath = os.listdir(path)
for folder in folderNames:
t = () # a tuple to act as sort of a dict, it will hold the folder name and it's equivalent st_mtime
statDir = [] # a list that will hold the st_mtime for all the folder names in subDirList
for item in localPath:
if os.path.isdir(path + item) == True:
if re.search(folder, item):
mtime = os.stat(path + '/' + item)
statDir.append(mtime.st_mtime)
t = t + ((path + item,mtime.st_mtime),)# the "," outside the perenthasis is how to make t be a list of lists and not set the elements one after theother.
if t == ():continue
sortBeforeDelete(statDir, t)
##########################################################################################################
def main(path):
dirs = os.listdir(path)
for component in dirs:
if os.path.isdir(component) == True:
newPath = path + '/' + component + '/'
listOfFolders= searchTreeForSh(newPath)
folderNames = getFolderNames(listOfFolders)
coolFunction(folderNames, newPath)
##########################################################################################################
if __name__ == "__main__":
main(sys.argv[1])

Renaming of files for a given pattern

I need help.
there is a folder "C:\TEMP" in this folder are formatted files "IN_ + 7123456789.amr"
It is necessary to make renaming of files for a given pattern.
"IN_ NAME _ DATE-CREATE _ Phone number.amr"
Correspondingly, if a file called "OUT_ + 7123456789.amr" the result format "OUT_ NAME_DATE-CREATE_Phone number.amr"
The question is how to specify the file name has been checked before os.rename and depending on the file name to use the template
import os
path = "C:/TEMP"
for i, filename in enumerate(os.listdir(path)):
os.chdir(path)
os.rename(filename, 'name'+str(i) +'.txt')
i = i+1
Sorry but none of your examples are consistent in your question, I still don't understand what your C:\temp contains...
Well, assuming it would look like:
>>> os.listdir(path)
['IN_ + 7123456789.amr', 'OUT_ + 7123456789.amr']
The example:
import datetime
import re
import os
os.chdir(path)
for filename in os.listdir(path):
match = re.match(r'(IN|OUT)_ \+ (\d+).amr', filename)
if match:
file_date = datetime.datetime.fromtimestamp(os.stat(filename).st_mtime)
destination = '%s_%s_%s_Phone number.amr' % (
match.group(1), # either IN or OUT
match.group(2),
file_date.strftime('%Y%m%d%H%M%S'), # adjust the format at your convenience
)
os.rename(filename, destination)
Will produce:
IN_7123456789_20150721094227_Phone number.amr
OUT_7123456789_20150721094227_Phone number.amr
Other files won't match the re.match pattern and be ignored.

Renaming multiple images with .rename and .endswith

I've been trying to get this to work, but I feel like I'm missing something. There is a large collection of images in a folder that I need to rename just part of the filename. For example, I'm trying to rename the "RJ_200", "RJ_600", and "RJ_60"1 all to the same "RJ_500", while keeping the rest of the filename intact.
Image01.Food.RJ_200.jpg
Image02.Food.RJ_200.jpg
Image03.Basket.RJ_600.jpg
Image04.Basket.RJ_600.jpg
Image05.Cup.RJ_601.jpg
Image06.Cup.RJ_602.jpg
This is what I have so far, but it keeps just giving me the "else" instead of actually renaming any of them:
import os
import fnmatch
import sys
user_profile = os.environ['USERPROFILE']
dir = user_profile + "\Desktop" + "\Working"
print (os.listdir(dir))
for images in dir:
if images.endswith("RJ_***.jpg"):
os.rename("RJ_***.jpg", "RJ_500.jpg")
else:
print ("Arg!")
The Python string method endswith does not do pattern-matching with *, so you're looking for filenames which explicitly include the asterisk character and not finding any.
Try using regular expressions to match your filenames and then building your target filename explicitly:
import os
import re
patt = r'RJ_\d\d\d'
user_profile = os.environ['USERPROFILE']
path = os.path.join(user_profile, "Desktop", "Working")
image_files = os.listdir(path)
for filename in image_files:
flds = filename.split('.')
try:
frag = flds[2]
except IndexError:
continue
if re.match(patt, flds[2]):
from_name = os.path.join(path, filename)
to_name = '.'.join([flds[0], flds[1], 'RJ_500', 'jpg'])
os.rename(from_name, os.path.join(path, to_name))
Note that you need to do your matching with the file's basename and join on the rest of the path later.
You don't need to use .endswith. You can split the image file name up using .split and check the results. Since there are several suffix strings involved, I've put them all into a set for fast membership testing.
import os
import re
import sys
suffixes = {"RJ_200", "RJ_600", "RJ_601"}
new_suffix = "RJ_500"
user_profile = os.environ["USERPROFILE"]
dir = os.path.join(user_profile, "Desktop", "Working")
for image_name in os.listdir(dir):
pieces = image_name.split(".")
if pieces[2] in suffixes:
from_path = os.path.join(dir, image_name)
new_name = ".".join([pieces[0], pieces[1], new_suffix, pieces[3]])
to_path = os.path.join(dir, new_name)
print("renaming {} to {}".format(from_path, to_path))
os.rename(from_path, to_path)

Write file to directory based on variable in Python

The script will generate multiple files using the year and id variable. These files need to be placed into a folder matching year and id. How do I write them to the correct folders?
file_root_name = row["file_root_name"]
year = row["year"]
id = row["id"]
path = year+'-'+id
try:
os.makedirs(path)
except:
pass
output = open(row['file_root_name']+'.smil', 'w')
output.write(prettify(doctype, root))
If I understand your question correctly, you want to do this:
import os.path
file_name = row['file_root_name']+'.smil'
full_path = os.path.join(path, file_name)
output = open(full_path, 'w')
Please note that it's not very common in Python to use the + operator for string concatenation. Although not in your case, with large strings the method is not very fast.
I'd prefer:
file_name = '%s.smil' % row['file_root_name']
and:
path = '%i-%i' % (year, id)

Categories