Put all files with same name in a folder - python

I'm new to Python and I need a program that copies files from the same day into a new folder.
Example files:
20120807_first_day_pic.jpg
20120807_first_day_sheet.jpg
20120807_first_day_sheet2.jpg
20120907_second_day_pic.jpg
20120907_second_day_sheet.jpg
20120907_second_day_sheet2.jpg
This is what I have so far, but every file gets a folder and not the whole day.
import os, re, shutil
tfolder = 'D:/Testing/src/'
os.chdir(tfolder)
re_year19xxxxxx = re.compile('(19[0-9][0-9][0-9][0-9])')
re_year20xxxxxx = re.compile('(20[0-9][0-9][0-9][0-9])')
re_ed = re.compile('(ED[0-9])')
destPath = 'D:/Testing/Dest/'
def analyse_file_name(fname):
filePath, coords = os.path.split(fname) #the new folders will be named according to the first 4 characters of the original file name
coordsFolder = coords[:53]
coordsFname = coords[:53]
coordsExt = os.path.splitext(fname)
year = 'year' #create variable year
ed = 'ed' #create variable ed to store the edition number if necessary
bname = fname #the original file name
for re_year in (re_year19xxxxxx, re_year20xxxxxx):
rx = re_year.search(fname) #search for regex in the file name and store it in rx
if rx:
year = rx.group(1) #if the regex is found, store the year
bname.replace(year, ' ')
res = re_ed.search(fname)
if res:
ed = res.group(1)
bname.replace(ed, ' ')
os.chdir(destPath)
if year is 'year':
fname2 = os.path.join(destPath, coordsFolder) + '\\' + coordsFname + coordsExt[1]
else:
fname2 = os.path.join(destPath, coordsFolder,year,ed) + '\\' + coordsFname + coordsExt[1]
print('%s -> %s' % (fname, fname2)) #debug print
dirn, _ = os.path.split(fname2)
if not os.path.exists(dirn):
os.makedirs(dirn)
shutil.copy(fname, fname2)
for root, dirs, files in os.walk(tfolder):
for name in files:
fn = os.path.join(root, name)
analyse_file_name(fn)

If you just want to copy files that start with a known date string format, how about something like this?
def copyfile(filepath, target_dir):
p, filename = os.path.split(filepath)
# get date component of name
date_component = filename.split("_", 1)[0]
# try to parse out the date
try:
d = datetime.datetime.strptime(date_component, "%Y%m%d")
except ValueError:
print "Could not place: ", filename
return
target_date_dir = os.path.join(target_dir, str(d.year), str(d.month), str(d.day))
os.makedirs(target_date_dir)
shutil.copy(filepath, target_date_dir)

First, create a dict (a defaultdict was even more convenient here) that will gather the files for a date (it's good to use re, but given the names of your files using split was easier):
>>> import os
>>> import re
>>> pat = r'(\d+)(?:_\d+)?_(\w+?)[\._].*'
>>> from collections import defaultdict
>>> dict_date = defaultdict(lambda : defaultdict(list))
>>> for fil in os.listdir(path):
if os.path.isfile(os.path.join(path, fil)):
date, animal = re.match(pat, fil).groups()
dict_date[date][animal].append(fil)
>>> dict_date['20120807']
defaultdict(<type 'list'>, {'first': ['20120807_first_day_pic.jpg', '20120807_first_day_sheet.jpg', '20120807_first_day_sheet2.jpg']})
Then for each date, create a subfolder and copy the corresponding files there:
>>> from shutil import copyfile
>>> for date in dict_date:
for animal in dict_date[date]:
try:
os.makedirs(os.path.join(path, date, animal))
except os.error:
pass
for fil in dict_date[date][animal]:
copyfile(os.path.join(path, fil), os.path.join(path, date, animal, fil))
EDIT: took into account OP's new requirements, and Khalid's remark.

Regex day :)
What about trying to match the filename with
pattern=r'(?P<filedate>(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))\_(?P<bloodyrestofname>.*)'
Complete date, year, etc. may be retrieved from the respective named groups in the match.

import os, shutil
src_path = "D:\\Testing\\Src\\"
dest_path = "D:\\Testing\\Dest\\"
for file in os.listdir(src_path):
if not os.path.isdir(dest_path + file.split("-")[0]):
os.mkdir(dest_path + file.split("-")[0])
shutil.copy(src_path + file, dest_path + file.split("-")[0])

Related

Python setting the sequence from the beginning

I have some files as below:
As a result, I want to have:
49111809953_1.txt
78100705693_1.txt
78100705693_2.txt
78100705693_3.txt
but now I have:
49111809953_1.txt
78100705693_**2**.txt
78100705693_3.txt
78100705693_4.txt
Anyone have any idea where I should add something to start counting over if the filename is different?
import os
import re
folderPath = r'C:/Users/a/Desktop/file'
fileSequence = 1
if os.path.exists(folderPath):
files = []
for name in os.listdir(folderPath):
if os.path.isfile(os.path.join(folderPath, name)):
files.append(os.path.join(folderPath, name))
print(files)
for ii in files:
os.rename(ii, folderPath + '/' + str(os.path.basename(ii).split("ODS")[0]) + str(fileSequence) + '.txt')
fileSequence += 1
Suppose the following unordered list:
# After os.listdir()
files = ['C:/Users/a/Desktop/file/78100705693_ODS_2_231711.txt',
'C:/Users/a/Desktop/file/49111809953_ODS_2_231648.txt',
'C:/Users/a/Desktop/file/78100705693_ODS_2_231655.txt',
'C:/Users/a/Desktop/file/78100705693_ODS_2_231702.txt']
You can use groupby from itertools and pathlib:
from itertools import groupby
import pathlib
for name, grp in groupby(sorted(files)):
for seq, file in enumerate(grp, 1):
file = pathlib.Path(file)
new_name = f"{file.stem.split('_', maxsplit=1)[0]}_{seq}"
file.rename(file.with_stem(new_name))
You need to reset your variable fileSequence each time the first part of the file name changes.
This is what I would do, based on your code:
import os
import re
folderPath = r'C:/Users/a/Desktop/file'
if os.path.exists(folderPath):
files = []
for name in os.listdir(folderPath):
if os.path.isfile(os.path.join(folderPath, name)):
files.append(os.path.join(folderPath, name))
prefix = None
for this_file in files:
current = os.path.basename(this_file).split("ODS")[0]
if prefix is None or current != prefix:
prefix = current
fileSequence = 1
os.rename(this_file, folderPath + '/' + prefix + str(fileSequence) + '.txt')
fileSequence += 1

How to rename a file to a substring within the filename?

I'm trying to rename the files in a directory that have the substring "Episode.26" by truncating the words before and after the substring
e.g. 'The.Vision.of.Escaflowne.Episode.26.Eternal.Love.1080p.Dual.Audio.Bluray [BC6DDF99].mkv'
The value to be found will always be Episode.## (## two digits)
Desired result: Episode.26.mkv
Current result: Episode.26.Eternal.Love.1080p.Dual.Audio.Bluray [BC6DDF99].mkv'
I removed the first n characters using python; but, I don't know how to isolate 'Episode.26' efficiently
import os
key = "Episode."
for filename in os.listdir("."):
if(key in filename):
index = filename.index(key)
os.rename(filename, filename[index:])
If your filename is always separated by periods then split() might be sufficient:
import os
ext = ".mkv"
ndl = "Episode"
for filename in os.listdir("."):
if ext in filename and ndl in filename:
num = filename.split(ndl, 1)[1].split(" ")[0].split(".")[1]
epi = "{}.{}{}".format(ndl, num, ext)
os.rename(filename, epi)
This should split the name after your needle ("ndl") grab the episode number and rename the file; it should also handle filenames that include spaces in addition to periods or if "Episode.26" is at the end of the string (eg. Some.Movie.Episode.26 [BC6DDF99].mkv).
Result:
Episode.26.mkv
If u'r sure there's two digits after "Episode.", then u can code like this. Otherwise, i'm afraid u should use re or split to get what u want.
import os
key = 'Episode'
for filename in os.listdir("."):
try:
index = filename.index(key)
_, file_extension = os.path.splitext(filename)
new_name = filename[index:index+len(key)+3] + file_extension
os.rename(filename, new_name)
except ValueError:
pass
You could use regular expressions, capture the episode number and file extension and create the new name using such data:
Code
import re
import os
key = "Episode"
regexp = re.compile('.*%s\.(\d\d).*\.(.+)' % key)
for filename in os.listdir("."):
match = regexp.match(filename)
if match:
episode, file_ext = match.group(1), match.group(2)
new_name = key + episode + '.' + file_ext
os.rename(filename, new_name)
This way is more cleaner and flexible. REs are very powerfull. Let me know if this worked for you.
in your code, you can use search instead of match and remove .* at the beginning of re
import re
import os
key = "Episode"
regexp = re.compile('%s\.(\d\d).*\.(.+)' % key)
for filename in os.listdir("."):
match = regexp.search(filename)
if match:
episode, file_ext = match.group(1), match.group(2)
new_name = key + episode + '.' + file_ext
os.rename(filename, new_name)

Create folders based on filenames

I have a folder with some 1500 excel files . The format of each file is something like this:
0d20170101abcd.xlsx
1d20170101ef.xlsx
0d20170104g.xlsx
0d20170109hijkl.xlsx
1d20170109mno.xlsx
0d20170110pqr.xlsx
The first character of the file name is either '0' or '1' followed by 'd' followed by the date when the file was created followed by customer id(abcd,ef,g,hijkl,mno,pqr).The customer id has no fixed length and it can vary.
I want to create folders for each unique date(folder name should be date) and move the files with the same date into a single folder .
So for the above example , 4 folders (20170101,20170104,20170109,20170110) has to be created with files with same dates copied into their respective folders.
I want to know if there is any way to do this in python ? Sorry for not posting any sample code because I have no idea as to how to start.
Try this out:
import os
import re
root_path = 'test'
def main():
# Keep track of directories already created
created_dirs = []
# Go through all stuff in the directory
file_names = os.listdir(root_path)
for file_name in file_names:
process_file(file_name, created_dirs)
def process_file(file_name, created_dirs):
file_path = os.path.join(root_path, file_name)
# Check if it's not itself a directory - safe guard
if os.path.isfile(file_path):
file_date, user_id, file_ext = get_file_info(file_name)
# Check we could parse the infos of the file
if file_date is not None \
and user_id is not None \
and file_ext is not None:
# Make sure we haven't already created the directory
if file_date not in created_dirs:
create_dir(file_date)
created_dirs.append(file_date)
# Move the file and rename it
os.rename(
file_path,
os.path.join(root_path, file_date, '{}.{}'.format(user_id, file_ext)))
print file_date, user_id
def create_dir(dir_name):
dir_path = os.path.join(root_path, dir_name)
if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
os.mkdir(dir_path)
def get_file_info(file_name):
match = re.search(r'[01]d(\d{8})([\w+-]+)\.(\w+)', file_name)
if match:
return match.group(1), match.group(2), match.group(3)
return None, None, None
if __name__ == '__main__':
main()
Note that depending on the names of your files, you might want to change (in the future) the regex I use, i.e. [01]d(\d{8})([\w+-]+) (you can play with it and see details about how to read it here)...
Check this code.
import os
files = list(x for x in os.listdir('.') if x.is_file())
for i in files:
d = i[2:10] #get data from filename
n = i[10:] #get new filename
if os.path.isdir(i[2:10]):
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
else:
os.mkdir(os.getcwd()+i)
os.rename(os.getcwd()+i,os.getcwd()+d+"/"+i)
Here's is the repl link
Try this out :
import os, shutil
filepath = "your_file_path"
files = list(x for x in os.listdir(filepath) if x.endswith(".xlsx"))
dates = list(set(x[2:10] for x in files))
for j in dates:
os.makedirs(filepath + j)
for i in files:
cid = i[10:]
for j in dates:
if j in i:
os.rename(filepath+i,cid)
shutil.copy2(filepath+cid, filepath+j)

How to remove numbers from strings in a list and retrieve them again

Ok, here's the scenerio:
I'm making a Windows 7 program that searches for all the music files in a directory and checks the user's input against that list so it can play the song that the user selects. Because the tracks' names are often numbered like this for example: '01 name.mp3', I want to remove the '01' part from a new list and have the program record the '01', so it can recognize it from the new list and launch the file. In other words, I want the program to make a new list of filenames without the numbers, and let the program recognize the file and launch it by also knowing the number. Is this possible? (Tell me if this doesn't make any sense.)
Also, Here is some of my code:
def GetMediaFName(source, name):
for key, val in source.iteritems():
if val == name:
return key
newList = {}
media_list = []
for dirpath, dirnames, filenames in os.walk(music_dir) and os.walk(alt_music_dir1):
for filename in [f for f in filenames if f.endswith(".m4a") or f.endswith(".mp3") or f.endswith(".mp4")]:
media_list.append(os.path.join(dirpath, filename))
media_from_menu = menu[5:]
print(media_from_menu)
media_to_search1 = media_from_menu + ".mp3"
media_to_search2 = media_from_menu + ".wav"
media_to_search3 = media_from_menu + ".m4a"
media_to_search4 = media_from_menu + ".mp4"
for i in media_list:
spl = i.split('\\')[-1]
if spl is not None:
try:
tmp = re.findall(r'\d+',spl.split('.')[0][0])
newList.update({i:i.replace(tmp,"").strip()})
except Exception:
newList.update({i:spl})
itms = newList.keys()
for i in files:
tmp = re.findall(r'\d+',i.split('.')[0][0])
newList.update({i:i.replace(tmp,"").strip()})
print(newList)
print(GetMediaFName(newList, media_from_menu + ".mp3"))
I am not sure if i understand it correctly, but you want to keep track of original file names while showing a different name ( let say track name without index number ) to user.
I think this might give you some idea about it . Are you going to use some GUI libraries ?
import re
def getFileName(source,name):
for key,val in source.iteritems():
if val == name:
return key
names = ['01 name_a.mp3','02 name_b.mp3','03 name_c.mp3','04 name_d.mp3']
newList = {}
for i in names:
tmp = re.findall(r'\d+',i.split('.')[0])[0]
newList.update({i:i.replace(tmp,"").strip()})
print newList
# If names are not the same, but you run in trouble if all of tracks are 01 name.mp3 , 02 name.mp3 and so on
print getFileName(newList,'name_a.mp3')
# Another possible way ! get the index of element user has clicked on and pass it to a list of original file names
user_selected = 2
itms = newList.keys()
print itms[user_selected]
EDIT:
To find Mp3s in a certain path including files in its subdirectories:
import os
import os.path
import re
def getFileName(source,name):
for key,val in source.iteritems():
if val == name:
return key
names = []
# From : http://stackoverflow.com/a/954522/3815839
for dirpath, dirnames, filenames in os.walk("C:\Users\test\Desktop\mp3s"):
for filename in [f for f in filenames if f.endswith(".mp3")]:
names.append(os.path.join(dirpath, filename))
newList = {}
for i in names:
spl = i.split('\\')[-1]
if spl is not None:
try:
tmp = re.findall(r'\d+',spl.split('.')[0])[0]
newList.update({i:i.replace(tmp,"").strip()})
except Exception:
newList.update({i:spl})
itms = newList.keys()
print newList
print getFileName(newList,'name_a.mp3')

Write file to directory based on variable in Python

The script will generate multiple files using the year and id variable. These files need to be placed into a folder matching year and id. How do I write them to the correct folders?
file_root_name = row["file_root_name"]
year = row["year"]
id = row["id"]
path = year+'-'+id
try:
os.makedirs(path)
except:
pass
output = open(row['file_root_name']+'.smil', 'w')
output.write(prettify(doctype, root))
If I understand your question correctly, you want to do this:
import os.path
file_name = row['file_root_name']+'.smil'
full_path = os.path.join(path, file_name)
output = open(full_path, 'w')
Please note that it's not very common in Python to use the + operator for string concatenation. Although not in your case, with large strings the method is not very fast.
I'd prefer:
file_name = '%s.smil' % row['file_root_name']
and:
path = '%i-%i' % (year, id)

Categories