Python unexpectedly moving files with os.rename - python

I have a script that:
Loops through all the files in a directory + its subdirectories
Creates folder for each unique year in the list of files
Moves files to their respective folder years
Renames them based on timestamp + unique number.
When I run parts 1-3 only, it moves the files to the folders correctly.
When I run parts 1-4 (including the os.rename part), it renames the files AFTER moving them back to the parent directory.
Start file structure:
parent_folder
--> file.txt modified 01-21-2012
--> file2.txt modified 09-30-2013
--> file3.txt modified 06-21-2017
Expected result:
parent_folder
--> '2012'
--> 2012-01-21-1.txt
--> '2013'
--> 2013-09-30-2.txt
--> '2017'
--> 2017-06-21-3.txt
Actual result:
parent_folder
--> '2012'
--> '2013'
--> '2017'
--> '2012-01-21-1.txt'
--> '2013-09-30-2.txt'
--> '2017-06-21-4.txt'
As you can see, it renamed the files but moved them out of their folders. Why is it doing this?
My code (I inserted print statements for logging):
import os, datetime, sys, shutil
#PART 1 : Change to the inputted directory
#===============================
# This is the directory I will work on.
p = 'ENTER_FOLDER_PATH_HERE'
print('This is the directory that will be organized:')
print(os.getcwd())
if os.path.isdir(p): # check if directory exists
print("Step 1: Changing directory")
os.chdir(p)
#PART 2 : Make a folder for each unique year
#===========================================
fileNames = next(os.walk(os.getcwd()))[2] # list files, excluding subdirectories
f = {}
filename = []
dates = []
# Loop through each file and grab the unique year.
# Store the file (key) and its modified year (value) into dictionary 'f'.
for name in fileNames:
f[name] = datetime.datetime.fromtimestamp(os.path.getmtime(name)).strftime("%Y")
dates = list(set(f.values()))
# Create the list of unique folders from the dictionary.
print("Step 2: Creating the following folders:\n", dates)
print('\n')
[os.mkdir(folder) for folder in dates]
#PART 3: Move all files to their respective folders based on modified year.
#==========================================================================
if sys.platform == 'Windows':
print("Step 3: moving files...")
[shutil.move(key, os.getcwd() + '\\' + value) for key, value in f.items()]
elif sys.platform == 'darwin':
print("Step 3: moving files...")
[shutil.move(key, os.getcwd() + '//' + value) for key, value in f.items()]
else:
print("Sorry, this script is not supported in your OS.")
else:
print("Oops, seems like that directory doesn't exist. Please try again.")
#PART 4: Rename the files
#==========================================================================
# Get each file in directory and renames it to its modified date, Y-M-D format
count=1
for root, dir, files in os.walk(p):
for file in files:
if not file.startswith('.'): # ignore hidden files
filePath = os.path.join(root,file)
ext = os.path.splitext(filePath)[1]
print("File number: ", count, file, ext)
print('\n')
os.rename(filePath, datetime.datetime.fromtimestamp(os.path.getmtime(filePath)).strftime("%Y-%m-%d") + '-' + str(count) + ext)
count += 1
print(filePath)
Logs:
This is the directory that will be organized:
TEST_PATH
Step 1: Changing directory
Step 2: Creating the following folders:
['2013', '2012', '2017']
Step 3: moving files...
File number: 1 2012-01-21-1.jpg TEST_PATH/2012/2012-01-21-1.jpg
TEST_PATH//2012/2012-01-21-1.jpg
File number: 2 2013-09-30-2.jpg TEST_PATH/2013/2013-09-30-2.jpg
TEST_PATH/2013/2013-09-30-2.jpg
TEST_PATH/2013/2013-09-30-2.jpg
File number: 4 June 21 2017.txt TEST_PATH/2017/June 21 2017.txt
TEST_PATH/2017/June 21 2017.txt

It moves the file, because of the working directory you are currently in. I gues it works just like mv command. The resulting file, after raname, will be put in a path specified by the second argument of the os.rename function, relative to cwd. If you want it to work correctly you need to specify the relative path with the new filename.
Btw. you can do steps 3&4 at once this way.

Related

Python 3.7 - How to check if file exists and rename

I am trying to figure out how to check if a file within my source folder, exists within my destination folder, then copy the file over to the destination folder.
If the file within the source folder exists within the destination folder, rename file within source folder to "_1" or _i+1 then copy it to destination folder.
For Example (will not be a .txt, just using this as an example, files will be dynamic in nature):
I want to copy file.txt from folder a over to folder b.
file.txt already exists within within folder b a. If I attempted to copy file.txt over to folder b, I would receive a copy error.
Rename file.txt to file_1.txt a. Copy file_1.txt to folder b b. If file_1.txt exists then make it file_2.txt
What I have so far is this:
for filename in files:
filename_only = os.path.basename(filename)
src = path + "\\" + filename
failed_f = pathx + "\\Failed\\" + filename
# This is where I am lost, I am not sure how to declare the i and add _i + 1 into the code.
if path.exists(file_path):
numb = 1
while True:
new_path = "{0}_{2}{1}".format(*path.splitext(file_path) + (numb,))
if path.exists(new_path):
numb += 1
shutil.copy(src, new_path)
else:
shutil.copy(src, new_path)
shutil.copy(src, file_path)
Thanks much in advance.
import os
for filename in files:
src = os.path.join(path, filename)
i = 0
while True:
base = os.path.basename(src)
name = base if i == 0 else "_{}".format(i).join(os.path.splitext(base))
dst_path = os.path.join(dst, name)
if not os.path.exists(dst_path):
shutil.copy(src, dst_path)
break
i += 1

file exists or not through matching filename in list

i have files in folders and subfolders. folder structure is like this
2020(folder)
-01(sub folder)
--14(sub-sub folder)
----abc1-2020-01-14.csv
----abc2-2020-01-14.csv
-02(subfolder in 2020)
--17(sub-sub folder in 02)
----abc1-2020-02-17.csv
----abc4-2020-02-17.csv
i have list of file names.
li = ['abc1','abc2','abc3','abc4']
i want to know if these file exists in directory or not. each subdirectory should have all 4 files. if not then code must return path where particular file doesnot exist.
import glob
BASE_PATH = r'2020/'
allin= BASE_PATH + '/*/*'
li = ['abc1','abc2','abc3','abc4']
print('Names of files:')
for name in glob.glob(allin):
print('\t', name)
for k in li:
try:
f = open(r"C:\\Users\\Karar\\ProjectFiles\\scripts\\"+ name + "\\" + k + "*.csv")
except IOError:
print(name+k+ ".csv""File not present")
print name is returning 2020\01\14 and 2020\02\17
iam having difficulty in giving path here in open method. please also note that my filename stored in folders has date in the end so need to tackle that as well in path so that for any date at the end of file name if folder carry files with name in list then okay do nothing but if files are missing in sub folders then print EXCEPT file not present with path.
note each folder has to carry all 4 files if not then return except.
One possible approach:
import glob, os.path
base = '2020'
li = ['abc1','abc2','abc3','abc4']
for dirname in glob.glob(base + '/*/*'):
year, month, day = dirname.split('/')
for prefix in li:
filename = "{}/{}.csv".format(dirname, '-'.join(prefix, year, month, day))
if not os.path.exists(filename):
print(filename)

How to move batches of files in Python?

I have a folder with 1092 files. I need to move those files to a new directory in batches of 10 (each new folder will have only 10 files each, so max. of 110 folders).
I tried this code, and now the folders have been created, but I can't find any of original files (???). They are neither in the original and newly created folders...
path = "/home/user/Documents/MSc/Imagens/Dataset"
paths = []
for root, dirs, file in os.walk(path):
for name in file:
paths.append(os.path.join(root,name))
start = 0
end = 10
while end <= 1100:
dest = str(os.mkdir("Dataset_" + str(start) + "_" + str(end)))
for i in paths[start:end]:
shutil.move(i, dest)
start += 10
end += 10
Any ideas?
With your move command, you are moving all 10 files to one single folder - but not into that folder as the filenames are missing. And dest is none, since os.mkdir() doesn't return anything.
You need to append the filename to dest:
dataset_dirname = "Dataset_" + str(start) + "_" + str(end)
os.mkdir(dataset_dirname)
dataset_fullpath = os.path.join(path, dataset_dirname)
for i in paths[start:end]:
# append filename to dataset_fullpath and move the file
shutil.move(i, os.path.join(dataset_fullpath , os.path.basename(i)))

Move files to newly created folders more efficiently

I want to group a list of files into sub-folders based on some substring in their name
The files are of the form
pie_riverside_10.png
stack_oak_20.png
scatter_mountain_10.png
and I want to use the starting substring (e.g. pie, stack, scatter) and the integer substring (e.g. 10,20) as the sub-directory name for grouping the files..
The code below is only example- if I actually do that approach I have to create at least 75-80 folders manually with elif statements, which is inefficient.
I am just curious if there is a better way to do this?
EDIT: The current code assumes there is already a folder created, but in real scenario I do not have the folders created and I do not want to have to create 70-80 subfolders- I am trying to make script to create those folders for me.
import shutil
import os
source = 'C:/Users/Xx/Documents/plots/'
pie_charts_10= 'C:/Users/Xx/Documents/pie_charts_10/'
pie_charts_20= 'C:/Users/Xx/Documents/pie_charts_20/'
stack_charts_10 = 'C:/Users/Xx/Documents/stack_charts_10 /'
scatter_charts_10 = 'C:/Users/Xx/Documents/scatter_charts_10 /'
files = os.listdir(source)
for f in files:
if (f.startswith("pie") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), pie_charts_10)
elif (f.startswith("pie") and f.endswith("20.png")):
shutil.move(os.path.join(source, f), pie_charts_20 )
elif (f.startswith("stack") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), stack_charts_10 )
elif (f.startswith("scatter ") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), scatter_charts_10 )
else:
print("No file")
When you are looking to move files of the format prefix_suffix.png into folders prefix_charts_suffix/:
base = "C:/Users/Xx/Documents"
moved_types = ['png']
for f in files:
pf = f.rsplit('.', 1) # filename, prefix
sf = pf[0].split("_") # prefix, whatever, suffix
if len(sf) >= len(pf) > 1 and pf[1] in moved_types:
new_dir = "%s_charts_%s" % (sf[0], sf[-1])
if not os.path.exists(os.path.join(base, new_dir):
os.mkdirs(os.path.join(base, new_dir)
shutil.move(os.path.join(source, f), os.path.join(base, new_dir, f)
Which will work for the general case, grabbing and moving only files which end in moved_types and contain a _ (which allows for splitting of a prefix and suffix).
See the relevant logic on repl.it:
>>>['prefix_garbage_suffix.png', 'bob.sh', 'bob.bill.png', "pie_23.png", "scatter_big_1.png"]
Move prefix_garbage_suffix.png to prefix_charts_suffix
Move pie_23.png to pie_charts_23
Move scatter_big_1.png to scatter_charts_1
EDIT: I've preserved the original answer in case others need a solution where not every file should be moved or you can't infer the folder name from the file names.
If you need I would do something like:
identity_tuples = \
[('pie', '16.png', 'C:/Users/Xx/Documents/pie_charts/'),
('stack', '14.png', 'C:/Users/Xx/Documents/stack_charts/'),
('scatter', '12.png', 'C:/Users/Xx/Documents/scatter_charts/')]
files = os.listdir(source)
for f in files:
for identity_tuple in identity_tuples:
if f.startswith(identity_tuple[0]) and f.endswith(identity_tuple[1]):
shutil.move(os.path.join(source, f), identity_tuple[2])
break
else:
print("No file")
Now you just have to add a new identity tuple: (prefix, suffix, destination) for each type. If the path is common for all the destinations, you can change it to:
identity_tuples = \
[('pie', '16.png', 'pie_charts/'),
('stack', '14.png', 'stack_charts/'),
('scatter', '12.png', 'scatter_charts/')]
files = os.listdir(source)
for f in files:
for identity_tuple in identity_tuples:
if f.startswith(identity_tuple[0]) and f.endswith(identity_tuple[1]):
shutil.move(os.path.join(source, f), "C:/Users/Xx/Documents/" + identity_tuple[2])
break
else:
print("No file")
Note: This is using a for/else loop, in which else is only called if you don't hit a break.
If you need to make the directories, add this in before the shutil.move():
if not os.path.exists(identity_tuple[2]):
os.mkdirs(identity_tuple[2]) # Or "C:/Users/Xx/Documents/" + ...
How about this
# assume you have files in a folder
source = './files' # some directory
files = os.listdir(source)
print files
#['pie_river_1.png', 'pie_mountain_11.png', 'scatter_grass_12.png', 'stack_field_30.png']
Now you want to group them into subfolders based on what they start with and what number they have before the extension
subdir_root = './subfolders'
for f in files:
fig_type = f.split('_')[0]
fig_num = f.split('.png')[0].split('_')[-1]
subdir_name = '%s_charts_%s'%(fig_type, fig_num) # name of dir, e.g. pie_charts_10
subdir = os.path.join( subdir_root, subdir_name ) # path to dir
if not os.path.exists(subdir): # if the dir does not exist , create it
os.makedirs(subdir)
f_src = os.path.join( source, f) # full path to source file
f_dest = os.path.join( subdir, f) # full path to new destination file
shutil.copy( f_src, f_dest ) # I changed to copy so you dont screw up your original files
on my compurer
$ ls ./files:
pie_mountain_11.png pie_river_1.png scatter_grass_12.png stack_field_30.png
$ ls -R ./subfolders
pie_charts_1 pie_charts_11 scatter_charts_12 stack_charts_30
subfolders//pie_charts_1:
pie_river_1.png
subfolders//pie_charts_11:
pie_mountain_11.png
subfolders//scatter_charts_12:
scatter_grass_12.png
subfolders//stack_charts_30:
stack_field_30.png
Obviously, you might have to change the code if edge cases arise.. but this should give you a good start...

delete older folder with similar name using python

I need to iterate over a folder tree. I have to check each subfolder, which looks like this:
moduleA-111-date
moduleA-112-date
moduleA-113-date
moduleB-111-date
moduleB-112-date
etc.
I figured out how to iterate over a folder tree. I can also use stat with mtime to get the date of the folder which seems easier than parsing the name of the date.
How do I single out modules with the same prefix (such as "moduleA") and compare their mtime's so I can delete the oldest?
Since you have no code, I assume that you're looking for design help. I'd lead my students to something like:
Make a list of the names
From each name, find the prefix, such as "moduleA. Put those in a set.
For each prefix in the set
Find all names with that prefix; put these in a temporary list
Sort this list.
For each file in this list *except* the last (newest)
delete the file
Does this get you moving?
I'm posting the code (answer) here, I suppose my question wasn't clear since I'm getting minus signs but anyway the solution wasn't as straight forward as I thought, I'm sure the code could use some fine tuning but it get's the job done.
#!/usr/bin/python
import os
import sys
import fnmatch
import glob
import re
import shutil
##########################################################################################################
#Remove the directory
def remove(path):
try:
shutil.rmtree(path)
print "Deleted : %s" % path
except OSError:
print OSError
print "Unable to remove folder: %s" % path
##########################################################################################################
#This function will look for the .sh files in a given path and returns them as a list.
def searchTreeForSh(path):
full_path = path+'*.sh'
listOfFolders = glob.glob(full_path)
return listOfFolders
##########################################################################################################
#Gets the full path to files containig .sh and returns a list of folder names (prefix) to be acted upon.
#listOfScripts is a list of full paths to .sh file
#dirname is the value that holds the root directory where listOfScripts is operating in
def getFolderNames(listOfScripts):
listOfFolders = []
folderNames = []
for foldername in listOfScripts:
listOfFolders.append(os.path.splitext(foldername)[0])
for folders in listOfFolders:
folder = folders.split('/')
foldersLen=len(folder)
folderNames.append(folder[foldersLen-1])
folderNames.sort()
return folderNames
##########################################################################################################
def minmax(items):
return max(items)
##########################################################################################################
#This function will check the latest entry in the tuple provided, and will then send "everything" to the remove function except that last entry
def sortBeforeDelete(statDir, t):
count = 0
tuple(statDir)
timeNotToDelete = minmax(statDir)
for ff in t:
if t[count][1] == timeNotToDelete:
count += 1
continue
else:
remove(t[count][0])
count += 1
##########################################################################################################
#A loop to run over the fullpath which is broken into items (see os.listdir above), elemenates the .sh and the .txt files, leaves only folder names, then matches it to one of the
#name in the "folders" variable
def coolFunction(folderNames, path):
localPath = os.listdir(path)
for folder in folderNames:
t = () # a tuple to act as sort of a dict, it will hold the folder name and it's equivalent st_mtime
statDir = [] # a list that will hold the st_mtime for all the folder names in subDirList
for item in localPath:
if os.path.isdir(path + item) == True:
if re.search(folder, item):
mtime = os.stat(path + '/' + item)
statDir.append(mtime.st_mtime)
t = t + ((path + item,mtime.st_mtime),)# the "," outside the perenthasis is how to make t be a list of lists and not set the elements one after theother.
if t == ():continue
sortBeforeDelete(statDir, t)
##########################################################################################################
def main(path):
dirs = os.listdir(path)
for component in dirs:
if os.path.isdir(component) == True:
newPath = path + '/' + component + '/'
listOfFolders= searchTreeForSh(newPath)
folderNames = getFolderNames(listOfFolders)
coolFunction(folderNames, newPath)
##########################################################################################################
if __name__ == "__main__":
main(sys.argv[1])

Categories