Rename duplicate files using Python

Rename duplicate files using Python - python

I'm trying to write a Python script that renames all duplicate file names recursively (i.e. inside all directories)
I already searched the web and Stack Overflow but I couldn't find any answer...
Here's my code:
#!/usr/bin/env python3.6
import os
import glob
path = os.getcwd()
file_list = []
duplicates={}
# remove filename duplicates
for file_path in glob.glob(path + "/**/*.c", recursive=True):
file = file_path.rsplit('/', 1)[1]
if file not in file_list:
file_list.append(file)
else:
if file in duplicates.keys():
duplicates[file] += 1
lista = []
lista.append(file_path)
os.rename(file_path, file_path.rsplit('/', 1)[:-1] + '/' + str(duplicates[file]) + file)
else:
duplicates[file] = 1
os.rename(file_path, file_path.rsplit('/', 1)[:-1] + '/' + str(duplicates[file]) + file)
And this is the error I'm getting:
Traceback (most recent call last):
File "/home/andre/Development/scripts/removeDuplicates.py", line 22, in <module>
os.rename(file_path, file_path.rsplit('/', 1)[:-1] + '/' + str(duplicates[file]) + file)
TypeError: can only concatenate list (not "str") to list
I know why I'm getting this error, but my question is: Is there a more clever way to do this? I'd also like to rename all duplicate directory names, but I still didn't figure it out...

Related

NotADirectoryError when converting multiple xml files to .csv

CODE
import os
import pandas as pd
df = pd.DataFrame()
xml_file_path = "/Users/ruzi/Desktop/top1000_complete 2"
csv_file_path = "/Users/ruzi/Desktop/xml.csv"
if os.path.isdir(xml_file_path):
for e in os.listdir(xml_file_path):
new_path = xml_file_path + "/" + str(e)
if str(e) != '.DS_Store' and os.path.isdir(new_path):
for e1 in os.listdir(new_path):
next_new_path = new_path + "/" + str(e1)
if str(e1) != '.DS_Store' and os.path.isfile(next_new_path):
for e2 in os.listdir(next_new_path):
third_new_path = new_path + "/" + str(e1)
if str(e2) != '.DS_Store' and os.path.isfile(third_new_path):
data_frame = pd.read_xml(third_new_path)
df=df.append(data_frame)
data_frame = pd.DataFrame()
# Convert Into CSV
df.to_csv(csv_file_path, index=None)
ERROR MESSAGE
/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 /Users/ruzi/Documents/pythonProject/main.py
Traceback (most recent call last):
File "/Users/ruzi/Documents/pythonProject/main.py", line 14, in <module>
for e2 in os.listdir(next_new_path):
NotADirectoryError: [Errno 20] Not a directory: '/Users/ruzi/Desktop/top1000_complete 2/P04-3022/citing_sentences_annotated.json'
Process finished with exit code 1
[FILE LOCATION][1]
[FOLDER 1][2]
[FOLDER 2][3]
##images
[1]: https://i.stack.imgur.com/UEeki.png
[2]: https://i.stack.imgur.com/2CmnR.png
[3]: https://i.stack.imgur.com/g5fVU.png

for e1 in os.listdir(new_path):
next_new_path = new_path + "/" + str(e1)
if str(e1) != '.DS_Store' and os.path.isfile(next_new_path):
# At this point, next_new_path is a file, not a dir
for e2 in os.listdir(next_new_path):
Consider this file tree:
dir1/
subdir1/
subdir2/
file1
file2
e1 is any subpath in new_path (file or directory) (ex. subdir1 or file1)
next_new_path is the same including parent (ex. dir1/subdir1/ or dir1/file)
Then you check that next_new_path is a file (not a dir), so you exclude dir1/subdir/ and only keep dir1/file.
And then you call listdir on this, which is wrong because it is a file, and this is exactly what the error message says.
In 2021, I'd recommend using pathlib rather than os.path.

How do I rename multiple files in Python, using part of the existing name?

I have a few hundred .mp4 files in a directory. When originally created their names were set as "ExampleEventName - Day 1", "ExampleEventName - Day 2" etc. thus they are not in chronological order.
I need a script to modify each of their names by taking the last 5 characters in the corresponding string and add it to the front of the name so that File Explorer will arrange them properly.
I tried using the os module .listdir() and .rename() functions, inside a for loop. Depending on my input I get either a FileNotFoundError or a TypeError:List object is not callable.
import os
os.chdir("E:\\New folder(3)\\New folder\\New folder")
for i in os.listdir("E:\\New folder(3)\\New folder\\New folder"):
os.rename(i, i[:5] +i)
Traceback (most recent call last):
File "C:/Python Projects/Alex_I/venv/Alex_OS.py", line 15, in <module>
os.rename(path + i, path + i[:6] +i)
FileNotFoundError: [WinError 2] The system cannot find the file specified:
import os, shutil
file_list = os.listdir("E:\\New folder(3)\\New folder\\New folder")
for file_name in file_list("E:\\New folder(3)\\New folder\\New folder"):
dst = "!#" + " " + str(file_name) #!# meant as an experiment
src = "E:\\New folder(3)\\New folder\\New folder" + file_name
dst = "E:\\New folder(3)\\New folder\\New folder" + file_name
os.rename(src, dst)
file_name +=1
Traceback (most recent call last):
File "C:/Python Projects/Alex_I/venv/Alex_OS.py", line 14, in <module>
for file_name in file_list("E:\\New folder(3)\\New folder\\New folder"):
TypeError: 'list' object is not callable

Some other approach:
Not based on based length ( 5 for subname )
import glob
import os
# For testing i created 99 files -> asume last 5 chars but this is wrong if you have more files
# for i in range(1, 99):
# with open("mymusic/ExampleEventName - Day {}.mp4".format(i), "w+") as f:
# f.flush()
# acording to this i will split the name at - "- Day X"
files = sorted(glob.glob("mymusic/*"))
for mp4 in files:
# split path from file and return head ( path ), tail ( filename )
head, tail = os.path.split(mp4)
basename, ext = os.path.splitext(tail)
print(head, tail, basename)
num = [int(s) for s in basename.split() if s.isdigit()][0] #get the number extracted
newfile = "{}\\{}{}{}".format(head, num, basename.rsplit("-")[0][:-1], ext) # remove - day x and build filename
print(newfile)
os.rename(mp4, newfile)

You're having multiple problems:
You're trying to increment a value that should not be incremented. Also you've created the list file_list, and thus it should not take any arguments anymore.
When using the syntax:
for x in y:
you do not have to increment the value. It will simply iterate through the list until there is no more left.
Therefore you simply have to leave out the incrementation and iterate through the list file_list.
import os, shutil
file_list = os.listdir("E:\\New folder(3)\\New folder\\New folder")
for file_name in file_list: #removed the argument, the as file_list is a list and thus not callable.
dst = "!#" + " " + str(file_name) #!# meant as an experiment
src = "E:\\New folder(3)\\New folder\\New folder" + file_name
dst = "E:\\New folder(3)\\New folder\\New folder" + file_name
os.rename(src, dst)
#file_name +=1 removed this line
Now your solution should work.

Python: Print file names and their directory based on their file size

I want to print filenames and their directory if their filesize is more than a certain amount. I wrote one and set the bar 1KB, but it doesn't work even if there are plenty of files larger than 1KB.
import os, shutil
def deleteFiles(folder):
folder = os.path.abspath(folder)
for foldername, subfolders, filenames in os.walk(folder):
for filename in filenames:
if os.path.getsize(filename) > 1000:
print(filename + ' is inside: ' + foldername)
deleteFiles('C:\\Cyber\\Downloads')
And I got 'Nothing'!
and then I wrote codes in interactive shell, I got following error:
Traceback (most recent call last):
File "<pyshell#14>", line 3, in <module>
if os.path.getsize(filename) > 100:
File "C:\Users\Cyber\Downloads\lib\genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError:
I am wondering How I can fix my code.

os can't find the file without a given path, following your code, you have to re-specify the absolute path. Replace
if os.path.getsize(filename) > 1000:
with
if os.path.getsize(os.path.abspath(foldername + "/" + filename)) > 1000:
And it should work.

Replace:
deleteFiles('C:\\Cyber\\Downloads')
with
import os
a = 'c:' # removed slash
b = 'Cyber' # removed slash
c = 'Downloads'
path = os.path.join(a + os.sep, b, c)
deleteFiles(path)

Can't traverse directory tree in python with os.walk() because it says a name is not defined

So as part of a homework, I have to traverse through a directory tree, and it seems that os.walk is the best choice for this. I am using cygwin to run my python scripts. The path of the tree that I am trying to traverse is:
/cygdrive/c/Users/Kamal/Documents/School/Spring2015/CS410/htmlfiles
So in my code, here is the snippet to the os.walk() call:
for dirName, subdirList, fileList in os.walk('/cygdrive/c/Users/Kamal/Documents/School/Spring2015/CS410/htmlfiles'):
However, when I execute the script, it gives me the following error:
$ python testparser.py
Traceback (most recent call last):
File "testparser.py", line 1, in <module>
Spring2015/CS410/htmlfiles/testparser.py
NameError: name 'Spring2015' is not defined
I'm confused as to why it thinks 'Spring2015' is undefined? The directory clearly exists at the given path on my computer
EDIT: here is the entire code since some have asked:
from bs4 import BeautifulSoup
import os
import shutil
cnt = 0
print "starting..."
for dirName, subdirList, fileList in os.walk('/cygdrive/c/Users/Kamal/Documents/School/Spring2015/CS410/htmlfiles'):
for f in fileList:
#print the path
print "Processing " + os.path.abspath(f) + "...\n"
#open the HTML file
html = open(f)
soup = BeautifulSoup(html)
#Filter out unwanted stuff
[s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])]
visible_text = soup.getText()
visible_text_encoded = visible_text.encode('utf-8')
visible_text_split = visible_text_encoded.split('\n')
visible_text_filtered = filter(lambda l: l != '', visible_text_split)
#Generate the name of the output text file
outfile_name = 'chaya2_' + str(cnt) + '.txt'
#Open the output file to write in
outfile = open(outfile_name, "w")
#Get the URL of the html file using its Path, write it to the first line
outfile.write(os.path.relpath(f, '/cygdrive/c/Users/Kamal/Documents/School/') + ' \n')
#Write the visible text to the
for l in visible_text_filtered:
outfile.write(l+'\n')
#Done writing, move the output file to the appropriate directory
shutil.move(os.path.abspath(outfile_name), '/cygdrive/c/Users/Kamal/Documents/School/Spring2015/CS410/txtFiles')
#Rename the html file
html_name = 'chaya2_' + str(cnt) + '.html'
os.rename(f, html_name)
#Move the html file to the appropriate directory
shutil.move(os.path.abspath(html_name), '/cygdrive/c/Users/Kamal/Documents/School/Spring2015/CS410/htmlFilesAfter')
print html_name + " converted to " + outfile_name + "\n"
outfile.close()
html.close()
cnt+=1

Python Arcpy.describe trouble reading ENVI.dat

This may seem trivial, but I can seem to track the error and I am very very new to Python, though not to programming. From reading around on the internet for a bit, I think my problem is that .dat ENVI image file isn't being read as a "describe object". But how do I get it to be read as such? I probably need it to read the header info too, any solutions?
Here is my code:
import arcpy #make sure you run the python associated with ArcGIS
import os
filepath = 'filepath'
filename = 'filename'
band_names = range(1,225)
# Path creation
in_folder = filepath + os.sep + 'ENVI'
out_folder = filepath + os.sep + 'GeoTIFF' # preferably an empty folder
# input multiband raster
in_ENVI = in_folder + filename '.dat'
in_raster = raster(in_ENVI)
index = 0
# get raster information specific to each band
desc = arcpy.Describe(in_raster)
################### THIS IS WHERE I GET THE ERROR ##################
Runtime error
Traceback (most recent call last):
File "<string>", line 23, in <module>
NameError: name 'raster' is not defined
################### SCRIPT TERMINATED ##############################
for band in desc.children:
print band
bandName = band.name
band_path = os.path.join(in_raster, bandName)
dest_path = os.path.join(out_folder, filename '_' + band_names(index) + '.tif')
arcpy.CopyRaster_management(band_path, dest_path, "", "", "", "NONE", "NONE", "")
index = index + 1

Ok so I actually figured it out myself. Here is the code I used. The error was actually not in arcpy.Describe() but in arcpy.CopyRaster_management because I didn't convert band_name[index] to a string.
dest_path = os.path.join(out_folder, filename + str(band_names[index]) + '.tif')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Rename duplicate files using Python - python

Related

NotADirectoryError when converting multiple xml files to .csv

How do I rename multiple files in Python, using part of the existing name?

Python: Print file names and their directory based on their file size

Can't traverse directory tree in python with os.walk() because it says a name is not defined

Python Arcpy.describe trouble reading ENVI.dat

Categories

Resources