Take everything out of a file name except from numbers [closed] - python

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
So I have the file 3D_492.png and I am trying to get rid of everything but the numbers after the last underscore. How would I go about this?
I want 3D_492.png to become 492.png
More examples:
Anniversary_1_Purple_710.png to become 710.png
They are all in the folder \Images
Edit: I'm dumb and forgot to say that I would like to rename the files with the new names.
Thanks

Using split:
filename = "3D_710.png"
# create a list of the parts of the file name separated by "_"
filename_parts = filename.split("_")
# new_file is only the last part
new_file = filename_parts[-1]
print new_file
# 710.png
Full example including rename, assuming Images is relative to the directory containing our Python script:
from os import listdir, rename
from os.path import isfile, join, realpath, dirname
dir_path = dirname(realpath(__file__))
images_path = join(dir_path, "Images")
only_files = [f for f in listdir(images_path) if isfile(join(images_path, f))]
for file in only_files:
filename_parts = file.split("_")
# new_file is only the last part
new_file = filename_parts[-1]
rename(join(images_path, file), join(images_path, new_file))

Perfect job for str.rpartition:
>>> s = "3D_492.png"
>>> start, sep, end = s.rpartition('_')
>>> end
'492.png'
It is guaranteed to return three elements, which sum to the original string. This means you can always get the 2nd element for the "tail":
>>> 'Anniversary_1_Purple_710.png'.rpartition('_')[2]
'710.png'
Putting the pieces together:
import os
os.chdir('\Images')
for old_name in os.listdir('.'):
new_name = old_name.rpartition('_')[2]
if not exists(new_name):
os.rename(old_name, new_name)

Here is one way, using os.path.basename and then str.split to extract characters after the final underscore:
import os
lst = ['3D_492.png', 'Anniversary_1_Purple_710.png']
res = [os.path.basename(i).split('_')[-1] for i in lst]
print(res)
['492.png', '710.png']

Sounds like you just want to split on _ and ignore everything but the last result.
*_, result = fname.split("_")
# or:
# result = fname.split("_")[-1]
Renames are done using os.rename
for fname in fnames: # where fnames is the list of the filenames
*_, new = fname.split("_")
os.rename(fname, new)
Note that if you want to do this with an absolute path (e.g. if fnames looks like ["C:/users/yourname/somefile_to_process_123.png", ...] this will require more processing with os.path.split)
for fpath in fnames:
*dir, basename = os.path.split(fpath)
*_, newname = basename.split("_")
newpath = os.path.join(dir[0], newname)
os.rename(fpath, newpath)

You can use a regular expression to search for the digits preceding the extension.
import re
def update_name(name):
return re.search(r'\d+\..*', name).group()
update_name('3D_492.png') # '492.png'

Related

How to remove multiple characters from file names under sub dir using python?

Whats best method to remove special charters (multiple characters) from file name
currently I am doing this way but its not efficient to remove multiple different characters at same time eg %$^&* etc
import os
"""
Remove () and empty space from file names
"""
path = '/var/www/POD/2021/09/12'
os.chdir(path)
files = os.listdir(path)
for root, dirs, files in os.walk(path):
for f in files:
cwd = os.path.join(root, f[:0])
cwd = os.chdir(cwd)
os.rename(f, f.replace(' ', ''))
This look like task for .translate method, consider following example
name = "name (with )brackets"
remove = "( )"
t = str.maketrans("", "", remove)
new_name = name.translate(t)
print(new_name)
output
namewithbrackets
Explanation: .maketrans takes 2 or 3 arguments, first 2 must be equal-length strs, n-th character from first is replace with corresponding character from second. I do not use this features, so I put empty str, third argument is str with characters to remove.
Thanks this is how I manage to implement
path = 'C:\\Users\\User\\Desktop\\Test'
os.chdir(path)
remove = "( )"
for filename in os.listdir(path):
t = str.maketrans("", "", remove)
newname = filename.translate(t)
os.rename(filename, newname)

Python : How to call csv files from the directory based on the list of items?

I have a list of items and I want to call only those csv files from the directory whose names are similar to the items in the list. I am doing something like below but its reading all the files from the directory. Please suggest.
x_list = [ a,b,c,d]
files in directory = [a.csv, b.csv, c.csv, d.csv, e.csv, f.csv]
for files in os.listdir(dir + 'folder\\'):
file_name = files[:files.find('.')]
if file_name in x_list:
print(file_name)
From official doc : "The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order."
import glob
dirPath = "/home/.../"
pattern = dirPath + '*.csv'
for file_path in glob.glob(pattern):
print(file_path)
You can use regular expressions for which you'll need the re module.
import re
print(*(file for file in os.listdir() if re.match(r".*\.csv", file) and file[:-4] in x), sep='\n')
You can also use the filter function:
print(*filter(lambda file: re.match(r".*\.csv", file) and file[:-4] in x, os.listdir(), sep='\n')
You can just open the CSVs on your list: This is what you could do:
# a b c d are names of CSVs without .csv extension
my_csvs_names = 'a b c d'.split()
my_csvs_folder = 'c/path/to/my_folder'
for csv_name in my_csvs_names:
file_name = f'{my_csvs_folder}/{csv_name}.csv'
try:
with open(file_name, 'r') as f_in:
for line in f_in:
# do something with it
print(line)
except FileNotFoundError:
print(f'{file_name} does not exist')
In doing something, you can append values to a variable or whatever you are trying to achieve.

Is there a way to filter files via variable string in a filename?

I am trying to filter all files with a common pattern. Why cant I put a variable pattern to filter.
listOfFiles = os.listdir('.')
casenr = str(nr)
pattern = "*"+str(nr)+"_*.state"
for entry in listOfFiles:
if fnmatch.fnmatch(entry, pattern):
statefile = entry
print(statefile)
I expect to find a file named
init_8_abc.txt
Why don't you just use glob? Following your example you could do:
import glob
pattern = f'/path/to/directory/with/files/*{str(nr)}_*.state'
files = glob.glob(pattern)
for file in files:
doSomething(file)
glob allows for searching files with specific pattern using * wildcards (you can use wildcards in any part of the pattern, e.g. to search in multiple subdirectories). See the documentation.
Also, note that the use of f-strings is only supported by python >3.6.
This should work. Your code will only print the last match, however, since each time it finds a match it overwrites statefile with that match. Make statefile be a list and do statefile.append(entry) and you should get a list of all matches.
This works for me:
import os, fnmatch
listOfFiles = os.listdir('.')
pattern = "*"+"1"+"*"
statefile = []
for entry in listOfFiles:
if fnmatch.fnmatch(entry, pattern):
statefile.append(entry)
print(statefile)
You can use regular expressions to build some none-trivial filters for list of filenames:
import re
filenames = [
"_filename_example1",
"_filename_examples",
"filename_example2",
"_filename_example_",
]
regex = re.compile("_.*\d")
selected = filter(regex.match, filenames)
for s in selected:
print(s)
Given example displays only _filename_example1 because it starts with _ and ends with number.
import os
import re
listOfFiles = os.listdir('.')
nr = '<wherever this is coming from>'
for filename in listOfFiles:
foundFiles = re.findall('^[\\w]+_' + str(nr) + '_[\\w.]+', filename)
print(foundFiles)

How to match either one of the condition using glob

I need to have a list of files inside a path that are filtered or matched either one of these condition. I am not sure if I'm doing it correctly since I am pretty new in python.
import glob
fileName = ('*.a.gz','*.b.gz','*.c.gz',)
filtered_list = []
for /tmp/myFolder in fileName:
filtered_list.extend(glob.glob(/tmp/myFolder))
return filtered_list
I guess you want to search for those file name patterns in /tmp/myFolder
import glob
fileName = ['*.a.gz','*.b.gz','*.c.gz']
filtered_list = []
for name in fileName:
filtered_list.extend(glob.glob('/tmp/myFolder/' + name))
print(filtered_list)

Rename files in a directory with incremental index

INPUT: I want to add increasing numbers to file names in a directory sorted by date. For example, add "01_", "02_", "03_"...to these files below.
test1.txt (oldest text file)
test2.txt
test3.txt
test4.txt (newest text file)
Here's the code so far. I can get the file names, but each character in the file name seems to be it's own item in a list.
import os
for file in os.listdir("/Users/Admin/Documents/Test"):
if file.endswith(".txt"):
print(file)
The EXPECTED results are:
01_test1.txt
02_test2.txt
03_test3.txt
04_test4.txt
with test1 being the oldest and test 4 being the newest.
How do I add a 01_, 02_, 03_, 04_ to each file name?
I've tried something like this. But it adds a '01_' to every single character in the file name.
new_test_names = ['01_'.format(i) for i in file]
print (new_test_names)
If you want to number your files by age, you'll need to sort them first. You call sorted and pass a key parameter. The function os.path.getmtime will sort in ascending order of age (oldest to latest).
Use glob.glob to get all text files in a given directory. It is not recursive as of now, but a recursive extension is a minimal addition if you are using python3.
Use str.zfill to strings of the form 0x_
Use os.rename to rename your files
import glob
import os
sorted_files = sorted(
glob.glob('path/to/your/directory/*.txt'), key=os.path.getmtime)
for i, f in enumerate(sorted_files, 1):
try:
head, tail = os.path.split(f)
os.rename(f, os.path.join(head, str(i).zfill(2) + '_' + tail))
except OSError:
print('Invalid operation')
It always helps to make a check using try-except, to catch any errors that shouldn't be occurring.
This should work:
import glob
new_test_names = ["{:02d}_{}".format(i, filename) for i, filename in enumerate(glob.glob("/Users/Admin/Documents/Test/*.txt"), start=1)]
Or without list comprehension:
for i, filename in enumerate(glob.glob("/Users/Admin/Documents/Test/*.txt"), start=1):
print("{:02d}_{}".format(i, filename))
Three things to learn about here:
glob, which makes this sort of file matching easier.
enumerate, which lets you write a loop with an index variable.
format, specifically the 02d modifier, which prints two-digit numbers (zero-padded).
two methods to format integer with leading zero.
1.use .format
import os
i = 1
for file in os.listdir("/Users/Admin/Documents/Test"):
if file.endswith(".txt"):
print('{0:02d}'.format(i) + '_' + file)
i+=1
2.use .zfill
import os
i = 1
for file in os.listdir("/Users/Admin/Documents/Test"):
if file.endswith(".txt"):
print(str(i).zfill(2) + '_' + file)
i+=1
The easiest way is to simply have a variable, such as i, which will hold the number and prepend it to the string using some kind of formatting that guarantees it will have at least 2 digits:
import os
i = 1
for file in os.listdir("/Users/Admin/Documents/Test"):
if file.endswith(".txt"):
print('%02d_%s' % (i, file)) # %02d means your number will have at least 2 digits
i += 1
You can also take a look at enumerate and glob to make your code even shorter (but make sure you understand the fundamentals before using it).
test_dir = '/Users/Admin/Documents/Test'
txt_files = [file
for file in os.listdir(test_dir)
if file.endswith('.txt')]
numbered_files = ['%02d_%s' % (i + 1, file)
for i, file in enumerate(txt_files)]

Categories