I'm new to Python, I have multiple files in a folder where I need to rename those files as the the given pattern.
Example:
Folder : /Users/Usr1/Documents/FilesFolder and
File's :
0. a101.employee.txt
1. a101.department.txt
2. a101.salary.txt
I want to remove the prefix of the file till a101 and rename to empoloyee.txt/salary.txt.
Expected Output:
My try:
import os
path = '/Users/User1/Documents/FilesFolder'
files = os.listdir(path)
for index, file in enumerate(files):
os.rename(os.path.join(path, file), os.path.join(path,file.removeprefix('a101')))
But unable to get expected result.
You may use regular expression:
import os
import re
path = '/Users/User1/Documents/FilesFolder'
files = os.listdir(path)
p = ".*a101.(.+)"
for file in files:
m = re.match(p, file)
if m is not None:
file_new = m.group(1)
print(file_new)
I think this can solve your problem
import os
import glob
# directory Path
path = "/path/to/dir"
# move to directory
os.chdir(path)
# Getting all files in the directory which contains a101
files = glob.glob("*a101*")
for file in files:
splitted = file.split('.')
filename, ext = splitted[-2], splitted[-1]
new_name = f"{filename}.{ext}"
os.rename(file, new_name)
If your file name is following same pattern with 3 . then you can use this for renaming. removeprefix is introduced in python 3.9.
files = ["0. a101.employee.txt", "1. a101.department.txt" ,"2. a101.salary.txt"]
for file in files:
print(".".join(file.split(".")[-2:]))
output:
employee.txt
department.txt
salary.txt
i can suggest you:
files = ["0. a101.employee.txt", "1. a101.department.txt" ,"2. a101.salary.txt"]
for index, file in enumerate(files):
filename = file.split(".")
print(filename[2]+"."+filename[3])
I got the followingoutput:
employee.txt
department.txt
salary.txt
Related
I have files with filenames as "lsud-ifgufib-1234568789.png" I want to rename this file as digits only which are followed by last "-" and then save them back in the folder.
Basically I want the final filename to be the digits that are followed by "-".
~ path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
r = re.compile("(\d+)")
newlist = filter(r.match, filename)
print(newlist)
~
How do I proceed further?
Assumptions:
You want to rename files if the file has a hyphen before the number.
The file may or may not have an extention.
If the file has an extention, preserve it.
Then would you please try the following:
import re, os
path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
m = re.search(r'.*-(\d+.*)', filename)
if m:
os.rename(os.path.join(path, filename), os.path.join(path, m.group(1)))
You could try a regex search followed by a path join:
import re
import os
path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
os.rename(filename, os.path.join(path, re.search("\d+(?=\D+?$)", filename).group()))
import re
import pathlib
fileName = "lsud-ifgufib-1234568789.png"
_extn = pathlib.Path(fileName).suffix
_digObj = re.compile(r'\d+')
digFileName = ''.join(_digObj.findall(fileName))
replFileName = digFileName + _extn
I have some xml files in a folder as example 'assests/2020/2010.xml', 'assests/2020/20005.xml', 'assests/2020/20999.xml' etc. I want to get the filename with max value in the '2020' folder. For above three files output should be 20999.xml
I am trying as following:
import glob
import os
list_of_files = glob.glob('assets/2020/*')
# latest_file = max(list_of_files, key=os.path.getctime)
# print (latest_file)
I couldn't be able to find logic to get the required file.
Here is the resource that have best answer to my query but I couldn't build my logic.
You can use pathlib to glob for the xml files and access the Path object attributes like .name and .stem:
from pathlib import Path
list_of_files = Path('assets/2020/').glob('*.xml')
print(max((Path(fn).name for fn in list_of_files), key=lambda fn: int(Path(fn).stem)))
Output:
20999.xml
I can't test it out right now, but you may try this:
files = []
for filename in list_of_files:
filename = str(filename)
filename = filename.replace('.xml','') #Assuming it's not printing your complete directory path
filename = int(filename)
files += [filename]
print(files)
This should get you your filenames in integer format and now you should be able to sort them in descending order and get the first item of the sorted list.
Use re to search for the appropriate endings in your file paths. If found use re again to extract the nr.
import re
list_of_files = [
'assests/2020/2010.xml',
'assests/2020/20005.xml',
'assests/2020/20999.xml'
]
highest_nr = -1
highest_nr_file = ''
for f in list_of_files:
re_result = re.findall(r'\d+\.xml$', f)
if re_result:
nr = int(re.findall(r'\d+', re_result[0])[0])
if nr > highest_nr:
highest_nr = nr
highest_nr_file = f
print(highest_nr_file)
Result
assests/2020/20999.xml
You can also try this way.
import os, re
path = "assests/2020/"
files =[
"assests/2020/2010.xml",
"assests/2020/20005.xml",
"assests/2020/20999.xml"
]
n = [int(re.findall(r'\d+\.xml$',file)[0].split('.')[0]) for file in files]
output = str(max(n))+".xml"
print("Biggest max file name of .xml file is ",os.path.join(path,output))
Output:
Biggest max file name of .xml file is assests/2020/20999.xml
import glob
xmlFiles = []
# this will store all the xml files in your directory
for file in glob.glob("*.xml"):
xmlFiles.append(file[:4])
# this will print the maximum one
print(max(xmlFiles))
I have a folder which has files with names:
"fileX.JPG" where X = 1....N
and I want to name the files as :
"000000000X.JPG" where X=1...N
The new name of the file should have the number from the old name of the file plus the zeros. so example file names I want is:
0000000000001.jpg
0000000000011.jpg
0000000000111.jpg
etc
The file name is 13 characters long. so should have zeros accordingly.
I have not started my code. Don't know where should I start.
You can use os.rename() from the os module
for path in pathlib.Path("a_directory").iterdir():
if path.is_file():
old_name = path.stem
#original filename
old_extension = path.suffix
#original file extension
Also try this:
import os
path = '/Users/myName/Desktop/directory'
files = os.listdir(path)
for index, file in enumerate(files):
os.rename(os.path.join(path, file), os.path.join(path, ''.join([str(index), '.jpg'])))
directory = path.parent
#current file location
new_name = "text" + old_name + old_extension
path.rename(pathlib.Path(directory, new_name))
You can use os.rename.
For example:
for file in os.listdir():
# Get the number, e.g.:
old_number = file.strip("file")[1].strip(".JPG")[0]
os.rename(file, f"{old_number}.JPG")
You might have to adapt based on how your files are actually namd
import os
# Function to rename multiple files
def main():
for count, filename in enumerate(os.listdir("path-to-files")):
d = str(count).zfill(12)
dst = d + ".jpg"
src ='path-to-file'+ filename
dst ='path-to-file'+ dst
# rename() function will
# rename all the files
os.rename(src, dst)
# Driver Code
if __name__ == '__main__':
# Calling main() function
main()
I have lots of zipped files on a Linux server and each file includes multiple text files.
what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.
My problem is that I am not able to change the extracted file's name.
I would appreciate if you could advise.
import os
import sys
import zipfile
os.chdir("/feeds/lipper/emaxx")
pwkwd = "/feeds/lipper/emaxx"
for item in os.listdir(pwkwd): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
fh = open(file_name, "rb")
zip_ref = zipfile.ZipFile(fh)
filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
for name in filelist :
try:
outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
zip_ref.extract(name, outpath)
except KeyError:
{}
fh.close()
import zipfile
zipdata = zipfile.ZipFile('somefile.zip')
zipinfos = zipdata.infolist()
# iterate through each file
for zipinfo in zipinfos:
# This will do the renaming
zipinfo.filename = do_something_to(zipinfo.filename)
zipdata.extract(zipinfo)
Reference:
https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
Why not just read the file in question and save it yourself instead of extracting? Something like:
import os
import zipfile
source_dir = "/feeds/lipper/emaxx" # folder with zip files
target_dir = "/SCRATCH/emaxx" # folder to save the extracted files
# Are you sure your files names are capitalized in your zip files?
filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
for item in os.listdir(source_dir): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_path = os.path.join(source_dir, item) # get zip file path
with zipfile.ZipFile(file_path) as zf: # open the zip file
for target_file in filelist: # loop through the list of files to extract
if target_file in zf.namelist(): # check if the file exists in the archive
# generate the desired output name:
target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
target_path = os.path.join(target_dir, target_name) # output path
with open(target_path, "w") as f: # open the output path for writing
f.write(zf.read(target_file)) # save the contents of the file in it
# next file from the list...
# next zip file...
You could simply run a rename after each file is extracted right? os.rename should do the trick.
zip_ref.extract(name, outpath)
parent_zip = os.path.basename(os.path.dirname(outpath)) + ".zip"
new_file_name = os.path.splitext(os.path.basename(name))[0] # just the filename
new_name_path = os.path.dirname(outpath) + os.sep + new_file_name + "_" + parent_zip
os.rename(outpath, new_namepath)
For the filename, if you want it to be incremental, simply start a count and for each file, go up by on.
count = 0
for file in files:
count += 1
# ... Do our file actions
new_file_name = original_file_name + "_" + str(count)
# ...
Or if you don't care about the end name you could always use something like a uuid.
import uuid
random_name = uuid.uuid4()
outpath = '/SCRATCH/emaxx'
suffix = os.path.splitext(item)[0]
for name in filelist :
index = zip_ref.namelist().find(name)
if index != -1: # check the file exists in the zipfile
filename, ext = os.path.splitext(name)
zip_ref.filelist[index].filename = f'{filename}_{suffix}.{ext}' # rename the extracting file to the suffix file name
zip_ref.extract(zip_ref.filelist[index], outpath) # use the renamed file descriptor to extract the file
I doubt this is possible to rename file during their extraction.
What about renaming files once they are extracted ?
Relying on linux bash, you can achieve it in a one line :
os.system("find "+outpath+" -name '*.txt' -exec echo mv {} `echo {} | sed s/.txt/"+zipName+".txt/` \;")
So, first we search all txt files in the specified folder, then exec the renaming command, with the new name computed by sed.
Code not tested, i'm on windows now ^^'
This question is how to get list of files from a directory into text file using python.
Result in the text file should exactly be like this:
E:\AA\a.jpg
E:\AA\b.jpg
...
How to correct the code below:
WD = "E:\\AA"
import glob
files = glob.glob ('*.jpg')
with open ('infiles.txt', 'w') as in_files:
in_files.write(files +'\n')
glob.glob() returns a list. You have to iterate through it.
WD = "E:\\AA"
import glob
files = glob.glob ('*.jpg')
with open ('infiles.txt', 'w') as in_files:
for eachfile in files: in_files.write(eachfile+'\n')
Input directory path : WD = "E://AA"
You can assign specific file extention that you needed eg: path = WD+'/*.jpg',
if you need all file list then give '' eg: path = WD+'/'
import glob
w_dir = WD + "/*.jpg"
with open("infiles.txt","wb")as fp:
for path in [filepath for filepath in glob.glob(w_dir)]:
fp.write(path+"\n")
Without path, glob.glob returns list of filename (No directory part). To get full path you need to call os.path.abspath(filename) / os.path.realpath(filename) / os.path.join(WD, filename)
>>> glob.glob('*.png')
['gnome-html.png', 'gnome-windows.png', 'gnome-set-time.png', ...]
>>> os.path.abspath('gnome-html.png')
'/usr/share/pixmaps/gnome-html.png'
With path, glob.glob return list of filename with directory part.
>>> glob.glob('/usr/share/pixmaps/*.png')
['/usr/share/pixmaps/gnome-html.png', '/usr/share/pixmaps/gnome-windows.png', '/usr/share/pixmaps/gnome-set-time.png', ...]
import glob
import os
WD = r'E:\AA'
files = glob.glob(os.path.join(WD, '*.jpg'))
with open('infiles.txt', 'w') as in_files:
in_files.writelines(fn + '\n' for fn in files)
or
import glob
import os
WD = r'E:\AA'
os.chdir(WD)
files = glob.glob('*.jpg')
with open('infiles.txt', 'w') as in_files:
in_files.writelines(os.path.join(WD, fn) + '\n' for fn in files)
Here is a two line simple solution:
import os
filee = open('all_names.txt','w')
given_dir = 'the_dierctory'
[filee.write(os.path.join(os.path.dirname(os.path.abspath(__file__)),given_dir,i)+'\n') for i in os.listdir(given_dir)]
where given_dir is the directory name. The output is a text file (all_names.txt) where each line in the file is the full path to all files and directories in the given_dir.