Rename multiple files with multiple extensions - python

I have this python script which will take these three arguments:
a given path for a directory with files to rename
a CSV file with two columns to map the file names to:
original,new
barcode01,sample01
barcode02,sample02
extension of the file (i.e. .txt, .bam, .png, .txt.readdb.log) which can be long.
The script:
import os
import csv
def rename_files(path, name_map, ext):
with open(name_map, 'r') as csv_map:
filereader = csv.DictReader(csv_map)
for row in filereader:
original_name = row["original"]
new_name = row["new"]
old_filename = '%s/%s.%s' % (path, original_name, ext)
new_filename = '%s/%s_%s.%s' % (path, new_name, original_name, ext)
try:
os.rename(old_filename, new_filename)
except Exception as e:
print('Rename for file %s failed. Details: ' % old_filename)
print (e)
if __name__ == '__main__':
filename, path, name_map, ext = sys.argv
rename_files(path, name_map, ext)
For example:
python rename.py /test/directory filestorename.csv txt
will only rename barcode01.txt to sample01.txt.
However, there are multiple barcode01 files with different extensions (i.e. barcode01.png). Instead of passing these extensions as arguments to the script, how can I modify this script to just rename all these files at once, keeping the extension the same?

Assuming all files exists, you may extract the base directory, basename and file extension as follows:
from csv import DictReader
from os import path, rename
from sys import exit
import argparse
def rename_file(row):
origin = row['original']
directory = path.dirname(origin)
_, extension = path.splitext(path.basename(origin))
target = path.join(directory, '{}{}'.format(row['new'], extension))
return rename(origin, target)
call it inside a loop:
def rename_files(spreadsheet):
csv = DictReader(open(spreadsheet))
valid_rows = filter(lambda row: path.isfile(row['original']), csv)
for row in valid_rows:
rename_file(row)
You also may improve your main function:
def main():
parser = argparse.ArgumentParser('rename files from *.csv')
parser.add_argument(
'-f', '--file',
metavar='file',
type=str,
help='csv (comma-separated values) file'
)
args = parser.parse_args()
if not path.isfile(args.file):
print('No such file: {}'.format(args.file))
return exit(1)
return rename_files(args.file)
if __name__ == '__main__':
main()

I would use the pathlib library, which makes dealing with file name easier. Note that in pathlib, a file name without extension is called a stem.
#!/usr/bin/env python3
import csv
import pathlib
import sys
def rename_files(directory, name_map):
directory = pathlib.Path(directory)
with open(name_map) as stream:
reader = csv.reader(stream)
next(reader) # Skip the header
for old_name, new_name in reader:
for old_path in directory.glob(old_name + ".*"):
new_path = old_path.with_stem(new_name)
old_path.rename(new_path)
if __name__ == '__main__':
rename_files(sys.argv[1], sys.argv[2])
Example:
python rename.py /test/directory filestorename.csv
Notes
The key to rename files, regardless of extension, is to use the .glob() function to find all files with the same name, but with different extensions.
The .with_stem() function basically take the path and return another path with different stem (filename minus extension)

Related

Moving files with python using a list .txt

I want to move files from one directory to another from a .txt file containing the names of the files to be moved, the script must first browse the directory and if it finds the file it moves it to the new directory. Where to start? I've managed to do this for a file list but I'd like to do it directly via the .txt file without rewriting the names of the files to be moved
import shutil, os
files = ['file1.txt', 'file2.txt', 'file3.txt', 'file4.txt']
for file in files:
shutil.move(file, 'destination_directory')
As I know, U cant move your files with .txt
Just move your file_path
You can use my code below.
I have double checked and it work on my side.
Sorry for my poor English Skill :)
import os
import shutil
from pathlib import Path
def create_directory(dir_name: str):
"""To create directory before create files: txt, csv..."""
system_path = os.getcwd()
dir_path = os.path.join(system_path, dir_name)
try:
os.makedirs(dir_path, exist_ok=True)
except OSError as error:
print("Directory '%s' can not be created" % dir_name)
return dir_path
def create_files(dir_path: str, file_name: str):
"""Function for creating files"""
file_path = dir_path + fr"\{file_name}"
with open(file_path, "w") as open_file:
if Path(file_path).is_file():
print(f'File: {file_name} created successfully')
else:
print(f'File: {file_name} does not exist')
open_file.close() # Need to close.
return file_path
def main():
# Step 1: Creating file1.txt, file2.txt, file3.txt, file4.txt
file_one = create_files(create_directory("file1_dir"), 'file1.txt')
file_two = create_files(create_directory("file2_dir"), 'file2.txt')
file_three = create_files(create_directory("file3_dir"), 'file3.txt')
file_four = create_files(create_directory("file4_dir"), 'file4.txt')
# Step 2: Creating destination_directory:
destination_dir = create_directory('destination_directory')
files = [file_one, file_two, file_three, file_four]
# Step 3: Moving Your Files:
for file in files:
shutil.move(file, destination_dir)
if __name__ == "__main__":
main()

Extract zip file and nested zip files into target directory using Python

I have a file structure something like this:
/a.zip
    /not_a_zip/
        contents
    /b.zip
        contents
and I want to create a directory a and extract a.zip into it and all the nested zipped files where they are so I get something like this:
/a/
    /not_a_zip/
        contents
    /b/
        contents
I tried this solution, but I was getting errors because inside my main directory I have subdirectories, as well as zip files.
I want to be able to extract the main zip file into a directory of the same name, then be able to extract all nested files within, no matter how deeply nested they are.
EDIT: my current code is this
archive = zipfile.ZipFile(zipped, 'r')
for file in archive.namelist():
archive.extract(file, resultDirectory)
for f in [filename for filename in archive.NameToInfo if filename.endswith(".zip")]:
# get file name and path to extract
fileToExtract = resultDirectory + '/' + f
# get directory to extract new file to
directoryToExtractTo = fileToExtract.rsplit('/', 1)
directoryToExtractTo = directoryToExtractTo[0] + '/'
# extract nested file
nestedArchive = zipfile.ZipFile(fileToExtract, 'r')
for file in nestedArchive.namelist():
nestedArchive.extract(fileToExtract, directoryToExtractTo)
but I'm getting this error:
KeyError: "There is no item named 'nestedFileToExtract.zip' in the archive"
Even though it exists in the file system
Based on this other solutions: this and this.
import os
import io
import sys
import zipfile
def extract_with_structure(input_file, output):
with zipfile.ZipFile(input_file) as zip_file:
print(f"namelist: {zip_file.namelist()}")
for obj in zip_file.namelist():
filename = os.path.basename(obj)
if not filename:
# Skip folders
continue
if 'zip' == filename.split('.')[-1]:
# extract a zip
content = io.BytesIO(zip_file.read(filename))
f = zipfile.ZipFile(content)
dirname = os.path.splitext(os.path.join(output, filename))[0]
for i in f.namelist():
f.extract(i, dirname)
else:
# extract a file
zip_file.extract(obj, os.path.join(output))
if __name__ == "__main__":
if len(sys.argv) < 3:
print("No zipfile specified or output folder.")
exit(1)
extract_with_structure(sys.argv[1], sys.argv[2])

Is there a way to remove the extension from all files in a folder? [duplicate]

I would like to change the extension of the files in specific folder. i read about this topic in the forum. using does ideas, I have written following code and I expect that it would work but it does not. I would be thankful for any guidance to find my mistake.
import os,sys
folder = 'E:/.../1936342-G/test'
for filename in os.listdir(folder):
infilename = os.path.join(folder,filename)
if not os.path.isfile(infilename): continue
oldbase = os.path.splitext(filename)
infile= open(infilename, 'r')
newname = infilename.replace('.grf', '.las')
output = os.rename(infilename, newname)
outfile = open(output,'w')
The open on the source file is unnecessary, since os.rename only needs the source and destination paths to get the job done. Moreover, os.rename always returns None, so it doesn't make sense to call open on its return value.
import os
import sys
folder = 'E:/.../1936342-G/test'
for filename in os.listdir(folder):
infilename = os.path.join(folder,filename)
if not os.path.isfile(infilename): continue
oldbase = os.path.splitext(filename)
newname = infilename.replace('.grf', '.las')
output = os.rename(infilename, newname)
I simply removed the two open. Check if this works for you.
You don't need to open the files to rename them, os.rename only needs their paths. Also consider using the glob module:
import glob, os
for filename in glob.iglob(os.path.join(folder, '*.grf')):
os.rename(filename, filename[:-4] + '.las')
Something like this will rename all files in the executing directory that end in .txt to .text
import os, sys
for filename in os.listdir(os.path.dirname(os.path.abspath(__file__))):
base_file, ext = os.path.splitext(filename)
if ext == ".txt":
os.rename(filename, base_file + ".text")
import os
dir =("C:\\Users\\jmathpal\\Desktop\\Jupyter\\Arista")
for i in os.listdir(dir):
files = os.path.join(dir,i)
split= os.path.splitext(files)
if split[1]=='.txt':
os.rename(files,split[0]+'.csv')
#!/usr/bin/env python
'''
Batch renames file's extension in a given directory
'''
import os
import sys
from os.path import join
from os.path import splitext
def main():
try:
work_dir, old_ext, new_ext = sys.argv[1:]
except ValueError:
sys.exit("Usage: {} directory old-ext new-ext".format(__file__))
for filename in os.listdir(work_dir):
if old_ext == splitext(filename)[1]:
newfile = filename.replace(old_ext, new_ext)
os.rename(join(work_dir, filename), join(work_dir, newfile))
if __name__ == '__main__':
main()
If you have python 3.4 or later, you can use pathlib. It is as follows. This example is for changing .txt to .md.
from pathlib import Path
path = Path('./dir')
for f in path.iterdir():
if f.is_file() and f.suffix in ['.txt']:
f.rename(f.with_suffix('.md'))
With print and validation.
import os
from os import walk
mypath = r"C:\Users\you\Desktop\test"
suffix = ".png"
replace_suffix = ".jpg"
filenames = next(walk(mypath), (None, None, []))[2]
for filename in filenames:
if suffix in filename:
print(filename)
rep = input('Press y to valid rename : ')
if rep == "y":
for filename in filenames:
if suffix in filename:
os.rename(mypath+"\\"+filename, mypath+"\\"+filename.replace(suffix, replace_suffix))

Create Comma-separated values file from file path

I am using python to create a CSV(Comma-separated values) file. It contains the path and all file in folder. With file in different folder, it is separated by comma and then the number of its folder without character. For example, I have image 1.pgm that stores in folder s14. Then the CSV format looks like
at/s14/1.pgm;14
In which, at/s14/1.pgm is full path to file 1.pgm and 14 is number (called label) that get from its folder s14. Could you help me implement it by python. I tried with that code but it does not work for my task
Try this:
import os
import re
import sys
SEPARATOR = ';'
CSV_FILE_PATH = 'output.csv'
if __name__ == "__main__":
if len(sys.argv) != 2:
print "usage: create_csv <base_path>"
sys.exit(1)
path = sys.argv[1]
# only folders in specified folder
folders = filter(os.path.isdir, os.listdir(path))
with open(CSV_FILE_PATH, 'w') as csv_file:
for folder in folders:
# folder path
folder_path = os.path.join(path, folder)
# extracting number
match = re.match(r's(\d+)', folder) # s\d+ is the pattern of your folder name, for example s20
if match is not None:
number = match.group(1)
# loop for files within the folder
for filename in os.listdir(folder_path):
# full file path
filepath = os.path.join(folder_path, filename)
# writing result
line = SEPARATOR.join((filepath, number))
csv_file.write(line + '\n')

How to read and write multiple files?

I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.
I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()
import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt
You may find the fileinput module useful. It is designed for exactly this problem.
I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff
Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'
from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]
I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)
This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()

Categories