Batch Renaming of Files in sub-directories based on a .xlsx

Batch Renaming of Files in sub-directories based on a .xlsx - python

There is a directory in C:\Users\abcd\video
Inside the directory, there are a lot of .mp4 files.
How do I rename all .mp4 files based on an Excel sheet that contains following information using Python:
For example, the current filename is A.mp4. I would like to rename it to 1.mp4.

Note:
If you want to use an Excel file, with the extension .xlsx
After installing xlrd, i.e. a dependency for reading .xlsx files using the follwing command from the command line:
pip install xlrd
Code:
import os
import pandas as pd
# Read your renaming data
dst = pd.read_excel('C:\\Users\\abcd\\video\\map.xlsx', header=None)
# Create a dictionary for easier access
dictionary = dict(zip(list(dst[dst.columns[0]]), list(dst[dst.columns[1]])))
# Renaming if filename ends with .mp4
for filename in os.listdir("C:\\Users\\abcd\\video"):
if (filename.endswith(".mp4")):
dest = str(dictionary[filename[:-4]]) + ".mp4"
src = "C:\\Users\\abcd\\video\\" + filename
dst = "C:\\Users\\abcd\\video\\" + dest
os.rename(src, dest)
Edit2:
Can use python3.4+ Path/pathlib to iterate recursively throught any folder
import os
import pandas as pd
from pathlib import Path
root = "C:\\Users\\abcd\\video\\"
# Read your renaming data
dst = pd.read_excel('C:\\Users\\abcd\\video\\map.xlsx', header=None)
# Create a dictionary for easier access
dictionary = dict(zip(list(dst[dst.columns[0]]), list(dst[dst.columns[1]])))
# Recursively reading all .mp4 files
files = list(Path(root).glob('**/*.mp4'))
for filename in files:
src = str(filename)
if(src[:src.rindex('\\')]==root):
dest = src[:src.rindex('\\')] + str(dictionary[str(filename)[str(filename).rindex('\\')+1:-4]]) + ".mp4"
else:
dest = src[:src.rindex('\\')] + "\\" + str(dictionary[str(filename)[str(filename).rindex('\\')+1:-4]]) + ".mp4"
os.rename(src, dest)

To solve your problem you can use the following approach (string formatting for python 3.6+):
import glob
import os
import pandas as pd
def rename_files(parent_dir, files_extension, filenames_map):
"""
Rename files with specified extension located in subfolders of parent directory
:param parent dir: Path to subfolder's parent directory
:param files_extension: Renaming file's extension
:param filenames_map: Mapping from initial filenames to target filenames
:return: None
"""
files_template = os.path.join(parent_dir, f'**/*{files_extension}')
files = glob.glob(pathname=files_template, recursive=True)
for file_path in files:
base_dir, filename_with_ext = os.path.split(file_path)
filename, extension = os.path.splitext(filename_with_ext)
try:
new_filename = filenames_map[filename]
except KeyError:
raise Exception(f"There's no {filename} key in filenames mapping")
new_file_path = os.path.join(base_dir, f'{new_filename}{extension}')
os.rename(file_path, new_file_path)
filenames_map_file_path = r'C:\Users\abcd\video\filenames_map.xlsx'
parent_files_dir_path = r'C:\Users\abcd\video'
extension = '.mp4'
filenames_map = pd.read_excel(io=filenames_map_file_path, header=None) \
.set_index(0)[1] \
.to_dict()
rename_files(parent_dir=parent_files_dir_path,
files_extension=extension,
filenames_map=filenames_map)

Related

Python/Pandas Walk through directory and save all foldernames subfolder and file to excel

I want to save all directory info. (path, folder, subfolder, and files) to an excel spreadsheet using Pandas.
Here is my code so far:
import os
import pandas as pd
# setup the paths
root_path = os.path.join(os.path.expanduser("~"), 'Desktop/')
test_path = os.path.join(root_path, 'Test Dir')
# setup excelwriter
# Input writer
xlWriterOutput = pd.ExcelWriter(os.path.join(test_path,'read_directory_to_excel.xlsx'), engine='xlsxwriter')
files_list = []
dfFiles = pd.DataFrame
directory_path = os.path.join(root_path, test_path)
if not os.path.exists(directory_path):
message = "Failed to find directory '%s'." % path
if errors is not None:
errors.append(message)
else:
raise IOError(message)
else:
for path, dirs, files in os.walk(test_path):
for file in files:
files_list.append(os.path.join(path,file))
dfFiles['path'] = path
dfFiles['directory'] = dirs
dfFiles['file_name'] = file
#Write the directory walk out to excel
dfFiles.to_excel(xlWriterOutput, header=True, sheet_name='Directory Output', index=False)
I started out with a list but started moving my solution to Pandas and ExcelWriter. I get an error "Type Error: 'type' object does not support item assignment" on the line where i am attempt to set dfFiles['path'] = path. Need some help at this point.

you can use pathlib module:
from pathlib import Path
inp_path = Path('.') # specify the path here
df = pd.DataFrame([{'parent': f.absolute().parent, 'full_path': f.absolute(), 'relative_path': f,
'file_name_without_extension': f.stem, 'file_name_with_extension': f.name} for f in inp_path.glob('**/*')])
df.to_excel('specify the excel sheet path here.xsls', index = False)
Here:
parent will give the parent directory info.
absolute will give the absolute path
stem will give the file name without extension
name will give the name of the file.
NOTE: If you want only file information you can add an if condition in list comprehension : if f.is_file().

How to copy a bunch of files with same name to a folder?

I have some files with different extension.
These files are located in different folders and has same names.
I'd like to copy these files and rename them at the same time.
i have:
ti.txt
ti.xlxsx
ti.pdf
and I would like to rename it to:
Archive_1.txt
Archive_2.xlsx
Archive_3.pdf
PS: I would have several files of the same type in this new folder
I researched and found the Shutil library to make the code, but I'm having trouble. Would anyone have any ideas to start with?
This is what i've tried
# importing os module
import os
# importing shutil module
import shutil
# path
path = r'D:\Usuarios\0025429\Desktop\old_folder'
# List files and directories
# in '/home/User/Documents'
print("Before copying file:")
print(os.listdir(path))
# Source path
source = r"D:\Usuarios\0025429\Desktop\old_folder\IQ.txt"
source = r"D:\Usuarios\0025429\Desktop\old_folder\IQ.xlsx"
source = r"D:\Usuarios\0025429\Desktop\old_folder\IQ.docx"
# Print file permission
# of the source
perm = os.stat(source).st_mode
print("File Permission mode:", perm, "\n")
# Destination path
destination = r"D:\Usuarios\0025429\Desktop\new_folder"
# Copy the content of
# source to destination
dest = shutil.copy(source, destination)
# List files and directories
# in "/home / User / Documents"
print("After copying file:")
print(os.listdir(path))
# Print file permission
# of the destination
perm = os.stat(destination).st_mode
print("File Permission mode:", perm)
# Print path of newly
# created file
print("Destination path:", dest)

You can use pathlib to work with filesystem and copy() from shutil to copy each file.
from shutil import copy
from pathlib import Path
src = Path(r".\dir1")
dst = Path(r".\dir2")
idx = 0
for file in src.iterdir():
if file.is_file():
idx += 1
copy(file, (dst / f"Archive_{idx}").with_suffix(file.suffix))
To rename only files with given name use next code:
from shutil import copy
from pathlib import Path
src = Path(r".\dir1")
filename = "123"
dst = Path(r".\dir2")
idx = 0
for file in src.iterdir():
if file.is_file() and file.stem == filename:
idx += 1
copy(file, (dst / f"Archive_{idx}").with_suffix(file.suffix))

Search and copy files listed in a dataframe

Hi I'm working on a simple script that copy files from a directory to another based on a dataframe that contains a list of invoices.
Is there any way to do this as a partial match? like i want all the files that contains "F11000", "G13000" and go on continue this loop until no more data in DF.
I tried to figure it out by myself and I'm pretty sure changing the "X" on the copy function will do the trick, but can't see it.
import pandas as pd
import os
import glob
import shutil
data = {'Invoice':['F11000','G13000','H14000']}
df = pd.DataFrame(data,columns=['Doc'])
path = 'D:/Pyfilesearch'
dest = 'D:/Dest'
def find(name,path):
for root,dirs,files in os.walk(path):
if name in files:
return os.path.join(root,name)
def copy():
for x in df['Invoice']:
shutil.copy(find(x,path),dest)
copy()

Using pathlib
This is part of the standard library
Treats paths and objects with methods instead of strings
Python 3's pathlib Module: Taming the File System
Script assumes dest is an existing directory.
.rglob searches subdirectories for files
from pathlib import Path
import pandas as pd
import shutil
# convert paths to pathlib objects
path = Path('D:/Pyfilesearch')
dest = Path('D:/Dest')
# find files and copy
for v in df.Invoice.unique(): # iterate through unique column values
files = list(path.rglob(f'*{v}*')) # create a list of files for a value
files = [f for f in files if f.is_file()] # if not using file extension, verify item is a file
for f in files: # iterate through and copy files
print(f)
shutil.copy(f, dest)
Copy to subdirectories for each value
path = Path('D:/Pyfilesearch')
for v in df.Invoice.unique():
dest = Path('D:/Dest')
files = list(path.rglob(f'*{v}*'))
files = [f for f in files if f.is_file()]
dest = dest / v # create path with value
if not dest.exists(): # check if directory exists
dest.mkdir(parents=True) # if not, create directory
for f in files:
shutil.copy(f, dest)

Zip Multiple files with multiple result in Python

Good Day!.
I would like to ask how can you convert a list of ".xlsx(excel)" file from specific folder location to ".zip" files.
Example:
Path:= C:\My_Program\zip_files
Inside my zip_file folder i have multiple ".xlsx" files.
Test1.xlsx
Test2.xlsx
Test3.xlsx
and i want the output to be in same folder but zip individually.
Output:
Test1.zip
Test2.zip
Test3.zip
Hope somebady can help me i am new to python2 or python3.

You have standard module zipfile to create ZIP, and glob.glob() or os.listdir() or os.walk() to get filenames in folder.
EDIT: should works (I works for me on Linux)
import os
import zipfile
folder = 'C:\\My_Program\\zip_files'
for filename in os.listdir(folder):
if filename.endswith('.xlsx'):
name_without_extension = filename[:-5] # string `.xlsx` has 5 chars
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()
EDIT: the same with glob
import os
import glob
import zipfile
folder = 'C:\\My_Program\\zip_files'
for file_path in glob.glob(folder+'\\*.xlsx'):
filename = os.path.basename(file_path)
print(filename)
name_without_extension = filename[:-5]
print(name_without_extension)
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()

Extract files from zip without keeping the structure using python ZipFile?

I try to extract all files from .zip containing subfolders in one folder. I want all the files from subfolders extract in only one folder without keeping the original structure. At the moment, I extract all, move the files to a folder, then remove previous subfolders. The files with same names are overwrited.
Is it possible to do it before writing files?
Here is a structure for example:
my_zip/file1.txt
my_zip/dir1/file2.txt
my_zip/dir1/dir2/file3.txt
my_zip/dir3/file4.txt
At the end I whish this:
my_dir/file1.txt
my_dir/file2.txt
my_dir/file3.txt
my_dir/file4.txt
What can I add to this code ?
import zipfile
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
zip_file.extract(files, my_dir)
zip_file.close()
if I rename files path from zip_file.namelist(), I have this error:
KeyError: "There is no item named 'file2.txt' in the archive"

This opens file handles of members of the zip archive, extracts the filename and copies it to a target file (that's how ZipFile.extract works, without taking care of subdirectories).
import os
import shutil
import zipfile
my_dir = r"D:\Download"
my_zip = r"D:\Download\my_file.zip"
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = open(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)

It is possible to iterate over the ZipFile.infolist(). On the returned ZipInfo objects you can then manipulate the filename to remove the directory part and finally extract it to a specified directory.
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
with zipfile.ZipFile(my_zip) as zip:
for zip_info in zip.infolist():
if zip_info.filename[-1] == '/':
continue
zip_info.filename = os.path.basename(zip_info.filename)
zip.extract(zip_info, my_dir)

Just extract to bytes in memory,compute the filename, and write it there yourself,
instead of letting the library do it - -mostly, just use the "read()" instead of "extract()" method:
Python 3.6+ update(2020) - the same code from the original answer, but using pathlib.Path, which ease file-path manipulation and other operations (like "write_bytes")
from pathlib import Path
import zipfile
import os
my_dir = Path("D:\\Download\\")
my_zip = my_dir / "my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
myfile_path = my_dir / Path(files.filename).name
myfile_path.write_bytes(data)
zip_file.close()
Original code in answer without pathlib:
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
# I am almost shure zip represents directory separator
# char as "/" regardless of OS, but I don't have DOS or Windos here to test it
myfile_path = os.path.join(my_dir, files.split("/")[-1])
myfile = open(myfile_path, "wb")
myfile.write(data)
myfile.close()
zip_file.close()

A similar concept to the solution of Gerhard Götz, but adapted for extracting single files instead of the entire zip:
with ZipFile(zipPath, 'r') as zipObj:
zipInfo = zipObj.getinfo(path_in_zip))
zipInfo.filename = os.path.basename(destination)
zipObj.extract(zipInfo, os.path.dirname(os.path.realpath(destination)))

In case you are getting badZipFile error. you can unzip the archive using 7zip sub process. assuming you have installed the 7zip then use the following code.
import subprocess
my_dir = destFolder #destination folder
my_zip = destFolder + "/" + filename.zip #file you want to extract
ziploc = "C:/Program Files/7-Zip/7z.exe" #location where 7zip is installed
cmd = [ziploc, 'e',my_zip ,'-o'+ my_dir ,'*.txt' ,'-r' ]
#extracting only txt files and from all subdirectories
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Batch Renaming of Files in sub-directories based on a .xlsx - python

There is a directory in C:\Users\abcd\video Inside the directory, there are a lot of .mp4 files. How do I rename all .mp4 files based on an Excel sheet that contains following information using Python: For example, the current filename is A.mp4. I would like to rename it to 1.mp4.

Related

Python/Pandas Walk through directory and save all foldernames subfolder and file to excel

How to copy a bunch of files with same name to a folder?

Search and copy files listed in a dataframe

Zip Multiple files with multiple result in Python

Extract files from zip without keeping the structure using python ZipFile?

Categories

Resources