How to rename unzipped files in Python? - python

I have the following structure:
Folder1
ZZ-20201201-XX.zip
Folder2
XX-20201201-XX.zip
XX-20201202-XX.zip
Folder3
YY-20201201-XX.zip
YY-20201202-XX.zip
With below code Im creating a counterpart of Folder1, Folder2 and Folder3 and directly unzipping the zipped files inside those 3 folders. So I receive this:
Folder1
ZZ-.txt
Folder2
XX-.txt
Folder3
YY.txt
As you can see the files lose the date once they get unzipped so if a folder contains 2 zipped files they will get the same name and thus the files will be rewritten. Now I want to add the date of the zipped files to the files once they are unzipped. How can I do this?
import fnmatch
pattern = '*.zip'
for root, dirs, files in os.walk(my_files):
for filename in fnmatch.filter(files, pattern):
path = os.path.join(root, filename)
date_zipped_file = re.search('-(.\d+)-', filename).group(1) #<-- this is the date of the zipped files and I want this to be included in the name of the unzipped files once they get unzipped.
# Store the new directory so that it can be recreated
new_dir = os.path.normpath(os.path.join(os.path.relpath(path, start=my_files), ".."))
# Join your target directory with newly created directory
new = os.path.join(counter_part, new_dir)
# Create those folders, works even with nested folders
if (not os.path.exists(new)):
os.makedirs(new)
zipfile.ZipFile(path).extractall(new)
my desired outcome:
Folder1
ZZ-20201201.txt
Folder2
XX-20201201.txt
XX-20201202.txt
Folder3
YY-20201201.txt
XX-20201202.txt

You could just rename the files after you have unzipped each folder. Something like this:
#get all files in that unzipped folder
files = os.listdir(path)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
os.rename(os.path.join(path, file), os.path.join(path, filesplit[0]+'_'+date_zipped_file+filesplit[1]))
but that also renames files which might actually have already a date in the name. So you would also need to integrate a check if the file was already renamed. Either by maintaining a list with file names or a simple regex which looks for 8 digit string between a '_' and a '.', e.g. text_20201207.txt.
#get all files in that unzipped folder
files = os.listdir(path)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
if not re.search(r'_\d{8}.', file):
os.rename(os.path.join(path, file), os.path.join(path, filesplit[0]+'_'+date_zipped_file+filesplit[1]))
your final solution would then look something like this:
import fnmatch
pattern = '*.zip'
for root, dirs, files in os.walk(my_files):
for filename in fnmatch.filter(files, pattern):
path = os.path.join(root, filename)
date_zipped_file = re.search('-(.\d+)-', filename).group(1) #<-- this is the date of the zipped files and I want this to be included in the name of the unzipped files once they get unzipped.
# Store the new directory so that it can be recreated
new_dir = os.path.normpath(os.path.join(os.path.relpath(path, start=my_files), ".."))
# Join your target directory with newly created directory
new = os.path.join(counter_part, new_dir)
# Create those folders, works even with nested folders
if (not os.path.exists(new)):
os.makedirs(new)
zipfile.ZipFile(path).extractall(new)
#get all files in that unzipped folder
files = os.listdir(new)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
if not re.search(r'_\d{8}.', file):
os.rename(os.path.join(new, file), os.path.join(new, filesplit[0]+'_'+date_zipped_file+filesplit[1]))

Related

Find directories missing .csv file in Python

I have ~1000 directories, containing various .csv files within them. I am trying to check if a specific type of csv file, containing a filename that begins with PTSD_OCOTBER, exists in each directory.
If this file does not exist in the directory, I want to print out that directory into a .txt file.
Here is what I have so far.
import os,sys,time,shutil
import subprocess
#determine filetype to look for.
file_type = ".csv"
print("Running file counter for" + repr(file_type))
#for each folder in the root directory
for subdir, dirs, files in os.walk(rootdir):
if("GeneSet" in subdir):
folder_name = subdir.rsplit('/', 1)[-1] #get the folder name.
for f in files:
#unclear how to write this part.
#how to tell if no files exist in directory?
This successfully finds the .csv files of interest, but how do achieve the above?
So files is the list of files in that directory that you are currently walking. You want to know if there are no files that start with PTSD_OCOTBER (PTSD_OCTOBER ?):
for subdir, dirs, files in os.walk(rootdir):
if("GeneSet" in subdir):
folder_name = subdir.rsplit('/', 1)[-1] #get the folder name.
dir_of_interest = not any(f.startswith('PTSD_OCOTBER') for f in files)
if dir_of_interest:
# do stuff with folder_name
Now you want to save the results into a text file? If you have a Unix-style computer, then you can use output redirection on your terminal, such as
python3 fileanalysis.py > result.txt
after writing print(folder_name) instead of # do stuff with folder_name.
Or you can use Python itself to write the file, such as:
found_dirs = []
for subdir, dirs, files in os.walk(rootdir):
...
if dir_of_interest:
found_dirs.append(folder_name)
with open('result.txt', 'w') as f:
f.write('\n'.join(found_dirs))

python unzip all files to parent directory

How can I extract all the .zip files in a certain directory to its parent directory?
I tried:
import zipfile
parent_directory = '../input'
directory = '../input/zip'
for f in os.listdir(directory):
with zipfile.ZipFile(os.path.join(directory,f), "r") as z:
z.extractall(parent_directory)
However the unzipped files are not saved in '..input/zip', they are saved in nested folders
This might be a bit exaggerated.
After files are unzipped, I run this to:
move the original .zip file up one directory level. (to avoid /src_filename' already exists error)
move all files from all subdirectories into the zip parent directory.
move the original .zip file back into the parent directory.
import os
import shutil
src = r'C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2'
dest = r'C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir'
pdir = '../PyUnzip01'
os.replace(r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2.zip", r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\unzip_test2.zip")
for root, subdirs, files in os.walk(src):
for file in files:
path = os.path.join(root, file)
shutil.move(path, dest)
os.replace(r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\unzip_test2.zip", r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2.zip")

how to combine all the files of one directory of one extension into one folder

how can i combine all PDF files of one directory (this pdfs can be on different deep of directory) into one new folder?
i have been tried this:
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files\'
for root, dirs, files in os.walk(root_with_files):
for file in files:
os.path.join(new_root, file)
but it's doest add anything to my folder
You may try this:
import shutil
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files'
for root, dirs, files in os.walk(root_with_files):
for file in files:
if file.lower().endswith('.pdf') : # .pdf files only
shutil.copy( os.path.join(root, file), new_root )
Your code doesn't move any files to new folder. you can move your files using os.replace(src,dst).
try this:
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files\'
for root, dirs, files in os.walk(root_with_files):
for file in files:
os.replace(os.path.join(root, file),os.path.join(new_root, file))

How to rename multiple files in a directory using os walk and split in PYTHON?

I need to create a copy of a directory tree called "caritemscopy" where all files, instead of being in directories named after years, rather have the year as part of the filename, and the year directories are entirely absent
My directory currently looks like this
After coding my directory should looks like this
It will list all directory files in path, then rename it and save all files at path and remove the empty directory.
import os
path = 'path_of_folder/F26/'
files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if '.txt' in file:
files.append(os.path.join(r, file))
for f in files:
src = f.split('/')
os.rename(f, path + src[-2]+'-'+src[-1])
if not os.listdir(path+src[-2]):
os.rmdir(path+src[-2])
else:
pass

How to Insert String to add in subfolders name in Python

Please help I am new in python. I have multiple files from different subfolder of a root directory with .wmv extension replaced with .mp4 and create a folder name base on the filename by removing other character starting at '#'. How can I insert the string 'DATA_' at the start of a folder name. All new created folders should starts with "DATA_". MP4 files will be moved in a new created folder name.
example:
Directory: D:\GE-J1N-2991
Files found located at:
D:\GE-J1N-2991\1-ZWC4\20160705051835547#DXV_Ch1.wmv
D:\GE-J1N-2991\2-QWD4\20160705051836647#DXV_Ch1.wmv
D:\GE-J1N-2991\2-QWD4\34-QWERD\20160705084433078#DXV_Ch1.wmv
New created folder:
D:\GE-J1N-2991\1-ZWC4\20160705051835547\
D:\GE-J1N-2991\2-QWD4\20160705051836647\
D:\GE-J1N-2991\2-QWD4\34-QWERD\20160705084433078\
It should be renamed like this:
D:\GE-J1N-2991\1-ZWC4\DATA_20160705051835547\20160705051835547#DXV_Ch1.mp4
D:\GE-J1N-2991\2-QWD4\DATA_20160705051836647\20160705051836647#DXV_Ch1.mp4
D:\GE-J1N-2991\2-QWD4\34-QWERD\DATA_20160705084433078\20160705084433078#DXV_Ch1.mp4
Code:
import os, glob, shutil, fnmatch
folder = 'D:\\GE-J1N-2991'
os.chdir(folder)
def locate_WMV(pattern, root=os.curdir):
for path, dir, files in os.walk(os.path.abspath(root)):
for filename in fnmatch.filter(files, pattern):
yield os.path.join(path, filename)
for asf in locate_WMV("*.wmv"):
os.rename(asf, asf[:-4] + ".mp4")
for f in locate_WMV("*#DVX_Ch1.mp4"):
new_dir = f.rsplit('#', 1)[0]
os.mkdir(new_dir)
shutil.move(f, os.path.join(new_dir, os.path.basename(f)))
If i use this code, the folder is renamed but created in Directory: D:\GE-J1N-2991 which is wrong, it should be created in where the file was found
new_dir2 = 'DATA_'+ f.rsplit('#', 1)[0].strip()[20:]

Categories