python unzip all files to parent directory - python

How can I extract all the .zip files in a certain directory to its parent directory?
I tried:
import zipfile
parent_directory = '../input'
directory = '../input/zip'
for f in os.listdir(directory):
with zipfile.ZipFile(os.path.join(directory,f), "r") as z:
z.extractall(parent_directory)
However the unzipped files are not saved in '..input/zip', they are saved in nested folders

This might be a bit exaggerated.
After files are unzipped, I run this to:
move the original .zip file up one directory level. (to avoid /src_filename' already exists error)
move all files from all subdirectories into the zip parent directory.
move the original .zip file back into the parent directory.
import os
import shutil
src = r'C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2'
dest = r'C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir'
pdir = '../PyUnzip01'
os.replace(r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2.zip", r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\unzip_test2.zip")
for root, subdirs, files in os.walk(src):
for file in files:
path = os.path.join(root, file)
shutil.move(path, dest)
os.replace(r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\unzip_test2.zip", r"C:\Users\Owner\Desktop\PythonZip\PyUnzip01\child_dir\unzip_test2.zip")

Related

Python zipfile and os

I have some code to export all files within a zipfile to a path but what I want to do is create a new folder with the same name as the zipfile minus the ".zip" just like the windows explorer option does. I have commented out the code that doesn't work. It seems to be the os.makedirs that doesn't work.
File "C:/Users/brentond/Documents/Python/Unzip all zip files in path.py", line 12
Output = os.path.join(path, filename.replace(".zip", "")) # get new folder path name
^
SyntaxError: invalid syntax
the code:
import os, zipfile
# Define path of zip files to variable
path = r'C:\Users\brentond\Documents\TA2\HA GDMS'
for foldername, subfolders, filenames in os.walk(path): # walk directory
for filename in filenames: # loop through files
if filename.endswith(".zip"): # find zip files
filepath = os.path.join(foldername, filename) # get zip file abs path
#os.makedirs(os.path.join(path, filename.replace(".zip", "")) # create new folder same name as zip file
#Output = os.path.join(path, filename.replace(".zip", "")) # get new folder path name
ZipRef = zipfile.ZipFile(filepath) # create zip file object
ZipRef.extractall(path) # extract all. This to put everything in the path folder
#ZipRef.extractall(Output) # This to put the zip file contents into a folder with same name
ZipRef.close() # close zip
I have resolved this now and simplified the code a little
import os, zipfile
# Define path of zip files to variable
path = r'C:\Users\brentond\Documents\TA2\HA GDMS'
for foldername, subfolders, filenames in os.walk(path): # walk directory
for filename in filenames: # loop through files
if filename.endswith(".zip"): # find zip files
filepath = os.path.join(foldername, filename) # get zip file abs path
filefolder = filename.replace(".zip","")
os.makedirs(os.path.join(path, filefolder)) # create new folder same name as zip file
Output = os.path.join(path, filefolder) # get new folder path name
ZipRef = zipfile.ZipFile(filepath) # create zip file object
#ZipRef.extractall(path) # extract all. This to put everything in the path folder
ZipRef.extractall(Output) # This to put the zip file contents into a folder with same name
ZipRef.close() # close zip

How to rename unzipped files in Python?

I have the following structure:
Folder1
ZZ-20201201-XX.zip
Folder2
XX-20201201-XX.zip
XX-20201202-XX.zip
Folder3
YY-20201201-XX.zip
YY-20201202-XX.zip
With below code Im creating a counterpart of Folder1, Folder2 and Folder3 and directly unzipping the zipped files inside those 3 folders. So I receive this:
Folder1
ZZ-.txt
Folder2
XX-.txt
Folder3
YY.txt
As you can see the files lose the date once they get unzipped so if a folder contains 2 zipped files they will get the same name and thus the files will be rewritten. Now I want to add the date of the zipped files to the files once they are unzipped. How can I do this?
import fnmatch
pattern = '*.zip'
for root, dirs, files in os.walk(my_files):
for filename in fnmatch.filter(files, pattern):
path = os.path.join(root, filename)
date_zipped_file = re.search('-(.\d+)-', filename).group(1) #<-- this is the date of the zipped files and I want this to be included in the name of the unzipped files once they get unzipped.
# Store the new directory so that it can be recreated
new_dir = os.path.normpath(os.path.join(os.path.relpath(path, start=my_files), ".."))
# Join your target directory with newly created directory
new = os.path.join(counter_part, new_dir)
# Create those folders, works even with nested folders
if (not os.path.exists(new)):
os.makedirs(new)
zipfile.ZipFile(path).extractall(new)
my desired outcome:
Folder1
ZZ-20201201.txt
Folder2
XX-20201201.txt
XX-20201202.txt
Folder3
YY-20201201.txt
XX-20201202.txt
You could just rename the files after you have unzipped each folder. Something like this:
#get all files in that unzipped folder
files = os.listdir(path)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
os.rename(os.path.join(path, file), os.path.join(path, filesplit[0]+'_'+date_zipped_file+filesplit[1]))
but that also renames files which might actually have already a date in the name. So you would also need to integrate a check if the file was already renamed. Either by maintaining a list with file names or a simple regex which looks for 8 digit string between a '_' and a '.', e.g. text_20201207.txt.
#get all files in that unzipped folder
files = os.listdir(path)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
if not re.search(r'_\d{8}.', file):
os.rename(os.path.join(path, file), os.path.join(path, filesplit[0]+'_'+date_zipped_file+filesplit[1]))
your final solution would then look something like this:
import fnmatch
pattern = '*.zip'
for root, dirs, files in os.walk(my_files):
for filename in fnmatch.filter(files, pattern):
path = os.path.join(root, filename)
date_zipped_file = re.search('-(.\d+)-', filename).group(1) #<-- this is the date of the zipped files and I want this to be included in the name of the unzipped files once they get unzipped.
# Store the new directory so that it can be recreated
new_dir = os.path.normpath(os.path.join(os.path.relpath(path, start=my_files), ".."))
# Join your target directory with newly created directory
new = os.path.join(counter_part, new_dir)
# Create those folders, works even with nested folders
if (not os.path.exists(new)):
os.makedirs(new)
zipfile.ZipFile(path).extractall(new)
#get all files in that unzipped folder
files = os.listdir(new)
#rename all files in that dir
for file in files:
filesplit = os.path.splitext(os.path.basename(file))
if not re.search(r'_\d{8}.', file):
os.rename(os.path.join(new, file), os.path.join(new, filesplit[0]+'_'+date_zipped_file+filesplit[1]))

how to combine all the files of one directory of one extension into one folder

how can i combine all PDF files of one directory (this pdfs can be on different deep of directory) into one new folder?
i have been tried this:
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files\'
for root, dirs, files in os.walk(root_with_files):
for file in files:
os.path.join(new_root, file)
but it's doest add anything to my folder
You may try this:
import shutil
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files'
for root, dirs, files in os.walk(root_with_files):
for file in files:
if file.lower().endswith('.pdf') : # .pdf files only
shutil.copy( os.path.join(root, file), new_root )
Your code doesn't move any files to new folder. you can move your files using os.replace(src,dst).
try this:
new_root = r'C:\Users\me\new_root'
root_with_files = r'C:\Users\me\all_of_my_pdf_files\'
for root, dirs, files in os.walk(root_with_files):
for file in files:
os.replace(os.path.join(root, file),os.path.join(new_root, file))

How to Insert String to add in subfolders name in Python

Please help I am new in python. I have multiple files from different subfolder of a root directory with .wmv extension replaced with .mp4 and create a folder name base on the filename by removing other character starting at '#'. How can I insert the string 'DATA_' at the start of a folder name. All new created folders should starts with "DATA_". MP4 files will be moved in a new created folder name.
example:
Directory: D:\GE-J1N-2991
Files found located at:
D:\GE-J1N-2991\1-ZWC4\20160705051835547#DXV_Ch1.wmv
D:\GE-J1N-2991\2-QWD4\20160705051836647#DXV_Ch1.wmv
D:\GE-J1N-2991\2-QWD4\34-QWERD\20160705084433078#DXV_Ch1.wmv
New created folder:
D:\GE-J1N-2991\1-ZWC4\20160705051835547\
D:\GE-J1N-2991\2-QWD4\20160705051836647\
D:\GE-J1N-2991\2-QWD4\34-QWERD\20160705084433078\
It should be renamed like this:
D:\GE-J1N-2991\1-ZWC4\DATA_20160705051835547\20160705051835547#DXV_Ch1.mp4
D:\GE-J1N-2991\2-QWD4\DATA_20160705051836647\20160705051836647#DXV_Ch1.mp4
D:\GE-J1N-2991\2-QWD4\34-QWERD\DATA_20160705084433078\20160705084433078#DXV_Ch1.mp4
Code:
import os, glob, shutil, fnmatch
folder = 'D:\\GE-J1N-2991'
os.chdir(folder)
def locate_WMV(pattern, root=os.curdir):
for path, dir, files in os.walk(os.path.abspath(root)):
for filename in fnmatch.filter(files, pattern):
yield os.path.join(path, filename)
for asf in locate_WMV("*.wmv"):
os.rename(asf, asf[:-4] + ".mp4")
for f in locate_WMV("*#DVX_Ch1.mp4"):
new_dir = f.rsplit('#', 1)[0]
os.mkdir(new_dir)
shutil.move(f, os.path.join(new_dir, os.path.basename(f)))
If i use this code, the folder is renamed but created in Directory: D:\GE-J1N-2991 which is wrong, it should be created in where the file was found
new_dir2 = 'DATA_'+ f.rsplit('#', 1)[0].strip()[20:]

Zip a directory so that it returns the acutal directory when upacked

I have a script that zips a directory using the following code:
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),arcname=file)
zip = zipfile.ZipFile('/path/to/directory.zip','w')
zipdir('/path/to/directory/',zip)
When I unzip this I end up with the contents of the directory as opposed to the original directory containing the files. How should I change this so the unzipping returns the original directory with the files still inside?
ie.) unzip --> /unzip/location/directory/file_from_directory
as opposed to
unzip --> /unzip/location/file_from_directory
You're using file as arcname, but you want the file plus the deepest directory in the path:
for file in files:
deepest_dir = os.path.split(root)[-1]
fname = os.path.join(deepest_dir, file)
ziph.write(os.path.join(root, file), arcname=fname)

Categories