I have a large amount of zipped files in a single directory that I would like to decompress and save them to the same directory and with the same name as the zipped file.
Start with something like:
import glob
import os
import zipfile
zip_files = glob.glob('*.zip')
for zip_filename in zip_files:
dir_name = os.path.splitext(zip_filename)[0]
os.mkdir(dir_name)
zip_handler = zipfile.ZipFile(zip_filename, "r")
zip_handler.extractall(dir_name)
Sorry, I don't have time to test this code; any bug is left as an exercise for you.
[Updated with eumiro's suggestion]
Related
I have a folder called my_folder at the following Path /Users/my_user_name/Desktop/my_folder. The folder my_folder contains more folders like 323456, 987654 etc. Those folders contain some content. I want to create a zip of all those folders called myzip.zip such that when someone unzips it they see all those folders like 323456, 987654 at the root.
My Code
import os
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def get_list_of_all_folders(download_dir: Path):
return [f for f in download_dir.iterdir() if download_dir.is_dir()]
def zip_files():
folder_list = get_list_of_all_folders(DOWNLOAD_DIR)
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
# writing each file one by one
for folder in folder_list:
zip.write(folder)
zip_files()
I have a function called get_list_of_all_folders where it goes to my_folder and gets a list of all the folders inside it that we want to zip. Then I use that folder_list to zip up each folder as part of my final zip called my_zip.zip. However there is something really wrong with my code and I am not sure what? The my_zip.zip is only 35 kb small when I know for a fact I am zipping up content over 2 gigabytes.
I looked at the zipfile document but did not find much help here as there are not many examples.
ZipFile.write expects to be supplied with the name of a file to write to the zip, not a folder.
You will need to iterate over the files in each folder and call write for each one. For example:
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def scan_dir(zip, dir, base_dir):
for f in dir.iterdir():
if f.is_dir():
scan_dir(zip, f, base_dir)
else:
# First param is the path to the file, second param is
# the path to use in the zip and when extracted. I just
# trim base_dir off the front.
zip.write(f, str(f)[len(str(base_dir)):])
def zip_files():
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
for f in DOWNLOAD_DIR.iterdir():
scan_dir(zip, f, DOWNLOAD_DIR)
zip_files()
There's probably a neater way to trim off the base directory, but this was done quickly :)
You can use: shutil.make_archive
Below example taken from: https://docs.python.org/3/library/shutil.html#archiving-example
>>> import os
>>> from shutil import make_archive
>>> archive_name = os.path.expanduser(os.path.join('~', 'myarchive'))
>>> root_dir = os.path.expanduser(os.path.join('~', '.ssh'))
>>> make_archive(archive_name, 'zip', root_dir)
'/Users/tarek/myarchive.zip'
EDIT:
Code using ZipFile library
import zipfile
import os
class ZipUtilities:
def toZip(self, file, filename):
zip_file = zipfile.ZipFile(filename, 'w')
if os.path.isfile(file):
zip_file.write(file)
else:
self.addFolderToZip(zip_file, file)
zip_file.close()
def addFolderToZip(self, zip_file, folder):
for file in os.listdir(folder):
full_path = os.path.join(folder, file)
if os.path.isfile(full_path):
print('File added: ' + str(full_path))
zip_file.write(full_path)
elif os.path.isdir(full_path):
print('Entering folder: ' + str(full_path))
self.addFolderToZip(zip_file, full_path)
if __name__ == '__main__':
utilities = ZipUtilities()
filename = 'newfile.zip'
directory = 'foldername'
utilities.toZip(directory, filename)
I have this password protected zip folder:
folder_1\1.zip
When I extract this it gives me
1\image.png
How can I extract this to another folder without its folder name? Just the contents of it: image.png
So far I have done all stackoverflows solutions and took me 11 hrs straight just to solve this.
import zipfile
zip = zipfile.ZipFile('C:\\Users\\Desktop\\folder_1\\1.zip', 'r')
zip.setpassword(b"virus")
zip.extractall('C:\\Users\\Desktop') <--target dir to extract all contents
zip.close()
EDIT:
This code worked for me: (Now I want many paths to be extracted at once, any ideas?
import os
import shutil
import zipfile
my_dir = r"C:\\Users\\Desktop"
my_zip = r"C:\\Users\\Desktop\\test\\folder_1\\1.zip"
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = file(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
You can use the ZipFile.read() method to read the specific file in the archive, open your target file for writing by joining the target directory with the base name of the source file, and then write what you read to it:
import zipfile
import os
zip = zipfile.ZipFile('C:\\Users\\Desktop\\folder_1\\1.zip', 'r')
zip.setpassword(b"virus")
for name in zip.namelist():
if not name.endswith(('/', '\\')):
with open(os.path.join('C:\\Users\\Desktop', os.path.basename(name)), 'wb') as f:
f.write(zip.read(name))
zip.close()
And if you have several paths containing 1.zip for extraction:
import zipfile
import os
for path in 'C:\\Users\\Desktop\\folder_1', 'C:\\Users\\Desktop\\folder_2', 'C:\\Users\\Desktop\\folder_3':
zip = zipfile.ZipFile(os.path.join(path, '1.zip'), 'r')
zip.setpassword(b"virus")
for name in zip.namelist():
if not name.endswith(('/', '\\')):
with open(os.path.join('C:\\Users\\Desktop', os.path.basename(name)), 'wb') as f:
f.write(zip.read(name))
zip.close()
I have a folder (not zipped) containing multiple zip files (no other file type within folder). Each zip has the same type of text files containing different data saved within.
I know how to read in each separately, but I am looking to loop the process without having to type in each zip name. The zipfile archive does not seem to allow wild cards, so I cannot loop using this method. Is it possible to loop the process using glob?
The goal is to get the agency names without extracting all the zipfiles.
Single file read
import os
os.listdir('C:\\NTM\\Test\\')
['00003_32_332.zip', '00011_273_569.zip', '00012_258_276.zip']
import glob
glob.glob('C:\\NTM\\Test\\*.zip')
['C:\\NTM\\Test\\00003_32_332.zip', 'C:\\NTM\\Test\\00011_273_569.zip', 'C:\\NTM\\Test\\00012_258_276.zip']
import zipfile
archive=zipfile.ZipFile('C:\\NTM\\Test\\00011_273_569.zip')
testagency=archive.open('agency.txt')
testagency.read()
'agency_id,agency_name,nVRT,ValleyRide'
Update:
Now, that I can loop through the zip files and loop through to get the text file - I cannot print the agency_name from all of the zip files in the folder. My current code only prints the name of the last agency from the text file of the last zip file in the folder. Am I missing some compound statement structure?
def csv_dict_reader(file_obj):
reader=csv.DictReader(file_obj, delimiter=',')
for row in reader:
print(row['agency_name'])
if name == 'main':
with archive.open('agency.txt')as f_obj:
csv_dict_reader(f_obj)
Whatcom Transportation Authority
Sample Code
import glob
import zipfile
dirName = '/backup/'
zipList = glob.glob(diName+'*.zip')
for zipname in zipList:
archive = zipfile.ZipFile(zipname)
fileList = archive.namelist()
for fileName in fileList:
if fileName.endswith('.txt'):
archive.extract(fileName)
archive.close()
Thanks Jean-Francois!
for archive_name in glob.glob('C:\\NTM\\Test\\*.zip'):
archive=zipfile.ZipFile(archive_name)
testagency=archive.open('agency.txt')
testagency.read()
As I could not comment on Fuji Komalans comment.
Here is the fixed code.
import glob
import zipfile
dirName = 'C:/test/'
zipList = glob.glob(dirName + '*.zip')
print(zipList)
for zipname in zipList:
archive = zipfile.ZipFile(zipname)
fileList = archive.namelist()
for fileName in fileList:
if fileName.endswith('.txt'):
archive.extract(fileName)
print(fileName)
archive.close()
I have 700 files in a single folder. I need to find files that have "h10v03" as part of the name and copy them to a different folder using python.
Heres an example of one of the files: MOD10A1.A2000121.h10v03.005.2007172062725.hdf
I appreciate any help.
Something like this would do the trick.
import os
import shutil
source_dir = "/some/directory/path"
target_dir = "/some/other/directory/path"
part = "h10v03"
files = [file for file in os.listdir(source_dir)
if os.path.isfile(file) and part in file]
for file in files:
shutil.copy2(os.path.join(source_dir, file), target_dir)
Does it need to be python?
A unix shell does that for you quite fine:
cp ./*h10v03* /other/directory/
In python I would suggest you take a look at os.listdir() and shutil.copy()
EDIT:
some untested code:
import os
import shutil
src_dir = "/some/path/"
target_dir = "/some/other/path/"
searchstring = "h10v03"
for f in os.listdir(src_dir):
if searchstring in f and os.path.isfile(os.path.join(src_dir, f)):
shutil.copy2(os.path.join(src_dir, f), target_dir)
print "COPY", f
with the glob module (untested):
import glob
import os
import shutil
for f in glob.glob("/some/path/*2000*h10v03*"):
print f
shutil.copy2(f, os.path.join("/some/target/dir/", os.path.basename(f)))
Firstly, find all the items in that folder with os.listdir. Then you can use the count() method of string to determine if it has your string. Then you can use shutil to copy the file.
I try to extract all files from .zip containing subfolders in one folder. I want all the files from subfolders extract in only one folder without keeping the original structure. At the moment, I extract all, move the files to a folder, then remove previous subfolders. The files with same names are overwrited.
Is it possible to do it before writing files?
Here is a structure for example:
my_zip/file1.txt
my_zip/dir1/file2.txt
my_zip/dir1/dir2/file3.txt
my_zip/dir3/file4.txt
At the end I whish this:
my_dir/file1.txt
my_dir/file2.txt
my_dir/file3.txt
my_dir/file4.txt
What can I add to this code ?
import zipfile
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
zip_file.extract(files, my_dir)
zip_file.close()
if I rename files path from zip_file.namelist(), I have this error:
KeyError: "There is no item named 'file2.txt' in the archive"
This opens file handles of members of the zip archive, extracts the filename and copies it to a target file (that's how ZipFile.extract works, without taking care of subdirectories).
import os
import shutil
import zipfile
my_dir = r"D:\Download"
my_zip = r"D:\Download\my_file.zip"
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = open(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
It is possible to iterate over the ZipFile.infolist(). On the returned ZipInfo objects you can then manipulate the filename to remove the directory part and finally extract it to a specified directory.
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
with zipfile.ZipFile(my_zip) as zip:
for zip_info in zip.infolist():
if zip_info.filename[-1] == '/':
continue
zip_info.filename = os.path.basename(zip_info.filename)
zip.extract(zip_info, my_dir)
Just extract to bytes in memory,compute the filename, and write it there yourself,
instead of letting the library do it - -mostly, just use the "read()" instead of "extract()" method:
Python 3.6+ update(2020) - the same code from the original answer, but using pathlib.Path, which ease file-path manipulation and other operations (like "write_bytes")
from pathlib import Path
import zipfile
import os
my_dir = Path("D:\\Download\\")
my_zip = my_dir / "my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
myfile_path = my_dir / Path(files.filename).name
myfile_path.write_bytes(data)
zip_file.close()
Original code in answer without pathlib:
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
# I am almost shure zip represents directory separator
# char as "/" regardless of OS, but I don't have DOS or Windos here to test it
myfile_path = os.path.join(my_dir, files.split("/")[-1])
myfile = open(myfile_path, "wb")
myfile.write(data)
myfile.close()
zip_file.close()
A similar concept to the solution of Gerhard Götz, but adapted for extracting single files instead of the entire zip:
with ZipFile(zipPath, 'r') as zipObj:
zipInfo = zipObj.getinfo(path_in_zip))
zipInfo.filename = os.path.basename(destination)
zipObj.extract(zipInfo, os.path.dirname(os.path.realpath(destination)))
In case you are getting badZipFile error. you can unzip the archive using 7zip sub process. assuming you have installed the 7zip then use the following code.
import subprocess
my_dir = destFolder #destination folder
my_zip = destFolder + "/" + filename.zip #file you want to extract
ziploc = "C:/Program Files/7-Zip/7z.exe" #location where 7zip is installed
cmd = [ziploc, 'e',my_zip ,'-o'+ my_dir ,'*.txt' ,'-r' ]
#extracting only txt files and from all subdirectories
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)