Create a zip of folders using zipfile - python

I have a folder called my_folder at the following Path /Users/my_user_name/Desktop/my_folder. The folder my_folder contains more folders like 323456, 987654 etc. Those folders contain some content. I want to create a zip of all those folders called myzip.zip such that when someone unzips it they see all those folders like 323456, 987654 at the root.
My Code
import os
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def get_list_of_all_folders(download_dir: Path):
return [f for f in download_dir.iterdir() if download_dir.is_dir()]
def zip_files():
folder_list = get_list_of_all_folders(DOWNLOAD_DIR)
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
# writing each file one by one
for folder in folder_list:
zip.write(folder)
zip_files()
I have a function called get_list_of_all_folders where it goes to my_folder and gets a list of all the folders inside it that we want to zip. Then I use that folder_list to zip up each folder as part of my final zip called my_zip.zip. However there is something really wrong with my code and I am not sure what? The my_zip.zip is only 35 kb small when I know for a fact I am zipping up content over 2 gigabytes.
I looked at the zipfile document but did not find much help here as there are not many examples.

ZipFile.write expects to be supplied with the name of a file to write to the zip, not a folder.
You will need to iterate over the files in each folder and call write for each one. For example:
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def scan_dir(zip, dir, base_dir):
for f in dir.iterdir():
if f.is_dir():
scan_dir(zip, f, base_dir)
else:
# First param is the path to the file, second param is
# the path to use in the zip and when extracted. I just
# trim base_dir off the front.
zip.write(f, str(f)[len(str(base_dir)):])
def zip_files():
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
for f in DOWNLOAD_DIR.iterdir():
scan_dir(zip, f, DOWNLOAD_DIR)
zip_files()
There's probably a neater way to trim off the base directory, but this was done quickly :)

You can use: shutil.make_archive
Below example taken from: https://docs.python.org/3/library/shutil.html#archiving-example
>>> import os
>>> from shutil import make_archive
>>> archive_name = os.path.expanduser(os.path.join('~', 'myarchive'))
>>> root_dir = os.path.expanduser(os.path.join('~', '.ssh'))
>>> make_archive(archive_name, 'zip', root_dir)
'/Users/tarek/myarchive.zip'
EDIT:
Code using ZipFile library
import zipfile
import os
class ZipUtilities:
def toZip(self, file, filename):
zip_file = zipfile.ZipFile(filename, 'w')
if os.path.isfile(file):
zip_file.write(file)
else:
self.addFolderToZip(zip_file, file)
zip_file.close()
def addFolderToZip(self, zip_file, folder):
for file in os.listdir(folder):
full_path = os.path.join(folder, file)
if os.path.isfile(full_path):
print('File added: ' + str(full_path))
zip_file.write(full_path)
elif os.path.isdir(full_path):
print('Entering folder: ' + str(full_path))
self.addFolderToZip(zip_file, full_path)
if __name__ == '__main__':
utilities = ZipUtilities()
filename = 'newfile.zip'
directory = 'foldername'
utilities.toZip(directory, filename)

Related

zip folder's and take the name of folder

I'm trying to make this code which take the content of each folder in directory and add it to zip one by one with the name of folder
I did made this code but I'm blocked with just add file by extension in zip
import zipfile, os
handle = zipfile.ZipFile('ALL-PY.zip', 'w')
for x in os.listdir():
if x.endswith(directory):
handle.write(x,compress_type = zipfile.ZIP_DEFLATED)
handle.close()
I would follow this approach:
import zipfile, os
handle = zipfile.ZipFile('ALL-PY.zip', 'w')
path = "C:/Users/User_Name/my_directory" # This is YOUR INPUT - set with the directory you want to zip
os.chdir(path)
for directory, subs, files in os.walk("."):
handle.write(directory)
for this_file in files:
handle.write(os.path.join(directory, this_file), compress_type = zipfile.ZIP_DEFLATED)
handle.close()

Copying files in python using shutil

I have the following directory structure:
-mailDir
-folderA
-sub1
-sub2
-inbox
-1.txt
-2.txt
-89.txt
-subInbox
-subInbox2
-folderB
-sub1
-sub2
-inbox
-1.txt
-2.txt
-200.txt
-577.txt
The aim is to copy all the txt files under inbox folder into another folder.
For this I tried the below code
import os
from os import path
import shutil
rootDir = "mailDir"
destDir = "destFolder"
eachInboxFolderPath = []
for root, dirs, files in os.walk(rootDir):
for dirName in dirs:
if(dirName=="inbox"):
eachInboxFolderPath.append(root+"\\"+dirName)
for ii in eachInboxFolderPath:
for i in os.listdir(ii):
shutil.copy(path.join(ii,i),destDir)
If the inbox directory only has .txt files then the above code works fine. Since the inbox folder under folderA directory has other sub directory along with .txt files, the code returns permission denied error. What I understood is shutil.copy won't allow to copy the folders.
The aim is to copy only the txt files in every inbox folder to some other location. If the file names are same in different inbox folder I have to keep both file names. How we can improve the code in this case ? Please note other than .txt all others are folders only.
One simple solution is to filter for any i that does not have the .txt extension by using the string endswith() method.
import os
from os import path
import shutil
rootDir = "mailDir"
destDir = "destFolder"
eachInboxFolderPath = []
for root, dirs, files in os.walk(rootDir):
for dirName in dirs:
if(dirName=="inbox"):
eachInboxFolderPath.append(root+"\\"+dirName)
for ii in eachInboxFolderPath:
for i in os.listdir(ii):
if i.endswith('.txt'):
shutil.copy(path.join(ii,i),destDir)
This should ignore any folders and non-txt files that are found with os.listdir(ii). I believe that is what you are looking for.
Just remembered that I once wrote several files to solve this exact problem before. You can find the source code here on my Github.
In short, there are two functions of interest here:
list_files(loc, return_dirs=False, return_files=True, recursive=False, valid_exts=None)
copy_files(loc, dest, rename=False)
For your case, you could copy and paste these functions into your project and modify copy_files like this:
def copy_files(loc, dest, rename=False):
# get files with full path
files = list_files(loc, return_dirs=False, return_files=True, recursive=True, valid_exts=('.txt',))
# copy files in list to dest
for i, this_file in enumerate(files):
# change name if renaming
if rename:
# replace slashes with hyphens to preserve unique name
out_file = sub(r'^./', '', this_file)
out_file = sub(r'\\|/', '-', out_file)
out_file = join(dest, out_file)
copy(this_file, out_file)
files[i] = out_file
else:
copy(this_file, dest)
return files
Then just call it like so:
copy_files('mailDir', 'destFolder', rename=True)
The renaming scheme might not be exactly what you want, but it will at least not override your files. I believe this should solve all your problems.
Here you go:
import os
from os import path
import shutil
destDir = '<absolute-path>'
for root, dirs, files in os.walk(os.getcwd()):
# Filter out only '.txt' files.
files = [f for f in files if f.endswith('.txt')]
# Filter out only 'inbox' directory.
dirs[:] = [d for d in dirs if d == 'inbox']
for f in files:
p = path.join(root, f)
# print p
shutil.copy(p, destDir)
Quick and simple.
sorry, I forgot the part where, you also need unique file names as well. The above solution only works for distinct file names in a single inbox folder.
For copying files from multiple inboxes and having a unique name in the destination folder, you can try this:
import os
from os import path
import shutil
sourceDir = os.getcwd()
fixedLength = len(sourceDir)
destDir = '<absolute-path>'
filteredFiles = []
for root, dirs, files in os.walk(sourceDir):
# Filter out only '.txt' files in all the inbox directories.
if root.endswith('inbox'):
# here I am joining the file name to the full path while filtering txt files
files = [path.join(root, f) for f in files if f.endswith('.txt')]
# add the filtered files to the main list
filteredFiles.extend(files)
# making a tuple of file path and file name
filteredFiles = [(f, f[fixedLength+1:].replace('/', '-')) for f in filteredFiles]
for (f, n) in filteredFiles:
print 'copying file...', f
# copying from the path to the dest directory with specific name
shutil.copy(f, path.join(destDir, n))
print 'copied', str(len(filteredFiles)), 'files to', destDir
If you need to copy all files instead of just txt files, then just change the condition f.endswith('.txt') to os.path.isfile(f) while filtering out the files.

Python: Zip all files individually in a folder

I have been trying to find out answer but nothing specific to my situation. I am very new to programming.
I have 20-30 .csv files in a folder.
I would like to loop through all files, and zip each csv file into .zip file (different folder)
C:\users\xyz\Source\NumberOne.csv
C:\users\xyz\Source\NumberTwo.csv
C:\users\xyz\Source\NumberThree.csv
C:\users\xyz\Dest\NumberOne.zip
C:\users\xyz\Dest\NumberTwo.zip
C:\users\xyz\Dest\NumberThree.zip
i have tried different code functions from internet, but nothing works on individual files. the working code i have, zips all files into 1 zip folder.
please help
This should work. You need to pass in the source and destination directories when running it (or modify the code).
import os
import sys
import zipfile
def csv_files(source_dir):
for filename in os.listdir(source_dir):
if filename.endswith('.csv'):
yield filename
source_dir = sys.argv[1] # r'C:\users\xyz\Source\'
dest_dir = sys.argv[2] # r'C:\users\xyz\Dest\'
os.chdir(source_dir) # To work around zipfile limitations
for csv_filename in csv_files(source_dir):
file_root = os.path.splitext(csv_filename)[0]
zip_file_name = file_root + '.zip'
zip_file_path = os.path.join(dest_dir, zip_file_name)
with zipfile.ZipFile(zip_file_path, mode='w') as zf:
zf.write(csv_filename)
To zip a file you can use this:
import commands
cmd = 'zip -j ' + zipfile + ' ' + file
commands.getstatusoutput(cmd)
To save all .csv files from a source folder as individual .zip archives in a destination folder:
#!/usr/bin/env python3
import sys
from pathlib import Path
from zipfile import ZipFile
src_dir, dest_dir = map(Path, sys.argv[1:])
for filename in src_dir.glob('*.csv'): # enumerate all csv-files in the src folder
# zip each file individually
with ZipFile(str(dest_dir / (filename.stem + '.zip')), 'w') as archive:
archive.write(str(filename), arcname=filename.name)
Example:
T:\> py zip-csv-files.py C:\users\xyz\Source C:\users\xyz\Dest

Extract files from zip without keeping the structure using python ZipFile?

I try to extract all files from .zip containing subfolders in one folder. I want all the files from subfolders extract in only one folder without keeping the original structure. At the moment, I extract all, move the files to a folder, then remove previous subfolders. The files with same names are overwrited.
Is it possible to do it before writing files?
Here is a structure for example:
my_zip/file1.txt
my_zip/dir1/file2.txt
my_zip/dir1/dir2/file3.txt
my_zip/dir3/file4.txt
At the end I whish this:
my_dir/file1.txt
my_dir/file2.txt
my_dir/file3.txt
my_dir/file4.txt
What can I add to this code ?
import zipfile
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
zip_file.extract(files, my_dir)
zip_file.close()
if I rename files path from zip_file.namelist(), I have this error:
KeyError: "There is no item named 'file2.txt' in the archive"
This opens file handles of members of the zip archive, extracts the filename and copies it to a target file (that's how ZipFile.extract works, without taking care of subdirectories).
import os
import shutil
import zipfile
my_dir = r"D:\Download"
my_zip = r"D:\Download\my_file.zip"
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = open(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
It is possible to iterate over the ZipFile.infolist(). On the returned ZipInfo objects you can then manipulate the filename to remove the directory part and finally extract it to a specified directory.
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
with zipfile.ZipFile(my_zip) as zip:
for zip_info in zip.infolist():
if zip_info.filename[-1] == '/':
continue
zip_info.filename = os.path.basename(zip_info.filename)
zip.extract(zip_info, my_dir)
Just extract to bytes in memory,compute the filename, and write it there yourself,
instead of letting the library do it - -mostly, just use the "read()" instead of "extract()" method:
Python 3.6+ update(2020) - the same code from the original answer, but using pathlib.Path, which ease file-path manipulation and other operations (like "write_bytes")
from pathlib import Path
import zipfile
import os
my_dir = Path("D:\\Download\\")
my_zip = my_dir / "my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
myfile_path = my_dir / Path(files.filename).name
myfile_path.write_bytes(data)
zip_file.close()
Original code in answer without pathlib:
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
# I am almost shure zip represents directory separator
# char as "/" regardless of OS, but I don't have DOS or Windos here to test it
myfile_path = os.path.join(my_dir, files.split("/")[-1])
myfile = open(myfile_path, "wb")
myfile.write(data)
myfile.close()
zip_file.close()
A similar concept to the solution of Gerhard Götz, but adapted for extracting single files instead of the entire zip:
with ZipFile(zipPath, 'r') as zipObj:
zipInfo = zipObj.getinfo(path_in_zip))
zipInfo.filename = os.path.basename(destination)
zipObj.extract(zipInfo, os.path.dirname(os.path.realpath(destination)))
In case you are getting badZipFile error. you can unzip the archive using 7zip sub process. assuming you have installed the 7zip then use the following code.
import subprocess
my_dir = destFolder #destination folder
my_zip = destFolder + "/" + filename.zip #file you want to extract
ziploc = "C:/Program Files/7-Zip/7z.exe" #location where 7zip is installed
cmd = [ziploc, 'e',my_zip ,'-o'+ my_dir ,'*.txt' ,'-r' ]
#extracting only txt files and from all subdirectories
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)

How do I zip the contents of a folder using python (version 2.5)?

Once I have all the files I require in a particular folder, I would like my python script to zip the folder contents.
Is this possible?
And how could I go about doing it?
On python 2.7 you might use: shutil.make_archive(base_name, format[, root_dir[, base_dir[, verbose[, dry_run[, owner[, group[, logger]]]]]]]).
base_name archive name minus extension
format format of the archive
root_dir directory to compress.
For example
shutil.make_archive(target_file, format="bztar", root_dir=compress_me)
Adapted version of the script is:
#!/usr/bin/env python
from __future__ import with_statement
from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED
import os
def zipdir(basedir, archivename):
assert os.path.isdir(basedir)
with closing(ZipFile(archivename, "w", ZIP_DEFLATED)) as z:
for root, dirs, files in os.walk(basedir):
#NOTE: ignore empty directories
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path
z.write(absfn, zfn)
if __name__ == '__main__':
import sys
basedir = sys.argv[1]
archivename = sys.argv[2]
zipdir(basedir, archivename)
Example:
C:\zipdir> python -mzipdir c:\tmp\test test.zip
It creates 'C:\zipdir\test.zip' archive with the contents of the 'c:\tmp\test' directory.
Here is a recursive version
def zipfolder(path, relname, archive):
paths = os.listdir(path)
for p in paths:
p1 = os.path.join(path, p)
p2 = os.path.join(relname, p)
if os.path.isdir(p1):
zipfolder(p1, p2, archive)
else:
archive.write(p1, p2)
def create_zip(path, relname, archname):
archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED)
if os.path.isdir(path):
zipfolder(path, relname, archive)
else:
archive.write(path, relname)
archive.close()
Both jfs's solution and Kozyarchuk's solution could work for the OP's use case, however:
jfs's solution zips all of the files in a source folder and stores them in the zip at the root level (not preserving the original source folder within the structure of the zip).
Kozyarchuk's solution inadvertently puts the newly-created zip file into itself since it is a recursive solution (e.g. creating new zip file "myzip.zip" with this code will result in the archive "myzip.zip" itself containing an empty file "myzip.zip")
Thus, here is a solution that will simply add a source folder (and any subfolders to any depth) to a zip archive. This is motivated by the fact that you cannot pass a folder name to the built-in method ZipFile.write() -- the function below, add_folder_to_zip(), offers a simple method to add a folder and all of its contents to a zip archive. Below code works for Python2 and Python3.
import zipfile
import os
def add_folder_to_zip(src_folder_name, dst_zip_archive):
""" Adds a folder and its contents to a zip archive
Args:
src_folder_name (str): Source folder name to add to the archive
dst_zip_archive (ZipFile): Destination zip archive
Returns:
None
"""
for walk_item in os.walk(src_folder_name):
for file_item in walk_item[2]:
# walk_item[2] is a list of files in the folder entry
# walk_item[0] is the folder entry full path
fn_to_add = os.path.join(walk_item[0], file_item)
dst_zip_archive.write(fn_to_add)
if __name__ == '__main__':
zf = zipfile.ZipFile('myzip.zip', mode='w')
add_folder_to_zip('zip_this_folder', zf)
zf.close()

Categories