Prevent zipfile writing directory to archive

Prevent zipfile writing directory to archive - python

I have the below to zip a file. the file zips correctly, but contains the folders within the zip. How can I just zip the file, whilst still being able to show where the file is located?
create_zip_path = "folder1\\folder2\\my_zip.zip"
file_to_add_to_zip = "folder1\\folder2\\my_file.txt"
zip_file(create_zip_path, file_to_add_to_zip)
def zip_file(create_zip_path, file_to_add_to_zip):
import zipfile
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
modes = { zipfile.ZIP_DEFLATED: 'deflated',
zipfile.ZIP_STORED: 'stored',
}
zf = zipfile.ZipFile(create_zip_path, mode='w')
zf.write(file_to_add_to_zip, compress_type=compression)

You can use the os module to change your working directory. This should work:
import os
print os.getcwd() #Your current working directory
os.chdir(os.getcwd() + '/folder1/folder2/')
print os.getcwd() #Your new wordking dir
create_zip_path = "my_zip.zip"
file_to_add_to_zip = "my_file.txt"
zip_file(create_zip_path, file_to_add_to_zip)

Related

Create a zip of folders using zipfile

I have a folder called my_folder at the following Path /Users/my_user_name/Desktop/my_folder. The folder my_folder contains more folders like 323456, 987654 etc. Those folders contain some content. I want to create a zip of all those folders called myzip.zip such that when someone unzips it they see all those folders like 323456, 987654 at the root.
My Code
import os
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def get_list_of_all_folders(download_dir: Path):
return [f for f in download_dir.iterdir() if download_dir.is_dir()]
def zip_files():
folder_list = get_list_of_all_folders(DOWNLOAD_DIR)
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
# writing each file one by one
for folder in folder_list:
zip.write(folder)
zip_files()
I have a function called get_list_of_all_folders where it goes to my_folder and gets a list of all the folders inside it that we want to zip. Then I use that folder_list to zip up each folder as part of my final zip called my_zip.zip. However there is something really wrong with my code and I am not sure what? The my_zip.zip is only 35 kb small when I know for a fact I am zipping up content over 2 gigabytes.
I looked at the zipfile document but did not find much help here as there are not many examples.

ZipFile.write expects to be supplied with the name of a file to write to the zip, not a folder.
You will need to iterate over the files in each folder and call write for each one. For example:
from pathlib import Path
from zipfile import ZipFile
DOWNLOAD_DIR = Path("/Users/my_user_name/Desktop/my_folder")
ZIPPED_FILE_DIR = Path("/Users/my_user_name/Desktop/my_zip")
def scan_dir(zip, dir, base_dir):
for f in dir.iterdir():
if f.is_dir():
scan_dir(zip, f, base_dir)
else:
# First param is the path to the file, second param is
# the path to use in the zip and when extracted. I just
# trim base_dir off the front.
zip.write(f, str(f)[len(str(base_dir)):])
def zip_files():
with ZipFile(ZIPPED_FILE_DIR / "my_zip.zip", "w") as zip:
for f in DOWNLOAD_DIR.iterdir():
scan_dir(zip, f, DOWNLOAD_DIR)
zip_files()
There's probably a neater way to trim off the base directory, but this was done quickly :)

You can use: shutil.make_archive
Below example taken from: https://docs.python.org/3/library/shutil.html#archiving-example
>>> import os
>>> from shutil import make_archive
>>> archive_name = os.path.expanduser(os.path.join('~', 'myarchive'))
>>> root_dir = os.path.expanduser(os.path.join('~', '.ssh'))
>>> make_archive(archive_name, 'zip', root_dir)
'/Users/tarek/myarchive.zip'
EDIT:
Code using ZipFile library
import zipfile
import os
class ZipUtilities:
def toZip(self, file, filename):
zip_file = zipfile.ZipFile(filename, 'w')
if os.path.isfile(file):
zip_file.write(file)
else:
self.addFolderToZip(zip_file, file)
zip_file.close()
def addFolderToZip(self, zip_file, folder):
for file in os.listdir(folder):
full_path = os.path.join(folder, file)
if os.path.isfile(full_path):
print('File added: ' + str(full_path))
zip_file.write(full_path)
elif os.path.isdir(full_path):
print('Entering folder: ' + str(full_path))
self.addFolderToZip(zip_file, full_path)
if __name__ == '__main__':
utilities = ZipUtilities()
filename = 'newfile.zip'
directory = 'foldername'
utilities.toZip(directory, filename)

no such file of directory 'file.xml'

Having a small issues when trying to create a zip file using the zipfile module in python 3.
I have a directory which contains xml files, I am looking to create a zip archive from all these files in the same directory but keep encountering the error of FileNotFoundError: [Errno 2] no such file or directory: 'file.xml'
script:
import datetime
import os
import zipfile
path = '/Users/xxxx/reports/xxxx/monthly'
month = datetime.datetime.now().strftime('%G'+'-'+'%B')
zf = os.path.join(path, '{}.zip'.format(month))
z = zipfile.ZipFile(zf, 'w')
for i in os.listdir(path):
if i.endswith('.xml'):
z.write(i)
z.close()
it seems like when z.write(i) is called it is looking in the working directory for the xml files however the working directory is /Users/xxxx/scripts where the python script is.
How would I get the z.write(i) to look at the path variable without changing the current working directory if possible.

What actually happens is that as you loop through os.listdir(path), the i itself is simply the FileName which does not include the real Path to the File. There are a couple of ways to get around this; the simplest (but crudest) of which is shown below:
import datetime
import os
import zipfile
path = '/Users/xxxx/reports/xxxx/monthly'
month = datetime.datetime.now().strftime('%G'+'-'+'%B')
zf = os.path.join(path, '{}.zip'.format(month))
z = zipfile.ZipFile(zf, 'w')
for i in os.listdir(path):
# DECLARE A VARIABLE TO HOLD THE FULL PATH TO THE FILE:
xmlFile = "{}/{}".format(path, i) # <== PATH TO CURRENT FILE UNDER CURSOR
if xmlFile.endswith('.xml'):
z.write(xmlFile)
z.write(filename=xmlFile, arcname="ARCHIVE_NAME_HERE", ) # <== CHANGE
z.close()
Hope this Helps.
Cheers and Good-Luck...

use os.chdir to move to the file path and try to write the files to zip.
import datetime
import os
import zipfile
path = '/Users/xxxx/reports/xxxx/monthly'
month = datetime.datetime.now().strftime('%G'+'-'+'%B')
zf = os.path.join(path, '{}.zip'.format(month))
z = zipfile.ZipFile(zf, 'w')
os.chdir(path) #Change DIR
for i in os.listdir(path):
if i.endswith('.xml'):
z.write(i)
z.close()
Without changing DIR:
z = zipfile.ZipFile(zf, 'w')
for i in os.listdir(path):
if i.endswith('.xml'):
z.write(os.path.join(path, i))
z.close()

Unable to remove zipped file after unzipping

I'm attempting to remove a zipped file after unzipping the contents on windows. The contents can be stored in a folder structure in the zip. I'm using the with statement and thought this would close the file-like object (source var) and zip file. I've removed lines of code relating to saving the source file.
import zipfile
import os
zipped_file = r'D:\test.zip'
with zipfile.ZipFile(zipped_file) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
os.remove(zipped_file)
The error returned is:
WindowsError: [Error 32] The process cannot access the file because it is being used by another process: 'D:\\test.zip'
I've tried:
looping over the os.remove line in case it's a slight timing issue
Using close explicitly instead of the with statment
Attempted on local C drive and mapped D Drive

instead of passing in a string to the ZipFile constructor, you can pass it a file like object:
import zipfile
import os
zipped_file = r'D:\test.zip'
with open(zipped_file, mode="r") as file:
zip_file = zipfile.ZipFile(file)
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
os.remove(zipped_file)

You are opening files inside the zip... which create a file lock on the whole zip file. close the inner file open first... via source.close() at the end of your loop
import zipfile
import os
zipped_file = r'D:\test.zip'
with zipfile.ZipFile(zipped_file) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
source.close()
os.remove(zipped_file)

Try to close the zipfile before removing.

you can do also like this, which works pretty good:
import os, shutil, zipfile
fpath= 'C:/Users/dest_folder'
path = os.getcwd()
for file in os.listdir(path):
if file.endswith(".zip"):
dirs = os.path.join(path, file)
if os.path.exists(fpath):
shutil.rmtree(fpath)
_ = os.mkdir(fpath)
with open(dirs, 'rb') as fileobj:
z = zipfile.ZipFile(fileobj)
z.extractall(fpath)
z.close()
os.remove(dirs)

How do I zip a file in Python without adding folders?

Currently I use the following code:
import zipfile
root_path = 'C:/data/'
def zipping_sql_database():
zf = zipfile.ZipFile(root_path + 'file.zip', mode='w')
try:
zf.write(root_path + "file.db")
finally:
zf.close()
At the moment it creates a zip-file but the zip files contains the whole folder named 'data' that then contains the 'file.db'. How can I create a zip-file only contains 'file.db' and not the file in a folder?

I found that I get the right behavior via:
import zipfile
root_path = 'C:/data/'
def zipping_sql_database():
zf = zipfile.ZipFile(root_path + 'file.zip', mode='w')
try:
zf.write(root_path + "file.db", "file.db")
finally:
zf.close()

Extract files from zip without keeping the structure using python ZipFile?

I try to extract all files from .zip containing subfolders in one folder. I want all the files from subfolders extract in only one folder without keeping the original structure. At the moment, I extract all, move the files to a folder, then remove previous subfolders. The files with same names are overwrited.
Is it possible to do it before writing files?
Here is a structure for example:
my_zip/file1.txt
my_zip/dir1/file2.txt
my_zip/dir1/dir2/file3.txt
my_zip/dir3/file4.txt
At the end I whish this:
my_dir/file1.txt
my_dir/file2.txt
my_dir/file3.txt
my_dir/file4.txt
What can I add to this code ?
import zipfile
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
zip_file.extract(files, my_dir)
zip_file.close()
if I rename files path from zip_file.namelist(), I have this error:
KeyError: "There is no item named 'file2.txt' in the archive"

This opens file handles of members of the zip archive, extracts the filename and copies it to a target file (that's how ZipFile.extract works, without taking care of subdirectories).
import os
import shutil
import zipfile
my_dir = r"D:\Download"
my_zip = r"D:\Download\my_file.zip"
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = open(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)

It is possible to iterate over the ZipFile.infolist(). On the returned ZipInfo objects you can then manipulate the filename to remove the directory part and finally extract it to a specified directory.
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
with zipfile.ZipFile(my_zip) as zip:
for zip_info in zip.infolist():
if zip_info.filename[-1] == '/':
continue
zip_info.filename = os.path.basename(zip_info.filename)
zip.extract(zip_info, my_dir)

Just extract to bytes in memory,compute the filename, and write it there yourself,
instead of letting the library do it - -mostly, just use the "read()" instead of "extract()" method:
Python 3.6+ update(2020) - the same code from the original answer, but using pathlib.Path, which ease file-path manipulation and other operations (like "write_bytes")
from pathlib import Path
import zipfile
import os
my_dir = Path("D:\\Download\\")
my_zip = my_dir / "my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
myfile_path = my_dir / Path(files.filename).name
myfile_path.write_bytes(data)
zip_file.close()
Original code in answer without pathlib:
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
# I am almost shure zip represents directory separator
# char as "/" regardless of OS, but I don't have DOS or Windos here to test it
myfile_path = os.path.join(my_dir, files.split("/")[-1])
myfile = open(myfile_path, "wb")
myfile.write(data)
myfile.close()
zip_file.close()

A similar concept to the solution of Gerhard Götz, but adapted for extracting single files instead of the entire zip:
with ZipFile(zipPath, 'r') as zipObj:
zipInfo = zipObj.getinfo(path_in_zip))
zipInfo.filename = os.path.basename(destination)
zipObj.extract(zipInfo, os.path.dirname(os.path.realpath(destination)))

In case you are getting badZipFile error. you can unzip the archive using 7zip sub process. assuming you have installed the 7zip then use the following code.
import subprocess
my_dir = destFolder #destination folder
my_zip = destFolder + "/" + filename.zip #file you want to extract
ziploc = "C:/Program Files/7-Zip/7z.exe" #location where 7zip is installed
cmd = [ziploc, 'e',my_zip ,'-o'+ my_dir ,'*.txt' ,'-r' ]
#extracting only txt files and from all subdirectories
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Prevent zipfile writing directory to archive - python

Related

Create a zip of folders using zipfile

no such file of directory 'file.xml'

Unable to remove zipped file after unzipping

How do I zip a file in Python without adding folders?

Extract files from zip without keeping the structure using python ZipFile?

Categories

Resources