Python create zip file

Python create zip file - python

I use the following script to create ZIP files:
import zipfile
import os
def zip_file_generator(filenames, size):
# filenames = path to the files
zip_subdir = "SubDirName"
zip_filename = "SomeName.zip"
# Open BytesIO to grab in-memory ZIP contents
s = io.BytesIO()
# The zip compressor
zf = zipfile.ZipFile(s, "w")
for fpath in filenames:
# Calculate path for file in zip
fdir, fname = os.path.split(fpath)
zip_path = os.path.join(zip_subdir, fname)
# Add file, at correct path
zf.write(fpath, zip_path)
# Must close zip for all contents to be written
zf.close()
# Grab ZIP file from in-memory, make response with correct MIME-type
resp = HttpResponse(s.getvalue(), content_type = "application/x-zip-compressed")
# ..and correct content-disposition
resp['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
resp['Content-length'] = size
return resp
I got it from here.
But I changed s = StringIO.StringIO() to s = io.BytesIO() because I am using Python 3.x.
The zip file does get created with the right size etc. But I can't open it. It is invalid. If I write the zip file to disk the zip file is valid.

I got it working. Just change size in resp['Content-length'] = size to s.tell()

I use shutil to zip like so:
import shutil
shutil.make_archive(archive_name, '.zip', folder_name)

Related

How do I convert a ZipFile object to an object supporting the buffer API?

I have a ZipFile object that I need to convert to an object that will work with the buffer api. Context is that I am trying to use an API that says it takes a file with the type string($binary). How do I do this? I know this is completely wrong, but here is my code:
def create_extension_zip_file(self, path_to_extension_directory, directory_name):
zipObj = ZipFile("static_extension.zip", "w")
with zipObj:
# Iterate over all the files in directory
for folderName, subfolders, filenames in os.walk(path_to_extension_directory):
for filename in filenames:
# create complete filepath of file in directory
filePath = os.path.join(folderName, filename)
with open(filename, 'rb') as file_data:
bytes_content = file_data.read()
# Add file to zip
zipObj.write(bytes_content, basename(filePath))
return zipObj

Or if the API expects a file-like object, you could pass a BytesIO instance when creating the zipfile and pass that to the API
import io
def create_extension_zip_file(self, path_to_extension_directory, directory_name):
buf = io.BytesIO()
zipObj = ZipFile(buf, "w")
with zipObj:
# Iterate over all the files in directory
for folderName, subfolders, filenames in os.walk(path_to_extension_directory):
for filename in filenames:
# create complete filepath of file in directory
filePath = os.path.join(folderName, filename)
with open(filename, 'rb') as file_data:
bytes_content = file_data.read()
# Add file to zip
zipObj.write(bytes_content, basename(filePath))
# Rewind the buffer's file pointer (may not be necessary)
buf.seek(0)
return buf
If the API expects a bytes instance, you could just open the zip file in binary mode after it has been written, and pass the bytes .
with open('static_extension.zip', 'rb') as f:
bytes_ = f.read()

How to avoid saving subfolders when saving a file?

The function creates a folder and saves a file into it. Then the folder is packed into a rar archive and sent to the user, and the newly created folder and archive are deleted from the server after.
code.py
new_file_name = self.generate_file_name(rfi, vendor, current_scoring_round)
path_to_temp_folder = os.path.dirname(BASE_DIR)
if not os.path.exists(f'{path_to_temp_folder}/temp_folder'):
pathlib.Path(f'{path_to_temp_folder}/temp_folder').mkdir(parents=True, exist_ok=True)
wb.save(f'{path_to_temp_folder}/temp_folder/{new_file_name}') #save xlsx file from openpyxl library
archive = self.generate_zip_name(rfi) # generate name for archive
to_rar = f'{path_to_temp_folder}/temp_folder'
patoolib.create_archive(archive, (to_rar,)) # patoolib - to .rar converter
to_download = f'{path_to_temp_folder}/{archive}'
if os.path.exists(to_download):
try:
with open(to_download, 'rb') as fh:
response = HttpResponse(fh.read(),
content_type="content_type='application/vnd.rar'")
response['Content-Disposition'] = 'attachment; filename= "{}"'.format(archive)
return response
finally:
shutil.rmtree(to_rar, ignore_errors=True)
default_storage.delete(to_download)
Everything work, but the problem is that the downloaded archive contains subfolders - paths to the saved file.
Expected result:
folder.rar
file.xlsx
Actual result:
folder.rar
/home
/y700
/projects
file.xlsx

The documentation to patool is minimal. It certainly seems to suggest that this should be possible by passing the path to the file in the create-archive command. I've tried this though, and it appears not.
So the only option, probably, is to change the working directory to the location of the test.xlsx file:
import patoolib
import os
new_file_name = self.generate_file_name(rfi, vendor, current_scoring_round)
path_to_temp_folder = os.path.dirname(BASE_DIR)
if not os.path.exists(f'{path_to_temp_folder}/temp_folder'):
pathlib.Path(f'{path_to_temp_folder}/temp_folder').mkdir(parents=True, exist_ok=True)
wb.save(f'{path_to_temp_folder}/temp_folder/{new_file_name}') #save xlsx file from openpyxl library
archive = self.generate_zip_name(rfi) # generate name for archive
to_rar = f'{path_to_temp_folder}/temp_folder'
cwd=os.getcwd()
os.chdir('to_rar')
patoolib.create_archive(cwd+archive, ({new_file_name},)) # patoolib - to .rar converter
os.chdir('cwd')
to_download = f'{path_to_temp_folder}/{archive}'
if os.path.exists(to_download):
try:
with open(to_download, 'rb') as fh:
response = HttpResponse(fh.read(),
content_type="content_type='application/vnd.rar'")
response['Content-Disposition'] = 'attachment; filename= "{}"'.format(archive)
return response
finally:
shutil.rmtree(to_rar, ignore_errors=True)
default_storage.delete(to_download)
This works on my system, for example, and I get a single file in the archive (using tar, because I don't have rar installed):
import patoolib
import os
cwd=os.getcwd()
os.chdir('foo/bar/baz/qux/')
patoolib.create_archive(cwd+'/foo.tar.gz',('test.txt',))
os.chdir(cwd)
Note that you should really use os.path.join rather than concatenating strings, but this was just a quick & dirty test.

Python unzip multiple .gz files

I have compressed a file into several chunks using 7zip:
HAVE:
foo.txt.gz.001
foo.txt.gz.002
foo.txt.gz.003
foo.txt.gz.004
foo.txt.gz.005
WANT:
foo.txt
How do I unzip and combine these chunks to get a single file using python?

First, get the list of all files.
files = ['/path/to/foo.txt.gz.001', '/path/to/foo.txt.gz.002', '/path/to/foo.txt.gz.003']
Then iterate over each file and append to a result file.
with open('./result.gz', 'ab') as result: # append in binary mode
for f in files:
with open(f, 'rb') as tmpf: # open in binary mode also
result.write(tmpf.read())
Then extract is using zipfile lib. You could use tempfile to avoid handle with temporary zip file.

First you must extract all the zip files sequentially:
import zipfile
paths = ["path_to_1", "path_to_2" ]
extract_paths = ["path_to_extract1", "path_to_extrac2"]
for i in range(0, paths):
zip_ref = zipfile.ZipFile(paths[i], 'r')
zip_ref.extractall(extract_paths[i])
zip_ref.close()
Next you can go to the extracted location and read() individual files with open into a string. Concatenate those strings and save to foo.txt.

import os, gzip, shutil
dir_name = '/Users/username/Desktop/data'
def gz_extract(directory):
extension = ".gz"
os.chdir(directory)
for item in os.listdir(directory): # loop through items in dir
if item.endswith(extension): # check for ".gz" extension
gz_name = os.path.abspath(item) # get full path of files
file_name = (os.path.basename(gz_name)).rsplit('.',1)[0] #get file name for file within
with gzip.open(gz_name,"rb") as f_in, open(file_name,"wb") as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(gz_name) # delete zipped file
gz_extract(dir_name)

how to zip all pdf files under a static folder? django

I have a folder named pdfs under static folder.
I am trying to have a returned zip which contains all the pdf files in the pdfs folder.
I have tried a few threads and used their codes, but I tried to workout things but then couldn't solve the last part that I get a message saying no file / directory
I know static folders are a bit different than usual folders.
can someone please give me a hand and see what I have missed?
Thanks in advance
from StringIO import StringIO
import zipfile
pdf_list = os.listdir(pdf_path)
print('###pdf list################################')
print(pdf_path) # this does show me the whole path up to the pdfs folder
print(pdf_list) # returns ['abc.pdf', 'efd.pdf']
zip_subdir = "somefiles"
zip_filename = "%s.zip" % zip_subdir
# Open StringIO to grab in-memory ZIP contents
s = StringIO()
# Grab ZIP file from in-memory, make response with correct MIME-type
resp = HttpResponse(content_type='application/zip')
# ..and correct content-disposition
resp['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
# The zip compressor
zf = zipfile.ZipFile(s, "w")
for pdf_file in pdf_list:
print(pdf_file)
zf.write(pdf_file, pdf_path + pdf_file)
zf.writestr('file_name.zip', pdf_file.getvalue())
zf.close()
return resp
here I am getting errors for not able to find file / directory for 'abc.pdf'
P.S. I don't really need any sub folders zipped into the zip file. As long as all files are inside the zip, it'll be all good. (There won't be any sub folders in the pdfs folder)

I solved it myself and made it into a function with comments.
complicated things myself earlier
# two params
# 1. the directory where files want to be zipped
# e.g. of file directory is /et/ubuntu/vanfruits/vanfruits/static/pdfs/
# 2. filename of the zip file
def render_respond_zip(self, file_directory, zip_file_name):
response = HttpResponse(content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename=' + zip_file_name
# open a file, writable
zip = ZipFile(response, 'w')
# loop through the directory provided
for single_file in os.listdir(file_directory):
# open the file, full path to the file including file name and extension is needed as first param
f = open(file_directory + single_file, 'r')
# write the file into the zip with
# first param is the name of the file inside the zip
# second param is read the file
zip.writestr(single_file, f.read())
zip.close()
return response

Unable to remove zipped file after unzipping

I'm attempting to remove a zipped file after unzipping the contents on windows. The contents can be stored in a folder structure in the zip. I'm using the with statement and thought this would close the file-like object (source var) and zip file. I've removed lines of code relating to saving the source file.
import zipfile
import os
zipped_file = r'D:\test.zip'
with zipfile.ZipFile(zipped_file) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
os.remove(zipped_file)
The error returned is:
WindowsError: [Error 32] The process cannot access the file because it is being used by another process: 'D:\\test.zip'
I've tried:
looping over the os.remove line in case it's a slight timing issue
Using close explicitly instead of the with statment
Attempted on local C drive and mapped D Drive

instead of passing in a string to the ZipFile constructor, you can pass it a file like object:
import zipfile
import os
zipped_file = r'D:\test.zip'
with open(zipped_file, mode="r") as file:
zip_file = zipfile.ZipFile(file)
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
os.remove(zipped_file)

You are opening files inside the zip... which create a file lock on the whole zip file. close the inner file open first... via source.close() at the end of your loop
import zipfile
import os
zipped_file = r'D:\test.zip'
with zipfile.ZipFile(zipped_file) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
if not filename:
continue
source = zip_file.open(member)
source.close()
os.remove(zipped_file)

Try to close the zipfile before removing.

you can do also like this, which works pretty good:
import os, shutil, zipfile
fpath= 'C:/Users/dest_folder'
path = os.getcwd()
for file in os.listdir(path):
if file.endswith(".zip"):
dirs = os.path.join(path, file)
if os.path.exists(fpath):
shutil.rmtree(fpath)
_ = os.mkdir(fpath)
with open(dirs, 'rb') as fileobj:
z = zipfile.ZipFile(fileobj)
z.extractall(fpath)
z.close()
os.remove(dirs)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python create zip file - python

I got it working. Just change size in resp['Content-length'] = size to s.tell()

I use shutil to zip like so: import shutil shutil.make_archive(archive_name, '.zip', folder_name)

Related

How do I convert a ZipFile object to an object supporting the buffer API?

How to avoid saving subfolders when saving a file?

Python unzip multiple .gz files

how to zip all pdf files under a static folder? django

Unable to remove zipped file after unzipping

Categories

Resources