python zip file hit userwarning: Duplicate name - python

I have a python script that exclude the folder and zip the rest into zip file. However my script zip all the files in the same directory level
it gives the following error:
C:\Program Files\Python39\lib\zipfile.py:1505: UserWarning: Duplicate name: '1.txt'
return self._open_to_write(zinfo, force_zip64=force_zip64)
C:\Program Files\Python39\lib\zipfile.py:1505: UserWarning: Duplicate name: '2.txt'
return self._open_to_write(zinfo, force_zip64=force_zip64)
the folder structure to be zip
-test
-file1
-1.txt
-2.txt
-file2
-1.txt
-2.txt
-dump
-file3
My python script
import os
from zipfile import ZipFile
from datetime import date
from os.path import basename
# format the dd/mm/y
today = date.today()
todayDate = today.strftime("%d-%m-%y")
# path to zip
pathZip = "C:\\PY\\Project\\Self\\python-zip\\test"
# get the final forler name
forlderToZip = basename(pathZip)
# format the zip file name
zipFileName = todayDate + " " + forlderToZip + ".zip"
# create a ZipFile object
zipObj = ZipFile(zipFileName, "w")
# exclude the folder to be zip
exclude_folder = "dump"
print("Zip start...")
for dirname, subdirs, files in os.walk(pathZip):
if exclude_folder in subdirs:
subdirs.remove(exclude_folder)
# Add empty folder to zip
zipObj.write(dirname)
for filename in files:
# create complete filepath of file in directory
filePath = os.path.join(dirname, filename)
# Add file to zip
zipObj.write(filePath,basename(filePath))
print("Zip " + filePath)
# close the Zip File
zipObj.close()
print("Zip done !")

The duplicate name warning comes from the input directory files, of the same name, all being written to the same place of the output zip. The below get overwritten and you are left with two text files in the zip.
-Output Zip
-1.txt
-1.txt
-2.txt
-2.txt
The arcname argument is expanded below to retain the nesting structure of your original directory. The arcname arg modifies the name and placement of a file when writing to a zip. The 'if continue' is used to skip child files with 'dump' in their path.
import os
from zipfile import ZipFile
path_to_dir = ''
path_to_zip = ''
output_zip = ZipFile(path_to_zip, "w")
for dirpath, dirnames, filenames in os.walk(path_to_dir):
if 'dump' in dirpath:
continue
for f in filenames:
outpath = os.path.join(dirpath, f)
arcname = outpath[len(path_to_dir):].lstrip(os.sep)
output_zip.write(outpath, arcname)
output_zip.close()
-Output Zip
-folder1
-1.txt
-2.txt
-folder2
-1.txt
-2.txt
Alternatively, rename the files from the original directory such that they are unique. One way to do this is by replacing backslashes in the arcname path.
arcname = outpath[len(path_to_dir):].lstrip(os.sep).replace('\\', '_')
-Output Zip
-folder1_1.txt
-folder1_2.txt
-folder2_1.txt
-folder2_2.txt
For more info see nested directory appear while creating zip file

Related

open a folder to then use the files in python correctly

Usually I navigate to the folder I am extracting data from and copy the file name directly:
df2=pd.read_csv('10_90_bnOH-MEA.csv',usecols=[1])
If I have multiple files and want to do the same for all the files, how do I specify the folder to open and get all the files inside?
I want to run the above code without specifying the file's full path
(C:\Users\X\Desktop\Y\Z\10_90_bnOH-MEA.csv)
You want listdir from the os module.
import os
path = "C:\\Users\\X\\Desktop\\Y\\Z\\"
files = os.listdir(path)
print(files)
dataframe_list = []
for filename in files:
dataframe_list.append(pd.read_csv(os.path.join(path,filename)))
You should open the desired directory and loop through all the files then do something to them.
# import required module
import os
# assign directory
directory = 'files'
# iterate over files in
def goThroughDirectory(directory):
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
# do something
If you also want to loop through all the files in a directory you should add a check for if os.path.isdir(f) like this
...
def goThroughDirectory(directory):
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
# do something
elif os.path.isdir(f):
# its not a file but a directory then loop through that directory aswell
goThroughDirectory(directory + "\" + f)
for more information you should check geeksforgeeks

Python tarfile creates subfolders in a tar

I need to make a tar from files in the directory
Here is my code
import tarfile
import os
path = '/home/atom/Desktop/test_arch/sample.tar'
source = '/home/atom/Desktop/test_arch'
files = os.listdir(source)
files.sort()
tar = tarfile.open(path, "w")
for file in files:
print(file)
file_path = source + '/' + file
tar.add(file_path)
tar.close()
Everything works fine. The archive is created. But instead of a list of files in it. I got several subsolders:
/home/atom/Desktop/test_arch
And only in the last subfolder are my files
If I try:
tar.add(file)
It gives an Error:
FileNotFoundError: [Errno 2] No such file or directory: '1.jpg'
You should change Current Working Directory to work directly in directory source
and then you can use filename without source/ and archive will keep it without source/.
import tarfile
import os
source = '/home/atom/Desktop/test_arch'
path = os.path.join(source, 'sample.tar')
# --- keep current directory and go to source directory
old_dir = os.getcwd()
os.chdir(source) # change directory
# --- compress using only names (or relative paths)
files = sorted(os.listdir())
tar = tarfile.open(path, "w")
for filename in files:
print(filename)
if filename != 'sample.tar':
tar.add(filename)
tar.close()
# --- go back to previuos directory
os.chdir(old_dir)

Moving All Contents Within Files to New Folder?

I have several folders that have a naming convention of "monthly_vendor_report_####", where #### is just a random combination of numbers. Each folder has a CSV file and I'd like to move the CSVs files out of the folder to a new destination source. So far this is what I have, which only unzips the files:
import os, zipfile
dir_name = r"C:\Users\...."
extension = ".zip"
os.chdir(dir_name) # change directory from working dir to dir with files
for item in os.listdir(dir_name): # loop through items in dir
if item.endswith(extension): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_name) # extract file to dir
zip_ref.close() # close file
os.remove(file_name) # delete zipped file
Screenshot of folders--each folder contains a CSV
Content of one of the folders
You can use shutil to copy or move the files.
#Note that all the CSV files are taking the same names as their folders
import os
import shutil
# Open a file
path = 'C:\foo'
os.chdir(path)
dirs = filter(os.path.isdir, os.listdir(path))
destination= 'C:\Output'
# This would copy all listed CSV files
for dir in dirs:
file= dir + ".csv"
src= path + "\\" + dir + "\\" + file
shutil.copy(src, destination)

Python zipfile and os

I have some code to export all files within a zipfile to a path but what I want to do is create a new folder with the same name as the zipfile minus the ".zip" just like the windows explorer option does. I have commented out the code that doesn't work. It seems to be the os.makedirs that doesn't work.
File "C:/Users/brentond/Documents/Python/Unzip all zip files in path.py", line 12
Output = os.path.join(path, filename.replace(".zip", "")) # get new folder path name
^
SyntaxError: invalid syntax
the code:
import os, zipfile
# Define path of zip files to variable
path = r'C:\Users\brentond\Documents\TA2\HA GDMS'
for foldername, subfolders, filenames in os.walk(path): # walk directory
for filename in filenames: # loop through files
if filename.endswith(".zip"): # find zip files
filepath = os.path.join(foldername, filename) # get zip file abs path
#os.makedirs(os.path.join(path, filename.replace(".zip", "")) # create new folder same name as zip file
#Output = os.path.join(path, filename.replace(".zip", "")) # get new folder path name
ZipRef = zipfile.ZipFile(filepath) # create zip file object
ZipRef.extractall(path) # extract all. This to put everything in the path folder
#ZipRef.extractall(Output) # This to put the zip file contents into a folder with same name
ZipRef.close() # close zip
I have resolved this now and simplified the code a little
import os, zipfile
# Define path of zip files to variable
path = r'C:\Users\brentond\Documents\TA2\HA GDMS'
for foldername, subfolders, filenames in os.walk(path): # walk directory
for filename in filenames: # loop through files
if filename.endswith(".zip"): # find zip files
filepath = os.path.join(foldername, filename) # get zip file abs path
filefolder = filename.replace(".zip","")
os.makedirs(os.path.join(path, filefolder)) # create new folder same name as zip file
Output = os.path.join(path, filefolder) # get new folder path name
ZipRef = zipfile.ZipFile(filepath) # create zip file object
#ZipRef.extractall(path) # extract all. This to put everything in the path folder
ZipRef.extractall(Output) # This to put the zip file contents into a folder with same name
ZipRef.close() # close zip

Zip Multiple files with multiple result in Python

Good Day!.
I would like to ask how can you convert a list of ".xlsx(excel)" file from specific folder location to ".zip" files.
Example:
Path:= C:\My_Program\zip_files
Inside my zip_file folder i have multiple ".xlsx" files.
Test1.xlsx
Test2.xlsx
Test3.xlsx
and i want the output to be in same folder but zip individually.
Output:
Test1.zip
Test2.zip
Test3.zip
Hope somebady can help me i am new to python2 or python3.
You have standard module zipfile to create ZIP, and glob.glob() or os.listdir() or os.walk() to get filenames in folder.
EDIT: should works (I works for me on Linux)
import os
import zipfile
folder = 'C:\\My_Program\\zip_files'
for filename in os.listdir(folder):
if filename.endswith('.xlsx'):
name_without_extension = filename[:-5] # string `.xlsx` has 5 chars
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()
EDIT: the same with glob
import os
import glob
import zipfile
folder = 'C:\\My_Program\\zip_files'
for file_path in glob.glob(folder+'\\*.xlsx'):
filename = os.path.basename(file_path)
print(filename)
name_without_extension = filename[:-5]
print(name_without_extension)
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()

Categories