Good Day!.
I would like to ask how can you convert a list of ".xlsx(excel)" file from specific folder location to ".zip" files.
Example:
Path:= C:\My_Program\zip_files
Inside my zip_file folder i have multiple ".xlsx" files.
Test1.xlsx
Test2.xlsx
Test3.xlsx
and i want the output to be in same folder but zip individually.
Output:
Test1.zip
Test2.zip
Test3.zip
Hope somebady can help me i am new to python2 or python3.
You have standard module zipfile to create ZIP, and glob.glob() or os.listdir() or os.walk() to get filenames in folder.
EDIT: should works (I works for me on Linux)
import os
import zipfile
folder = 'C:\\My_Program\\zip_files'
for filename in os.listdir(folder):
if filename.endswith('.xlsx'):
name_without_extension = filename[:-5] # string `.xlsx` has 5 chars
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()
EDIT: the same with glob
import os
import glob
import zipfile
folder = 'C:\\My_Program\\zip_files'
for file_path in glob.glob(folder+'\\*.xlsx'):
filename = os.path.basename(file_path)
print(filename)
name_without_extension = filename[:-5]
print(name_without_extension)
xlsx_path = os.path.join(folder, filename)
zip_path = os.path.join(folder, name_without_extension + '.zip')
zip_file = zipfile.ZipFile(zip_path, 'w')
# use `filename` (without folder name) as name inside archive
# and it will not create folders inside archive
zip_file.write(xlsx_path, filename)
zip_file.close()
Related
I'm trying to make this code which take the content of each folder in directory and add it to zip one by one with the name of folder
I did made this code but I'm blocked with just add file by extension in zip
import zipfile, os
handle = zipfile.ZipFile('ALL-PY.zip', 'w')
for x in os.listdir():
if x.endswith(directory):
handle.write(x,compress_type = zipfile.ZIP_DEFLATED)
handle.close()
I would follow this approach:
import zipfile, os
handle = zipfile.ZipFile('ALL-PY.zip', 'w')
path = "C:/Users/User_Name/my_directory" # This is YOUR INPUT - set with the directory you want to zip
os.chdir(path)
for directory, subs, files in os.walk("."):
handle.write(directory)
for this_file in files:
handle.write(os.path.join(directory, this_file), compress_type = zipfile.ZIP_DEFLATED)
handle.close()
I have a python script that exclude the folder and zip the rest into zip file. However my script zip all the files in the same directory level
it gives the following error:
C:\Program Files\Python39\lib\zipfile.py:1505: UserWarning: Duplicate name: '1.txt'
return self._open_to_write(zinfo, force_zip64=force_zip64)
C:\Program Files\Python39\lib\zipfile.py:1505: UserWarning: Duplicate name: '2.txt'
return self._open_to_write(zinfo, force_zip64=force_zip64)
the folder structure to be zip
-test
-file1
-1.txt
-2.txt
-file2
-1.txt
-2.txt
-dump
-file3
My python script
import os
from zipfile import ZipFile
from datetime import date
from os.path import basename
# format the dd/mm/y
today = date.today()
todayDate = today.strftime("%d-%m-%y")
# path to zip
pathZip = "C:\\PY\\Project\\Self\\python-zip\\test"
# get the final forler name
forlderToZip = basename(pathZip)
# format the zip file name
zipFileName = todayDate + " " + forlderToZip + ".zip"
# create a ZipFile object
zipObj = ZipFile(zipFileName, "w")
# exclude the folder to be zip
exclude_folder = "dump"
print("Zip start...")
for dirname, subdirs, files in os.walk(pathZip):
if exclude_folder in subdirs:
subdirs.remove(exclude_folder)
# Add empty folder to zip
zipObj.write(dirname)
for filename in files:
# create complete filepath of file in directory
filePath = os.path.join(dirname, filename)
# Add file to zip
zipObj.write(filePath,basename(filePath))
print("Zip " + filePath)
# close the Zip File
zipObj.close()
print("Zip done !")
The duplicate name warning comes from the input directory files, of the same name, all being written to the same place of the output zip. The below get overwritten and you are left with two text files in the zip.
-Output Zip
-1.txt
-1.txt
-2.txt
-2.txt
The arcname argument is expanded below to retain the nesting structure of your original directory. The arcname arg modifies the name and placement of a file when writing to a zip. The 'if continue' is used to skip child files with 'dump' in their path.
import os
from zipfile import ZipFile
path_to_dir = ''
path_to_zip = ''
output_zip = ZipFile(path_to_zip, "w")
for dirpath, dirnames, filenames in os.walk(path_to_dir):
if 'dump' in dirpath:
continue
for f in filenames:
outpath = os.path.join(dirpath, f)
arcname = outpath[len(path_to_dir):].lstrip(os.sep)
output_zip.write(outpath, arcname)
output_zip.close()
-Output Zip
-folder1
-1.txt
-2.txt
-folder2
-1.txt
-2.txt
Alternatively, rename the files from the original directory such that they are unique. One way to do this is by replacing backslashes in the arcname path.
arcname = outpath[len(path_to_dir):].lstrip(os.sep).replace('\\', '_')
-Output Zip
-folder1_1.txt
-folder1_2.txt
-folder2_1.txt
-folder2_2.txt
For more info see nested directory appear while creating zip file
I need to read the contents of a file from the list of files from a directory with os.listdir. My working scriptlet is as follows:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
t = t.split()
print(t)
print(t) gives me all the contents from all the files at once present in the directory (path).
But I like to print the contents on first file, then contents of the second and so on, until all the files are read from in dir.
Please guide ! Thanks.
You can print the file name.
Print the content after the file name.
import os
path = "/home/vpraveen/uni_tmp/temp"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
print filename + " Content : "
print(t)
First, you should find the path of each file using os.path.join(path, filename). Otherwise you'll loop wrong files if you change the variable path. Second, your script already provides the contents of all files starting with the first one. I added a few lines to the script to print the file path and an empty line to see where the contents end and begin:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
filepath = os.path.join(path, filename)
with open(filepath, 'rU') as f:
content = f.read()
print(filepath)
print(content)
print()
os.listdir returns the name of the files only. you need to os.path.join that name with the path the files live in - otherwise python will look for them in your current working directory (os.getcwd()) and if that happens not to be the same as path python will not find the files:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
print(filename)
file_path = os.path.join(path, filename)
print(file_path)
..
if you have pathlib at your disposal you can also:
from pathlib import Path
path = "/Users/Desktop/test/"
p = Path(path)
for file in p.iterdir():
if not file.is_file():
continue
print(file)
print(file.read_text())
I am having a difficult time creating a python script that will rename file extensions in a folder and continue to do so in sub directories. Here is the script I have thus far; it can only rename files in the top directory:
#!/usr/bin/python
# Usage: python rename_file_extensions.py
import os
import sys
for filename in os.listdir ("C:\\Users\\username\\Desktop\\test\\"): # parse through file list in the folder "test"
if filename.find(".jpg") > 0: # if an .jpg is found
newfilename = filename.replace(".jpg","jpeg") # convert .jpg to jpeg
os.rename(filename, newfilename) # rename the file
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) #get the directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
subdirectoryPath = os.path.relpath(subdir, directory) #get the path to your subdirectory
filePath = os.path.join(subdirectoryPath, filename) #get the path to your file
newFilePath = filePath.replace(".jpg",".jpeg") #create the new name
os.rename(filePath, newFilePath) #rename your file
I modified Jaron's answer with the path to the file and the complete example of renaming the file
I modified the answer of Hector Rodriguez Jr. a little bit because it would replace ANY occurance of ".jpg" in the path, e.g. /path/to/my.jpg.files/001.jpg would become /path/to/my.jpeg.files/001.jpeg, which is not what you wanted, right?
Although it is generally not a good idea to use dots "." in a folder name, it can happen...
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) # directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
newFilename = filename.replace(".jpg", ".jpeg") # replace only in filename
subdirectoryPath = os.path.relpath(subdir, directory) # path to subdirectory
filePath = os.path.join(subdirectoryPath, filename) # path to file
newFilePath = os.path.join(subdirectoryPath, newFilename) # new path
os.rename(filePath, newFilePath) # rename
You can process the directory like this:
import os
def process_directory(root):
for item in os.listdir(root):
if os.path.isdir(item):
print("is directory", item)
process_directory(item)
else:
print(item)
#Do stuff
process_directory(os.getcwd())
Although, this isn't really necessary. Simply use os.walk which will iterate through all toplevel and further directories / files
Do it like this:
for subdir, dirs, files in os.walk(root):
for f in files:
if f.find('.jpg') > 0:
#The rest of your stuff
That should do exactly what you want.
I try to extract all files from .zip containing subfolders in one folder. I want all the files from subfolders extract in only one folder without keeping the original structure. At the moment, I extract all, move the files to a folder, then remove previous subfolders. The files with same names are overwrited.
Is it possible to do it before writing files?
Here is a structure for example:
my_zip/file1.txt
my_zip/dir1/file2.txt
my_zip/dir1/dir2/file3.txt
my_zip/dir3/file4.txt
At the end I whish this:
my_dir/file1.txt
my_dir/file2.txt
my_dir/file3.txt
my_dir/file4.txt
What can I add to this code ?
import zipfile
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
zip_file.extract(files, my_dir)
zip_file.close()
if I rename files path from zip_file.namelist(), I have this error:
KeyError: "There is no item named 'file2.txt' in the archive"
This opens file handles of members of the zip archive, extracts the filename and copies it to a target file (that's how ZipFile.extract works, without taking care of subdirectories).
import os
import shutil
import zipfile
my_dir = r"D:\Download"
my_zip = r"D:\Download\my_file.zip"
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = open(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
It is possible to iterate over the ZipFile.infolist(). On the returned ZipInfo objects you can then manipulate the filename to remove the directory part and finally extract it to a specified directory.
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
with zipfile.ZipFile(my_zip) as zip:
for zip_info in zip.infolist():
if zip_info.filename[-1] == '/':
continue
zip_info.filename = os.path.basename(zip_info.filename)
zip.extract(zip_info, my_dir)
Just extract to bytes in memory,compute the filename, and write it there yourself,
instead of letting the library do it - -mostly, just use the "read()" instead of "extract()" method:
Python 3.6+ update(2020) - the same code from the original answer, but using pathlib.Path, which ease file-path manipulation and other operations (like "write_bytes")
from pathlib import Path
import zipfile
import os
my_dir = Path("D:\\Download\\")
my_zip = my_dir / "my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
myfile_path = my_dir / Path(files.filename).name
myfile_path.write_bytes(data)
zip_file.close()
Original code in answer without pathlib:
import zipfile
import os
my_dir = "D:\\Download\\"
my_zip = "D:\\Download\\my_file.zip"
zip_file = zipfile.ZipFile(my_zip, 'r')
for files in zip_file.namelist():
data = zip_file.read(files, my_dir)
# I am almost shure zip represents directory separator
# char as "/" regardless of OS, but I don't have DOS or Windos here to test it
myfile_path = os.path.join(my_dir, files.split("/")[-1])
myfile = open(myfile_path, "wb")
myfile.write(data)
myfile.close()
zip_file.close()
A similar concept to the solution of Gerhard Götz, but adapted for extracting single files instead of the entire zip:
with ZipFile(zipPath, 'r') as zipObj:
zipInfo = zipObj.getinfo(path_in_zip))
zipInfo.filename = os.path.basename(destination)
zipObj.extract(zipInfo, os.path.dirname(os.path.realpath(destination)))
In case you are getting badZipFile error. you can unzip the archive using 7zip sub process. assuming you have installed the 7zip then use the following code.
import subprocess
my_dir = destFolder #destination folder
my_zip = destFolder + "/" + filename.zip #file you want to extract
ziploc = "C:/Program Files/7-Zip/7z.exe" #location where 7zip is installed
cmd = [ziploc, 'e',my_zip ,'-o'+ my_dir ,'*.txt' ,'-r' ]
#extracting only txt files and from all subdirectories
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)