extracting a .ppm.bz2 from a custom path to a custom path - python

as the title says, I have several folders, several .ppm.bz2 files and I want to extract them exactly where they are using python.
Directory structure image
I am traversing in the folders as this:
import tarfile
import os
path = '/Users/ankitkumar/Downloads/colorferet/dvd1/data/images/'
folders = os.listdir(path)
for folder in folders: #the folders starting like 00001
if not folder.startswith("0"):
pass
path2 = path + folder
zips = os.listdir(path2)
for zip in zips:
if not zip.startswith("0"):
pass
path3 = path2+"/"+zip
fh = tarfile.open(path3, 'r:bz2')
outpath = path2+"/"
fh.extractall(outpath)
fh.close
`
then I get this error
`
Traceback (most recent call last):
File "ZIP.py", line 16, in <module>
fh = tarfile.open(path3, 'r:bz2')
File "/anaconda2/lib/python2.7/tarfile.py", line 1693, in open
return func(name, filemode, fileobj, **kwargs)
File "/anaconda2/lib/python2.7/tarfile.py", line 1778, in bz2open
t = cls.taropen(name, mode, fileobj, **kwargs)
File "/anaconda2/lib/python2.7/tarfile.py", line 1723, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/anaconda2/lib/python2.7/tarfile.py", line 1587, in __init__
self.firstmember = self.next()
File "/anaconda2/lib/python2.7/tarfile.py", line 2370, in next
raise ReadError(str(e))
tarfile.ReadError: invalid header
`

The tarfile module is for tar files, including tar.bz2. if your file is not tar you should use bz2 module directly.
Also, try using os.walk instead of multiple listdir as it can traverse the tree
import os
import bz2
import shutil
for path, dirs, files in os.walk(path):
for filename in files:
basename, ext = os.path.splitext(filename)
if ext.lower() != '.bz2':
continue
fullname = os.path.join(path, filename)
newname = os.path.join(path, basename)
with bz2.open(fullname) as fh, open(newname, 'wb') as fw:
shutil.copyfileobj(fh, fw)
This will uncompress all .bz2 files in all subfolders, in the same place they are. All other files will stay the same. If the uncompressed file already exists it will be overwritten.
Please backup your data before running destructive code

Related

File not found error when copying images from one folder to another

I have a text file containing the names of images to be copied from a source folder to a destination folder. The source folder contains several sub-folders as shown below. The images may come from any of these sub-folders.
animals (source folder)
|-cats_1
|-cats_2
|-tigers_1
|-lions_1
|-lions_2
Shown below is the Python code:
import os
import shutil
src = r'X:\animals' #source with multiple sub-folders
dest = r'X:\images\cat_family' #destination folder
with open('cat_fam.txt') as file: #text file containing the image names
for path, subdirs, files in os.walk(src):
for name in file:
file_name = name.strip()
filename = os.path.join(path, file_name)
shutil.copy2(filename, dest)
I encounter a file not found error as shown below:
File "C:\Users\AppData\Local\Temp\2/ipykernel_30556/2100413787.py", line 6, in <module>
shutil.copy2(filename, dest)
File "C:\Users\AppData\Local\Continuum\anaconda3\envs\tf2.7\lib\shutil.py", line 266, in copy2
copyfile(src, dst, follow_symlinks=follow_symlinks)
File "C:\Users\AppData\Local\Continuum\anaconda3\envs\tf2.7\lib\shutil.py", line 120, in copyfile
with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: 'X:\\animals\\lion_2345.jpg'
One approach would be to build a dictionary out of the data returned from os.walk()
The dictionary will be keyed on filename (not the full path) and its value will be the directory where the file resides.
Then you can read the cat_fam.txt file and lookup in the dictionary to see where it exists (if it exists!)
from os.path import join
from os import walk
from shutil import copy2
src = r'X:\animals'
dest = r'X:\animals\cat_family'
db = dict()
for root, _, files in walk(src):
for file in files:
db[file] = root
with open('cat_fam.txt') as fam:
for cat in map(str.strip, fam):
fp = db.get(cat)
if fp is not None:
copy2(join(fp, cat), join(dest, cat))

Decompressing .bz2 files in a directory in python

I would like to decompress a bunch of .bz2 files contained in a folder (where there are also .zst files). What I am doing is the following:
destination_folder = "/destination_folder_path/"
compressed_files_path="/compressedfiles_folder_path/"
dirListing = os.listdir(compressed_files_path)
for file in dirListing:
if ".bz2" in file:
unpackedfile = bz2.BZ2File(file)
data = unpackedfile.read()
open(destination_folder, 'wb').write(data)
But I keep on getting the following error message:
Traceback (most recent call last):
File "mycode.py", line 34, in <module>
unpackedfile = bz2.BZ2File(file)
File ".../miniconda3/lib/python3.9/bz2.py", line 85, in __init__
self._fp = _builtin_open(filename, mode)
FileNotFoundError: [Errno 2] No such file or directory: 'filename.bz2'
Why do I receive this error?
You must be sure that all the file paths you are using exist.
It is better to use the full path to the file being opened.
import os
import bz2
# this path must exist
destination_folder = "/full_path_to/folder/"
compressed_files_path = "/full_path_to_other/folder/"
# get list with filenames (strings)
dirListing = os.listdir(compressed_files_path)
for file in dirListing:
# ^ this is only filename.ext
if ".bz2" in file:
# concatenation of directory path and filename.bz2
existing_file_path = os.path.join(compressed_files_path, file)
# read the file as you want
unpackedfile = bz2.BZ2File(existing_file_path)
data = unpackedfile.read()
new_file_path = os.path.join(destination_folder, file)
with bz2.open(new_file_path, 'wb') as f:
f.write(data)
You can also use the shutil module to copy or move files.
os.path.exists
os.path.join
shutil
bz2 examples

Iterating over a multiple files in a folder

Im trying to develop a program that can iterate over different files in the same folder. The files are all the same format but will have different names. Right now if there is only 1 file in the folder the code executes with no problems but with different files i get the error:
Traceback (most recent call last):
File "D:/Downloads/FYP/Feedback draft.py", line 24, in <module>
wb = openpyxl.load_workbook(filename)
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 315, in load_workbook
reader = ExcelReader(filename, read_only, keep_vba,
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 124, in __init__
self.archive = _validate_archive(fn)
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 96, in _validate_archive
archive = ZipFile(filename, 'r')
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38-32\lib\zipfile.py", line 1251, in __init__
self.fp = io.open(file, filemode)
FileNotFoundError: [Errno 2] No such file or directory: 'tester2.xlsx'
The code im using is :
directory = r'D:\Downloads\FYP\TEST'
for filename in os.listdir(directory):
if filename.endswith(".xlsx"):
wb = openpyxl.load_workbook(filename)
sh1=wb['test']
doc = DocxTemplate('Assignment1feedback.docx')
context = {
'acc': acceleration
}
doc.render(context)
doc.save('D:\\Downloads\\FYP\\TEST\\' + filename + '.docx')
This is incomplete code as the full thing would be quite long but overall i want to access these excel files and then create a corresponding docx
So os.listdir only provides the basename of the directory files, which will cause problems if your working directory does not match the value of directory. If your working directory is D:\Downloads, ./file.xlsx does not exist but D:\Downloads\FYP\TEST/file.xlsx does.
You will want to use the absolute path to the file, you have two options here. You could follow #IronMan's suggestion in the their comment to produce the file path from the directory path and file basename:
import os
directory = r'D:\Downloads\FYP\TEST'
for filename in os.listdir():
wb = openpyxl.load_workbook(os.path.join(directory, filename))
This is a simple and useful approach; however, its functionality is somewhat limited and may make it harder to make changes in the future. The alternative is to use python's paathlib and scandir, and access the path directly from there:
import pathlib
directory = r'D:\Downloads\FYP\TEST'
for entry in pathlib.scandir(diectory):
wb = openpyxl.load_workbook(entry.path)

python convert compressed zip to uncompressed tar

I have a compressed .zip file that i want to convert to an uncompressed .tar file.
I made this function to do it:
def decompress(filepath):
devname = re.search("_(.+?)_.+?\.zip", filepath).group(1)
with zipfile.ZipFile(filepath, 'r') as zip:
path = os.path.join(start_dir, "Downloads", devname, str(os.path.basename(filepath))[:-4])
zip.extractall(path)
with tarfile.open(path + ".tar", 'w') as tar:
for object in os.listdir(path):
tar.add(os.path.join(path, object), arcname=object)
time.sleep(2)
shutil.rmtree(path, ignore_errors=False, onerror=onError)
time.sleep(0.5)
os.remove(filepath)
return path + ".tar"
I am getting this error when running it:
Traceback (most recent call last):
File "File.py", line 195, in <module>
main()
File "File.py", line 184, in main
dld = download()
File "File.py", line 132, in download
filename = decompress(os.path.join(start_dir, "Downloads", devname, filename
))
File "File.py", line 103, in decompress
shutil.rmtree(path, ignore_errors=False, onerror=onError)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 247, in rmtree
rmtree(fullname, ignore_errors, onerror)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 247, in rmtree
rmtree(fullname, ignore_errors, onerror)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 256, in rmtree
onerror(os.rmdir, path, sys.exc_info())
File "C:\Program Files (x86)\python27\lib\shutil.py", line 254, in rmtree
os.rmdir(path)
WindowsError: [Error 145] The directory is not empty: 'C:\\Users\\Vaibhav\\Deskt
op\\Folder\\Downloads\\toro\\_toro_nightly_test\\system\\app'
Here is my onError that I got from https://stackoverflow.com/a/2656405/2518263:
def onError(func, path, exc_info):
"""
Error handler for ``shutil.rmtree``.
If the error is due to an access error (read only file)
it attempts to add write permission and then retries.
If the error is for another reason it re-raises the error.
Usage : ``shutil.rmtree(path, onerror=onerror)``
"""
if not os.access(path, os.W_OK):
# Is the error an access error ?
os.chmod(path, stat.S_IWUSR)
func(path)
else:
raise
I only get the error on random occasions. The decompress function works 75% of the time.
I don't know why I'm getting this error. Can someone suggest a better way to do this or a way to solve this error.
I just replaced:
shutil.rmtree(path, ignore_errors=False, onerror=onError)
with:
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
filename = os.path.join(root, name)
os.chmod(filename, stat.S_IWUSR)
os.remove(filename)
for name in dirs:
os.rmdir(os.path.join(root, name))
time.sleep(0.5)
os.rmdir(path)
Now it works like a charm.
EDIT:
Never mind! I still get the error but just less often, now its about 20% of the time!! Please help!
EDIT 2:
OK, so I don't get the error if i extract to a temporary file so I am using this code:
import tempfile
def decompress(filepath):
with zipfile.ZipFile(filepath) as zip:
tmp = tempfile.mkdtemp(dir=os.getcwd())
zip.extractall(tmp)
with tarfile.open(filepath[:-4] + ".tar", 'w') as tar:
for object in os.listdir(tmp):
tar.add(os.path.join(tmp, object), arcname=object)
time.sleep(1)
shutil.rmtree(tmp)
os.remove(filepath)
return filepath[:-4] + ".tar"
I works now! (hopefully the error won't occur again)
EDIT 3:
I got the error again!!!!!!!!!! this is really getting on my nerves. Please help someone.
Looks like some process in your OS manages to create another file in the directory you are in process of deletion of.
I can suggest you to avoid creation of temporary files and feed decompressed parts of original ZIP directly into new TAR file.
This require matching ZipInfo fields to TarInfo ones but it should be straightforward.
Here's my take on it:
def zip2tar(zipname, tarname):
zipf = zipfile.ZipFile(zipname, 'r')
tarf = tarfile.TarFile(tarname, 'w')
timeshift = int((datetime.datetime.now() -
datetime.datetime.utcnow()).total_seconds())
for zipinfo in zipf.infolist():
tarinfo = tarfile.TarInfo()
tarinfo.name = zipinfo.filename
tarinfo.size = zipinfo.file_size
tarinfo.mtime = calendar.timegm(zipinfo.date_time) - timeshift
if zipinfo.internal_attr & 1:
tarinfo.mode = 0666
tarinfo.type = tarfile.REGTYPE
else:
tarinfo.mode = 0777
tarinfo.type = tarfile.DIRTYPE
infile = zipf.open(zipinfo.filename)
tarf.addfile(tarinfo, infile)
zipf.close()
tarf.close()

Create folders based on file names and copy files to them in python

I have many files in this folder structure:
test[dir]
-test1 - 123.avi
-video[dir]
-test2 - 123.avi
I want to create folders based based on file names (e.g. test1, test2) in the target dir and move files to respective folders.
I tried this based on code from another thread:
#!/usr/bin/env python
import os, shutil
src = "/home/koogee/Code/test"
dest = "/home/koogee/Downloads"
for dirpath, dirs, files in os.walk(src):
for file in files:
if not file.endswith('.part'):
Dir = file.split("-")[0]
newDir = os.path.join(dest, Dir)
if (not os.path.exists(newDir)):
os.mkdir(newDir)
shutil.move(file, newDir)
I get this error:
Traceback (most recent call last):
File "<stdin>", line 8, in <module>
File "/usr/lib/python2.7/shutil.py", line 299, in move
copy2(src, real_dst)
File "/usr/lib/python2.7/shutil.py", line 128, in copy2
copyfile(src, dst)
File "/usr/lib/python2.7/shutil.py", line 82, in copyfile
with open(src, 'rb') as fsrc:
IOError: [Errno 2] No such file or directory: 'test1'
What is weird is that there is a folder created in /home/koogee/Downloads named 'test1'
When you try to do shutil.move(), your file variable is just the file name with no context of the directory, so it is looking for a file of that name in the script's current directory.
To get an absolute path, use os.path.join(dirpath, file) as the source:
shutil.move(os.path.join(dirpath, file), newDir)

Categories