Read a file within an archived archive (without extracting)

Read a file within an archived archive (without extracting) - python

What I am trying to do is to read a file located in an archived archive as shown below:
I want to access a "document.txt" file
Code 1:
import zipfile
with zipfile.ZipFile("archive.zip", mode="r") as archive:
with zipfile.ZipFile("archive2.zip", mode="r") as archive2:
text = archive2.read("document.txt")
print(text) #FileNotFountError: [Errno 2] No such file or directory: 'archive2.zip'
Code 2:
import zipfile
with zipfile.ZipFile("archive.zip/archive2.zip", mode="r") as archive:
text = archive.read("document.txt")
print(text) #FileNotFountError: [Errno 2] No such file or directory: 'archive.zip/archive2.zip'
None of the above works. How can I read a "document.txt" file that is located in an "archive2.zip", which in turn is located in an "archive.zip" file without extracting anything? Thank you very much.
archive.zip file for your reference

You need to open first then read it.
Ex:-
import zipfile
with zipfile.ZipFile("archive.zip/archive2.zip", mode="r") as archive:
text = archive.open("document.txt").read().decode()
print(text)

You first need to open archive.zip, then read the raw archive2.zip file and finally read the document within that zip file.
import zipfile
with zipfile.ZipFile(r"archive.zip", "r") as archive:
with zipfile.ZipFile(archive.open(r"archive/archive2.zip", "r")) as archive2:
doc = archive2.open("archive2/document.txt")
content = doc.readlines()
print(content)

Related

Filter Directory using Regex and output filtered files to another directory

I am simply trying to create a python 3 program that runs through all .sql files in a specific directory and then apply my regex that adds ; after a certain instance and write the changes made to the file to a separate directory with their respective file names as the same.
So, if I had file1.sql and file2.sql in "/home/files" directory, after I run the program, the output should write those two files to "/home/new_files" without changes the content of the original files.
Here is my code:
import glob
import re
folder_path = "/home/files/d_d"
file_pattern = "/*sql"
folder_contents = glob.glob(folder_path + file_pattern)
for file in folder_contents:
print("Checking", file)
for file in folder_contents:
read_file = open(file, 'rt',encoding='latin-1').read()
#words=read_file.split()
with open(read_file,"w") as output:
output.write(re.sub(r'(TBLPROPERTIES \(.*?\))', r'\1;', f, flags=re.DOTALL))
I receive an error of File name too long:"CREATE EXTERNAL TABLe" and also I am not too sure where I would put my output path (/home/files/new_dd)in my code.
Any ideas or suggestions?

With read_file = open(file, 'rt',encoding='latin-1').read() the whole content of the file was being used as the file descriptor. The code provided here iterate over the files names found with glob.glob pattern open to read, process data, and open to write (assuming that a folder newfile_sqls already exist,
if not, an error would rise FileNotFoundError: [Errno 2] No such file or directory).
import glob
import os
import re
folder_path = "original_sqls"
#original_sqls\file1.sql, original_sqls\file2.sql, original_sqls\file3.sql
file_pattern = "*sql"
# new/modified files folder
output_path = "newfile_sqls"
folder_contents = glob.glob(os.path.join(folder_path,file_pattern))
# iterate over file names
for file_ in [os.path.basename(f) for f in folder_contents]:
# open to read
with open(os.path.join(folder_path,file_), "r") as inputf:
read_file = inputf.read()
# use variable 'read_file' here
tmp = re.sub(r'(TBLPROPERTIES \(.*?\))', r'\1;', read_file, flags=re.DOTALL)
# open to write to (previouly created) new folder
with open(os.path.join(output_path,file_), "w") as output:
output.writelines(tmp)

Extracting zip with password to another dir without foldername

I have this password protected zip folder:
folder_1\1.zip
When I extract this it gives me
1\image.png
How can I extract this to another folder without its folder name? Just the contents of it: image.png
So far I have done all stackoverflows solutions and took me 11 hrs straight just to solve this.
import zipfile
zip = zipfile.ZipFile('C:\\Users\\Desktop\\folder_1\\1.zip', 'r')
zip.setpassword(b"virus")
zip.extractall('C:\\Users\\Desktop') <--target dir to extract all contents
zip.close()
EDIT:
This code worked for me: (Now I want many paths to be extracted at once, any ideas?
import os
import shutil
import zipfile
my_dir = r"C:\\Users\\Desktop"
my_zip = r"C:\\Users\\Desktop\\test\\folder_1\\1.zip"
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = file(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)

You can use the ZipFile.read() method to read the specific file in the archive, open your target file for writing by joining the target directory with the base name of the source file, and then write what you read to it:
import zipfile
import os
zip = zipfile.ZipFile('C:\\Users\\Desktop\\folder_1\\1.zip', 'r')
zip.setpassword(b"virus")
for name in zip.namelist():
if not name.endswith(('/', '\\')):
with open(os.path.join('C:\\Users\\Desktop', os.path.basename(name)), 'wb') as f:
f.write(zip.read(name))
zip.close()
And if you have several paths containing 1.zip for extraction:
import zipfile
import os
for path in 'C:\\Users\\Desktop\\folder_1', 'C:\\Users\\Desktop\\folder_2', 'C:\\Users\\Desktop\\folder_3':
zip = zipfile.ZipFile(os.path.join(path, '1.zip'), 'r')
zip.setpassword(b"virus")
for name in zip.namelist():
if not name.endswith(('/', '\\')):
with open(os.path.join('C:\\Users\\Desktop', os.path.basename(name)), 'wb') as f:
f.write(zip.read(name))
zip.close()

Python ZipFile return extracted file path and name

I have this current code to unzip the contents of archive to extract_dir. However, I cannot figure out how to get the extracted file path & name of the extracted file.
if archive.endswith((".zip")):
zip_ref = zipfile.ZipFile(archive, 'r')
zip_ref.extract(extract_dir)
zip_ref.close()
For example, if the archive is called test.zip and ZipFile extracts the contents test.exe I want get C:/windows/users/admin/downloads/test.exe in to a variable?
EDIT: Sorry I wasn't clear, in the source code for zipfile targetpath is returned I am wondering how I can get this?

Here is the solution, I can't accept the answer for 2 days though.
if archive.endswith((".zip")):
print "example.jpg"
zip_ref = zipfile.ZipFile(archive, 'r')
extracted = zip_ref.namelist()
zip_ref.extractall(extract_dir)
zip_ref.close()
extracted_file = os.path.join(extract_dir, extracted[0])

How to sequentially read all the files in a directory and export the contents in Python?

I have a directory /directory/some_directory/ and in that directory I have a set of files. Those files are named in the following format: <letter>-<number>_<date>-<time>_<dataidentifier>.log, for example:
ABC1-123_20162005-171738_somestring.log
DE-456_20162005-171738_somestring.log
ABC1-123_20162005-153416_somestring.log
FG-1098_20162005-171738_somestring.log
ABC1-123_20162005-031738_somestring.log
DE-456_20162005-171738_somestring.log
I would like to read those a subset of those files (for example, read only files named as ABC1-123*.log) and export all their contents to a single csv file (for example, output.csv), that is, a CSV file that will have all the data from the inidividual files collectively.
The code that I have written so far:
#!/usr/bin/env python
import os
file_directory=os.getcwd()
m_class="ABC1"
m_id="123"
device=m_class+"-"+m_id
for data_file in sorted(os.listdir(file_dir)):
if str(device)+"*" in os.listdir(file_dir):
print data_file
I don't know how to read a only a subset of filtered files and also how to export them to a common csv file.
How can I achieve this?

just use re lib to match file name pattern, and use csv lib to export.

Only a few adjustments, You were close
filesFromDir = os.listdir(os.getcwd())
fileList = [file for file in filesFromDir if file.startswith(device)]
f = open("LogOutput.csv", "ab")
for file in fileList:
#print "Processing", file
with open(file, "rb") as log_file:
txt = log_file.read()
f.write(txt)
f.write("\n")
f.close()

Your question could be better stated, based on your current code snipet, I'll assume that you want to:
Filter files in a directory based on glob pattern.
Concatenate their contents to a file named output.csv.
In python you can achieve (1.) by using glob to list filenames.
import glob
for filename in glob.glob('foo*bar'):
print filename
That would print all files starting with foo and ending with bar in
the current directory.
For (2.) you just read the file and write its content to your desired
output, using python's open() builtin function:
open('filename', 'r')
(Using 'r' as the mode you are asking python to open the file for
"reading", using 'w' you are asking python to open the file for
"writing".)
The final code would look like the following:
import glob
import sys
device = 'ABC1-123'
with open('output.csv', 'w') as output:
for filename in glob.glob(device+'*'):
with open(filename, 'r') as input:
output.write(input.read())

You can use the os module to list the files.
import os
files = os.listdir(os.getcwd())
m_class = "ABC1"
m_id = "123"
device = m_class + "-" + m_id
file_extension = ".log"
# filter the files by their extension and the starting name
files = [x for x in files if x.startswith(device) and x.endswith(file_extension)]
f = open("output.csv", "a")
for file in files:
with open(file, "r") as data_file:
f.write(data_file.read())
f.write(",\n")
f.close()

How to read text files in a zipped folder in Python

I have a compressed data file (all in a folder, then zipped). I want to read each file without unzipping. I tried several methods but nothing works for entering the folder in the zip file. How should I achieve that?
Without folder in the zip file:
with zipfile.ZipFile('data.zip') as z:
for filename in z.namelist():
data = filename.readlines()
With one folder:
with zipfile.ZipFile('data.zip') as z:
for filename in z.namelist():
if filename.endswith('/'):
# Here is what I was stucked

namelist() returns a list of all items in an archive recursively.
You can check whether an item is a directory by calling os.path.isdir():
import os
import zipfile
with zipfile.ZipFile('archive.zip') as z:
for filename in z.namelist():
if not os.path.isdir(filename):
# read the file
with z.open(filename) as f:
for line in f:
print line
Hope that helps.

I got Alec's code to work. I made some minor edits: (note, this won't work with password-protected zipfiles)
import os
import sys
import zipfile
z = zipfile.ZipFile(sys.argv[1]) # Flexibility with regard to zipfile
for filename in z.namelist():
if not os.path.isdir(filename):
# read the file
for line in z.open(filename):
print line
z.close() # Close the file after opening it
del z # Cleanup (in case there's further work after this)

I got RichS' code to work. I made some minor edits:
import os
import sys
import zipfile
archive = sys.argv[1] # assuming launched with `python my_script.py archive.zip`
with zipfile.ZipFile(archive) as z:
for filename in z.namelist():
if not os.path.isdir(filename):
# read the file
for line in z.open(filename):
print(line.decode('utf-8'))
As you can see the edits are minor. I've switched to Python 3, the ZipFile class has a capital F, and the output is converted from b-strings to unicode strings. Only decode if you are trying to unzip a text file.
PS I'm not dissing RichS at all. I just thought it would be hilarious. Both useful and a mild shitpost.
PPS You can get file from an archive with a password: ZipFile.open(name, mode='r', pwd=None, *, force_zip64=False) or ZipFile.read(name, pwd=None). If you use .read then there's no context manager so you would simply do
# read the file
print(z.read(filename).decode('utf-8'))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Read a file within an archived archive (without extracting) - python

You need to open first then read it. Ex:- import zipfile with zipfile.ZipFile("archive.zip/archive2.zip", mode="r") as archive: text = archive.open("document.txt").read().decode() print(text)

Related

Filter Directory using Regex and output filtered files to another directory

Extracting zip with password to another dir without foldername

Python ZipFile return extracted file path and name

How to sequentially read all the files in a directory and export the contents in Python?

How to read text files in a zipped folder in Python

Categories

Resources