get file list of files contained in a zip file - python

I have a zip archive: my_zip.zip. Inside it is one txt file, the name of which I do not know. I was taking a look at Python's zipfile module ( http://docs.python.org/library/zipfile.html ), but couldn't make too much sense of what I'm trying to do.
How would I do the equivalent of 'double-clicking' the zip file to get the txt file and then use the txt file so I can do:
>>> f = open('my_txt_file.txt','r')
>>> contents = f.read()

What you need is ZipFile.namelist() that will give you a list of all the contents of the archive, you can then do a zip.open('filename_you_discover') to get the contents of that file.

import zipfile
# zip file handler
zip = zipfile.ZipFile('filename.zip')
# list available files in the container
print (zip.namelist())
# extract a specific file from the zip container
f = zip.open("file_inside_zip.txt")
# save the extraced file
content = f.read()
f = open('file_inside_zip.extracted.txt', 'wb')
f.write(content)
f.close()

import zipfile
zip=zipfile.ZipFile('my_zip.zip')
f=zip.open('my_txt_file.txt')
contents=f.read()
f.close()
You can see the documentation here. In particular, the namelist() method will give you the names of the zip file members.

Related

Unzipping with specific name

Based on this answer, is there an option to rename the file when extracting it? Or what is the best solution to do so?
Didn't find anything on the documentation
I found two methods:
Using ZipFile.read()
You can get data from zip file using ZipFile.read() and write it with new name using standard open(), write()
import zipfile
z = zipfile.ZipFile('image.zip')
for f in z.infolist():
data = z.read(f)
with open('new_name.png', 'wb') as fh:
fh.write(data)
Using zipfile.extract() with ZipInfo
You can change name before using extract()
import zipfile
z = zipfile.ZipFile('image.zip')
for f in z.infolist():
#print(f.filename)
#print(f.orig_filename)
f.filename = 'new_name.png'
z.extract(f)
This version can automatically create subfolders if you use
f.filename = 'folder/subfolder/new_name.png'
z.extract(f)
f.filename = 'new_name.png'
z.extract(f, 'folder/subfolder')

Read a file from a folder and extract a specific key from the file and save as in CSV file

I'm new to Python and the task I am performing is to extract a specific key value from a list of .iris ( which contains the list of nested dictionary format) files in a specific directory.
I wanted to extract the specific value and save it as a new .csv file and repeat it for all other files.
Below is my sample of .iris file from which I should extract only for the these keys ('uid','enabled','login','name').
{"streamType":"user",
"uid":17182,
"enabled":true,
"login":"xyz",
"name":"abcdef",
"comment":"",
"authSms":"",
"email":"",
"phone":"",
"location":"",
"extraLdapOu":"",
"mand":997,
"global":{
"userAccount":"View",
"uid":"",
"retention":"No",
"enabled":"",
"messages":"Change"},
"grants":[{"mand":997,"role":1051,"passOnToSubMand":true}],
I am trying to convert the .iris file to .json and reading the files one by, but unfortunately, I am not getting the exact output as desired.
Please, could anyone help me?
My code (added from comments):
import os
import csv
path = ''
os.chdir(path)
# Read iris File
def read_iris_file(file_path):
with open(file_path, 'r') as f:
print(f.read())
# iterate through all files
for file in os.listdir():
# Check whether file is in iris format or not
if file.endswith(".iris"):
file_path = f"{path}\{file}"
# call read iris file function
print(read_iris_file(file_path))
Your files contain data in JSON format, so we can use built-in json module to parse it. To iterate over files with certain extension you can use pathlib.glob() with next pattern "*.iris". Then we can use csv.DictWriter() and pass "ignore" to extrasaction argument which will make DictWriter ignore keys which we don't need and write only those which we passed to fieldnames argument.
Code:
import csv
import json
from pathlib import Path
path = Path(r"path/to/folder")
keys = "uid", "enabled", "login", "name"
with open(path / "result.csv", "w", newline="") as out_f:
writer = csv.DictWriter(out_f, fieldnames=keys, extrasaction='ignore')
writer.writeheader()
for file in path.glob("*.iris"):
with open(file) as inp_f:
data = json.load(inp_f)
writer.writerow(data)
Try the below (the key point here is loading the iris file using ast)
import ast
fields = ('uid','enabled','login','name')
with open('my.iris') as f1:
data = ast.literal_eval(f1.read())
with open('my.csv','w') as f2:
f2.write(','.join(fields) + '\n')
f2.write(','.join(data[f] for f in fields) + '\n')
my.csv
uid,enabled,login,name
17182,true,xyz,abcdef

How to append particular list in other python file and save it

I need to change the items in the list saved in other python file
file A.py
items = ['A','B','C']
file B.py
import A
A.items.append('D')
it works, but when I restart the script it switch to the previous version.
You need to store the data in a file to save it.
In this script I have the items list in file A, file B adds 'D' to the list, and then adds
it to a txt file.
You can print the txt file in file A after you run file B to see the new items list. The new list gets saved as new_items
If you run file B multiple times, it will add the list more then once.
File A:
items = ['A','B','C']
#prints out the txt file
with open("items.txt","r") as f:
new_items = f.read()
new_items = new_items.split()
print(new_items)
f.close()
File B:
import A
#Adds 'D' to the items list and stores it as a new variable
A.items.append('D')
items = A.items
#Writes the data to a txt file
with open('items.txt', 'a') as f:
f.write(str(items))
When an application exits, the memory used by that application is freed. This is why we need files, we can write our data into files. Also actions that your code does while running don't affects any source code.
If your items will be a list of strings you can use a lightweight solution like this:
#a.py
import os, ast
items = ['A','B','C']
file = "data.txt" # the file we will write our data in
if os.path.exists(file): # if our file exists
with open(file, "r") as f: # open it in 'r'ead mode
items = ast.literal_eval(f.read()) # read it and evalute
else: # if our file doesn't exists
with open(file, "w") as f: # open it in 'w'rite mode
f.write(str(items)) # write str(items) into file
#b.py
import a
a.items.append("D")
with open("data.txt", "w") as f: # open our file in 'w'rite mode
f.write(str(a.items)) # save a.items
As a general solution, you can also use pickle or json module for saving lists or other objects.
Docs:
ast.literal_eval, open,
os.path

How to extract a specific file from an archive donwloaded from internet using only memory

I'm looking for a way to extract a specific file (knowing his name) from an archive containing multiple ones, without writing any file on the hard drive.
I tried to use both StringIO and zipfile, but I only get the entire archive, or the same error from Zipfile (open require another argument than a StringIo object)
Needed behaviour:
archive.zip #containing ex_file1.ext, ex_file2.ext, target.ext
extracted_file #the targeted unzipped file
archive.zip = getFileFromUrl("file_url")
extracted_file = extractFromArchive(archive.zip, target.ext)
What I've tried so far:
import zipfile, requests
data = requests.get("file_url")
zfile = StringIO.StringIO(zipfile.ZipFile(data.content))
needed_file = zfile.open("Needed file name", "r").read()
There is a builtin library, zipfile, made for working with zip archives.
https://docs.python.org/2/library/zipfile.html
You can list the files in an archive:
ZipFile.namelist()
and extract a subset:
ZipFile.extract(member[, path[, pwd]])
EDIT:
This question has in-memory zip info. TLDR, Zipfile does work with in-memory file-like objects.
Python in-memory zip library
I finally found why I didn't succeed to do it after few hours of testing :
I was bufferring the zipfile object instead of buffering the file itself and then open it as a Zipfile object, which raised a type error.
Here is the way to do :
import zipfile, requests
data = requests.get(url) # Getting the archive from the url
zfile = zipfile.ZipFile(StringIO.StringIO(data.content)) # Opening it in an emulated file
filenames = zfile.namelist() # Listing all files
for name in filesnames:
if name == "Needed file name": # Verify the file is present
needed_file = zfile.open(name, "r").read() # Getting the needed file content
break

How to read multiple CSV files in a zip folder using Python

I need to read multiple csv files in a zip folder and extract the data from those csv's into a container in Python.
I am new to Python having basic knowledge.So detailed explanation is appreciable.
Thanks in advance
Sampath
The first thing to do is to open the zip file using module zipfile. Then read the CSV data from each archived file and store it in a container such as a dictionary.
The following will read the data from each file in the zip archive into a dictionary keyed by the file name.
import zipfile
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = zf.read(name)
for name in container:
print("Contents of file {}:".format(name))
print(container[name])
print("============================\n")
Optionally you could process the csv data using module csv. Something like this should get you started:
import csv
import zipfile
from cStringIO import StringIO
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = csv.reader(StringIO(zf.read(name)))
Now container is a dictionary keyed by file name with csv.reader objects as values.
Here is how you can read all the text inside zip:
import zipfile
archive = 'c:\\test\\archive.zip'
def readZip(archive):
zfile = zipfile.ZipFile(archive)
for finfo in zfile.infolist():
ifile = zfile.open(finfo)
lines = ifile.readlines()
return lines
print(readZip(archive))
Thanks for the help.
Apart from the code provided above,I have come up with a code which satisfies the question
import os
import csv
from zipfile import ZipFile
#Extracts and loads the files in a zip file to a specified destination
ze = ZipFile(open("Src_AdventureWorks_Files.zip","r"))
ze.extractall("/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/")
print "Extraction successful"
#Meta data of the zipfile
zf = ZipFile('Src_AdventureWorks_Files.zip', 'r')
zc = zf.namelist()
print zc
#Loop to run each csv file and print the data
if __name__ == "__main__":
i=0
while i < len(zc):
#path = '/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/'+zc[i]
#print path
print zc[i]
for csv_path in zc:
print "###########"
print zc[i]
print "###########"
os.chdir('/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files')
f = open(zc[i])
csv_f = csv.reader(f)
for row in csv_f:
print row
f.close()
i += 1

Categories