Can mmap and gzip collaborate? - python

I'm trying to figure how to use mmap with a gzip compressed file. Is that even possible ?
import mmap
import os
import gzip
filename = r'C:\temp\data.gz'
file = gzip.open(filename, "rb+")
size = os.path.getsize(filename)
file = mmap.mmap(file.fileno(), size)
print file.read(8)
The output data is compressed.

You can do easilly. Indeed the gzip module gets as optional argument a file-like object.
import mmap
import gzip
filename = "a.gz"
handle = open(filename, "rb")
mapped = mmap.mmap(handle.fileno(), 0, access=mmap.ACCESS_READ)
gzfile = gzip.GzipFile(mode="r", fileobj=mapped)
print gzfile.read()
The same applies to tarfile module:
import sys
import mmap
import tarfile
f = open(sys.argv[1], 'rb')
fo = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
tf = tarfile.open(mode='r:gz', fileobj=fo)
print tf.getnames()

Well, not the way you want.
mmap() can be used to access the gzipped file if the compressed data is what you want.
mmap() is a system call for mapping disk blocks into RAM almost as if you were adding swap.
You can't map the uncompressed data into RAM with mmap() as it is not on the disk.

Related

Get tar file buffer without write to file with Python

I know how to tar file using Python
import os
import tarfile
with tarfile.open('res.tar.gz','w:xz' )as tar :
tar.add('Pic.jpeg')
But I want to do that without create any tar.gz file, only get the results buffer.
Hiw can I do that please?
You could use this code to access result buffer
from io import BytesIO
import os
import tarfile
buf = BytesIO()
with tarfile.open('/tmp/res.tar.gz', 'w:gz', fileobj=buf) as f:
f.add("Pic.jpeg")
data = buf.getvalue()

writing data into fifo file in python

I have recorded one audio file and I am converting that file into base64 format.Now I want to write this audio file into a fifo file.
Code is written below:
import os
import base64
import select
os.system("mkfifo audio1.fifo")
with open("audio1.fifo") as fifo:
select.select([fifo],[],[fifo])
with open("out1.wav","rb") as audioFile:
str = base64.b64encode(audioFile.read())
fifo.write(str)
But above code only creating fifo file but not writing anything in it.Plz give any suggestions.
Use + model can at the same time support read and write
import base64
import select
os.system("mkfifo audio1.fifo")
with open("audio1.fifo", "wb") as fifo:
select.select([fifo],[],[fifo])
with open("out1.wav","rb") as audioFile:
str = base64.b64encode(audioFile.read())
fifo.write(str)
The same time read and write two different files:
# maker sure the be read file's has some data.
with open("a.file", "w") as fp:
fp.write("some data")
# now a.file has some data, we create a b.file for write the a.file's data.
with open("b.file", "w") as write_fp, open("a.file", "r") as read_fp:
write_fp.write(read_fp.read())
# for bytes data
with open("b.file", "wb") as write_fp, open("a.file", "rb") as read_fp:
write_fp.write(read_fp.read())

Python tarfile compress an object in memory

I am trying to use tarfile to add a file in memory and then writing it back to disk, but the issue i am having is that in my final output, when i extract the newly created tar.gz file, i am getting an empty file. What am I doing wrong in my code?
import tarfile
import io
with open('logo.png', 'rb') as f:
data = f.read()
fh = io.BytesIO()
with tarfile.open(fileobj=fh, mode='w:gz') as tar:
info = tarfile.TarInfo('some.png')
tar.addfile(info, data)
with open('/tmp/test/test.tar.gz', 'wb') as f:
f.write(fh.getvalue())
I also tried doing tar.addfile(info, fh.write(data)), but that just creates a corrupted tar file.
TarFile.addfile() takes a file-like object.
When the documentation says:
tarinfo.size bytes are read from it and added to the archive.
It means that tarinfo.size is used to determine how many bytes to read. Therefore, you need set tarinfo.size appropriately.
The only thing you need to do is read the data from the source, count the length, then load that data into a BytesIO object:
E.g.
import tarfile
import io
with open('logo.png', 'rb') as f:
data = f.read()
source_f = io.BytesIO(initial_bytes=data)
fh = io.BytesIO()
with tarfile.open(fileobj=fh, mode='w:gz') as tar:
info = tarfile.TarInfo('logo.png')
info.size = len(data)
tar.addfile(info, source_f)
with open('test.tar.gz', 'wb') as f:
f.write(fh.getvalue())
or a more memory efficient way, seek the source file:
f = open('logo.png', 'rb')
f.seek(0,2) # go to the end
source_len = f.tell()
f.seek(0)
fh = io.BytesIO()
with tarfile.open(fileobj=fh, mode='w:gz') as tar:
info = tarfile.TarInfo('logo.png')
info.size = source_len
tar.addfile(info, f)
with open('test.tar.gz', 'wb') as f:
f.write(fh.getvalue())
f.close()

Creating an MD5 Hash of A ZipFile

I want to create an MD5 hash of a ZipFile, not of one of the files inside it. However, ZipFile objects aren't easily convertible to streams.
from hashlib import md5
from zipfile import ZipFile
zipped = ZipFile(r'/Foo/Bar/Filename.zip')
hasher = md5()
hasher.update(zipped)
return hasher.hexdigest()
The above code generates the error :TypeError: must be convertible to a buffer, not ZipFile.
Is there a straightforward way to turn a ZipFile into a stream?
There's no security issues here, I just need a quick an easy way to determine if I've seen a file before. hash(zipped) works fine, but I'd like something a little more robust if possible.
Just open the ZipFile as a regular file. Following code works on my machine.
from hashlib import md5
m = md5()
with open("/Foo/Bar/Filename.zip", "rb") as f:
data = f.read() #read file in chunk and call update on each chunk if file is large.
m.update(data)
print m.hexdigest()
This function should return the MD5 hash of any file, provided it's path (requires pycrypto module):
from Crypto.Hash import MD5
def get_MD5(file_path):
chunk_size = 8192
h = MD5.new()
with open(file_path, 'rb') as f:
while True:
chunk = f.read(chunk_size)
if len(chunk):
h.update(chunk)
else:
break
return h.hexdigest()
print get_MD5('pics.zip') # example
output:
6a690fa3e5b34e30be0e7f4216544365
Info on pycrypto

decompress a bz2 file from BytesIO

I want to read a bz2 file in a server, decompress it and read with csv parser, but I still have the error;
myfile = bz2.BZ2File(bio.read(), "rb")
TypeError: file() argument 1 must be encoded string without NULL bytes, not str
import paramiko
from config import config
import bz2
import csv
import StringIO
from io import BytesIO
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(config.get('mrc_ssh', 'host'), username=config.get('mrc_ssh', 'user'))
sftp_client = ssh.open_sftp()
_file = sftp_client.open('/home/myfile.bz2')
bio = BytesIO(_file.read())
print bio
myfile = bz2.BZ2File(bio.read(), "rb")
reader = csv.DictReader(myfile)
for row in reader:
print row
bz2.BZ2File takes a filename as the first argument. Not actual data.
Either use (if you can store locally the file):
myfile = bz2.BZ2File('/home/myfile.bz2', "rb")
Or use the one-shot decompression function bz2.decompress
uncompressed_data = bz2.decompress(bio.read())

Categories