AES encryption and padding across multiple blocks - python

I am encrypting a large (100GB+) file with Python using PyCryptodome using AES-256 in CBC mode.
Rather than read the entire file into memory and encrypt it in one fell swoop, I would like to read the input file a 'chunk' at a time and append to the output file with the results of encrypting each 'chunk.'
Regrettably, the documentation for PyCryptodome is lacking in that I can't find any examples of how to encrypt a long plaintext with multiple calls to encrypt(). All the examples use a short plaintext and encrypt the entire plaintext in a single call to encrypt().
I had assumed that if my input 'chunk' is a multiple of 16 bytes (the block size of AES in CBC mode) I wouldn't need to add padding to any 'chunk' but the last one. However, I wasn't able to get that to work. (I got padding errors while decrypting.)
I'm finding that in order to successfully decrypt the file, I need to add padding to every 'chunk' when encrypting, and decrypt in units of the input chunk size plus 16 bytes. This means the decrypting process needs to know the 'chunk size' used for encryption, which makes me believe that this is probably an incorrect implementation.
While I do have my encryption/decryption working as described, I wonder if this is the 'correct' way to do it. (I suspect it is not.) I've read inconsistent claims on whether or not every such 'chunk' needs padding. If not, I'd like some handholding to get Pycryptodome to encrypt and then decrypt a large plaintext across multiple calls to encrypt() and decrypt().
EDIT: This code throws a ValueError, "Padding is incorrect," when decrpyting the first 'chunk'.
def encrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open(infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) ==0:
break
insize = len(data)
if insize == (16 * 32):
padded_data = data
else:
padded_data = pad(data, AES.block_size)
fout.write(cipher.encrypt(padded_data))
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
fout.write(unpad(cipher.decrypt(data), AES.block_size))

My problem was related to the PAD of the last block. It is necessary to detect which is the last fragment read in bytes in order to add the PAD.
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data
And apply the decrypt:
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data

It looks like the fix is to do similar chunksize/padding comparison in the decrypt function as I used in the encrypt function:
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
if len(data) == (16 * 32):
decrypted_data = cipher.decrypt(data)
else:
decrypted_data = unpad(cipher.decrypt(data), AES.block_size)
fout.write(decrypted_data)

Related

Encrypt data in PostgreSQL and decrypt the data in Python

I have data in my database that I need to encrypt. I will then download the database to csv files. I have a python program that can decrypt the specific columns in a csv file. The problem is that I don't get my data out from the python program.
sql function:
CREATE OR REPLACE FUNCTION AESEncrypt (data TEXT,pass TEXT)
RETURNS TEXT AS $crypted$
declare
crypted TEXT;
key BYTEA;
iv BYTEA;
BEGIN
key := digest(convert_to(pass, 'utf-8'), 'sha256');
iv := digest(convert_to(CONCAT(data , 'salt'), 'utf-8'), 'md5');
crypted := encode(encrypt_iv(convert_to(data, 'utf-8'), key, iv, 'aes'), 'base64');
RETURN crypted;
END;
$crypted$ LANGUAGE plpgsql;
python program:
import csv
import time
import base64
from hashlib import sha256, md5
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
password = 'Password'
inputFile = 'test.txt'
outputFile = 'out.txt'
delimiter = ';'
columns = [0]
backend = default_backend()
key = sha256(password.encode('utf-8')).digest()
iv = md5((password + 'salt').encode('utf-8')).digest()
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
def encrypt(input):
input = bytes(input, 'utf-8')
#Padding
length = 16 - (len(input) % 16)
input += bytes([length])*length
#Encrypt
encryptor = cipher.encryptor()
return base64.b64encode(encryptor.update(input) + encryptor.finalize()).decode("utf-8")
def decrypt(input):
input = base64.b64decode(input)
decryptor = cipher.decryptor()
data = decryptor.update(input) + decryptor.finalize()
data = data[:-data[-1]] #Remove padding
print(data)
return data.decode('utf-8')
def main():
start_time = time.time()
with open(inputFile, 'r') as csvfileIn:
with open(outputFile, 'w', newline='') as csvfileOut:
spamreader = csv.reader(csvfileIn, delimiter=delimiter)
spamwriter = csv.writer(csvfileOut, delimiter=delimiter)
firstRow = True
for row in spamreader:
if not firstRow:
for pos in columns:
row[pos] = decrypt(row[pos])
firstRow = False
spamwriter.writerow(row)
print("--- %s seconds ---" % (time.time() - start_time))
main()
If I encrypt the file with the encrypt function written in the python program then I get the correct result if i would decrypt it.
If I would call the sql funcion as AESEncrypt('data', 'Password') then it returns the base64 string Ojq6RKg7NgDx8YFdLzfVhQ==
But after decryption I get the empty string as result and not the string data. If I look at the print statment before the utf-8 decode step in the decryption function it prints out the following on the console b'', so it looks like it could be something wrong with the padding. If I would print before I remove the padding I get b'\x85\x90sz\x0cQS\x9bs\xeefvA\xc63-'. If I will encrypt a long sentence then I will actually see parts of the text in the byte outputs above.
Do anyone know what I have done wrong?

AES: Input strings must be a multiple of 16 in length

I want to make a script to decrypt my filess, but when I try to run my script then show me this message , how can I fix it?
Traceback (most recent call last): File "F:\bug_bounty\decrypt.py",
line 46, in File "F:\bug_bounty\decrypt.py", line 24, in
decrypt File
"C:\Python27\lib\site-packages\Crypto\Cipher\blockalgo.py", line 295,
in decrypt
return self._cipher.decrypt(ciphertext) ValueError: Input strings must be a multiple of 16 in length
from Crypto.Hash import SHA256
from Crypto.Cipher import AES
import os
import random
import sys
def decrypt(key, filename):
outFile = os.path.join(os.path.dirname(filename),
os.path.basename(filename[11:]))
chunksize = 64 * 1024
with open(filename, 'rb') as infile:
filesize = infile.read(16)
IV = infile.read(16)
decryptor = AES.new(key, AES.MODE_CBC, IV)
with open(outFile, 'wb') as outfile:
while True:
chunk = infile.read(chunksize)
if len(chunk) == 0:
break
outfile.write(decryptor.decrypt(chunk))
outfile.truncate(int(filesize))
def allfiles():
allFiles = []
for (root, subfiles, files) in os.walk(os.getcwd()):
for names in files:
allFiles.append(os.path.join(root, names))
return allFiles
password = 'M4st3rRul3zs'
files = allfiles();
for filename in files:
if os.path.basename(filename).startswith("(encrypted)"):
print "%s is already encrypted" %filename
pass
else:
decrypt(SHA256.new(password).digest(), filename)
print "Done decrypting %s" %filename
"""os.remove(filename)"""
Here is the small trick you could use while encrypting the data if your data size is not large.
plaintext = "some text"
encryptor = AES.new(key, AES.MODE_CBC, iv)
ciphertext = encryptor.encrypt(plaintext*16)
This will ensure that your input data is a multiple of 16. And of course, you would like to get the original data back when decrypting.
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypttext = cipher.decrypt(ciphertext)
decrypttext = decrypttext[0:len(plaintext)]
Now, decrpyttext has your original plaintext.
From Crypto++ wiki.
The block size is determined by AES::BLOCKSIZE. For AES, this is
always 16 bytes
AES is a block cipher, it works on 16-byte (128-bit) blocks. It can't work with data smaller or bigger than 16 bytes. Smaller data needs to be padded until they're 16 bytes, and larger data needs to be split into 16-byte blocks.
Also there are algorithms that help you achieve just that (work on data larger than the cipher's block size), they're called block cipher modes of operation.
Have a look at this How to encrypt more than 16 bytes using AES?
ValueError: Input strings must be a multiple of 16 in length
That is because AES works with blocks of 128 bits (16 chars). You can consider adding padding to fix this.
AES works with blocks of 16 chars. This how you can add extra padding
import random
import string
plaintext = "Encrypt me"
encryptor = AES.new(key, AES.MODE_CBC, iv)
while len(bytes(plaintext, encoding='utf-8')) % 16 != 0:
plaintext = plaintext + random.choice(string.ascii_letters)
ciphertext = encryptor.encrypt(plaintext)

ValueError: AES key must be either 16, 24, or 32 bytes long PyCrypto 2.7a1

I'm making programm for my school project and have one problem above.
Here's my code:
def aes():
#aes
os.system('cls')
print('1. Encrypt')
print('2. Decrypt')
c = input('Your choice:')
if int(c) == 1:
#cipher
os.system('cls')
print("Let's encrypt, alright")
print('Input a text to be encrypted')
text = input()
f = open('plaintext.txt', 'w')
f.write(text)
f.close()
BLOCK_SIZE = 32
PADDING = '{'
pad = lambda s: s + (BLOCK_SIZE - len(s) % BLOCK_SIZE) * PADDING
EncodeAES = lambda c, s: base64.b64encode(c.encrypt(pad(s)))
secret = os.urandom(BLOCK_SIZE)
f = open('aeskey.txt', 'w')
f.write(str(secret))
f.close()
f = open('plaintext.txt', 'r')
privateInfo = f.read()
f.close()
cipher = AES.new(secret)
encoded = EncodeAES(cipher, privateInfo)
f = open('plaintext.txt', 'w')
f.write(str(encoded))
f.close()
print(str(encoded))
if int(c) == 2:
os.system('cls')
print("Let's decrypt, alright")
f = open('plaintext.txt','r')
encryptedString = f.read()
f.close()
PADDING = '{'
DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e)).rstrip(PADDING)
encryption = encryptedString
f = open('aeskey.txt', 'r')
key = f.read()
f.close()
cipher = AES.new(key)
decoded = DecodeAES(cipher, encryption)
f = open('plaintext.txt', 'w')
f.write(decoded)
f.close()
print(decoded)
Full error text:
Traceback (most recent call last): File "C:/Users/vital/Desktop/Prog/Python/Enc_dec/Enc_dec.py", line 341, in aes()
File "C:/Users/vital/Desktop/Prog/Python/Enc_dec/Enc_dec.py", line 180, in aes cipher = AES.new(key)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\AES.py", line 179, in new return AESCipher(key, *args, **kwargs)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\AES.py", line 114, in init blockalgo.BlockAlgo.init(self, _AES, key, *args, **kwargs)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\blockalgo.py", line 401, in init self._cipher = factory.new(key, *args, **kwargs)
ValueError: AES key must be either 16, 24, or 32 bytes long
Process finished with exit code 1
What am I doing wrong?
The error is very clear. The key must be exactly of that size. os.urandom will return you the correct key. However this key is a bytes (binary string value). Furthermore, by using str(secret), the value of repr(secret) is written into the file instead of secret.
What is more confusing is that AES.new allows you to pass the key as Unicode! However, suppose the key was the ASCII bytes 1234123412341234. Now,
f.write(str(secret))
will write b'1234123412341234' to the text file! Instead of 16 bytes, it now contains those 16 bytes + the b, and two ' quote characters; 19 bytes in total.
Or if you take a random binary string from os.urandom,
>>> os.urandom(16)
b'\xd7\x82K^\x7fe[\x9e\x96\xcb9\xbf\xa0\xd9s\xcb'
now, instead of writing 16 bytes D7, 82,.. and so forth, it now writes that string into the file. And the error occurs because the decryption tries to use
"b'\\xd7\\x82K^\\x7fe[\\x9e\\x96\\xcb9\\xbf\\xa0\\xd9s\\xcb'"
as the decryption key, which, when encoded as UTF-8 results in
b"b'\\xd7\\x82K^\\x7fe[\\x9e\\x96\\xcb9\\xbf\\xa0\\xd9s\\xcb'"
which is a 49-bytes long bytes value.
You have 2 good choices. Either you continue to write your key to a text file, but convert it to hex, or write the key into a binary file; then the file should be exactly the key length in bytes. I am going for the latter here:
Thus for storing the key, use
with open('aeskey.bin', 'wb') as keyfile:
keyfile.write(secret)
and
with open('aeskey.bin', 'rb') as keyfile:
key = keyfile.read()
Same naturally applies to the cipher text (that is the encrypted binary), you must write and read it to and from a binary file:
with open('ciphertext.bin', 'wb') as f:
f.write(encoded)
and
with open('ciphertext.bin', 'rb') as f:
encryptedString = f.read()
If you want to base64-encode it, do note that base64.b64encode/decode are bytes-in/bytes-out.
By the way, plaintext is the original, unencrypted text; the encrypted text is called ciphertext. AES is a cipher that can encrypt plaintext to ciphertext and decrypt ciphertext to plaintext using a key.
Despite these being called "-text" neither of them is textual data per se, as understood by Python, but they're binary data, and should be represented as bytes.

Pycrypto - Encrypt on Linux / decrypt on Windows

I've got a encryption/decryption class that I'm using cross platform. I'm using the same class on both server and client. I encrypt a file on a Linux server, then decrypt on either a Linux or Windows client. I have no problems when decrypting on Linux, but when I transfer the file to Windows and try to decrypt, I get the following exception:
ValueError: Input strings must be a multiple of 16 in length
My first thought is that it is caused by the different filesystems, and any characters that are used to create the padding. Here is my class code:
class FileSec:
def __init__(self):
# File chunk size
self.chunk_size = 64*1024
# Encrypt file with OpenSSL
def encrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Encrypt the file
iv = ''.join(chr(random.randint(0, 0xFF)) for i in range(16))
encryptor = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(infile)
with open(infile, 'rb') as ifh:
with open(outfile, 'wb') as ofh:
ofh.write(struct.pack('<Q', filesize))
ofh.write(iv)
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += ' ' * (16 - len(chunk) % 16)
ofh.write(encryptor.encrypt(chunk))
return True
# Decrypt file with OpenSSL
def decrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Decrypt the file
with open(infile, 'rb') as ifh:
origsize = struct.unpack('<Q', ifh.read(struct.calcsize('Q')))[0]
iv = ifh.read(16)
decryptor = AES.new(key, AES.MODE_CBC, iv)
with open(outfile, 'wb') as ofh:
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
ofh.write(decryptor.decrypt(chunk))
ofh.truncate(origsize)
return True
http://pastebin.com/Dvf6nUxH
I'm using code adapted from here: http://eli.thegreenplace.net/2010/06/25/aes-encryption-of-files-in-python-with-pycrypto/
Anyone have any suggestions on how I can modify this class to work cross-platform?
myfile.read(x) reads any amount up to x bytes; it is not guaranteed to return all x.
Note that it will always return at least one until the file is empty, so it is possible to wrap this in a loop, and then join the returned strings.
Closing this one. Turns out the problem has nothing to do with the encryption/decryption function, but with an extra byte being tacked on to the encrypted file when I transfer it to the Windows machine, causing the exception.

How can I encrypt .docx files with AES & pycrypto without corrupting the files

I've got this bit of python code that I want to use to encrypt various kinds of files with AES 256. I am using the pycrypto module. It works fine for most files (exe, deb, jpg, pdf, txt) but when it comes to office files (docx, xlsx, ppt etc) the file is corrupted upon decryption and will no open (nor can it be repaired) in LibreOffice. I am using Linux mint, python 2.7.6, pycrypto 2.6.1. I'm still a bit of a noob so I'd appreciate it if you could give me code examples of the corrections you'd recommend.
Thanks
from Crypto import Random
from Crypto.Cipher import AES
import os
def pad(s):
return s + b"\0" * (AES.block_size - len(s) % AES.block_size)
def encrypt(message, key, key_size=256):
message = pad(message)
iv = Random.new().read(AES.block_size)
cipher = AES.new(key, AES.MODE_CBC, iv)
return iv + cipher.encrypt(message)
def decrypt(ciphertext, key):
iv = ciphertext[:AES.block_size]
cipher = AES.new(key, AES.MODE_CBC, iv)
plaintext = cipher.decrypt(ciphertext[AES.block_size:])
return plaintext.rstrip(b"\0")
def encrypt_file(file_name, key):
with open(file_name, 'rb') as fo:
plaintext = fo.read()
enc = encrypt(plaintext, key)
with open(file_name + ".enc", 'wb') as fo:
fo.write(enc)
def decrypt_file(file_name, key):
with open(file_name, 'rb') as fo:
ciphertext = fo.read()
dec = decrypt(ciphertext, key)
with open(file_name[:-4], 'wb') as fo:
fo.write(dec)
key = b'\xbf\xc0\x85)\x10nc\x94\x02)j\xdf\xcb\xc4\x94\x9d(\x9e[EX\xc8\xd5\xbfI{\xa2$\x05(\xd5\x18'
encrypt_file('file.docx', key)
The problem is here
plaintext.rstrip(b"\0")
I have run the program and see the reason is:
There was a bug in here that caused the last bytes of the original file to be discarded if they happened to have the same value as the padding bytes!
To fix this issue, we have to store how many padding bytes were used during encryption, then remove them during decryption. Here is my code, it works for me (tested with word and excel 2013 files, pdf, jpg). Let me know if still some bugs.
from Crypto import Random
from Crypto.Cipher import AES
import hashlib
def pad(s):
padding_size = AES.block_size - len(s) % AES.block_size
return s + b"\0" * padding_size, padding_size
def encrypt(message, key, key_size=256):
message, padding_size = pad(message)
iv = Random.new().read(AES.block_size)
cipher = AES.new(key, AES.MODE_CFB, iv)
enc_bytes = iv + cipher.encrypt(message) + bytes([padding_size])
return enc_bytes
def decrypt(ciphertext, key):
iv = ciphertext[:AES.block_size]
cipher = AES.new(key, AES.MODE_CFB, iv)
plaintext = cipher.decrypt(ciphertext[AES.block_size:-1])
padding_size = ciphertext[-1] * (-1)
return plaintext[:padding_size]
def encrypt_file(file_name, key):
with open(file_name, 'rb') as fo:
plaintext = fo.read()
enc = encrypt(plaintext, key)
with open(file_name + ".enc", 'wb') as fo:
fo.write(enc)
def decrypt_file(file_name, key):
with open(file_name, 'rb') as fo:
ciphertext = fo.read()
dec = decrypt(ciphertext, key)
with open('processed_' + file_name[:-4], 'wb') as fo:
fo.write(dec)
key = 'Quan'
hash_object = hashlib.md5(key.encode())
while True:
filename = input('File: ')
en_de = input('En or De?')
if en_de.upper() == 'EN':
encrypt_file(filename, hash_object.hexdigest())
elif en_de.upper() == 'DE':
decrypt_file(filename, hash_object.hexdigest())
else:
print('Did not pick either en or de!')
cont = input('Continue?')
if cont.upper() == 'N':
break
If you need to add padding to make the plaintext a multiple of 16 bytes, the extra bytes need to be stripped before you write the decrypted data. This means you will need to somehow include the number of pad bytes added with the padding before you encrypt it. See PKCS#7 for one possible technique. There are a number of other schemes as well.

Categories