Here are some encryption and decryption functions that I created for a python key management library I am writing.
def generate_RSA():
bits = 2048
new_key = RSA.generate(bits)
public_key = new_key.publickey()
private_key = new_key
return private_key, public_key
def encrypt_data(in_fd, chunk_size, pub_key):
encryptor = PKCS1_OAEP.new(pub_key)
A = list()
with open(in_fd, 'rb') as in_file:
while True:
chunk = in_file.read(chunk_size)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += b' ' * (16 - len(chunk) % 16)
encrypted_file = encryptor.encrypt(chunk)
return encrypted_file
def decrypt_data(in_fd, chunk_size, priv_key):
decryptor = PKCS1_OAEP.new(priv_key)
with open(in_fd, 'rb') as in_file:
while True:
chunk = in_file.read(chunk_size)
if len(chunk) == 0:
break
decrypted_file = decryptor.decrypt(eval(str(chunk)))
return decrypted_file
I wanted to be able to insert the encrypt_data and decrypt_data into each other as the first arguments if need be. However I am running into a problem.
priv_key, pub_key = generate_RSA()
print(decrypt_data(encrypt_data('C:\\Users\cowbo\OneDrive\Documents\EWC\Haiku.txt', 8192, pub_key), 8192, priv_key))
Whenever I try to run the last line of code, I get the following traceback...
Traceback (most recent call last):
File "C:\Users\cowbo\source\repos\Python Practice\PythonPractice\FileCounter.py", line 57, in <module>
print(decrypt_data(encrypt_data('C:\\Users\cowbo\OneDrive\Documents\EWC\Haiku.txt', 8192, pub_key), 8192, priv_key))
File "C:\Users\cowbo\source\repos\Python Practice\Python Practice\FileCounter.py", line 31, in decrypt_data
with open(in_fd, 'rb') as in_file:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfd in position 1: invalid start byte
I have looked at other post on here concerning this same issue and I seem to encrypting and decrypting correctly so Im not sure what the issue is.
You are passing the result from encrypt_data() directly to decrypt_data():
print(decrypt_data(encrypt_data(...))
encrypt_data() returns the encrypted data, not a filename:
encrypted_file = encryptor.encrypt(chunk)
return encrypted_file
(You are only producing the last chunk of encrypted data there, not all of it, but that’s not the cause of this error).
decrypt_data() doesn’t accept encrypted data however. It accepts a filename:
def decrypt_data(in_fd, chunk_size, priv_key):
# ...
with open(in_fd, 'rb') as in_file:
What threw me at first was that on anything but Windows that’ll give you a “file not found” error, but on Windows, a binary value for a file path will first be decoded as UTF-8, and that fails for the encrypted data.
To fix this, you have three options:
Have the encryption function open a new file, write the encrypted data to that file and return the filename instead of the encrypted data. Then you at least pass on the correct information to the decryption function.
Create the file for the encrypted data the encrypted data the encryption function returns at the point where you call the encryption function. Don’t pass the result directly to the decryption function, pass the filename.
Change the decryption function to accept the data directly and not read it from a file.
As a side note, in the decryption function you use:
decryptor.decrypt(eval(str(chunk)))
That's... a rather odd way of passing chunk directly to the decryption function. This is enough:
decryptor.decrypt(chunk)
chunk is a bytes object, str(bytesvalue) gives you "b'...'" (where b' at the start and ' at the end are now part of the string) and eval() gives you the original bytesvalue again. Just pass the original in, no need to waste cycles there.
Related
I am encrypting a large (100GB+) file with Python using PyCryptodome using AES-256 in CBC mode.
Rather than read the entire file into memory and encrypt it in one fell swoop, I would like to read the input file a 'chunk' at a time and append to the output file with the results of encrypting each 'chunk.'
Regrettably, the documentation for PyCryptodome is lacking in that I can't find any examples of how to encrypt a long plaintext with multiple calls to encrypt(). All the examples use a short plaintext and encrypt the entire plaintext in a single call to encrypt().
I had assumed that if my input 'chunk' is a multiple of 16 bytes (the block size of AES in CBC mode) I wouldn't need to add padding to any 'chunk' but the last one. However, I wasn't able to get that to work. (I got padding errors while decrypting.)
I'm finding that in order to successfully decrypt the file, I need to add padding to every 'chunk' when encrypting, and decrypt in units of the input chunk size plus 16 bytes. This means the decrypting process needs to know the 'chunk size' used for encryption, which makes me believe that this is probably an incorrect implementation.
While I do have my encryption/decryption working as described, I wonder if this is the 'correct' way to do it. (I suspect it is not.) I've read inconsistent claims on whether or not every such 'chunk' needs padding. If not, I'd like some handholding to get Pycryptodome to encrypt and then decrypt a large plaintext across multiple calls to encrypt() and decrypt().
EDIT: This code throws a ValueError, "Padding is incorrect," when decrpyting the first 'chunk'.
def encrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open(infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) ==0:
break
insize = len(data)
if insize == (16 * 32):
padded_data = data
else:
padded_data = pad(data, AES.block_size)
fout.write(cipher.encrypt(padded_data))
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
fout.write(unpad(cipher.decrypt(data), AES.block_size))
My problem was related to the PAD of the last block. It is necessary to detect which is the last fragment read in bytes in order to add the PAD.
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data
And apply the decrypt:
def decrypt_file(
self, filename: str, output_file: str, save_path: str, key, iv
):
cipher_aes = AES.new(key, AES.MODE_CBC, iv)
log.info(f'Decrypting file: {filename} output: {output_file}')
count = 0
previous_data = None
with open(filename, "rb") as f, open(
f"{save_path}/{output_file}", "wb"
) as f2:
while True:
count+=1
data = f.read(self.block_size)
if data == b"":
decrypted = cipher_aes.decrypt(previous_data)
log.info(f'Last block UnPadding Count: {count} BlockSize: {self.block_size}')
decrypted = unpad(decrypted, AES.block_size, style="pkcs7")
f2.write(decrypted)
break
if previous_data:
decrypted = cipher_aes.decrypt(previous_data)
f2.write(decrypted)
previous_data = data
It looks like the fix is to do similar chunksize/padding comparison in the decrypt function as I used in the encrypt function:
def decrypt_file(infile, outfile, aeskey, iv):
cipher = AES.new(aeskey, AES.MODE_CBC, iv)
with open (infile, "rb") as fin:
with open(outfile, "wb") as fout:
while True:
data = fin.read(16 * 32)
if len(data) == 0:
break
if len(data) == (16 * 32):
decrypted_data = cipher.decrypt(data)
else:
decrypted_data = unpad(cipher.decrypt(data), AES.block_size)
fout.write(decrypted_data)
I make a requests.post() call to the server, which replies me with a json, in this json there are some keys and also the base64 file.
This is an example of a response from the server:
The server responds like this:
'success' is the key to understanding if access with private data is
correct.
'message' is the key in case success is False (In this case being
success == True, the message is not shown
'data' is the dictionary key that contains the fileName and the
base64 format file
So:
{'success': True,
'message': '',
'data': {'fileName': 'Python_logo_and_wordmark.svg.png',
'file': 'iVBORw0KGgoAAAANSUhEUgAABLAAAA....'}} #To limit the space, I cut the very long bytes example
So the respose in json also contains the file, which I need to decode with base64.b64decode(r.json()['data']['file'])
Everything ok, I can get my file and decrypt it correctly.
The problem is that with large files I would like to use the stream method like this:
file = "G:\Python_logo_and_wordmark.svg.png"
if os.path.isfile(file):
os.remove(file)
def get_chunk(chunk):
# Try to decode the base64 file (Chunked)
# is this a wrong approach?
chunk = chunk.decode("ascii")
chunk = chunk.replace('"', '')
if "file" in chunk:
chunk = chunk.split('file:')[1]
elif "}}" in chunk:
chunk = chunk.split('}}')[0]
else:
chunk = chunk
chunk += "=" * ((4 - len(chunk) % 4) % 4)
chunk_decoded = base64.b64decode(chunk)
return chunk_decoded
r = requests.post(url=my_url, json=my_data, stream=True)
iter_content = r.iter_content(chunk_size=64)
while True:
chunk = next(iter_content, None)
if not chunk:
break
chunk_decoded = get_chunk(chunk)
with open(file, "ab") as file_object:
file_object.write(chunk_decoded)
iter_content chunks return this:
b'{"success":true,"message":"","data":{"fileName":"Python_logo_and'
b'_wordmark.svg.png","file":"iVBORw0KGgoAAAANSUhEUgAABLAAAAFkCAYAA'
b'AAwtsJRAAAABGdBTUEAALGPC\\/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAA'
b'dTAAAOpgAAA6mAAAF3CculE8AAAABmJLR0QA\\/wD\\/AP+gvaeTAACAAElEQVR42u'
b'zdeZwbdf0\\/8Nf7k2Ovdttyt7QIggoth1qUW1AQ5PLeAiK13UwWiqLiBZ4Eb+T6+'
There are errors inherent in padding sometimes in decoding, but after 1 week of trying I preferred to ask this question here, as I am afraid of being wrong approach to this situation.
I would like how to handle this situation in the right way
According to your requirement mentioned in the comment, I'm pointing out the current issues and probable future problems below:
In your get_chunck function, you're doing this:
chunk = chunk.decode("ascii")
chunk = chunk.replace('"', '')
if "file" in chunk:
chunk = chunk.split('file:')[1]
elif "}}" in chunk:
chunk = chunk.split('}}')[0]
else:
chunk = chunk
Now look into the first chunk given by iter_line:
b'{"success":true,"message":"","data":{"fileName":"Python_logo_and'
So, it will fall under the condition if "file" in chunk: as it contains this file string in the fileName. So when it will try to split this based on file:, it will return a list of one element, because the file was in fileName, not as file:. Hence the program will through following error:
Traceback (most recent call last):
File "main.py", line 7, in <module>
chunk = chunk.split('file:')[1]
IndexError: list index out of range
try if "file:" in chunk: instead.
Your program may also fail if the fileName contains something like "prod_file:someName". You have to check for that too.
A chunk that doesn't contain file can contain }}, so it can break what you're trying too achieve too.
You can modify the response server and wrap the start and ending of the file base64 encoded string with unique identifiers so that you can receive the response as below and therefore can identify the start and end of the file with guarantee in this stream approach. For example:
{'success': True,
'message': '',
'data': {'fileName': 'Python_logo_and_wordmark.svg.png',
'file': '0000101100iVBORw0KGgoAAAANSUhEUgAABLAAAA....0000101101'}}
I've appended 0000101100 as starting identifier and 0000101101 as ending. You can trim them off while writing to chunk/file. You can use any other unique identifier format as your own, not conflicting the base64 encoding.
Feel free to ask if there's any further confusion.
I tried to analyze your problem, and can't find solution better than #devReddir provided.
The reason is - it is impossible (or very difficult) to parse data before completely download it.
Workaround may be to save data as is in one big file and parse it by separate worker. That will allow to decrease server memory usage, when downloading file and avoid to loss data.
save file as is
...
while True:
chunk = next(iter_content, None)
if not chunk:
break
with open(file, "ab") as file_object:
file_object.write(chunk)
...
read file in separated worker
import json
import base64
with open("saved_as_is.json") as json_file:
json_object = json.load(json_file)
encoded_base64 = json_object['data']['file']
decoded = base64.b64decode(encoded_base64)
...
Why parse data on the fly is so difficult?
file separator may be splitted by two chunks:
b'... ... ... .., "fi'
b'le": "AAAB... ... .'
Actually \\ is a escape symbol and you must to handle it manually (and don't forget that \\ may be splitted by chunks → b'...\', b'\...'):
b'dTAAAOpgAAA6mAAAF3CculE8AAAABmJLR0QA\\/wD\\/AP+gvaeTAACAAElEQVR42u'
If file is super tiny, chunk line may be look like:
b'"file":"SUPERTINY_BASE64_DECODED", "fileName":"Python_lo'
And chunk.split('file:')[1] will don't work
base64 chunk must be multiple of 4, so if your first chunk (characters after "file":) will be 3 character length, you will be need to read next chunk and add one first character to end of previous chunk for all following iterations
So here is tones of nuances if you will try to parse data manually.
Howevevr, if you want to choose this hard way, here is how to decode base64 chunks.
And here is list of allowed base64 characters
If you want to use #devReddir's solution and store whole data in memory, not sure if here any profit of stream usage at all.
Okay, that is complete working solution:
Server side (main.py):
I added this code to be able run test server that responding json data with base64 encoded file.
Also I added some randomness in response to be able to check if string parsing independent on character position
import base64 as b
import json as j
from fastapi import FastAPI as f
import requests as r
import random as rr
import string as s
import uvicorn as u
banana_url = 'https://upload.wikimedia.org/wikipedia/commons/c/ce/PNG_demo_Banana.png'
banana_b64 = b.encodebytes(
r.get(banana_url, stream=True).raw.read())
banana_b64 = banana_b64.decode('ascii').replace('\n', '').encode('ascii')
def get_response(banana_file, banana_file_name):
random_status = ''
for i in range(rr.randint(3, 30)): random_status += rr.choice(s.ascii_letters)
banana_response = {
'status': random_status,
'data': {
'fileName': banana_file_name.split('/')[-1],
'file': banana_file,
}
}
if len(random_status) % 2 == 0:
banana_response['data']['random_payload'] = 'hello_world'
banana_response['random_payload'] = '%hello_world_again%'
return banana_response
app = f()
#app.get("/")
async def read_root():
resp = get_response(banana_b64, banana_url.split('/')[-1])
print('file length:', len(resp['data']['file']))
return resp
if __name__ == "__main__":
u.run('main:app', host="0.0.0.0", port=8000, reload=True, workers=1)
Client side (file downloader decoder.py):
import requests
import base64
# must be larger than len('"file":')
CHUNK_SIZE = 64
# iterable response
r = requests.get('http://127.0.0.1:8000', stream=True).iter_content(chunk_size=CHUNK_SIZE)
class ChunkParser:
file = None
total_length = 0
def close(self):
if self.file:
self.file.close()
def __init__(self, file_name) -> None:
self.file = open(file_name, 'ab')
def add_chunk(self, chunk):
# remove all escape symbols if existing
chunk = chunk.decode('ascii').replace('\\', '').encode('ascii')
# if chunk size is not multiple of 4, return modulo to be able add it in next chunk
modulo = b''
if not (l := len(chunk)) % 4 == 0:
modulo = chunk[l-(l%4):]
chunk = chunk[:l-(l%4)]
self.file.write(base64.b64decode(chunk))
self.total_length += len(chunk)
return modulo
prev_chunk = None
cur_chunk = None
writing_started = False
last_chunk = False
parser = ChunkParser('temp_file.png')
file_found = False
while True:
# set previous chunk on first iterations before modulo may be returned
if cur_chunk is not None and not writing_started:
prev_chunk = cur_chunk
# get current chunk
cur_chunk = next(r, None)
# skip first iteration
if prev_chunk is None:
continue
# break loop if no data
if not cur_chunk:
break
# concatenate two chunks to avoid b' ... "fil', b'e": ... ' patern
two_chunks = prev_chunk + cur_chunk
# if file key found get real base64 encoded data
if not file_found and '"file":' in two_chunks.decode('ascii'):
file_found = True
# get part after "file" key
two_chunks = two_chunks.decode('ascii').split('"file":')[1].encode('ascii')
if file_found and not writing_started:
# data should be started after first "-quote
# so cut all data before "
if '"' in (t := two_chunks.decode('ascii')):
two_chunks = t[t.find('"')+1:].encode('ascii')
writing_started = True
# handle b' ... "file":', b'"... ' patern
else:
cur_chunk = b''
continue
# check for last data chunk
# "-quote means end of value
if writing_started and '"' in (t := two_chunks.decode('ascii')):
two_chunks = t[:t.find('"')].encode('ascii')
last_chunk = True
if writing_started:
# decode and write data in file
prev_chunk = parser.add_chunk(two_chunks)
# end operation
if last_chunk:
if (l := len(prev_chunk)) > 0:
# if last modulo length is larget than 0, that meaning the data total length is not multiple of 4
# probably data loss appear?
raise ValueError(f'Bad end of data. length is {str(l)} and last characters are {prev_chunk.decode("ascii")}')
break
parser.close()
print(parser.total_length)
Don't forget to compare files after download when testing this script:
# get md5 of downloaded by chunks file
$ md5 temp_file.png
MD5 (temp_file.png) = 806165d96d5f9a25cebd2778ae4a3da2
# get md5 of downloaded file using browser
$ md5 PNG_demo_Banana.png
MD5 (PNG_demo_Banana.png) = 806165d96d5f9a25cebd2778ae4a3da2
You could stream it down to a file like this (pip install base64io):
class decoder():
def __init__(self, fh):
self.fileh = open(fh, 'rb')
self.closed = False
search = ''
start_tag = '"file": "'
for i in range(1024):
search += self.fileh.read(1).decode('UTF8')
if len(start_tag) > len(search)+1:
continue
if search[-len(start_tag):] == start_tag:
break
def read(self, chunk=1200):
data = self.fileh.read(chunk)
if not data:
self.close()
return b''
return data if not data.decode('UTF8').endswith('"}}') else data[:-3]
def close(self):
self.fileh.close()
self.closed = True
def closed(self):
return self.closed
def flush(self):
pass
def write(self):
pass
def readable(self):
return True
And then use the class like this:
from base64io import Base64IO
encoded_source = decoder(fh)
with open("target_file.jpg", "wb") as target, Base64IO(encoded_source) as source:
for line in source:
target.write(line)
But of course you need to change from streaming from local file to streaming from the requests.raw object.
I'm making programm for my school project and have one problem above.
Here's my code:
def aes():
#aes
os.system('cls')
print('1. Encrypt')
print('2. Decrypt')
c = input('Your choice:')
if int(c) == 1:
#cipher
os.system('cls')
print("Let's encrypt, alright")
print('Input a text to be encrypted')
text = input()
f = open('plaintext.txt', 'w')
f.write(text)
f.close()
BLOCK_SIZE = 32
PADDING = '{'
pad = lambda s: s + (BLOCK_SIZE - len(s) % BLOCK_SIZE) * PADDING
EncodeAES = lambda c, s: base64.b64encode(c.encrypt(pad(s)))
secret = os.urandom(BLOCK_SIZE)
f = open('aeskey.txt', 'w')
f.write(str(secret))
f.close()
f = open('plaintext.txt', 'r')
privateInfo = f.read()
f.close()
cipher = AES.new(secret)
encoded = EncodeAES(cipher, privateInfo)
f = open('plaintext.txt', 'w')
f.write(str(encoded))
f.close()
print(str(encoded))
if int(c) == 2:
os.system('cls')
print("Let's decrypt, alright")
f = open('plaintext.txt','r')
encryptedString = f.read()
f.close()
PADDING = '{'
DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e)).rstrip(PADDING)
encryption = encryptedString
f = open('aeskey.txt', 'r')
key = f.read()
f.close()
cipher = AES.new(key)
decoded = DecodeAES(cipher, encryption)
f = open('plaintext.txt', 'w')
f.write(decoded)
f.close()
print(decoded)
Full error text:
Traceback (most recent call last): File "C:/Users/vital/Desktop/Prog/Python/Enc_dec/Enc_dec.py", line 341, in aes()
File "C:/Users/vital/Desktop/Prog/Python/Enc_dec/Enc_dec.py", line 180, in aes cipher = AES.new(key)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\AES.py", line 179, in new return AESCipher(key, *args, **kwargs)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\AES.py", line 114, in init blockalgo.BlockAlgo.init(self, _AES, key, *args, **kwargs)
File "C:\Users\vital\AppData\Local\Programs\Python\Python35-32\lib\site-packages\Crypto\Cipher\blockalgo.py", line 401, in init self._cipher = factory.new(key, *args, **kwargs)
ValueError: AES key must be either 16, 24, or 32 bytes long
Process finished with exit code 1
What am I doing wrong?
The error is very clear. The key must be exactly of that size. os.urandom will return you the correct key. However this key is a bytes (binary string value). Furthermore, by using str(secret), the value of repr(secret) is written into the file instead of secret.
What is more confusing is that AES.new allows you to pass the key as Unicode! However, suppose the key was the ASCII bytes 1234123412341234. Now,
f.write(str(secret))
will write b'1234123412341234' to the text file! Instead of 16 bytes, it now contains those 16 bytes + the b, and two ' quote characters; 19 bytes in total.
Or if you take a random binary string from os.urandom,
>>> os.urandom(16)
b'\xd7\x82K^\x7fe[\x9e\x96\xcb9\xbf\xa0\xd9s\xcb'
now, instead of writing 16 bytes D7, 82,.. and so forth, it now writes that string into the file. And the error occurs because the decryption tries to use
"b'\\xd7\\x82K^\\x7fe[\\x9e\\x96\\xcb9\\xbf\\xa0\\xd9s\\xcb'"
as the decryption key, which, when encoded as UTF-8 results in
b"b'\\xd7\\x82K^\\x7fe[\\x9e\\x96\\xcb9\\xbf\\xa0\\xd9s\\xcb'"
which is a 49-bytes long bytes value.
You have 2 good choices. Either you continue to write your key to a text file, but convert it to hex, or write the key into a binary file; then the file should be exactly the key length in bytes. I am going for the latter here:
Thus for storing the key, use
with open('aeskey.bin', 'wb') as keyfile:
keyfile.write(secret)
and
with open('aeskey.bin', 'rb') as keyfile:
key = keyfile.read()
Same naturally applies to the cipher text (that is the encrypted binary), you must write and read it to and from a binary file:
with open('ciphertext.bin', 'wb') as f:
f.write(encoded)
and
with open('ciphertext.bin', 'rb') as f:
encryptedString = f.read()
If you want to base64-encode it, do note that base64.b64encode/decode are bytes-in/bytes-out.
By the way, plaintext is the original, unencrypted text; the encrypted text is called ciphertext. AES is a cipher that can encrypt plaintext to ciphertext and decrypt ciphertext to plaintext using a key.
Despite these being called "-text" neither of them is textual data per se, as understood by Python, but they're binary data, and should be represented as bytes.
I'm trying to write an encryption/decryption program called P-Cypher in python (python-Cypher, rhymes with decypher). It uses the PyCrypto libraries to encode a file (using AES). Although I know Python, I do not know cryptography - I'm doing this because I thought it would be fun, so don't critique me on security.
This is how the program is supposed to work.
Asks for input file.
Asks whether you want it to encrypt or decrypt. (sets mode)
Asks for output file. Verifies it exists- if it does not, asks if you want it to create one.
Encrypts input file and tells you the key/Prompts you for the key, and decrypts the file with the key (Depending on mode)
Writes to output file.
Everything works except for number 4. (I know step 5 works as step 5 remains pretty much unchanged from the last stable version, v0.03d). On step 4 encoding, one of two things happen depending on which way I code it:
The thing successfully- YAY! encodes the file. However, the key it prints out is in the form of b'U\xxx\xxx\xxx\xxx\xxx' like that. When I enter it in step 4 decoding mode, with or without the b and 's, it doesn't work. So the program cannot decrypt the file, rendering half of my program useless.
I can use .decode(encoding) to turn it into a string. This is the method you see on the code below. However, here is this way's problem- no matter what encoding I use (ascii, ISO, windows-125x, EUR, Big5, utf-8, 16, and 32, etc...) there is always one or more bytes that the encoding cannot encode. And without encoding, there's no decoding, rendering the WHOLE program useless.
So I ask you for help. If you could figure out how to fix problem #1 or #2 (or maybe even both), I would be grateful.
CODE -- Updated
# P-Cypher-Dev
# Made in 2015 by Mateo Guynn
# v0.04d
# Using AES 16/32/64-bit encryption (Google standard)
# DEV VERSION: Possibly unstable, contains better code.
# Changelog:
"""
v0.02d
- Improved Caesar Cipher
- Added binary Cipher converter (fail)
-------------FILE BROKEN------------
"""
"""
v0.03d
- Added ability to create new output files
- Fixed code not quitting on abort
- DEL : binary Cipher converter
---------------STABLE---------------
"""
"""
v0.04d
- DEL : Caesar Cypher
- Added 16/32/64-byte AES encryption
- Binary and text now handled in same manner
-------------FILE BROKEN------------
(encryption works, decryption does not)
"""
"""
v0.05d
- Changed AES encryption to Google's way
- Fixed Key entry
"""
import os
import sys
from Crypto.Cipher import AES
from Crypto import Random
from Crypto.Util import randpool
import base64
import codecs
MAX_KEY_SIZE = 26 # Shows the number of available characters (26 in the alphabet)
#NOTES: Binary mode only works if the file is named binary.dat.
def getMode():
while True:
eOrD = input('\nDo you wish to encrypt or decrypt a message? ')
mode = eOrD.lower()
if mode in 'encrypt e decrypt d'.split():
return mode
else:
sys.exit('\nEnter either "encrypt" or "e" or "decrypt" or "d". Capital letters are allowed.\n')
def getMessage():
inputFile = open(input('\nPlease enter the name of the file you want to encrypt/decrypt. You may use relative or full paths. \nPlease, remember the file extension(s)! ')).read()
try:
print ('\nThe contents of the file are: \n%s\n' % inputFile)
return inputFile
except IOError as e:
sys.exit('Unable to open file (the file does not exist or P-Cypher does not have permission to view it).\n Aborting.')
except FileNotFoundError as e:
sys.exit('Unable to open file (the file does not exist or P-Cypher does not have permission to view it).\n Aborting.')
def getCipher(mode, message):
block_size = 16 # For AES, this is the only working value
key_size = 32 # Size of crypto key (possibly changes in getKey())
aesmode = AES.MODE_CBC # More secure mode
if mode[0] == 'e':
key_bytes = randpool.RandomPool(512).get_bytes(key_size)
open('decryption.key', 'wb+').write(key_bytes)
print('\nYour keyfile is: decryption.key\n')
pad = block_size - len(message) % block_size
data = message + pad * chr(pad)
iv_bytes = randpool.RandomPool(512).get_bytes(block_size)
encrypted_bytes = iv_bytes + AES.new(key_bytes,aesmode,iv_bytes).encrypt(data)
encrypted = base64.urlsafe_b64encode(encrypted_bytes)
return encrypted
else:
decryptb = base64.urlsafe_b64decode(message)
decrypted_ivbytes = decryptb[:block_size]
decrypt = decryptb[block_size:]
print('\nAuto-searching for decryption.key...')
try:
key_bytes = base64.urlsafe_b64decode(open('decryption.key', 'rb').read())
except IOError as io:
key_bytes = base64.urlsafe_b64decode(open(input('decryption.key not found. If you have an alternate keyfile, please enter its name now. ')), 'rb').read
except FileNotFoundError as fnf:
key_bytes = base64.urlsafe_b64decode(open(input('decryption.key not found. If you have an alternate keyfile, please enter its name now. '), 'rb').read())
decrypted = AES.new(key_bytes, aesmode, decrypted_ivbytes).decrypt(decryptb)
pad = ord(decrypted[-1])
decrypted = decrypted[:-pad]
return decrypted
def getOutput():
outputFile = input('\nPlease specify an output file. \nDon\'t forget the file extension! ')
outputCheck = input('\nYour message will be encrypted/decrypted into the following output file: %s\n\nIs this okay? (y/n) ' % outputFile).lower()
if outputCheck in 'y yes yeah ok'.split():
try:
return outputFile
except IOError as ioerror:
createNewFile = input('The file you specified does not exist. Shall I create one? (y/n) ')
if createNewFile in 'y_yes_yeah_yes please_ok'.split('_'):
oF = open(outputFile, 'w+')
oF.close()
return outputFile
else:
sys.exit('Aborting...')
elif outputCheck in 'n no'.split():
sys.exit('\nAborting...\n')
else:
sys.exit('\nAborting.\n')
print("\nP-Cypher Alpha starting up...\n\nv0.05 dev\nMateo Guynn\n2015\n")
mode = getMode()
message = getMessage()
try:
open(getOutput(), 'wb+').write(getCipher(mode,message))
except IOError:
sys.exit('Oh noes! Something has gone terribly wrong!')
except FileNotFoundError:
sys.exit('Your input file was not found.')
print('\nDone.')
one solution is to encode it as hex ... this is guaranteed to be ascii characters
import codecs
my_key = "U\x22\x54\x33"
print ("Your Key:", codecs.encode(my_key,"hex"))
...
my_decode_key = codecs.decode(input("enter key:"),"hex")
print( repr(my_decode_key))
print( my_decode_key == my_key )
I've got a encryption/decryption class that I'm using cross platform. I'm using the same class on both server and client. I encrypt a file on a Linux server, then decrypt on either a Linux or Windows client. I have no problems when decrypting on Linux, but when I transfer the file to Windows and try to decrypt, I get the following exception:
ValueError: Input strings must be a multiple of 16 in length
My first thought is that it is caused by the different filesystems, and any characters that are used to create the padding. Here is my class code:
class FileSec:
def __init__(self):
# File chunk size
self.chunk_size = 64*1024
# Encrypt file with OpenSSL
def encrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Encrypt the file
iv = ''.join(chr(random.randint(0, 0xFF)) for i in range(16))
encryptor = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(infile)
with open(infile, 'rb') as ifh:
with open(outfile, 'wb') as ofh:
ofh.write(struct.pack('<Q', filesize))
ofh.write(iv)
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += ' ' * (16 - len(chunk) % 16)
ofh.write(encryptor.encrypt(chunk))
return True
# Decrypt file with OpenSSL
def decrypt(self, infile, outfile, key):
if not infile or not os.path.isfile(infile):
return False
if not outfile or os.path.isfile(outfile):
return False
if not key:
return False
# Decrypt the file
with open(infile, 'rb') as ifh:
origsize = struct.unpack('<Q', ifh.read(struct.calcsize('Q')))[0]
iv = ifh.read(16)
decryptor = AES.new(key, AES.MODE_CBC, iv)
with open(outfile, 'wb') as ofh:
while True:
chunk = ifh.read(self.chunk_size)
if len(chunk) == 0:
break
ofh.write(decryptor.decrypt(chunk))
ofh.truncate(origsize)
return True
http://pastebin.com/Dvf6nUxH
I'm using code adapted from here: http://eli.thegreenplace.net/2010/06/25/aes-encryption-of-files-in-python-with-pycrypto/
Anyone have any suggestions on how I can modify this class to work cross-platform?
myfile.read(x) reads any amount up to x bytes; it is not guaranteed to return all x.
Note that it will always return at least one until the file is empty, so it is possible to wrap this in a loop, and then join the returned strings.
Closing this one. Turns out the problem has nothing to do with the encryption/decryption function, but with an extra byte being tacked on to the encrypted file when I transfer it to the Windows machine, causing the exception.