I use the following code to extract filename of the attachment:
import email.utils
msg = email.message_from_string(self.request.body) # http://docs.python.org/2/library/email.parser.html
for part in msg.walk():
ctype = part.get_content_type()
if ctype in ['image/jpeg', 'image/png']:
image_file = part.get_payload(decode=True)
image_file_name = part.get_filename()
It works well in many cases, but sometime as image_file_name I get values like =?KOI8-R?B?xsHTLTk2Mi5qcGc=?= or =?UTF-8?B?REkyeTFXMFNMNzAuanBn?=.
How should I handle such cases?
You can use decode_header function like this:
from email.header import decode_header
filename = part.get_filename()
if decode_header(filename)[0][1] is not None:
filename = str(decode_header(filename)[0][0]).decode(decode_header(filename)[0][1])
With Python 3:
from email.message import EmailMessage
from email.header import decode_header
def get_part_filename(msg: EmailMessage):
filename = msg.get_filename()
if decode_header(filename)[0][1] is not None:
filename = decode_header(filename)[0][0].decode(decode_header(filename)[0][1])
return filename
You should look at the three parts separated by '?', and use the first two as instructions for how to treat the third:
The first bit is the character-encoding (KO18-R and UTF-8 in your examples), and the second bit is a 'B' to indicate base64 encoding - Q in it's place would indicate quoted-printable, so you should prepare your code for that as well.
Elaborating on #Nikon's response:
from email.header import decode_header
filename = part.get_filename()
fname, charset = decode_header(filename)
if charset:
filename = fname.decode(charset)
Related
I am trying to pass a base64 to bytes. But I think I'm doing it wrong because I'm passing it to ascii. The file is much bigger but I didn't want to put it all. I hope you can support me.
def convert():
base64_message = 'JVBERi0xLjUKJeLjz9MKMSAwIG9iago8PC9DcmVhdG9yKFdyaXRlcikvc2l6ZV9oZWlnaHQoMTI1LjApL01vZERhdGUoRDoyMDIyMDIxNjE5MzQzOS0wNicwMCcpL2xhc3RQYWdlKDEpL0NyZWF0aW9uRGF0ZShEOjIwMjIwMjE2MTkzNDM3LTA2JzAwJykvc3BhY2VfYm94KDIwLjApL2Nvb3JkaW5hdGVzUGFnZSg1NC4wLTU5Ny4wLTIsKS9Qcm9kdWNlcihMaWJyZU9mZmljZSA2LjQ7IG1vZGlmaWVkIHVzaW5nIGlUZXh0riA1LjUuOCCpMjAwMC0yMDE1IGlUZXh0IEdyb3VwIE5WIFwoQUdQTC12ZXJzaW9uXCkpL0F1dGhvcihGR0RSKS9zaXplX3dpZHRoKDIyNS4wKS9UaXRsZShQQUdBUkUpL3BkZkFQSU1hbmlwdWxhdGVkKDEpPj4KZW5kb2JqCjIgMCBvYmoKPDwvR3JvdXA8PC9TL1RyYW5zcGFyZW5jeS9JIHRydWUvQ1MvRGV2aWNlUkdCPj4vQ29udGVudHMgMyAwIFIvVHlwZS9QYWdlL1Jlc291cmNlcyA0IDAgUi9QYXJlbnQgNSAwIFIvTWVkaWFCb3hbMCAwIDYxMiA3OTJdPj4KZW5kb2JqCjMgMCBvYmoKPDwvRmlsdGVyL0ZsYXRlRGVjb2RlL0xlbmd0aCAzMTUwPj5zdHJlYW0KeJzVW0uLJLkRvvevyPNC1SpCj0xBk1Bd3W28t7EbfDA+2V7D4rGZvezft0KhR0gpZfWAfTADXZVZUigUjy8e0qgrLL89fVvUoq4Kt8UBXP0Ky+r589e/P/3ph+VfT+rq/KZ0GLVtDtfwuYZHv/z6j6cwY1s2uCq72DBL20WvV7Bh6vLzD09flm/LRYVV4h8XB68er1hIf4tvnSUe6CfmxS5//fr04++/muX138uXSOZ/xmMkHf6FgS8fT5u9umW129UvH39bfnyHBdTVLh8///lZwX7BZ4V7+KOV2SF9t/sF1LNy+18+fnp6+3j60pEE9GHzgia2NDXR0UzzEoja9Oni33Vfn9XW0L5qtMaHT6vstoVPVOtm0mqo42qo81ra8Uo+scwfN/Uy43eFq1mcN1d9FMFKXAVet0DuFvl7Uffwj9++7vZZvan3KWUkK3A+qOMgCE8iuBNrr4EuBLm+kVzD83t4CSpIGoB+Ho55pTEYlgdNSgLaI1hiir8yfzQV1jSFpkKeBfRSF3qwxal+N/lrJMvLRII30vw9vFQrzZpsmJUhdixlSdIK+wpCjMsHTsIqtvIOyrIRqI2EsObBLkiqmcd/zYwJrcmmx2I/SOMF7izpVh6v0ej5t7eodw3vUSS3+BuqhlL4RCgij9pSVVtB5CRSFuxtZyXwNpvJUemI5WsUP+p2aOCYR8C51Vl7XY/bj/vy+8la+jltcL+sLq66hiezb88Y7QujOqIs+IXSaOJLNOG1y2+jT+MaFIxh48GuSJbpNxseLiuL2+nkC7dEtI4O48Ir59txDBZYvwa/dGvmlAzF6fIE7EviVdZQNCybdIM+/AZ5HfaeIuYLaDyxeaN1K29ozI2dC7MFe95QWMFFr8r+5eOYl8QuOQQb3xS1LKxhkZGaadWPX4ZzMESLiWlE+9JeeMSpeRmYmhdr3rFOI607e9PRgZJajKmaCt/plY0O46JgzDPeEgZYkyckfysIhRG8KhoKlyIdWltWKto1yXuNP4O0NURWuV9o9kuk4h4jlMVvrMq3nXVuWNsWotMYnZ8wcoY8mh+C4IwtvwNtGQkYzVpIxJ1l10XxHEb2i0e7IrbIQ+zWEhZGFx1TZ/EXdvnV/dwMcA1Q24uFGWKrZ6xOOH8Bz2xgwNpk4WiEzdMvioOAN0KcBKb5pc0bi4='
base64_bytes = base64_message.encode('ascii')
message_bytes = base64.b64decode(base64_bytes)
message = message_bytes.decode('ascii')
print(message)
you can find the following here:
import base64
base64_message = 'UHl0aG9uIGlzIGZ1bg=='
base64_bytes = base64_message.encode('ascii')
message_bytes = base64.b64decode(base64_bytes)
message = message_bytes.decode('ascii')
print(message)
Source: https://stackabuse.com/encoding-and-decoding-base64-strings-in-python/
just two line could solve this problem:
import base64
base64_code = "VER30QHI30JFOIAIO3020085723F" # this is just a example
img_data = base64_code.encode()
content = base64.b64decode(img_data)
with open('/path/to/your/image.jpg', 'wb') as fw:
fw.write(content)
print(content)
then you could check the image bytes.
I am working with an encrypted file, but I can't manage to create a loop with for in order to read it before it get closed and removed.
My intention is to read the data given in the encrypted file and loop it to assign each line to a variable.
Whenever I execute my code, Python just goes straight to finish, without working with the decrypted info; I believe it is because the with command close it before the loop starts.
This is what I want, not working, no errors either:
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with tempfile.TemporaryFile() as fp:
fp.write(encrypted)
for url in fp: #Python ignores the tempfile. I belive it is closed in the previous line.
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
This is the working code (Sorry, tried to make it shorter but couldn't):
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with open(output_file, 'wb') as fp:
fp.write(encrypted)
with open(output_file) as fp:
for url in fp:
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Header for both examples (apart to make it shorter):
#python 3.6.6
from urllib.request import urlopen
import urllib.request
from os.path import join as pjoin
import re, os, sys, tempfile, six, ctypes, time, fileinput
from cryptography.fernet import Fernet
print("[*] Checking list.dat for consistency . . .")
key = b'wTmVBRLytAmlfkctCuEf59K0LDCXa3sGas3kPg3r4fs=' #Decrypt list.dat
input_file = 'List.dat'
output_file = 'List.txt'
List.txt content:
errors
classes
stdlib
Any hints?
The problem is that once you have written to the file, the "file pointer" is at the end of the file. There's nothing to read.
You can use the seek method to reposition the file pointer at the beginning. Alternatively, closing and re-opening the file (as in your working code) will position the pointer at the beginning of the file.
#LarryLustig pretty much answered why your code wasn't working, but IMO if you eliminate the temp file altogether (which shouldn't be necessary) you don't even need to worry about the cursor. See below commented changes on your desired code.
# We'll use os.linesep to get the line terminator string for your os.
import os
...
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
# decode your decrypted bytes into strings. Change 'utf-8' into whichever file encoding you're using if necessary.
decrypted = fernet.decrypt(data).decode('utf-8')
# Don't write to a temp file
# Iterate directly on each line of the extracted data
for url in decrypted.split(os.linesep):
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Alternatively, if you know for sure what is the line terminator used in the file (e.g. \r\n, or \n) then you can eliminate using os.linesep altogether.
I'm having difficult time trying to work these two together. It's frustrating me a little, so I hope to find ideas/solutions.
The complete works (what I'm planning on) should grab a random image from an online directory, encode it to base64 then print the base64. I've had total curl madness going all day and now I'm turning to python. Onwards!
These are kinda just notes at the minute but should explain the process.
import random, os
import base64
def search(): #get file
path = r"/Users/Impshum/Pictures" #should be able to http
random_filename = random.choice([
x for x in os.listdir(path)
if os.path.isfile(os.path.join(path, x))
])
print(random_filename) #not printing full location
def encode(): #encode to base64
image = open('heaven.jpg', 'rb')
image_read = image.read()
image_64_encode = base64.encodestring(image_read)
print image_64_encode
search() #notes
encode() #notes
Many thanks in advance.
You have most of the code you need
import random, os
import base64
def search(path): #get file
random_filename = random.choice([
x for x in os.listdir(path)
if os.path.isfile(os.path.join(path, x))
])
return os.path.join(path, random_filename)
def encode(path):
image = open(path, 'rb')
image_read = image.read()
image.close()
image_64_encode = base64.encodestring(image_read)
return image_64_encode
print(encode(search(r"/Users/Impshum/Pictures")))
There are things you can do to make this "nicer", but this should get you started.
For instance, you might want to use glob instead of os.listdir / os.path.join, etc. And using a context manager
import glob
import base64
import random
def search(path): #get file
random_filename = random.choice(glob.glob(path))
return random_filename
def encode(path):
with open(path, 'rb') as image:
image_read = image.read()
image_64_encode = base64.encodestring(image_read)
return image_64_encode
print(encode(search(r"/Users/Impshum/Pictures/*")))
Error handling is left as an exercise to the OP
Here is an answer which gives some information on how to base64 encode a file. However, I also want to pass in the filetype and mimetype. for the information in the base64 encoded string.
So far I have for my base64 string:
x=base64.b64encode(open('/Users/user/Desktop/img.PNG').read())
What is the correct information to prepend, and how would I do this?
It seems like the following is how I would get the base64 file information to pass to the server:
file = '/Users/user/Desktop/img.PNG'
prepend_info = 'data:%s;base64' % mimetypes.guess_type(file)[0]
base_64_data = open(file).read().encode('base64')
image_data_base64 = '%s,%s' % (prepend_info, base_64_data)
This then gives me:
data:image/png;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wB...
Perhaps something along these lines:
from __future__ import print_function
import base64
import binascii
import os
def base64_encode_file(filename):
filetype = os.path.splitext(filename)[1][1:] # remove leading '.' from ext
with open(filename) as file:
data = file.read()
return base64.b64encode(','.join((filename, filetype, data))), data
filename = 'C:/Users/martin/Desktop/img.PNG'
#filename = '/Users/user/Desktop/img.PNG'
encoded, data = base64_encode_file(filename)
print('encoded: {} (hex file data: {})'.format(encoded, binascii.hexlify(data)))
decoded = base64.b64decode(encoded).split(',', 2)
print('decoded:', decoded[0], decoded[1], binascii.hexlify(decoded[2]))
Output:
encoded: QzovVXNlcnMvbWFydGluL0Rlc2t0b3AvaW1nLlBORyxQTkcsiVBORwo=
(hex file data: 89504e470a)
decoded: C:/Users/martin/Desktop/img.PNG PNG 89504e470a
I want to extract the subject from an email which is stored in an .eml file using Python 3:
The subject line from the file is the following:
Subject: math340-05 =?UTF-8?B?4oCTIEF1ZmdhYmUgNS4x?=
I try to parse the name using the email module, but it returns the string as is:
with open(filename, "r") as f:
m = email.message_from_file(f)
print(m["Subject"])
How can I decode that?
One way of doing it would be to us the email.header module:
import email, email.header
m = email.message_from_string("Subject: math340-05 =?UTF-8?B?4oCTIEF1ZmdhYmUgNS4x?=")
raw_header = email.header.decode_header(m["Subject"])
header = email.header.make_header(raw_header)
print(str(header))
# or for py2: print(unicode(header))
will give you:
math340-05 – Aufgabe 5.1