Why HTML convert python output '<' = &lt, '>' = &gt? - python

I created a script who send mail whith a specific output took from a server.
I splited this output and each element I sent it to a html cell.
I also created a header for the table what is looks like that:
def get_html_table_header(*column_names):
header_string = '<tr width=79 style="background:#3366FF;height:23.25pt;font-size:8.0pt;font-family:Arial,sans-serif;color:white;font-weight:bold;" >'
for column in column_names:
if column is not None:
header_string += '<td>' + column + '</td>'
header_string += '</tr>'
return header_string
def get_concrete_html_table_header():
return get_html_table_header('Num. Row','Cell1','Cell2','Cell3','Comment (enter your feedback below)','Cell4','Cell5','Cell6','Cell7','Cell8','Cell9','Cell10')
When I print the result of this function in linux konsole, it looks like that:
<tr width=79 style="background:#3366FF;height:23.25pt;font-size:8.0pt;font-family:Arial,sans-serif;color:white;font-weight:bold;" ><td>Num. Row</td><td>Cell1</td><td>Cell2</td><td>Cell3</td><td>Comment (enter your feedback below)</td><td>Cell4</td><td>Cell5</td><td>Cell6</td><td>Cell7</td><td>Cell8</td><td>Cell9</td><td>Cell10</td></tr>
When I receive the email, source looks like that:
<tr width="79" style="background:#3366FF;height:23.25pt;font-size:8.0pt;font-family:Arial,sans-serif;color:white;font-weight:bold;"><td>Num. Row</td><td>Cell1</td><td>Cell2</td><td>Cell3</td><td>Comment (enter your feedback below)</td><td>Cell4</td><td>Cell5</td><td>Cell6</td><td>Cell7</td><td>Cell8</td><td>Cell9</td>< td>Cell10</td></tr>
To build email body I`m using function:
def build_email_body(CRs_list):
global criterial_number
if 0 == len(CRs_list):
return None
email_body = ''
email_body += '<html><head><title>My Title</title></head><body>'
email_body += '<p align="center"><font color="#176b54" size="+2"><b>Some info</b></font></p>'
email_body += '<p align="center"><font color="#176b54" size="+1">Another info</font></p>'
email_body += '<table align="center" BORDER=1 CELLSPACING=2 CELLPADDING=2 COLS=3 WIDTH="100%">'
email_body += get_concrete_html_table_header()
for CR in CRs_list:
email_body += get_html_table_row()#create row for every output received(11 cells for every output, according with the header)
email_body += '</table>'
email_body += '</table><br><p align="left"><font color="#176b54" size="+1"><b>=> This is an automatic generated email via script<br>'
email_body += '<br><br>Have a nice day!</b></font></p><br></body></html>'
return email_body
To send email I`m using function:
def send_email(body, recipients, subject, file):
#inform just sender
if None == body:
body = "WARNING -> NO entries retrieved after 5 retries<br>CRAU output:<br>" + dct_newCRs_output + "<br>" + duration
#override recipients to not set junk info
recipients = sender
email = Email(SMTP_SERVER, SENDER, recipients, _CC, subject, body, 'html', file)
email.send()
send() is imported from class Email:
import os, smtplib
from email import encoders
from email.mime.audio import MIMEAudio
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import mimetypes
class Email:
__config = {}
def __init__(self, smtp_server, sender, recipients, cc, subject, body, body_type, attachments=None):
self.__config = {'smtp_server': smtp_server,
'sender': sender,
'recipients': recipients,
'cc': cc,
'subject': subject,
'body':body,
'body_type':body_type, #plain|html
'attachments':attachments #list of files
}
def getSmtpServer(self):
return self.__config.get('smtp_server')
def getSender(self):
return self.__config.get('sender')
def getRecipients(self):
return self.__config.get('recipients')
def getCc(self):
return self.__config.get('cc')
def getSubject(self):
return self.__config.get('subject')
def getBody(self):
return self.__config.get('body')
def getBodyType(self):
return self.__config.get('body_type')
def getAttachments(self):
return self.__config.get('attachments')
def setSmtpServer(self, host):
self.__config['smtp_server'] = smtp_server
return self
def setSender(self, sender):
self.__config['sender'] = sender
return self
def setRecipients(self, recipients):
self.__config['recipients'] = recipients
return self
def setCc(self, cc):
self.__config['cc'] = cc
return self
def setSubject(self, subject):
self.__config['subject'] = subject
return self
def setBody(self, body):
self.__config['body'] = body
return selfMIMEMultipart
def setBodyType(self, body_type):
self.__config['body_type'] = body_type
return self
def setAttachments(self, attachments):
self.__config['attachments'] = attachments
return self
def attachFilesToEmail(self, attachments, msg):
if None == attachments:
tmpmsg = msg
msg = MIMEMultipart()
msg.attach(tmpmsg)
if None != attachments:
for fname in attachments:
if not os.path.exists(fname):
print "File '%s' does not exist. Not attaching to email." % fname
continue
if not os.path.isfile(fname):
print "Attachment '%s' is not a file. Not attaching to email." % fname
continue
# Guess at encoding type
ctype, encoding = mimetypes.guess_type(fname)
if ctype is None or encoding is not None:
# No guess could be made so use a binary type.
ctype = 'application/octet-stream'
maintype, subtype = ctype.split('/', 1)
if maintype == 'text':
fp = open(fname)
attach = MIMEText(fp.read(), _subtype=subtype)
fp.close()
elif maintype == 'image':
fp = open(fname, 'rb')
attach = MIMEImage(fp.read(), _subtype=subtype)
fp.close()
elif maintype == 'audio':
fp = open(fname, 'rb')
attach = MIMEAudio(fp.read(), _subtype=subtype)
fp.close()
else:
fp = open(fname, 'rb')
attach = MIMEBase(maintype, subtype)
attach.set_payload(fp.read())
fp.close()
# Encode the payload using Base64
encoders.encode_base64(attach)
# Set the filename parameter
filename = os.path.basename(fname)
attach.add_header('Content-Disposition', 'attachment', filename=filename)
msg.attach(attach)
def send(self):
# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = self.getSubject()
msg['From'] = self.getSender()
msg['To'] = self.getRecipients()
msg['CC'] = self.getCc()
# Record the MIME types of both parts - text/plain and text/html.
#part1 = MIMEText(text, 'plain')
#part2 = MIMEText(html, 'html')
part = MIMEText(self.getBody(), self.getBodyType())
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part)
# Add attachments, if any
self.attachFilesToEmail(self.getAttachments(), msg)
# Send the message via local SMTP server.
s = smtplib.SMTP(self.getSmtpServer())
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(self.getSender(), (self.getRecipients() + self.getCc()).split(","), msg.as_string())
s.quit()
I hope is enough information.
Can someone explain to me, why is happening this and how can I fix it?

Your code looks correct, the problem is elsewhere.
< is what you get when you add < as text to a HTML document (since < means "start new element", you need to escape this character in plain text).
The interesting part here is why does it happen only once in the whole string. If all the < had been replaced, my guess would be that you accidentally added the table as text to the HTML body of the mail.
Maybe the space in < td> is a clue: Mails shouldn't have more than 72 characters per line. So maybe some mail server wraps the HTML? Outlook is known to mess a lot with the mails it receives.
Try to send the HTML code as multipart attachment. See Sending HTML email using Python

Related

How do I implement the imaplib search function?

import imaplib
import email
from email.header import decode_header
import webbrowser
import os
# account credentials
username = "example#stack.com"
password = "exapleforstack"
imap_server = "imap.one.com"
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
# create an IMAP4 class with SSL
imap = imaplib.IMAP4_SSL(imap_server)
# authenticate
imap.login(username, password)
status, messages = imap.select("INBOX")
imap.search(None, 'SUBJECT', '"exampleforstack"')
# number of top emails to fetch
N = 3
# total number of emails
messages = int(messages[0])
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
if content_type == "text/html":
# if it's HTML, create a new HTML file and open it in browser
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
# write the file
open(filepath, "w").write(body)
# open in the default browser
webbrowser.open(filepath)
print("="*100)
# close the connection and logout
imap.close()
imap.logout()
I tried using the search method a couple different ways, like:
res, msg = imap.search(None, 'SUBJECT', "example")
and
res, msg = imap.search(None, 'SUBJECT, "example"')
but my code just gives an error and automatically fetches the most recent 3.
I've tried replacing the line:
res, msg = imap.fetch(str(i), "(RFC822)")
with
res, msg = imap.search(None, 'SUBJECT', '"example"')
but the program returns nothing at all.
how would I go about implementing the search I got this code from pythoncode and altered its credentials.
But I'm not sure why I can't implement the search function.

Fetch a particular unseen email and print only those emails

To elaborate the question I have a code that searches for UNSEEN emails and stores the ID to a variable.
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, new_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(new_mails[0].split())
print("Total Messages that is New:" , recent_mails)
print(new_mails)
and it prints this:
Total Messages that is New: 2
[b'389 393']
What I want to do is the use these numbers to fetch it's contents like subject, who sent it and the body of the email. Is it possible to this implementation?
I have a previous code that fetches the first and beyond emails by indicating on how emails it will go to using the variable
N code follows:
N = 0
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = mail.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
pre_subject, encoding = decode_header(msg["Subject"])[0]
subject = pre_subject.upper()
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", pre_subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
plain = body
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
plain = body
print("="*100)
Finally for hours of experimenting I successfully implemented it.
Basically the ID I fetched on UNSEEN emails is converted to string from byte and then pass those number lists to the loop
gmail_host = 'imap.gmail.com'
mail = imaplib.IMAP4_SSL(gmail_host)
mail.login(EMAIL_ADDRESS,EMAIL_PASSWORD)
mail.list()
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, raw_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(raw_mails[0].split())
splited = str(raw_mails[0], 'utf-8')
new_emails = splited.split()
print("Total Messages that is New:" , recent_mails)
print(new_emails)
Output:
['378', '390']
And changed my for loop to this
for i in new_emails:

Python Sending E-Mail with Umlauts

I want to create a program in python that receives every new unread email from one of my email accounts to another email account.
So far I got everything set up. Unfortunately I have really big issues with umlauts (ä,ö,ü). For some reason I con not make it to work properly.
Here is my code:
# -*- coding: utf-8 -*-
import os, sys
import imaplib
import email
import smtplib
from email.mime.multipart import MIMEMultipart
servername = 'SERVERNAME'
username='USERNAME'
password='PASSWORD'
mail = imaplib.IMAP4_SSL(servername)
(retcode, capabilities) = mail.login(username,password)
mail.list()
mail.select('inbox')
server_smtp = smtplib.SMTP_SSL('SMTP')
n=0
(retcode, messages) = mail.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in messages[0].split() :
n=n+1
typ, data = mail.fetch(num,'(RFC822)')
for response_part in data:
if isinstance(response_part, tuple):
original = email.message_from_string(response_part[1])
print original['From']
typ, data = mail.store(num,'+FLAGS','\\Seen')
body = ""
if original.is_multipart():
for part in original.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
else:
body = original.get_payload(decode=True)
body = body.encode('UTF-8')
body = str(body)
print(body)
body = "Betreff: " + str(original['Subject']) + "\n\n\n" + body.encode('UTF-8')
SUBJECT = original['From']
server_smtp.login(username, password)
msg = 'Subject: {}\n\n{}'.format(SUBJECT, body.decode('UTF-8'))
server_smtp.sendmail(username, 'TARGET', msg)
For example: if I want to send this message: "ÄäÖöÜü&ß" I will receive this: "������������&��
���"
Do you know what I am doing wrong?
I would really appreciate your help!

Replace body message of an email using python

I created a class in python that will send emails via one of my private servers. It works but I'm wondering if there is a method to replace an existing email body message with a new one?
Emailer Class
class Emailer:
def __init__(self, subj=None, message=None, toAddr=None, attachment=None, image=None):
# initialize email inputs
self.msg = email.MIMEMultipart.MIMEMultipart()
self.cidNum = 0
self.message = []
if message is not None:
self.addToMessage(message,image)
# set the subject of the email if there is one specified
self.subj = []
if subj is not None:
self.setSubject(subj)
# set the body of the email and any attachements specified
self.attachment = []
if attachment is not None:
self.addAtachment(attachment)
# set the recipient list
self.toAddr = []
if toAddr is not None:
self.addRecipient(toAddr)
def addAttachment(self,attachment):
logger.debug("Adding attachement to email")
# loop through list of attachments and add them to the email
if attachment is not None:
if type(attachment) is not list:
attachment = [attachment]
for f in attachment:
part = email.MIMEBase.MIMEBase('application',"octet-stream")
part.set_payload( open(f,"rb").read() )
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="{0}"'.format(os.path.basename(f)))
self.msg.attach(part)
def addToMessage(self,message,image=None):
logger.debug("Adding to email message. Content: [%s]" % message)
# add the plain text message
self.message.append(message)
# add embedded images to message
if image is not None:
if type(image) is not list:
image = [image]
for i in image:
msgText = email.MIMEText.MIMEText('<br><img src="cid:image%s"><br>' % self.cidNum, 'html')
self.msg.attach(msgText)
fp = open(i, 'rb')
img = email.MIMEImage.MIMEImage(fp.read())
fp.close()
img.add_header('Content-ID','<image%s>' % self.cidNum)
self.msg.attach(img)
self.cidNum += 1
# method to set the subject of the email
def setSubject(self,subj):
self.msg['Subject'] = subj
# method to add recipients to the email
def addRecipient(self, toAddr):
# loop through recipient list
for x in toAddr:
self.msg['To'] = x
# method to configure server settings: the server host/port and the senders login info
def configure(self, serverLogin, serverPassword, fromAddr, toAddr, serverHost='myserver', serverPort=465):
self.server=smtplib.SMTP_SSL(serverHost,serverPort)
self.server.set_debuglevel(True)
# self.server.ehlo()
# self.server.ehlo()
self.server.login(serverLogin, serverPassword) #login to senders email
self.fromAddr = fromAddr
self.toAddr = toAddr
# method to send the email
def send(self):
logger.debug("Sending email!")
msgText = email.MIMEText.MIMEText("\n".join(self.message))
self.msg.attach(msgText)
print "Sending email to %s " % self.toAddr
text = self.msg.as_string() #conver the message contents to string format
try:
self.server.sendmail(self.fromAddr, self.toAddr, text) #send the email
except Exception as e:
logger.error(e)
Currently, the addToMessage() method is what adds text to the body of the email. If addToMessage() had already been called but I wanted to replace that body text with new text, is there a way?
If addToMessage() had already been called but I wanted to replace that body text with new text, is there a way?
Yes. If you are always replacing the last entry added to self.message, you can reference this element with self.message[-1] since it is a list. If you want to replace a specific element, you can search for it with the index() method.
Example #1: Replace Last Written Text in Body
def replace_last_written_body_text(new_text):
if len(self.message) > 0:
self.message[-1] = new_text
Example #2: Replace Specified Text in Body
def replace_specified_body_text(text_to_replace, new_text):
index_of_text_to_replace = self.message.index(text_to_replace)
if index_of_text_to_replace is not None:
self.message[index_of_text_to_replace] = new_text
else:
logger.warning("Cannot replace non-existent body text")
If addToMessage has been called just once, then:
message is a list, and its first element is the body text, so you just need to replace that element with the new text:
def replace_body(self, new_text):
if len(self.message) > 0:
self.message[0] = new_text
else:
self.message = [new_text]
I haven't tested that, but it should work. Make sure you write some unit tests for this project!
EDIT:
if addToMessage has been called multiple times, then the new replace function could replace the entire text, or just part of it. If you want to replace all of it, then just replace message, like the part after else above: self.message = [new_text]. Otherwise, you're going to have to find the element you need to replace, like #BobDylan is doing in his answer.

Downloading multiple attachments using imaplib

How can I download multiple attachments from a single mail using imaplib?
Let's say I have an e-mail and that e-mail contains 4 attachments. How can I download all of those attachments? The code below only downloads a single attachment from an e-mail.
detach_dir = 'c:/downloads'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('hello#gmail.com','3323434')
m.select("[Gmail]/All Mail")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
For any future python travellers.
Here is a class that downloads any attachment found for an email and saves it to a specific location.
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print "No new emails to read."
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def parse_email_address(self, email_address):
"""
Helper function to parse out the email address from the message
return: tuple (name, address). Eg. ('John Doe', 'jdoe#example.com')
"""
return email.utils.parseaddr(email_address)
I reworked the code, breaking it up into functions. I use PEEK so I don't change the UNREAD status of the email messages.
I'm posting my take on the problem, similar to #John, but I use only functions instead of classes:
import imaplib
import email
# Connect to an IMAP server
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
# Download all the attachment files for all emails in the inbox.
def downloadAllAttachmentsInInbox(server, user, password, outputdir):
m = connect(server, user, password)
resp, items = m.search(None, "(ALL)")
items = items[0].split()
for emailid in items:
downloaAttachmentsInEmail(m, emailid, outputdir)
You code appears okay except for the return (perhaps a typo?) right after the fp.close():
...
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
After saving the first attachment it returns from the function. Comment out that line and see if it fixes your issue.
You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for message in mailbox.fetch():
for att in message.attachments: # list: [Attachment objects]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
* You can try following function to get mail attachment
def create_message_attachment(self,msg_str):
count = 1
body = ''
content_id = ''
for part in msg_str.walk():
file_name_gl = None
mptype = part.get_content_maintype()
file_name_gl = part.get_filename()
if mptype == "multipart":
continue
elif mptype == "text":
if not file_name_gl: continue
elif mptype == "image":
content_id = part.get('Content-ID')
if not file_name_gl:
file_name_gl = 'image_' + str(count) + '.' + part.get_content_subtype()
count = count + 1
body = part.get_payload(decode = True)
if type(body) <> type(None) :
body = body.strip()
if body <> "":
body = base64.encodestring(body)
#sashoalm 's code worked for me with a minor change:
change mail = email.message_from_string(email_body) in downloaAttachmentsInEmail to mail = email.message_from_bytes(email_body)
I was getting an error when trying to read bytes (the attachment) as a string. Now it works perfectly for me.
Heres a full example of the code:
server = 'outlook.office365.com'
user = 'YOUR USERNAME'
password = 'YOUR PASSWORD'
outputdir = 'DIRECTORY THAT YOU WANT FILES DOWNLOADED TO'
subject = 'Data Exports' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)
import re
def get_valid_filename(s):
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
fileName = get_valid_filename(part.get_filename())
Clean up the file name if it contains invalid characters. e.g: : on Windows.

Categories