Python Download Attachment from Outlook w/ Imaplib4 Never downloads the last Attachment

Python Download Attachment from Outlook w/ Imaplib4 Never downloads the last Attachment - python

The script I wrote:1) connects to my work Outlook email. The script reads my username and password from a text file which is found in the variable TextFilePath. 2) Looks for attachments based upon a searchterm I choose that would be in the Subject of the email (here, it's "special_search_term_in_email"). 3) Downloads the attachments to a specific folder titled 'DownloadFolderPath'.
The goal for this script is to run everyday and connect to my email and download 4 attachments that will be sent to me everyday. The issue is that the script will sometimes download all 4 attachments, but then sometimes will only download 3 of 4 attachments and then won't even terminate. Appreciate the help.
import email
import imaplib
import os
import datetime
import csv
# these 3 variables you need to set to run the code
mail_server = "imap-mail.outlook.com"
TextFilePath = "C:/Users/FakeName/PycharmProjects/imaplib_script/unpw.txt"
LogFilePath = 'C:/Users/FakeName/PycharmProjects/imaplib_script/downloaded_attachments/logfile.csv'
DownloadFolderPath = 'C:/Users/FakeName/PycharmProjects/imaplib_script/downloaded_attachments/'
# below read_input_return_list function reads input from a text file and returns a list
def read_input_return_list():
textunpw = open(TextFilePath, "r")
lines = textunpw.readlines()
username = lines[0].strip('\n')
password = lines[1]
textunpw.close()
return [username, password]
read_input_variable = read_input_return_list()
username = read_input_variable[0]
password = read_input_variable[1]
script_ran_time=datetime.datetime.today().strftime('%c')
mail = imaplib.IMAP4_SSL(mail_server)
mail.login(username, password)
print("{0} Connecting to mailbox via IMAP...".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")))
mail.select()
type, data = mail.search(None, '(SUBJECT "special_search_term_in_email")')
total_count = 0
with open(LogFilePath,newline='', encoding='utf-8', mode='a') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for num in data[0].split():
type, data = mail.fetch(num, '(RFC822)')
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
for part1 in part.walk():
c_type = part.get_content_type()
c_disp0 = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if c_type == 'text/plain' and 'attachment' not in c_disp0:
body = part1.get_payload(decode=True)
break
attachment = part.get_filename()
if bool(attachment):
filePath = os.path.join(DownloadFolderPath, attachment)
if os.path.isfile(filePath):
filename, file_extension = os.path.splitext(filePath)
FileDownloadAndWriteTime = '__' + datetime.datetime.today().strftime('%m_%d_%Y %H_%M_%S')
new_fname = "{}_{}{}".format(filename, FileDownloadAndWriteTime, file_extension)
while os.path.exists(new_fname):
new_fname = "{}_{}{}".format(filename, FileDownloadAndWriteTime, file_extension)
filePath = new_fname
filepathopen = open(filePath, 'wb')
filepathopen.write(part.get_payload(decode=True))
FileDownloadAndWriteTime = datetime.datetime.today().strftime('%m_%d_%Y %H_%M_%S')
total_count += 1
writer.writerow([filePath,FileDownloadAndWriteTime, script_ran_time])
filepathopen.close()
print('Download file attachment name: ', attachment)
print("Total count of downloaded documents: ", total_count)
mail.close()

I can't pinpoint what's wrong but try adopting this code here: https://gist.github.com/cdunklau/9001357
It worked for me.
I updated the find_attachments method like this:
def find_attachments(message):
"""
Return a tuple of parsed content-disposition dict, message object
for each attachment found.
"""
found = []
for part in message.walk():
if 'content-disposition' not in part:
continue
cdisp = part['content-disposition'].split(';')
cdisp = [x.strip() for x in cdisp]
if cdisp[0].lower() != 'attachment':
continue
parsed = {}
for kv in cdisp[1:]:
try:
key, val = kv.split('=')
if val.startswith('"'):
val = val.strip('"')
elif val.startswith("'"):
val = val.strip("'")
parsed[key] = val
except Exception as e:
parsed['filename']=kv.replace('filename=','')
found+=list(parsed.values())
return found

Related

Facing issues in downloading a particular attachment from gmail through Python

I have the below piece of code which works fine for downloading CSV files. But I'm trying to download a file without any extension name where this is failing. The part.get_filename() is not fetching anything and hence the code is failing with error NameError: name 'fileName' is not defined. The search is working correctly and identifying the particular email.
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import os
import imaplib
import email
#from email.header import decode_header
#import webbrowser
import os
import datetime
import time
import glob
import shutil
today = datetime.date.today()
yday = today - datetime.timedelta(days=5)
# account credentials
username = "xyz#gmail.com"
with open(r'C:\Users\xyz\Downloads\Google sheet key\gmail_app_pwd.txt','r') as pwd:
password=pwd.read()
# create an IMAP4 class with SSL
mailBox = imaplib.IMAP4_SSL("imap.gmail.com")
# authenticate
mailBox.login(username, password)
svdir = r'C:\Users\xyz\Downloads\Work'
boxList = mailBox.list()
# print(boxList)
mailBox.select()
searchQuery = '(SUBJECT "Mailer as on ' + str(yday) +'")'
result, data = mailBox.uid('search', None, searchQuery)
ids = data[0]
# list of uids
id_list = ids.split()
i = len(id_list)
#x=0
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(svdir, fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
print('Downloaded "{file}" from email titled "{subject}" with UID {uid}.'.format(file=fileName, subject=subject, uid=latest_email_uid.decode('utf-8')))
mailBox.close()
mailBox.logout()

It worked after I removed the checks. Updated code:
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
**fileName = "file.csv"**
if bool(fileName):
filePath = os.path.join(svdir, fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()

Read email in python 3.7 using imaplib with HTML body and attachments in the email

I would really appreciate if someone can help me with this issue.
I have implemented the below code to read "unread emails from gmail inbox". I need to print "To", "From", "Subject", "Body" and "save attachments in a specified location"
I have 2 issues here.
If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN'T print the body.
This works fine if no attachment is included.
If there is an email body with any styling in it like "bold/italic/underline/colour...etc", it doesn't print as it is.
Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by "*".
def get_body(email_message):
for payload in email_message.get_payload():
# print('Body:\t', payload.get_payload())
break
return(payload.get_payload())
def read_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(location, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
print('Body:\t', get_body(email_message))
break
else:
print('Nothing'])
except IndexError:
print("No new email")
while True:
read_email("imap.gmail.com", "s#gmail.com", "spassword")
time.sleep(10)
Many thanks

I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.
import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'
def get_body(email_message):
for payload in email_message.get_payload():
break
return payload.get_payload()
def two_way_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
for part in email_message.walk():
if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
print('Body:\t',part.get_payload())
break
else:
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t', email_message['Date'])
print('Thread-Index:\t', email_message['Thread-Index'])
text = f"{email_message.get_payload(decode=True)}"
html = text.replace("b'", "")
h = html2text.HTML2Text()
h.ignore_links = True
output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
output = output.replace("'", "")
print(output)
except IndexError:
print("No new email")
while True:
two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
time.sleep(10)

from imap_tools import MailBox, A
with MailBox('imap.mail.com').login('test#mail.com', 'pwd') as mailbox:
for msg in mailbox.fetch(A(seen=False)):
body = msg.text or msg.html
print(msg.subject, msg.from_, msg.to, len(body))
for att in msg.attachments:
print(att.filename, len(att.payload))
https://github.com/ikvk/imap_tools
I am lib author.
More examples: https://github.com/ikvk/imap_tools/blob/master/examples/idle.py#L19

Python - Get datetime of mails - Gmail

I was trying to download the attachments from the Gmail using python for a specific keyword and the code is below,
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL(imap_url)
mail.login(user, password)
mail.select('"[Gmail]/All Mail"')
print("Login into Mailbox")
result, data = mail.search(None, '(SUBJECT "Contract note")')
count = 0
for num in data[0].split():
result, data = mail.fetch(num, "(RFC822)")
raw_email_string = data[0][1].decode('utf-8')
msg = email.message_from_string(raw_email_string)
for part in msg.walk():
if part.get_content_type() == "text/plain":
print(part.get_payload(decode=True))
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
print(part.get_)
fileName = fileName+str(count)+str('.pdf')
count = count + 1
if bool(fileName):
filePath = os.path.join(attachment_dir, fileName)
with open(filePath, 'wb') as e:
e.write(part.get_payload(decode=True))
The code works fine and it was downloading the attachment. The issue is all the attachment in the mails are off in the same name so in the above code I have added a count and appended it. But in future it will be so tough to find the right file.
Note:- I used to recieve the mail daily
Can someone please guide me how to get the date of the email so that I will append it to the filename rather than the count.

We can get the date of the email using
msg['Date']

Retrieve Email Body Text Using imaplib

I'm trying to ensure that I retrieve all of the body text (no matter what format the email is in) from a certain email address. The connection details are omitted in this example (imaplib_connect) since all seems to work, but I don't think the below is sufficient for all email bodies. Any improvement recommendations? I'm writing each email's body to a file.
import imaplib
import imaplib_connect
import uuid
import ConfigParser
import os
import email
c = imaplib_connect.open_connection()
try:
config = ConfigParser.ConfigParser()
config.read([os.path.expanduser('~/reader.config')])
fromAddress = config.get('account', 'fromAddress')
typ, data = c.select('INBOX')
typ, data = c.search(None, '(FROM "' + fromAddress + '")')
print "Processing..."
for num in data[0].split():
typ, data = c.fetch(num, '(RFC822)')
rawMessage = data[0][1]
emailMessage = email.message_from_string(rawMessage)
maintype = emailMessage.get_content_maintype()
fileName = uuid.uuid4().hex
if maintype == 'multipart':
for part in emailMessage.get_payload():
if part.get_content_maintype() == 'text':
with open(fileName + ".txt", "wb") as fo:
fo.write(part.get_payload())
elif maintype == 'text':
with open(fileName + ".txt", "wb") as fo:
fo.write(part.get_payload())
finally:
try:
c.close()
except:
pass
c.logout()

Downloading multiple attachments using imaplib

How can I download multiple attachments from a single mail using imaplib?
Let's say I have an e-mail and that e-mail contains 4 attachments. How can I download all of those attachments? The code below only downloads a single attachment from an e-mail.
detach_dir = 'c:/downloads'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('hello#gmail.com','3323434')
m.select("[Gmail]/All Mail")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')

For any future python travellers.
Here is a class that downloads any attachment found for an email and saves it to a specific location.
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print "No new emails to read."
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def parse_email_address(self, email_address):
"""
Helper function to parse out the email address from the message
return: tuple (name, address). Eg. ('John Doe', 'jdoe#example.com')
"""
return email.utils.parseaddr(email_address)

I reworked the code, breaking it up into functions. I use PEEK so I don't change the UNREAD status of the email messages.
I'm posting my take on the problem, similar to #John, but I use only functions instead of classes:
import imaplib
import email
# Connect to an IMAP server
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
# Download all the attachment files for all emails in the inbox.
def downloadAllAttachmentsInInbox(server, user, password, outputdir):
m = connect(server, user, password)
resp, items = m.search(None, "(ALL)")
items = items[0].split()
for emailid in items:
downloaAttachmentsInEmail(m, emailid, outputdir)

You code appears okay except for the return (perhaps a typo?) right after the fp.close():
...
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
After saving the first attachment it returns from the function. Comment out that line and see if it fixes your issue.

You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for message in mailbox.fetch():
for att in message.attachments: # list: [Attachment objects]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'

* You can try following function to get mail attachment
def create_message_attachment(self,msg_str):
count = 1
body = ''
content_id = ''
for part in msg_str.walk():
file_name_gl = None
mptype = part.get_content_maintype()
file_name_gl = part.get_filename()
if mptype == "multipart":
continue
elif mptype == "text":
if not file_name_gl: continue
elif mptype == "image":
content_id = part.get('Content-ID')
if not file_name_gl:
file_name_gl = 'image_' + str(count) + '.' + part.get_content_subtype()
count = count + 1
body = part.get_payload(decode = True)
if type(body) <> type(None) :
body = body.strip()
if body <> "":
body = base64.encodestring(body)

#sashoalm 's code worked for me with a minor change:
change mail = email.message_from_string(email_body) in downloaAttachmentsInEmail to mail = email.message_from_bytes(email_body)
I was getting an error when trying to read bytes (the attachment) as a string. Now it works perfectly for me.
Heres a full example of the code:
server = 'outlook.office365.com'
user = 'YOUR USERNAME'
password = 'YOUR PASSWORD'
outputdir = 'DIRECTORY THAT YOU WANT FILES DOWNLOADED TO'
subject = 'Data Exports' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)

import re
def get_valid_filename(s):
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
fileName = get_valid_filename(part.get_filename())
Clean up the file name if it contains invalid characters. e.g: : on Windows.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python Download Attachment from Outlook w/ Imaplib4 Never downloads the last Attachment - python

Related

Facing issues in downloading a particular attachment from gmail through Python

Read email in python 3.7 using imaplib with HTML body and attachments in the email

Python - Get datetime of mails - Gmail

Retrieve Email Body Text Using imaplib

Downloading multiple attachments using imaplib

Categories

Resources