Downloading Attachment and Marking Mail as Unseen - python

I want to download the attachments from Unread Messages, but also does not want the messages to be flagged Seen.
The below code works, but currently setting the mail as Seen
Tried '(BODY.PEEK[HEADER])' , but then even mail download stopped.
import upload,checkFileAtServer,sha1sum,email, getpass, imaplib, os
detach_dir = '.'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('myaccount#gmail.com','password')
m.select("inbox")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
#resp, data = m.fetch(emailid, '(BODY.PEEK[HEADER])')
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
sha1sum = sha1sum.calculateSHA1(att_path)
print type(sha1sum)
responseFromServer = checkFileAtServer.responseFromServer(sha1sum)
if(responseFromServer == "NOT_CHECKED"):
upload.uploadToSecureServer('root','root',att_path,att_path)
Anybody can guide me what am I missing ?
Thanks.

If you do not want to mark a message as \Seen, don't call the STORE IMAP command and don't use FETCHable items which are documented to cause an implicit marking as such (yes, the RFC822 is an alias for BODY[] which causes the message to be marked as read).

Related

Read email in python 3.7 using imaplib with HTML body and attachments in the email

I would really appreciate if someone can help me with this issue.
I have implemented the below code to read "unread emails from gmail inbox". I need to print "To", "From", "Subject", "Body" and "save attachments in a specified location"
I have 2 issues here.
If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN'T print the body.
This works fine if no attachment is included.
If there is an email body with any styling in it like "bold/italic/underline/colour...etc", it doesn't print as it is.
Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by "*".
def get_body(email_message):
for payload in email_message.get_payload():
# print('Body:\t', payload.get_payload())
break
return(payload.get_payload())
def read_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(location, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
print('Body:\t', get_body(email_message))
break
else:
print('Nothing'])
except IndexError:
print("No new email")
while True:
read_email("imap.gmail.com", "s#gmail.com", "spassword")
time.sleep(10)
Many thanks
I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.
import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'
def get_body(email_message):
for payload in email_message.get_payload():
break
return payload.get_payload()
def two_way_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
for part in email_message.walk():
if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
print('Body:\t',part.get_payload())
break
else:
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t', email_message['Date'])
print('Thread-Index:\t', email_message['Thread-Index'])
text = f"{email_message.get_payload(decode=True)}"
html = text.replace("b'", "")
h = html2text.HTML2Text()
h.ignore_links = True
output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
output = output.replace("'", "")
print(output)
except IndexError:
print("No new email")
while True:
two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
time.sleep(10)
from imap_tools import MailBox, A
with MailBox('imap.mail.com').login('test#mail.com', 'pwd') as mailbox:
for msg in mailbox.fetch(A(seen=False)):
body = msg.text or msg.html
print(msg.subject, msg.from_, msg.to, len(body))
for att in msg.attachments:
print(att.filename, len(att.payload))
https://github.com/ikvk/imap_tools
I am lib author.
More examples: https://github.com/ikvk/imap_tools/blob/master/examples/idle.py#L19

Trouble with downloading gmail attachments with Python

I have this code, and it seems to be able to log into the Gmail and view the emails (Unread email become marked as read). However, there is no downloading occurring, and I'm not sure why. Any ideas on how to make this happen?
import email
import getpass, imaplib
import os
import sys
detach_dir = '.'
if 'attachments' not in os.listdir(detach_dir):
os.mkdir('attachments')
userName = input('Enter your GMail username:')
passwd = getpass.getpass('Enter your password: ')
imapSession = imaplib.IMAP4_SSL('imap.gmail.com')
typ, accountDetails = imapSession.login(userName, passwd)
if typ != 'OK':
print ('Not able to sign in!')
raise
imapSession.select('Inbox')
typ, data = imapSession.search(None, 'ALL')
if typ != 'OK':
print ('Error searching Inbox.')
raise
# Iterating over all emails
for msgId in data[0].split(): typ, messageParts = imapSession.fetch(msgId,
'(RFC822)')
if typ != 'OK':
print ('Error fetching mail.')
raise
emailBody = messageParts[0][1]
mail = email.message_from_string(emailBody)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
# print part.as_string()
continue
if part.get('Content-Disposition') is None:
# print part.as_string()
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(detach_dir, 'attachments', fileName)
if not os.path.isfile(filePath) :
print (fileName)
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
imapSession.close()
imapSession.logout()
I was working with similar code and I had the same problem. In order to solve it, I changed
mail = email.message_from_string(emailBody)
to
mail = email.message_from_bytes(emailBody)
which worked for me.
Hope it helps!

Python - Get datetime of mails - Gmail

I was trying to download the attachments from the Gmail using python for a specific keyword and the code is below,
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL(imap_url)
mail.login(user, password)
mail.select('"[Gmail]/All Mail"')
print("Login into Mailbox")
result, data = mail.search(None, '(SUBJECT "Contract note")')
count = 0
for num in data[0].split():
result, data = mail.fetch(num, "(RFC822)")
raw_email_string = data[0][1].decode('utf-8')
msg = email.message_from_string(raw_email_string)
for part in msg.walk():
if part.get_content_type() == "text/plain":
print(part.get_payload(decode=True))
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
print(part.get_)
fileName = fileName+str(count)+str('.pdf')
count = count + 1
if bool(fileName):
filePath = os.path.join(attachment_dir, fileName)
with open(filePath, 'wb') as e:
e.write(part.get_payload(decode=True))
The code works fine and it was downloading the attachment. The issue is all the attachment in the mails are off in the same name so in the above code I have added a count and appended it. But in future it will be so tough to find the right file.
Note:- I used to recieve the mail daily
Can someone please guide me how to get the date of the email so that I will append it to the filename rather than the count.
We can get the date of the email using
msg['Date']

Python: 'NoneType' object is unsubscriptable : imaplib

This is covered extensively in SO, so I apologize in advance ...however, I've gone through the posts and can't get this to work.
GOALS
Want to get email from gmail that match certain criteria, save the attachments, then delete them.
ISSUE
So, I can get everything to work except deleting the emails. It deletes a few then I get this error:
Traceback (most recent call last): File "get_overdues.py", line 22,
in
email_body = data[0][1] TypeError: 'NoneType' object is unsubscriptable
Every time I run it it deletes more emails then exits with the same error. This has to run on a cronjob and can't be babysat.
What am I doing wrong?
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login(user,word)
m.select("INBOX")
searchString = "(SUBJECT \"Daily Mail Notices\")"
resp, items = m.search(None,searchString)
items = items[0].split()
for emailid in items:
print emailid
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"] + mail["Date"]
sub_dir = re.sub('[,:\- ]','', mail["Date"])
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
message_dir = os.path.join(dump_dir, sub_dir)
if not os.path.exists(message_dir):
os.makedirs(message_dir)
filename = part.get_filename()
counter = 1
if not filename:
filename = 'overdues-%s' % counter
counter += 1
att_path = os.path.join(dump_dir, message_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
m.store(emailid, '+FLAGS', r'(\Deleted)')
m.expunge()
m.close()
m.logout()
Your problem is clearly with fetch:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
It's returning a NoneType for either data or, less likely, for data[0], and None obviously isn't subscriptable. You may want to double check the results of m.fetch and see if it's coming the form you expect it to.
This is probably because this email was deleted (and not expunged).

Downloading multiple attachments using imaplib

How can I download multiple attachments from a single mail using imaplib?
Let's say I have an e-mail and that e-mail contains 4 attachments. How can I download all of those attachments? The code below only downloads a single attachment from an e-mail.
detach_dir = 'c:/downloads'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('hello#gmail.com','3323434')
m.select("[Gmail]/All Mail")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
For any future python travellers.
Here is a class that downloads any attachment found for an email and saves it to a specific location.
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print "No new emails to read."
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def parse_email_address(self, email_address):
"""
Helper function to parse out the email address from the message
return: tuple (name, address). Eg. ('John Doe', 'jdoe#example.com')
"""
return email.utils.parseaddr(email_address)
I reworked the code, breaking it up into functions. I use PEEK so I don't change the UNREAD status of the email messages.
I'm posting my take on the problem, similar to #John, but I use only functions instead of classes:
import imaplib
import email
# Connect to an IMAP server
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
# Download all the attachment files for all emails in the inbox.
def downloadAllAttachmentsInInbox(server, user, password, outputdir):
m = connect(server, user, password)
resp, items = m.search(None, "(ALL)")
items = items[0].split()
for emailid in items:
downloaAttachmentsInEmail(m, emailid, outputdir)
You code appears okay except for the return (perhaps a typo?) right after the fp.close():
...
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
After saving the first attachment it returns from the function. Comment out that line and see if it fixes your issue.
You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for message in mailbox.fetch():
for att in message.attachments: # list: [Attachment objects]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
* You can try following function to get mail attachment
def create_message_attachment(self,msg_str):
count = 1
body = ''
content_id = ''
for part in msg_str.walk():
file_name_gl = None
mptype = part.get_content_maintype()
file_name_gl = part.get_filename()
if mptype == "multipart":
continue
elif mptype == "text":
if not file_name_gl: continue
elif mptype == "image":
content_id = part.get('Content-ID')
if not file_name_gl:
file_name_gl = 'image_' + str(count) + '.' + part.get_content_subtype()
count = count + 1
body = part.get_payload(decode = True)
if type(body) <> type(None) :
body = body.strip()
if body <> "":
body = base64.encodestring(body)
#sashoalm 's code worked for me with a minor change:
change mail = email.message_from_string(email_body) in downloaAttachmentsInEmail to mail = email.message_from_bytes(email_body)
I was getting an error when trying to read bytes (the attachment) as a string. Now it works perfectly for me.
Heres a full example of the code:
server = 'outlook.office365.com'
user = 'YOUR USERNAME'
password = 'YOUR PASSWORD'
outputdir = 'DIRECTORY THAT YOU WANT FILES DOWNLOADED TO'
subject = 'Data Exports' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)
import re
def get_valid_filename(s):
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
fileName = get_valid_filename(part.get_filename())
Clean up the file name if it contains invalid characters. e.g: : on Windows.

Categories