Python script only downloading 3 emails - python

I am trying to write a script that will scan through all emails from the past year. I can only get it to download the top 3. The tutorial I am following doesn't explain anywhere I can change the number of emails it downloads. Here's the code so far.
# user credentials
email_user = input('Email: ')
email_pass = input('Password: ')
# connect to imap
mail = imaplib.IMAP4_SSL("imap.gmail.com",993)
#login
mail.login(email_user, email_pass)
#select folder
mail.select("INBOX","SPAM")
#filter emails by header with receipt or invoice
type, data = mail.search(None, '(SUBJECT "Invoice")')
mail_ids = data[0]
id_list = mail_ids.split()
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)' )
raw_email = data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
# this part comes from the snipped I don't understand yet...
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('/Users/benbuechler/Desktop/Keepr Receipt Storage', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 10)[1].split("\nTo:", 10)[1]
print('Downloaded "{file}" from email titled "{subject}"'.format(file=fileName, subject=subject))
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1].decode('utf-8'))
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
print(msg.get_payload(decode=True))

Related

How take payload from email and send they not modified, without main headers

I need resend the email from inbox a#example.com email to b#example.org. Is it possible to send the payload without decoding and encoding it when creating a new email? Whether text, HTML, or attachments. You just need to send the original letter from another mailbox without forwarding notifications.
mail = imaplib.IMAP4_SSL(server)
mail.login(login, password)
mail.list()
mail.select("inbox")
result, data = mail.search(None, "UNSEEN")
if result == 'OK':
for num in data[0].split():
result, data = mail.fetch(num, "(RFC822)")
raw_email = data[0][1]
try:
oldmsg = email.message_from_string(raw_email)
except TypeError:
oldmsg = email.message_from_bytes(raw_email)
newmsg = email.mime.multipart.MIMEMultipart(boundary="----")
newmsg['Subject'] = decode_mime_words(oldmsg['Subject'])
newmsg['From'] = login
newmsg['Date'] = strftime("%a, %d %b %Y %H:%M:%S %z")
newmsg['User-Agent'] = "Roundcube Webmail"
newmsg['To'] = to
#part = MIMEBase('application', 'octet-stream')
if oldmsg.is_multipart():
for oldpayload in oldmsg.get_payload():
print(oldpayload.get_payload())
print("-_________________-")
else:
print(oldmsg.get_payload())
# get attachments
for part in oldmsg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if (fileName[:11] == '=?koi8-r?B?') or (fileName[:11] == '=?KOI8-R?B?'):
fileName = base64.b64decode(fileName[11:]).decode(
'KOI8-R')
elif (fileName[:10] == '=?utf-8?B?') or (fileName[:10] == '=?UTF-8?B?'):
fileName = base64.b64decode(fileName[10:])
fileName = fileName.decode('utf-8')
# name attachemnts
if bool(fileName):
filePath = os.path.join('attach', fileName)
if not os.path.isfile(filePath):
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
part = MIMEApplication(open(filePath, 'rb').read())
part.add_header('Content-Disposition', 'attachment', filename=fileName)
newmsg.attach(part)
fp.close()

Reading unread emails using python script

I am trying to read all the unread emails from the gmail account.
The above code is able to make connection but is unable to fetch the emails.
I want to print the content of each email.
I am getting the error as can't concat int to bytes.
code:
import smtplib
import time
import imaplib
import email
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('my_mail','my_pwd')
mail.select('inbox')
result, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
result, data = mail.fetch(i, '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
print(read_email_from_gmail())
error:
Traceback (most recent call last):
File "C:/Users/devda/Desktop/Internship/access_email.py", line 32, in <module>
print(read_email_from_gmail())
File "C:/Users/devda/Desktop/Internship/access_email.py", line 20, in read_email_from_gmail
result, data = mail.fetch(i, '(RFC822)' )
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 529, in fetch
typ, dat = self._simple_command(name, message_set, message_parts)
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 1191, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 956, in _command
data = data + b' ' + arg
TypeError: can't concat int to bytes
>>>
I followed the tutorial from here
What I want to do is to extract content from email which is shown in image
I had to make a few changes to your code in order to get it to work on Python 3.5.1. I have inlined comments below.
# no need to import smtplib for this code
# no need to import time for this code
import imaplib
import email
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('my_mail','my_pwd')
mail.select('inbox')
result, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
# need str(i)
result, data = mail.fetch(str(i), '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
# from_bytes, not from_string
msg = email.message_from_bytes(response_part[1])
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
# nothing to print here
read_email_from_gmail()
Maybe submit a bug report to the author of that blog.
This is what worked for me:
It stores the from address, subject, content(text) into a file of all unread emails.
code:
import email
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
(retcode, capabilities) = mail.login('mymail','mypassword')
mail.list()
mail.select('inbox')
n=0
(retcode, messages) = mail.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in messages[0].split() :
print ('Processing ')
n=n+1
typ, data = mail.fetch(num,'(RFC822)')
for response_part in data:
if isinstance(response_part, tuple):
original = email.message_from_bytes(response_part[1])
# print (original['From'])
# print (original['Subject'])
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if (part.get_content_type() == "text/plain"): # ignore attachments/html
body = part.get_payload(decode=True)
save_string = str(r"C:\Users\devda\Desktop\Internship\Dumpemail_" + str('richboy') + ".txt" )
myfile = open(save_string, 'a')
myfile.write(original['From']+'\n')
myfile.write(original['Subject']+'\n')
myfile.write(body.decode('utf-8'))
myfile.write('**********\n')
myfile.close()
else:
continue
typ, data = mail.store(num,'+FLAGS','\\Seen')
print (n)

Read email in python 3.7 using imaplib with HTML body and attachments in the email

I would really appreciate if someone can help me with this issue.
I have implemented the below code to read "unread emails from gmail inbox". I need to print "To", "From", "Subject", "Body" and "save attachments in a specified location"
I have 2 issues here.
If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN'T print the body.
This works fine if no attachment is included.
If there is an email body with any styling in it like "bold/italic/underline/colour...etc", it doesn't print as it is.
Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by "*".
def get_body(email_message):
for payload in email_message.get_payload():
# print('Body:\t', payload.get_payload())
break
return(payload.get_payload())
def read_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(location, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
print('Body:\t', get_body(email_message))
break
else:
print('Nothing'])
except IndexError:
print("No new email")
while True:
read_email("imap.gmail.com", "s#gmail.com", "spassword")
time.sleep(10)
Many thanks
I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.
import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'
def get_body(email_message):
for payload in email_message.get_payload():
break
return payload.get_payload()
def two_way_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
for part in email_message.walk():
if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
print('Body:\t',part.get_payload())
break
else:
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t', email_message['Date'])
print('Thread-Index:\t', email_message['Thread-Index'])
text = f"{email_message.get_payload(decode=True)}"
html = text.replace("b'", "")
h = html2text.HTML2Text()
h.ignore_links = True
output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
output = output.replace("'", "")
print(output)
except IndexError:
print("No new email")
while True:
two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
time.sleep(10)
from imap_tools import MailBox, A
with MailBox('imap.mail.com').login('test#mail.com', 'pwd') as mailbox:
for msg in mailbox.fetch(A(seen=False)):
body = msg.text or msg.html
print(msg.subject, msg.from_, msg.to, len(body))
for att in msg.attachments:
print(att.filename, len(att.payload))
https://github.com/ikvk/imap_tools
I am lib author.
More examples: https://github.com/ikvk/imap_tools/blob/master/examples/idle.py#L19

How to extract attachments from msg files

How do I extract attachments from msg files using python...?
Sample code snippet will be helpful...!
Look at the email examples, specifically unpacking the message based on mimetype.
imap_host = 'imap.gmail.com'
mail = imaplib.IMAP4_SSL(imap_host)
mail.login(user,passw)
mail.select("Inbox")
mail = get_unseen(mail)
result, data = mail.uid('search', None, 'UNSEEN')
uid_list = data[0].split()
print len(uid_list), 'Unseen emails.'
for i in range(len(uid_list)):
email_uid = uid_list[i]
res, dat = mail.uid('fetch', email_uid, '(RFC822)')
raw_email = dat[0][1]
msg = email.message_from_string(raw_email)
print ''
print 'New email:\n'
print i,'UID:', email_uid, 'Sender:', email.utils.parseaddr(msg['From'])[0],email.utils.parseaddr(msg['From'])[1]
print 'Subjct:',msg['Subject']
print 'Message: '
print get_body(msg)
attach_list = get_attach_list(msg)
print len(attach_list),'Attachments:',attach_list
get_attach(msg)
def get_body(msg):
for part in msg.walk():
content_type = part.get_content_type()
if content_type == 'text/plain' or content_type =='text/html':
payload = part.get_payload(decode=True)
if payload:
print payload
return
def get_attach_list(msg):
attach_list=[]
for part in msg.walk():
filename = part.get_filename()
if filename:
attach_list.append(filename)
return attach_list
def get_attach(msg):
for part in msg.walk():
filename = part.get_filename()
if filename:
fp = open(filename,'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return
So, basically what you do is walk through the complete mail, and look for attachments, and then using get_payload(), you download the attachments.

Downloading multiple attachments using imaplib

How can I download multiple attachments from a single mail using imaplib?
Let's say I have an e-mail and that e-mail contains 4 attachments. How can I download all of those attachments? The code below only downloads a single attachment from an e-mail.
detach_dir = 'c:/downloads'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('hello#gmail.com','3323434')
m.select("[Gmail]/All Mail")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
For any future python travellers.
Here is a class that downloads any attachment found for an email and saves it to a specific location.
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print "No new emails to read."
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def parse_email_address(self, email_address):
"""
Helper function to parse out the email address from the message
return: tuple (name, address). Eg. ('John Doe', 'jdoe#example.com')
"""
return email.utils.parseaddr(email_address)
I reworked the code, breaking it up into functions. I use PEEK so I don't change the UNREAD status of the email messages.
I'm posting my take on the problem, similar to #John, but I use only functions instead of classes:
import imaplib
import email
# Connect to an IMAP server
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
# Download all the attachment files for all emails in the inbox.
def downloadAllAttachmentsInInbox(server, user, password, outputdir):
m = connect(server, user, password)
resp, items = m.search(None, "(ALL)")
items = items[0].split()
for emailid in items:
downloaAttachmentsInEmail(m, emailid, outputdir)
You code appears okay except for the return (perhaps a typo?) right after the fp.close():
...
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
After saving the first attachment it returns from the function. Comment out that line and see if it fixes your issue.
You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for message in mailbox.fetch():
for att in message.attachments: # list: [Attachment objects]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
* You can try following function to get mail attachment
def create_message_attachment(self,msg_str):
count = 1
body = ''
content_id = ''
for part in msg_str.walk():
file_name_gl = None
mptype = part.get_content_maintype()
file_name_gl = part.get_filename()
if mptype == "multipart":
continue
elif mptype == "text":
if not file_name_gl: continue
elif mptype == "image":
content_id = part.get('Content-ID')
if not file_name_gl:
file_name_gl = 'image_' + str(count) + '.' + part.get_content_subtype()
count = count + 1
body = part.get_payload(decode = True)
if type(body) <> type(None) :
body = body.strip()
if body <> "":
body = base64.encodestring(body)
#sashoalm 's code worked for me with a minor change:
change mail = email.message_from_string(email_body) in downloaAttachmentsInEmail to mail = email.message_from_bytes(email_body)
I was getting an error when trying to read bytes (the attachment) as a string. Now it works perfectly for me.
Heres a full example of the code:
server = 'outlook.office365.com'
user = 'YOUR USERNAME'
password = 'YOUR PASSWORD'
outputdir = 'DIRECTORY THAT YOU WANT FILES DOWNLOADED TO'
subject = 'Data Exports' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)
import re
def get_valid_filename(s):
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
fileName = get_valid_filename(part.get_filename())
Clean up the file name if it contains invalid characters. e.g: : on Windows.

Categories