I'm trying to save emails from my Gmail-account in ELM format. I'm trying to preserve as much information as possible (To, From, CC, attachments, etc.) because I want to convert this into PST format and recreate my outlook if needed in the future. I've only figured out how to save the contents of the email into an ELM-file, but I haven't been able to figure out how to save the other email information using this email library.
import imaplib
import email
#Login
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('email', 'pswd')
mail.list() # Lists all labels in GMail
mail.select('inbox') # Connected to inbox
result, data = mail.search(None, "ALL")
ids = data[0]
id_list = ids.split()
i = len(data[0].split())
for x in range(i):
latest_email_id = id_list[x] #get the latest
result, data = result, data = mail.fetch(latest_email_id, "(RFC822)") # fetch the email body (RFC822) for the given ID
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_type() == "text/plain": # ignore attachments/html
body = part.get_payload(decode=True)
save_string = str(r"C:\Users\Millar\Desktop\SavedEmailsTest\Dumpgmailemail_" + str(x) + ".eml")
myfile = open(save_string, 'a')
myfile.write(body.decode('utf-8'))
myfile.close()
else:
continue
Related
To elaborate the question I have a code that searches for UNSEEN emails and stores the ID to a variable.
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, new_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(new_mails[0].split())
print("Total Messages that is New:" , recent_mails)
print(new_mails)
and it prints this:
Total Messages that is New: 2
[b'389 393']
What I want to do is the use these numbers to fetch it's contents like subject, who sent it and the body of the email. Is it possible to this implementation?
I have a previous code that fetches the first and beyond emails by indicating on how emails it will go to using the variable
N code follows:
N = 0
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = mail.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
pre_subject, encoding = decode_header(msg["Subject"])[0]
subject = pre_subject.upper()
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", pre_subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
plain = body
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
plain = body
print("="*100)
Finally for hours of experimenting I successfully implemented it.
Basically the ID I fetched on UNSEEN emails is converted to string from byte and then pass those number lists to the loop
gmail_host = 'imap.gmail.com'
mail = imaplib.IMAP4_SSL(gmail_host)
mail.login(EMAIL_ADDRESS,EMAIL_PASSWORD)
mail.list()
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, raw_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(raw_mails[0].split())
splited = str(raw_mails[0], 'utf-8')
new_emails = splited.split()
print("Total Messages that is New:" , recent_mails)
print(new_emails)
Output:
['378', '390']
And changed my for loop to this
for i in new_emails:
Log in and read subject works. An error occurs when reading the body. What is the error? In the internet the error was always in this part : " email.message_from_bytes(data[0][1].decode())"but I think this part is correct.
# Connection settings
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
b = email.message_from_string(email_message_raw)
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
Error message:
TypeError: initial_value must be str or None, not Message
Thanks for the comments and reply
You have everything in place. Just have to understand a few concepts.
"email" library allows you to convert typical email bytes into an easily usable object called Message using its parser APIs, such as message_from_bytes(), message_from_string(), etc.
The typical error is due to an input error.
email.message_from_bytes(data[0][1].decode())
The function above, message_from_bytes, takes bytes as an input not str. So, it is redundant to decode data[0][1] and also inputting through the parser API.
In short, you are trying to parse the original email message twice using message_from_bytes(data[0][1]) and message_from_string(email_message_raw). Get rid of one of them and you will be all set!
Try this approach:
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
# b = email.message_from_string(email_message_raw)
# this is already set as Message object which have many methods (i.e. is_multipart(), walk(), etc.)
b = email_message
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
from imap_tools import MailBox, AND
# get email bodies from INBOX
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for msg in mailbox.fetch():
body = msg.text or msg.html
https://github.com/ikvk/imap_tools
I'm writing a program that checks which email addresses have bounced (not delivered) and generating a list for that in Python. But so far, I only get the bounced emails in the first page of Inbox (from what I understood from my code). Here's what I have so far:
inbox = {}
for item in items:
# getting email content
resp, data = M.fetch(item, "(RFC822)")
email_content = data[0][1]
msg = email.message_from_bytes(email_content)
content_list = msg.as_bytes().decode(encoding='UTF-8').split('\n')
# retrieve email address sent out
for cl in content_list:
if cl.startswith('To: '):
inbox[cl.replace("To: ", '').strip()] = 1
#print(cl) this prints out the sent email addresses
fail_content = ['Delivery Status Notification (Failure)','Undeliverable:','DELIVERY FAILURE:','Returned mail:','Undelivered Mail Returned to Sender']
# check Inbox
M.select('INBOX')
resp, items = M.search(None,"All")
items = items[0].split()
#print(items) prints out the number of inbox emails
for item in items:
resp, data = M.fetch(item, "(RFC822)")
email_content = data[0][1]
msg = email.message_from_bytes(email_content)
# check for all possible fail content
for fc in fail_content:
if fc in msg['Subject']:
# get email content
content_list = msg.as_bytes().decode(encoding='UTF-8')
# find its sender
for eo in inbox:
if eo in content_list:
inbox[eo] = 0
print(inbox)
How can i read the body of any mail its not coming properly in this
manner
I tried this :
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(email_user, email_pass)
mail.select('Inbox')
type, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)' )
raw_email = data[0][1] # converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
#body = str(email_message).split("body: ", 1)[1].split("\nTo:", 1)[0]
print(email_message);
its showing
If you simply want to parse the email and access the body, then consider using mail-parser. It's a simple mail-parser that takes as input a raw email and generates a parsed object.
import mailparser
mail = mailparser.parse_from_file(f)
mail = mailparser.parse_from_file_obj(fp)
mail = mailparser.parse_from_string(raw_mail)
mail = mailparser.parse_from_bytes(byte_mail)
How to Use:
mail.body #use this to access the body contents
mail.to
I have emails that will be flagged by gmail settings to move to a certain label called "Test". This script I am writing when ran, downloads any attachments in that label then moves all those emails to another label called "Checked" (to keep that label clear).
I have the download and parsing part done but I can't seem to manage moving the emails.
Here is the completed part of the program:
import imaplib
import email
import os
import base64
#import Const
user = 'email#gmail.com'
password = 'imnottellingyou'
imap_url = 'imap.gmail.com'
def auth(user, password, imap_url):
con = imaplib.IMAP4_SSL(imap_url)
con.login(user, password)
return con
con = auth(user, password, imap_url)
con.select('Test')
type, data = con.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
print(id_list)
print(mail_ids)
for num in data[0].split():
typ, data = con.fetch(num, '(RFC822)')
raw_email = data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(
'C:/Users/User/Desktop/test', fileName)
if not os.path.isfile(filePath):
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
for uid in id_list:
con.uid('STORE', uid, '+X-GM-LABELS', 'Checked')
con.uid('STORE', uid, '-X-GM-LABELS', 'Test')
Here is the trouble area. This is what I have tried:
#after emails in label have been checked for attachments and downloaded
#emails will be transferred to a "checked" labe
for uid in id_list:
con.uid('STORE', uid, '+X-GM-LABELS', 'Checked')
con.uid('STORE', uid, '-X-GM-LABELS', 'Test')
The program executes fine, and no error messages appear but nothing changes in my gmail inbox.
Finally was able to come up with a solution.
for uid in id_list:
#adds the checked label (new label) to all emails that are in the id list
con.store(uid, '+X-GM-LABELS', '(Checked)')
#instead of "removing" original label it deletes the email from the label
#since labels act like folders in gmail
con.store(uid,'+FLAGS', '\\Deleted')