How can i read the body of any mail its not coming properly in this
manner
I tried this :
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(email_user, email_pass)
mail.select('Inbox')
type, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)' )
raw_email = data[0][1] # converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
#body = str(email_message).split("body: ", 1)[1].split("\nTo:", 1)[0]
print(email_message);
its showing
If you simply want to parse the email and access the body, then consider using mail-parser. It's a simple mail-parser that takes as input a raw email and generates a parsed object.
import mailparser
mail = mailparser.parse_from_file(f)
mail = mailparser.parse_from_file_obj(fp)
mail = mailparser.parse_from_string(raw_mail)
mail = mailparser.parse_from_bytes(byte_mail)
How to Use:
mail.body #use this to access the body contents
mail.to
Related
Log in and read subject works. An error occurs when reading the body. What is the error? In the internet the error was always in this part : " email.message_from_bytes(data[0][1].decode())"but I think this part is correct.
# Connection settings
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
b = email.message_from_string(email_message_raw)
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
Error message:
TypeError: initial_value must be str or None, not Message
Thanks for the comments and reply
You have everything in place. Just have to understand a few concepts.
"email" library allows you to convert typical email bytes into an easily usable object called Message using its parser APIs, such as message_from_bytes(), message_from_string(), etc.
The typical error is due to an input error.
email.message_from_bytes(data[0][1].decode())
The function above, message_from_bytes, takes bytes as an input not str. So, it is redundant to decode data[0][1] and also inputting through the parser API.
In short, you are trying to parse the original email message twice using message_from_bytes(data[0][1]) and message_from_string(email_message_raw). Get rid of one of them and you will be all set!
Try this approach:
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
# b = email.message_from_string(email_message_raw)
# this is already set as Message object which have many methods (i.e. is_multipart(), walk(), etc.)
b = email_message
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
from imap_tools import MailBox, AND
# get email bodies from INBOX
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for msg in mailbox.fetch():
body = msg.text or msg.html
https://github.com/ikvk/imap_tools
I am trying to read an email in Gmail that has a specific subject and get the OTP value within the email. I am using imaplib
import imaplib
def get_CreateAccount_OTP(self, email_type):
gmail = imaplib.IMAP4_SSL("imap.gmail.com", 993)
gmail.login(self.gmail_username, self.gmail_password)
gmail.select('Inbox', readonly=True)
type, data = gmail.search(None, '(SUBJECT "Here\'s your Texas by Texas email verification.")')
I got the type returned as Ok, but the data as below
data = {list: 1} [b'']
0 = {bytes: 0} b''
__len__ = {int} 1
After that line, it's not going into the below "for loop"
for num in data[0].split():
typ, data = gmail.fetch(num, '(RFC822)')
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = str(email.message_from_string(raw_email_string))
email_message_list = email_message.split('\n')
RE_TIME_STAMP_PATTERN = re.compile((r'\d{6}'))
for line in email_message_list:
print(line)
if 'Your sign-in verification code is ' in line:
self.OTP = re.findall(RE_TIME_STAMP_PATTERN, line)[0]
break
self.log.info("OTP:",self.OTP)
return self.OTP
Note: I am new to Python and learning it slowly. Please bare with my silly questions
Thanks in advance
I found the issue that the string has special char and the implib is not converting the char to Unicode. So I have to remove the word that has the special char in my string.
import imaplib
def get_CreateAccount_OTP(self, email_type):
subject="your Texas by Texas email verification."
gmail = imaplib.IMAP4_SSL("imap.gmail.com", 993)
gmail.login(self.gmail_username, self.gmail_password)
gmail.select('Inbox', readonly=True)
type, data = gmail.search(None, '(UNSEEN SUBJECT "%s")' % subject)
for num in data[0].split():
typ, data = gmail.fetch(num, '(RFC822)')
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = str(email.message_from_string(raw_email_string))
email_message_list = email_message.split('\n')
RE_TIME_STAMP_PATTERN = re.compile((r'\d{6}'))
for line in email_message_list:
print(line)
if 'Your sign-in verification code is ' in line:
self.OTP = re.findall(RE_TIME_STAMP_PATTERN, line)[0]
break
self.log.info("OTP:",self.OTP)
return self.OTP
While am trying to parse the body of an email, the body will get as
VCBJTkZPUk1BVElPTjwvdGQ+Cgk8L3RyPgoJPHRyPjx0ZD4mbmJzcDs8L3RkPjwvdHI+Cgk8dHI+
And when I try to decode it separately, it works successfully
import base64
data="VCBJTkZPUk1BVElPTjwvdGQ+Cgk8L3RyPgoJPHRyPjx0ZD4mbmJzcDs8L3RkPjwvdHI+Cgk8dHI+"
print(base64.b64decode((data)))
Output:
b'T INFORMATION</td>\n\t</tr>\n\t<tr><td> </td></tr>\n\t<tr>'
But while i tried the same in my mail parsing script, it doesnt works
try:
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(FROM_EMAIL,FROM_PWD)
mail.select('inbox')
type, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
print(id_list)
print(first_email_id)
print(latest_email_id)
for i in data[0].decode().split(' '):
print(i)
typ, data = mail.fetch(i, '(RFC822)' )
data=(data[0][1])
print(base64.b64decode(data))
except Exception as e:
print(str(e))
The output is getting as follows:
b"\r\xe9b\xbd\xea\xdeu:-\xa2|\xa9\xae\x8b^rH&j)\\\"
Is there any way to decode this ?
I'm trying to use python3 to parse through an email and output the contents of the body in a list of strings. The contents of the body always follows this pattern:
string \n string \n string \n etc.
The error I get currently is initial_value must be str or None, not bytes
import imaplib
import email
import time
import smtplib
from_email = "someemail#gmail.com"
from_pwd = "somepass"
smtp_server = "imap.gmail.com"
smtp_port= 993
def readmail(from_email,from_pwd,smtp_server,smtp_port ):
try:
mail = imaplib.IMAP4_SSL(smtp_server)
mail.login(from_email,from_pwd)
mail.select('inbox')
result, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id =id_list[0]
latest_email_id = id_list[-1] #most recent email
result,data = mail.fetch(latest_email_id, "(RFC822)")
raw_email = data[0][1]
#read the email
email_message = email.message_from_string(raw_email)
return email_message_instance.get_payload()
except Exception as e:
print(e)
print(readmail(from_email,from_pwd,smtp_server,smtp_port))
I know the error lies after the "#read the email" comments because I was able to print out the raw_email
email.message_from_string() expects string argument. You can use email.message_from_bytes() instead to fix this issue.
Alternatively, you can convert raw_email to a string as follows
mail_content = raw_email.decode('utf-8')
and pass mail_content in email.message_from_string()
I'm trying to save emails from my Gmail-account in ELM format. I'm trying to preserve as much information as possible (To, From, CC, attachments, etc.) because I want to convert this into PST format and recreate my outlook if needed in the future. I've only figured out how to save the contents of the email into an ELM-file, but I haven't been able to figure out how to save the other email information using this email library.
import imaplib
import email
#Login
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('email', 'pswd')
mail.list() # Lists all labels in GMail
mail.select('inbox') # Connected to inbox
result, data = mail.search(None, "ALL")
ids = data[0]
id_list = ids.split()
i = len(data[0].split())
for x in range(i):
latest_email_id = id_list[x] #get the latest
result, data = result, data = mail.fetch(latest_email_id, "(RFC822)") # fetch the email body (RFC822) for the given ID
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_type() == "text/plain": # ignore attachments/html
body = part.get_payload(decode=True)
save_string = str(r"C:\Users\Millar\Desktop\SavedEmailsTest\Dumpgmailemail_" + str(x) + ".eml")
myfile = open(save_string, 'a')
myfile.write(body.decode('utf-8'))
myfile.close()
else:
continue