Fetch a particular unseen email and print only those emails - python

To elaborate the question I have a code that searches for UNSEEN emails and stores the ID to a variable.
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, new_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(new_mails[0].split())
print("Total Messages that is New:" , recent_mails)
print(new_mails)
and it prints this:
Total Messages that is New: 2
[b'389 393']
What I want to do is the use these numbers to fetch it's contents like subject, who sent it and the body of the email. Is it possible to this implementation?
I have a previous code that fetches the first and beyond emails by indicating on how emails it will go to using the variable
N code follows:
N = 0
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = mail.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
pre_subject, encoding = decode_header(msg["Subject"])[0]
subject = pre_subject.upper()
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", pre_subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
plain = body
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
plain = body
print("="*100)

Finally for hours of experimenting I successfully implemented it.
Basically the ID I fetched on UNSEEN emails is converted to string from byte and then pass those number lists to the loop
gmail_host = 'imap.gmail.com'
mail = imaplib.IMAP4_SSL(gmail_host)
mail.login(EMAIL_ADDRESS,EMAIL_PASSWORD)
mail.list()
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, raw_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(raw_mails[0].split())
splited = str(raw_mails[0], 'utf-8')
new_emails = splited.split()
print("Total Messages that is New:" , recent_mails)
print(new_emails)
Output:
['378', '390']
And changed my for loop to this
for i in new_emails:

Related

How do I implement the imaplib search function?

import imaplib
import email
from email.header import decode_header
import webbrowser
import os
# account credentials
username = "example#stack.com"
password = "exapleforstack"
imap_server = "imap.one.com"
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
# create an IMAP4 class with SSL
imap = imaplib.IMAP4_SSL(imap_server)
# authenticate
imap.login(username, password)
status, messages = imap.select("INBOX")
imap.search(None, 'SUBJECT', '"exampleforstack"')
# number of top emails to fetch
N = 3
# total number of emails
messages = int(messages[0])
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
if content_type == "text/html":
# if it's HTML, create a new HTML file and open it in browser
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
# write the file
open(filepath, "w").write(body)
# open in the default browser
webbrowser.open(filepath)
print("="*100)
# close the connection and logout
imap.close()
imap.logout()
I tried using the search method a couple different ways, like:
res, msg = imap.search(None, 'SUBJECT', "example")
and
res, msg = imap.search(None, 'SUBJECT, "example"')
but my code just gives an error and automatically fetches the most recent 3.
I've tried replacing the line:
res, msg = imap.fetch(str(i), "(RFC822)")
with
res, msg = imap.search(None, 'SUBJECT', '"example"')
but the program returns nothing at all.
how would I go about implementing the search I got this code from pythoncode and altered its credentials.
But I'm not sure why I can't implement the search function.

How to scan for a table in the body of an email using Python?

Currently, my code goes through my emails and looks for a certain pattern. However, instead of looking for a pattern, I wanted to make the code look for a specific table and export that table into a csv. Here's the table I would like my code to scan for:
Here is my current code:
with open("data.csv", "w") as f_out:
writer = csv.writer(f_out)
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
payload = part.get_payload(decode=True)
if payload is None:
continue
body = payload.decode()
pattern = re.compile(
r"([a-zA-Z]+[0-9]+) Line ([0-9]+) Seq ([0-9]) ([0-9]+/[0-9]+/[0-9]+)")
#r"([a-zA-Z]+[0-9]+) Line ([0-9]+) Seq ([0-9]) ([0-9]+/[0-9]+/[0-9]+)")
#r"([a-zA-Z]+[0-9]+)( Line ([0-9]+))?( Seq ([0-9])? ([0-9]+/[0-9]+/[0-9]+)")
#r"([a-zA-Z]+[0-9]+)( Line ([0-9]+ )| )(Seq ([0-9]) |)([0-9]+\/[0-9]+\/[0-9]+)")
matches = pattern.finditer(body)
writer.writerows(map(lambda m: m.groups(), matches))
break
imap.close()
imap.logout()

How can I read the mail body of a mail with Python?

Log in and read subject works. An error occurs when reading the body. What is the error? In the internet the error was always in this part : " email.message_from_bytes(data[0][1].decode())"but I think this part is correct.
# Connection settings
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
b = email.message_from_string(email_message_raw)
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
Error message:
TypeError: initial_value must be str or None, not Message
Thanks for the comments and reply
You have everything in place. Just have to understand a few concepts.
"email" library allows you to convert typical email bytes into an easily usable object called Message using its parser APIs, such as message_from_bytes(), message_from_string(), etc.
The typical error is due to an input error.
email.message_from_bytes(data[0][1].decode())
The function above, message_from_bytes, takes bytes as an input not str. So, it is redundant to decode data[0][1] and also inputting through the parser API.
In short, you are trying to parse the original email message twice using message_from_bytes(data[0][1]) and message_from_string(email_message_raw). Get rid of one of them and you will be all set!
Try this approach:
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
# b = email.message_from_string(email_message_raw)
# this is already set as Message object which have many methods (i.e. is_multipart(), walk(), etc.)
b = email_message
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
from imap_tools import MailBox, AND
# get email bodies from INBOX
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for msg in mailbox.fetch():
body = msg.text or msg.html
https://github.com/ikvk/imap_tools

How to get emails received from gmail python?

I want to get the last 10 received gmails with python.
Currently I have this code but it only returns a limited number of emails and it manipulates pop3 directly, which makes it unnecessary long.
Source of the code: https://www.code-learner.com/python-use-pop3-to-read-email-example/
import poplib
import smtplib, ssl
def guess_charset(msg):
# get charset from message object.
charset = msg.get_charset()
# if can not get charset
if charset is None:
# get message header content-type value and retrieve the charset from the value.
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
# variable indent_number is used to decide number of indent of each level in the mail multiple bory part.
def print_info(msg, indent_number=0):
if indent_number == 0:
# loop to retrieve from, to, subject from email header.
for header in ['From', 'To', 'Subject']:
# get header value
value = msg.get(header, '')
if value:
# for subject header.
if header=='Subject':
# decode the subject value
value = decode_str(value)
# for from and to header.
else:
# parse email address
hdr, addr = parseaddr(value)
# decode the name value.
name = decode_str(hdr)
value = u'%s <%s>' % (name, addr)
print('%s%s: %s' % (' ' * indent_number, header, value))
# if message has multiple part.
if (msg.is_multipart()):
# get multiple parts from message body.
parts = msg.get_payload()
# loop for each part
for n, part in enumerate(parts):
print('%spart %s' % (' ' * indent_number, n))
print('%s--------------------' % (' ' * indent_number))
# print multiple part information by invoke print_info function recursively.
print_info(part, indent_number + 1)
# if not multiple part.
else:
# get message content mime type
content_type = msg.get_content_type()
# if plain text or html content type.
if content_type=='text/plain' or content_type=='text/html':
# get email content
content = msg.get_payload(decode=True)
# get content string charset
charset = guess_charset(msg)
# decode the content with charset if provided.
if charset:
content = content.decode(charset)
print('%sText: %s' % (' ' * indent_number, content + '...'))
else:
print('%sAttachment: %s' % (' ' * indent_number, content_type))
# input email address, password and pop3 server domain or ip address
email = 'yourgmail#gmail.com'
password = 'yourpassword'
# connect to pop3 server:
server = poplib.POP3_SSL('pop.gmail.com')
# open debug switch to print debug information between client and pop3 server.
server.set_debuglevel(1)
# get pop3 server welcome message.
pop3_server_welcome_msg = server.getwelcome().decode('utf-8')
# print out the pop3 server welcome message.
print(server.getwelcome().decode('utf-8'))
# user account authentication
server.user(email)
server.pass_(password)
# stat() function return email count and occupied disk size
print('Messages: %s. Size: %s' % server.stat())
# list() function return all email list
resp, mails, octets = server.list()
print(mails)
# retrieve the newest email index number
#index = len(mails)
index = 3
# server.retr function can get the contents of the email with index variable value index number.
resp, lines, octets = server.retr(index)
# lines stores each line of the original text of the message
# so that you can get the original text of the entire message use the join function and lines variable.
msg_content = b'\r\n'.join(lines).decode('utf-8')
# now parse out the email object.
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib
# parse the email content to a message object.
msg = Parser().parsestr(msg_content)
print(len(msg_content))
# get email from, to, subject attribute value.
email_from = msg.get('From')
email_to = msg.get('To')
email_subject = msg.get('Subject')
print('From ' + email_from)
print('To ' + email_to)
print('Subject ' + email_subject)
for part in msg.walk():
if part.get_content_type():
body = part.get_payload(decode=True)
print_info(msg, len(msg))
# delete the email from pop3 server directly by email index.
# server.dele(index)
# close pop3 server connection.
server.quit()
I also tried this code but it didn't work:
import imaplib, email, base64
def fetch_messages(username, password):
messages = []
conn = imaplib.IMAP4_SSL("imap.gmail.com", 993)
conn.login(username, password)
conn.select()
typ, data = conn.uid('search', None, 'ALL')
for num in data[0].split():
typ, msg_data = conn.uid('fetch', num, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
messages.append(email.message_from_string(response_part[1]))
typ, response = conn.store(num, '+FLAGS', r'(\Seen)')
return messages
and this also didn't work for me...
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('#gmail.com')
pop_conn.pass_('password')
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print(message['subject'])
print(message['body'])
I managed to solve it, the only issue is that it marks as read every unread email, here is the code I used:
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
email = input('Email: ')
password = input('Password: ')
mail.login(email+'#gmail.com', password)
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("inbox") # connect to inbox.
result, data = mail.search(None, "ALL")
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
latest_email_id = id_list[-1] # get the latest
# fetch the email body (RFC822) for the given ID
result, data = mail.fetch(latest_email_id, "(RFC822)")
raw_email = data[0][1] # here's the body, which is raw text of the whole email
# including headers and alternate payloads
import email
email_message = email.message_from_string(str(raw_email))
print (email_message['To'])
print (email.utils.parseaddr(email_message['From'])) # for parsing "Yuji Tomita" <yuji#grovemade.com>
print (email_message.items()) # print all headers
# note that if you want to get text content (body) and the email contains
# multiple payloads (plaintext/ html), you must parse each message separately.
# use something like the following: (taken from a stackoverflow post)
def get_first_text_block(self, email_message_instance):
maintype = email_message_instance.get_content_maintype()
if maintype == 'multipart':
for part in email_message_instance.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
elif maintype == 'text':
return email_message_instance.get_payload()
https://developers.google.com/gmail/api/quickstart/python is the preferred way:
from gmail.gmail import gmail_auth, ListThreadsMatchingQuery
service = gmail_auth()
threads = ListThreadsMatchingQuery(service, query=query)
where:
def ListThreadsMatchingQuery(service, user_id='me', query=''):
"""List all Threads of the user's mailbox matching the query.
Args:
service: Authorized Gmail API service instance.
user_id: User's email address. The special value "me"
can be used to indicate the authenticated user.
query: String used to filter messages returned.
Eg.- 'label:UNREAD' for unread messages only.
Returns:
List of threads that match the criteria of the query. Note that the returned
list contains Thread IDs, you must use get with the appropriate
ID to get the details for a Thread.
"""
try:
response = service.users().threads().list(userId=user_id, q=query).execute()
threads = []
if 'threads' in response:
threads.extend(response['threads'])
while 'nextPageToken' in response:
page_token = response['nextPageToken']
response = service.users().threads().list(userId=user_id, q=query,
pageToken=page_token).execute()
threads.extend(response['threads'])
return threads
except errors.HttpError as error:
raise error
You should try easyimap lib to get a list of e-mails, I'm not sure if works with pop3.
Code example:
import easyimap
host = 'imap.gmail.com'
user = 'you#example.com'
password = 'secret'
mailbox = 'INBOX.subfolder'
imapper = easyimap.connect(host, user, password, mailbox)
email_quantity = 10
emails_from_your_mailbox = imapper.listids(limit=email_quantity)
imapper.quit()

Saving an email in python

I'm trying to save emails from my Gmail-account in ELM format. I'm trying to preserve as much information as possible (To, From, CC, attachments, etc.) because I want to convert this into PST format and recreate my outlook if needed in the future. I've only figured out how to save the contents of the email into an ELM-file, but I haven't been able to figure out how to save the other email information using this email library.
import imaplib
import email
#Login
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('email', 'pswd')
mail.list() # Lists all labels in GMail
mail.select('inbox') # Connected to inbox
result, data = mail.search(None, "ALL")
ids = data[0]
id_list = ids.split()
i = len(data[0].split())
for x in range(i):
latest_email_id = id_list[x] #get the latest
result, data = result, data = mail.fetch(latest_email_id, "(RFC822)") # fetch the email body (RFC822) for the given ID
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_type() == "text/plain": # ignore attachments/html
body = part.get_payload(decode=True)
save_string = str(r"C:\Users\Millar\Desktop\SavedEmailsTest\Dumpgmailemail_" + str(x) + ".eml")
myfile = open(save_string, 'a')
myfile.write(body.decode('utf-8'))
myfile.close()
else:
continue

Categories