python poplib get headers from emails - python

this is my actula code :
import poplib, sys, re, threading, time, Queue
from email import parser
Mailbox = poplib.POP3(pop3, '110')
Mailbox.user(username)
Mailbox.pass_(password)
numMessages = len(Mailbox.list()[1])
for i in range(numMessages):
fullemail = ''
fullemail = '\n'.join([msg for msg in Mailbox.retr(i+1)[1]])
msg = parser.Parser().parsestr(fullemail)
for part in msg.walk():
print part.get_payload(decode=True)
print part.get_payload(decode=True) print body of email, how i can print header of email ?

the method items() of email.message.Message/email.message.EmailMessage object may be useful:
for t in msg.items():
print(t)
t is a tuple like this (key,value).

Related

IMAP message gets UnicodeDecodeError 'utf-8' codec can't decode

After 5 hours of trying, time to get some help. Sifted through all the stackoverflow questions related to this but couldn't find the answer.
The code is a gmail parser - works for most emails but some emails cause the UnicodeDecodeError. The problem is "raw_email.decode('utf-8')" but changing it (see comments) causes a different problem down below.
# Source: https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
import datetime
import time
import email
import imaplib
import mailbox
from vars import *
import re # to remove links from str
import string
EMAIL_ACCOUNT = 'gmail_login'
PASSWORD = 'gmail_psswd'
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "ALL") # (ALL/UNSEEN)
id_list = data[0].split()
email_rev = reversed(id_list) # Returns a type list.reverseiterator, which is not list
email_list = list(email_rev)
i = len(email_list)
todays_date = time.strftime("%m/%d/%Y")
for x in range(i):
latest_email_uid = email_list[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = email_data[0][1] # Returns a byte
raw_email_str = raw_email.decode('utf-8') # Returns a str
#raw_email_str = base64.b64decode(raw_email_str1) # Tried this but didn't work.
#raw_email_str = raw_email.decode('utf-8', errors='ignore') # Tried this but caused a TypeError down where var subject is created because something there is expecting a str or byte-like
email_message = email.message_from_string(raw_email_str)
date_tuple = email.utils.parsedate_tz(email_message['Date'])
date_short = f'{date_tuple[1]}/{date_tuple[2]}/{date_tuple[0]}'
# Header Details
if date_short == '12/23/2019':
#if date_tuple:
# local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
# local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
#print(subject)
if email_from.find('restaurants#uber.com') != -1:
print('yay')
# Body details
if email_from.find('restaurants#uber.com') != -1 and subject.find('Payment Summary') != -1:
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
body = body.decode("utf-8") # Convert byte to str
body = body.replace("\r\n", " ")
text = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', body) # removes url links
text2 = text.translate(str.maketrans('', '', string.punctuation))
body_list = re.sub("[^\w]", " ", text2).split()
print(body_list)
print(date_short)
else:
continue
Here is an example how to retrieve and read mail parts with imapclient and the email.* modules from the python standard libs:
from imapclient import IMAPClient
import email
from email import policy
def walk_parts(part, level=0):
print(' ' * 4 * level + part.get_content_type())
# do something with part content (applies encoding by default)
# part.get_content()
if part.is_multipart():
for part in part.get_payload():
get_parts(part, level + 1)
# context manager ensures the session is cleaned up
with IMAPClient(host="your_mail_host") as client:
client.login('user', 'password')
# select some folder
client.select_folder('INBOX')
# do something with folder, e.g. search & grab unseen mails
messages = client.search('UNSEEN')
for uid, message_data in client.fetch(messages, 'RFC822').items():
email_message = email.message_from_bytes(
message_data[b'RFC822'], policy=policy.default)
print(uid, email_message.get('From'), email_message.get('Subject'))
# alternatively search for specific mails
msgs = client.search(['SUBJECT', 'some subject'])
#
# do something with a specific mail:
#
# fetch a single mail with UID 12345
raw_mails = client.fetch([12345], 'RFC822')
# parse the mail (very expensive for big mails with attachments!)
mail = email.message_from_bytes(
raw_mails[12345][b'RFC822'], policy=policy.default)
# Now you have a python object representation of the mail and can dig
# into it. Since a mail can be composed of several subparts we have
# to walk the subparts.
# walk all parts at once
for part in mail.walk():
# do something with that part
print(part.get_content_type())
# or recurse yourself into sub parts until you find the interesting part
walk_parts(mail)
See the docs for email.message.EmailMessage. There you find all needed bits to read into a mail message.
use 'ISO 8859-1' instead of 'utf-8'
I had the same issue And after a lot of research I realized that I simply need to use, message_from_bytes function from email rather than using message_from_string
so for your code simply replace:
raw_email_str = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_str)
to
email_message = email.message_from_bytes(raw_email)
should work like a charm :)

Get only the text/plain section of an email obtained with imaplib in Python

I'm trying to get only the text/plain section of an email i received with imaplib in Python:
#!/usr/bin/env python
import imaplib
import sys
from pprint import pprint
from email.parser import HeaderParser
from email.header import decode_header
reload(sys)
sys.setdefaultencoding("utf-8")
conn = imaplib.IMAP4_SSL('host')
conn.login('username', 'password')
#conn.select('Inbox', readonly=True)
conn.select('Inbox')
a, b = conn.search(None, '(UNSEEN)')
if b[0]:
c = b[0]
d = c.split()
e = d[-1]
#(BODY[HEADER.FIELDS (SUBJECT FROM)])
data = conn.fetch(e,'(BODY[1] BODY[HEADER.FIELDS (SUBJECT FROM)])')
#pprint(data)
body_data = data[1][0][1]
header_data = data[1][1][1]
#print(body_data)
# parser = HeaderParser()
# msg = parser.parsestr(header_data)
#
# print (decode_header(msg['From'])[0][0].decode('utf-8') + ": " + decode_header(msg['Subject'])[0][0].decode('utf-8'))
In case of a multipart message though, what i end up with "body_data" as the body of the message with all the sections.
I tried with the example here:
raw_message = data[0][1]
msg = email.message_from_string(raw_message)
for part in msg.walk():
# each part is a either non-multipart, or another multipart message
# that contains further parts... Message is organized like a tree
if part.get_content_type() == 'text/plain':
print part.get_payload() # prints the raw text
However, for some reason, part.get_payload() returns only a single character from the text.
I'm forced to use Python 2.7.13.
I was able to get what i want with a change of BODY[1] to BODY[1.1] in the fetch command:
#!/usr/bin/env python
import imaplib
import sys
from email.parser import HeaderParser
from email.header import decode_header
reload(sys)
sys.setdefaultencoding("utf-8")
conn = imaplib.IMAP4_SSL('host')
conn.login('username', 'password')
#conn.select('Inbox', readonly=True)
conn.select('Inbox')
a, b = conn.search(None, '(UNSEEN)')
if b[0]:
c = b[0]
d = c.split()
e = d[-1]
data = conn.fetch(e,'(BODY[1.1] BODY[HEADER.FIELDS (SUBJECT FROM)])')
body_data = data[1][0][1]
header_data = data[1][1][1]
print(body_data) #Now "body_data" always contains the body, i.e. only the "text/plain" section
...

Python Sending E-Mail with Umlauts

I want to create a program in python that receives every new unread email from one of my email accounts to another email account.
So far I got everything set up. Unfortunately I have really big issues with umlauts (ä,ö,ü). For some reason I con not make it to work properly.
Here is my code:
# -*- coding: utf-8 -*-
import os, sys
import imaplib
import email
import smtplib
from email.mime.multipart import MIMEMultipart
servername = 'SERVERNAME'
username='USERNAME'
password='PASSWORD'
mail = imaplib.IMAP4_SSL(servername)
(retcode, capabilities) = mail.login(username,password)
mail.list()
mail.select('inbox')
server_smtp = smtplib.SMTP_SSL('SMTP')
n=0
(retcode, messages) = mail.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in messages[0].split() :
n=n+1
typ, data = mail.fetch(num,'(RFC822)')
for response_part in data:
if isinstance(response_part, tuple):
original = email.message_from_string(response_part[1])
print original['From']
typ, data = mail.store(num,'+FLAGS','\\Seen')
body = ""
if original.is_multipart():
for part in original.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
else:
body = original.get_payload(decode=True)
body = body.encode('UTF-8')
body = str(body)
print(body)
body = "Betreff: " + str(original['Subject']) + "\n\n\n" + body.encode('UTF-8')
SUBJECT = original['From']
server_smtp.login(username, password)
msg = 'Subject: {}\n\n{}'.format(SUBJECT, body.decode('UTF-8'))
server_smtp.sendmail(username, 'TARGET', msg)
For example: if I want to send this message: "ÄäÖöÜü&ß" I will receive this: "������������&��
���"
Do you know what I am doing wrong?
I would really appreciate your help!

Reliable way of sending email of a file containing HTML code

I have a HTML code in a text file emailtext.txt # http://pastie.org/8276028 ,currently I am using the following code to send an email(via outlook) in HTML format but the formatting seems messed up sometimes... can someone provide inputs on how to send this email in a reliable way?
from email.mime.text import MIMEText
from subprocess import check_call,Popen,PIPE
def email (body,subject,to=None):
msg = MIMEText("%s" % body)
msg['Content-Type'] = "text/html; charset=UTF8"
msg["From"] = "userid#qualcomm.com"
if to!=None:
to=to.strip()
msg["To"] = to
else:
msg["To"] = "userid2#company.com"
msg["Subject"] = '%s' % subject
p = Popen(["/usr/sbin/sendmail", "-t", "-f" + msg["From"]], stdin=PIPE)
p.communicate(msg.as_string())
print "Done"
def main ():
with open('emailtext.txt', 'r') as f:
body = f.read()
Subject ="test email"
email(body, Subject, "userid3#company.com")
if __name__ == '__main__':
main()
Try using smtplib:
from smtplib import SMTP
s = SMTP('localhost',25)
s.sendmail('Me <me#example.com>', ['You <you#example.com>'],msg=msg.as_string())
If that doesn't work then your HTML is probably bad.

Get message header (subject) with poplib python

I want get only the subject from a message mail using poplib:
import poplib
server ='pop3.live.com'
port = 995
login="xxx#outlook.com"
pw="xxx"
print "Connecting..."
M = poplib.POP3_SSL(server,port)
print "Connected to "+server
M.user(login)
M.pass_(pw)
print login+" is authenticated."
numMessages = len(M.list()[1])
for i in range(numMessages):
for j in M.retr(i+1)[1]:
print j
M.quit()
Any suggestions? Thanks in advance
You need to use email.message_from_bytes to convert e-mail from bytes and decode_header to decode your headers. This is python 3.4 example. In python 2 code will be different.
#!/usr/bin/env python3
import email, poplib
from email.header import decode_header
login = 'login'
password = 'password'
pop_server = 'gmail.com'
pop_port = 995
mail_box = poplib.POP3_SSL(pop_server, pop_port)
mail_box.user(login)
mail_box.pass_(password)
num_messages = len(mail_box.list()[1])
for i in range(num_messages):
print(i, "message:")
raw_email = b"\n".join(mail_box.retr(i+1)[1])
parsed_email = email.message_from_bytes(raw_email)
subject = decode_header(parsed_email['Subject'])
print(
subject[0][0].decode(subject[0][1])
)
mail_box.quit()

Categories