Parsing raw email with python email library adding unwanted characters - python

I am using python standard email parsing library to parse the raw email that I am getting from amazon ses mail service.
Below is my code for the same.
import json
import email
from email.Utils import parseaddr
def parse(raw_email):
message = email.message_from_string(raw_email)
text_plain = None
text_html = None
for part in message.walk():
if part.get_content_type() == 'text/plain' and text_plain is None:
text_plain = part.get_payload()
if part.get_content_type() == 'text/html' and text_html is None:
text_html = part.get_payload()
parsed_email_object = {
'to': parseaddr(message.get('To'))[1],
'from': parseaddr(message.get('From'))[1],
'delivered to': parseaddr(message.get('Delivered-To'))[1],
'subject': message.get('Subject'),
'text_plain': text_plain,
'text_html': text_html,
}
json_string = json.dumps(parsed_email_object)
return json_string
when I am parsing my raw email, it is not parsing 100%, it is giving me unwanted characters like this
this is a replyo from the gmail indbo asdf asdf asdfa sdfa=
sd sdfa sdfa fasd
=C2=A0dfa sf asdf
a sdfas
<= div>f asdf=C2=A0
Is there anything else like some decoding option to parse it correctly.

Making my comment as an answer so that it gets noticed.
part.get_payload(decode=True).decode(part.get_content_charset())
This will solve the issue of encoding

Related

Python, email, quoted-printable partially decoded on other end

I don't understand why the body of my email I send via Python and the email library isn't being decoded by the receiving mail server. I know I've done something wrong, but don't know what. CTE is Quoted-Printable, and when viewing the raw email, it's still encoded.
I'm burning the net up, but most of my results deal with the legacy API of the email library. I use the current version, because it's recommended and more robust.
Code to package EmailMessage:
"""
#type sndr: str
#type rcvr: str
#type subj: str
#type body: tuple
#type extra: tuple
#return: bytes
"""
msg = message.EmailMessage()
if '<' in sndr:
name, sep, addr = sndr.rpartition(' ') # Partition at space between display name & address
name = name.strip() # Remove leading and trailing spaces
addr = addr.strip('<>') # Remove leading and trailing angle brackets
msg['From'] = headerregistry.Address(display_name=name, addr_spec=addr)
else:
msg['From'] = headerregistry.Address(display_name='', addr_spec=sndr)
if '<' in rcvr:
name, sep, addr = rcvr.rpartition(' ')
name = name.strip()
addr = addr.strip('<>')
msg['To'] = headerregistry.Address(display_name=name, addr_spec=addr)
else:
msg['To'] = headerregistry.Address(display_name='', addr_spec=rcvr)
msg['Subject'] = subj
msg.set_content(body[0], charset='utf-8')
msg.add_alternative(body[1], cte='quoted-printable', subtype='html')
return msg
HTML body:
"""
#type body: str
#return: str
"""
first = \
'<html><head>\r' \
'<meta http–equiv="Content-Type" content="text/html; charset=utf-8">\r' \
'</head>'
second = f'<body><div>{body}</div></body></html>'
payload = first + second
return payload
By using the argument subtype='html', Python creates the required meta tags. These tags display correctly in the raw email, yet my tags (old/troubleshooting code, no need to focus on it—the problem remains) do not. The en dash between "http" and "equiv" displays encoded. What's wrong. I want to send email using the full utf-8 range.
Also, I use the following from smtplib to send the message: send_message(msg). So, the default call of send_message().

No national characters in mail subject using imaplib, email in Python to read gmail inbox

Windows. Python 3.9.
As a value of mail subject I get other characters instead of Polish characters - I get:
Odpowied�� automatyczna: "Re: Program licz��cy ceny i sprzeda�� w allegro dla EAN��w"
instead of:
Odpowiedź automatyczna: "Re: Program liczący ceny i sprzedaż w allegro dla EANów"
How to make it correct? Should I apply some codepage information somewhere?
I notice all out dictionary values are string except for the subject which is of type Header.
import imaplib, email
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('user', 'pwd')
mail.select('inbox')
data = mail.search(None, 'ALL')
_, data = mail.fetch(str(7), '(RFC822)')
message = email.message_from_bytes(data[0][1])
out = {
'from': message['from'],
'subject': message['subject'],
'to': message['Delivered-To'],
'datetime': message['Date'],
'cc': message['Cc']
}
if understand correctly you need to decode bytes.
try something like
from email.header import decode_header
subject, encoding = decode_header(message["subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding)

Convert email body to a string

so I'm working on something that uses regex to search something from an email, which is fetched via imaplib module. Right now I can't get it to work, even after using str() function.
result, data = mail.fetch(x, '(RFC822)')
eemail = email.message_from_bytes(data[0][1])
print(str(eemail))
trying to regex it:
print(re.search("button", eemail))
Regex gives me no matches even after making the email a string object.
This is what I use:
import imaplib
import email
import re
mail = imaplib.IMAP4_SSL(SMTP_SERVER, SMTP_PORT)
mail.login(FROM_EMAIL,FROM_PWD)
mail.select('inbox')
status, response = mail.search(None, '(UNSEEN)')
unread_msg_nums = response[0].split()
for e_id in unread_msg_nums:
_, response = mail.fetch(e_id, '(UID BODY[TEXT])')
b = email.message_from_string(response[0][1])
if b.is_multipart():
for payload in b.get_payload(decode=True):
print(re.search("button", payload.get_payload(decode=True)))
else:
print(re.search("button", b.get_payload(decode=True)))

Python Sendgrid add CC to email

I am using SendGrid for Python. I want to CC some people in an email. It seems like they may no longer support CC'ing on emails, though I'm not positive if that's true? But surely there is a work around to it somehow, but I am surprised I can't find much support on this.
Here is my basic code:
sg = sendgrid.SendGridAPIClient(apikey='*****')
from_email = Email(sender_address, sender_name)
to_email = Email(email_address)
subject = subject
content = Content("text/plain", email_message)
mail = Mail(from_email, subject, to_email, content)
response = sg.client.mail.send.post(request_body=mail.get())
How can I modify this so it will CC someone on an email?
Using the SendGrid's Personalization() or Email() class did not work for me. This is how I got it to work:
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail, Cc
# using a list of tuples for emails
# e.g. [('email1#example.com', 'email1#example.com'),('email2#example.com', 'email2#example.com')]
to_emails = []
for r in recipients:
to_emails.append((r, r))
# note the Cc class
cc_emails = []
for c in cc:
cc_emails.append(Cc(c, c))
message = Mail(
from_email=from_email,
to_emails=to_emails,
subject='My Subject',
html_content=f'<div>My HTML Email...</div>'
)
if cc_emails:
message.add_cc(cc_emails)
try:
sg = SendGridAPIClient(os.getenv('SENDGRID_API_KEY'))
sg.send(message)
except Exception as e:
print(f'{e}')
Hopefully this helps someone.
I resolved it. Santiago's answer got me mostly there, but here is what I needed to do:
sg = sendgrid.SendGridAPIClient(apikey='****')
from_email = Email(sender_address, sender_name)
to_email = Email(to_email)
cc_email = Email(cc_email)
p = Personalization()
p.add_to(to_email)
p.add_cc(cc_email)
subject = subject
content = Content("text/plain", email_message)
mail = Mail(from_email, subject, to_email, content)
mail.add_personalization(p)
response = sg.client.mail.send.post(request_body=mail.get())
If you don't include the p.add_to(to_email) it rejects it because there is no "to email" in the personalization object. Also, if you don't include the "to_email" inside the mail object it rejects it because it is looking for that argument, so you have to be a bit redundant and define it twice.
I've been looking at the code: https://github.com/sendgrid/sendgrid-python/blob/master/examples/mail/mail.py
And it looks like you can do that by adding a personalization to the mail, for example:
cc_email = Email(cc_address)
p = Personalization()
p.add_cc(cc_email)
mail.add_personalization(p)
Based on the answers here you can CC to email if you add another email to 'to_email'.
If you want to cc multiple user then in djanogo using sendgrid you need to import the below line
the function that will be used to send the mail
and finally how you ned to send the data paramters to the above function so that it can CC the person
email = send_sandgridmail(sender=sender,receiver=receivers,subject=subject,content=message,reply_to=sender,cc=[admin_mail_account_mail,"rawatanup918#gmail.com"],attachment=None)
i hope this'll help.simplified from #anurag image script
import os
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import To,Mail,ReplyTo,Email,Cc
def send_sandgridmail (sender, receiver, subject, content, reply_to=None, cc=[], attachment=None) :
# content = convert_safe_text(content)
# to email = To(receiver)
message = Mail(
from_email=str(sender),
to_emails=receiver,
subject= str(subject),
html_content = content)
if reply_to:
message.reply_to= ReplyTo(reply_to)
if attachment:
message.add_attachment (attachment)
if len(cc):
cc_mail = []
for cc_person in cc:
cc_mail.append(Cc(cc_person, cc_person))
message.add_cc (cc_mail)
try:
SENDGRID_API_KEY = 'your sendgrid api key'
sg= SendGridAPIClient (SENDGRID_API_KEY)
response= sg.send(message)
print (response.status_code)
# print (response.body)
# print (response.headers)
except Exception as e:
print(e)
return response

How to deal with flooded unseen messages

I have written an email parsing mechanism in python.
It finds a new email and passes the data correctly. I am 99.999% certain that my code is functioning correctly, so there should be no issue there. The problem is that occasionally, the Gmail inbox will get flooded with messages that are considered "unseen". At this point, there is nothing that my code can do.
It fails with:
imaplib.error: FETCH command error: BAD ['Could not parse command']
This is distressing, and I would love to have either
a way to check whether the unseen messages have overflown to this state, or
a way to manually (via imaplib) mark all messages as read, including a way to detect this particular error.
Any thoughts on how to accomplish this?
Here is my code:
#!/usr/bin/env python
import imaplib, re, sys, time, OSC, threading, os
iparg = 'localhost'
oportarg = 9000
iportarg = 9002
usern = 'myusrname#gmail.com'
gpass = 'mypass'
kill_program = False
server = imaplib.IMAP4_SSL('imap.googlemail.com', 993)
oclient = OSC.OSCClient()
email_interval = 2.0
def login():
server.login(usern, gpass)
oclient.connect((iparg, oportarg))
def logout_handle(addr, tags, stuff, source):
print 'received kill call'
global kill_program
kill_program = True
def filter_signature(s): #so annoying; wish i didn't have to do this
try:
a_sig = re.sub(r'Sent|--Sent', '', s)
b_sig = re.sub(r'using SMS-to-email. Reply to this email to text the sender back and', '', a_sig)
c_sig = re.sub(r'save on SMS fees.', '', b_sig)
d_sig = re.sub(r'https://www.google.com/voice', '', c_sig)
no_lines = re.sub(r'\n|=|\r?', '', d_sig) #add weird characters to this as needed
except:
nolines = s
return no_lines
def parse_email(interval):
while True:
server.select('INBOX')
status, ids = server.search(None, 'UnSeen')
print 'status is: ', status
if not ids or ids[0] is '':
print 'no new messages'
else:
try:
print 'found a message; attempting to parse...'
latest_id = ids[0]
status, msg_data = server.fetch(latest_id, '(UID BODY[TEXT])')
raw_data = msg_data[0][1]
raw_filter = raw_data
print 'message result: ', raw_filter
time.sleep(interval)
#execute main block
while not kill_program:
login()
parse_email(email_interval)
st.kill()
sys.exit()
Based upon the error, I would very carefully check the parameters that you're passing to fetch. Gmail is telling you that it could not parse the command that you sent to it.
Also, you can do a STORE +FLAGS \SEEN to mark the messages as read.

Categories