I have a piece of code to get emails from messages from my inbox (gmail). Getting emails work correct when I print email_from but I would like to do some operation on data split name and email etc. but then code broke after print first loop step and I got the error:
Traceback (most recent call last):
File "C:\Users\loc\Desktop\extract_gmail.py", line 24, in <module>
email_message_raw = email.message_from_bytes(data[0][1])
AttributeError: 'str' object has no attribute 'message_from_bytes'
Can you give me some advice how to solve this problem?
Code:
import imaplib
import email
from email.header import Header, decode_header, make_header
# Connection settings
HOST = 'imap.gmail.com'
USERNAME = '***'
PASSWORD = '***'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
# Get raw message
email_message_raw = email.message_from_bytes(data[0][1])
# Decode headers
email_from = str(make_header(decode_header(email_message_raw['From'])))
# Print each name and email
name, email = email_from.split('<')
email.replace(">", "")
print(name + "|" + email)
# When i swap to just print email_from then works
# print(email_from)
# Close server connection
m.close()
m.logout()
In your code you replaced the email variable..
Try this...
import imaplib
import email
from email.header import Header, decode_header, make_header
# Connection settings
HOST = 'imap.gmail.com'
USERNAME = '***'
PASSWORD = '***'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
# Get raw message
email_message_raw = email.message_from_bytes(data[0][1])
# Decode headers
email_from = str(make_header(decode_header(email_message_raw['From'])))
# Print each name and email
name, email_addr = email_from.split('<')
email_addr.replace(">", "")
print(name + "|" + email_addr)
# When i swap to just print email_from then works
# print(email_from)
# Close server connection
m.close()
m.logout()
I had the same error [happily solved], my mistake was in the shebang. That should point to python3.
#! /usr/bin/env python3
Related
I want to get the last 10 received gmails with python.
Currently I have this code but it only returns a limited number of emails and it manipulates pop3 directly, which makes it unnecessary long.
Source of the code: https://www.code-learner.com/python-use-pop3-to-read-email-example/
import poplib
import smtplib, ssl
def guess_charset(msg):
# get charset from message object.
charset = msg.get_charset()
# if can not get charset
if charset is None:
# get message header content-type value and retrieve the charset from the value.
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
# variable indent_number is used to decide number of indent of each level in the mail multiple bory part.
def print_info(msg, indent_number=0):
if indent_number == 0:
# loop to retrieve from, to, subject from email header.
for header in ['From', 'To', 'Subject']:
# get header value
value = msg.get(header, '')
if value:
# for subject header.
if header=='Subject':
# decode the subject value
value = decode_str(value)
# for from and to header.
else:
# parse email address
hdr, addr = parseaddr(value)
# decode the name value.
name = decode_str(hdr)
value = u'%s <%s>' % (name, addr)
print('%s%s: %s' % (' ' * indent_number, header, value))
# if message has multiple part.
if (msg.is_multipart()):
# get multiple parts from message body.
parts = msg.get_payload()
# loop for each part
for n, part in enumerate(parts):
print('%spart %s' % (' ' * indent_number, n))
print('%s--------------------' % (' ' * indent_number))
# print multiple part information by invoke print_info function recursively.
print_info(part, indent_number + 1)
# if not multiple part.
else:
# get message content mime type
content_type = msg.get_content_type()
# if plain text or html content type.
if content_type=='text/plain' or content_type=='text/html':
# get email content
content = msg.get_payload(decode=True)
# get content string charset
charset = guess_charset(msg)
# decode the content with charset if provided.
if charset:
content = content.decode(charset)
print('%sText: %s' % (' ' * indent_number, content + '...'))
else:
print('%sAttachment: %s' % (' ' * indent_number, content_type))
# input email address, password and pop3 server domain or ip address
email = 'yourgmail#gmail.com'
password = 'yourpassword'
# connect to pop3 server:
server = poplib.POP3_SSL('pop.gmail.com')
# open debug switch to print debug information between client and pop3 server.
server.set_debuglevel(1)
# get pop3 server welcome message.
pop3_server_welcome_msg = server.getwelcome().decode('utf-8')
# print out the pop3 server welcome message.
print(server.getwelcome().decode('utf-8'))
# user account authentication
server.user(email)
server.pass_(password)
# stat() function return email count and occupied disk size
print('Messages: %s. Size: %s' % server.stat())
# list() function return all email list
resp, mails, octets = server.list()
print(mails)
# retrieve the newest email index number
#index = len(mails)
index = 3
# server.retr function can get the contents of the email with index variable value index number.
resp, lines, octets = server.retr(index)
# lines stores each line of the original text of the message
# so that you can get the original text of the entire message use the join function and lines variable.
msg_content = b'\r\n'.join(lines).decode('utf-8')
# now parse out the email object.
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib
# parse the email content to a message object.
msg = Parser().parsestr(msg_content)
print(len(msg_content))
# get email from, to, subject attribute value.
email_from = msg.get('From')
email_to = msg.get('To')
email_subject = msg.get('Subject')
print('From ' + email_from)
print('To ' + email_to)
print('Subject ' + email_subject)
for part in msg.walk():
if part.get_content_type():
body = part.get_payload(decode=True)
print_info(msg, len(msg))
# delete the email from pop3 server directly by email index.
# server.dele(index)
# close pop3 server connection.
server.quit()
I also tried this code but it didn't work:
import imaplib, email, base64
def fetch_messages(username, password):
messages = []
conn = imaplib.IMAP4_SSL("imap.gmail.com", 993)
conn.login(username, password)
conn.select()
typ, data = conn.uid('search', None, 'ALL')
for num in data[0].split():
typ, msg_data = conn.uid('fetch', num, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
messages.append(email.message_from_string(response_part[1]))
typ, response = conn.store(num, '+FLAGS', r'(\Seen)')
return messages
and this also didn't work for me...
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('#gmail.com')
pop_conn.pass_('password')
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print(message['subject'])
print(message['body'])
I managed to solve it, the only issue is that it marks as read every unread email, here is the code I used:
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
email = input('Email: ')
password = input('Password: ')
mail.login(email+'#gmail.com', password)
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("inbox") # connect to inbox.
result, data = mail.search(None, "ALL")
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
latest_email_id = id_list[-1] # get the latest
# fetch the email body (RFC822) for the given ID
result, data = mail.fetch(latest_email_id, "(RFC822)")
raw_email = data[0][1] # here's the body, which is raw text of the whole email
# including headers and alternate payloads
import email
email_message = email.message_from_string(str(raw_email))
print (email_message['To'])
print (email.utils.parseaddr(email_message['From'])) # for parsing "Yuji Tomita" <yuji#grovemade.com>
print (email_message.items()) # print all headers
# note that if you want to get text content (body) and the email contains
# multiple payloads (plaintext/ html), you must parse each message separately.
# use something like the following: (taken from a stackoverflow post)
def get_first_text_block(self, email_message_instance):
maintype = email_message_instance.get_content_maintype()
if maintype == 'multipart':
for part in email_message_instance.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
elif maintype == 'text':
return email_message_instance.get_payload()
https://developers.google.com/gmail/api/quickstart/python is the preferred way:
from gmail.gmail import gmail_auth, ListThreadsMatchingQuery
service = gmail_auth()
threads = ListThreadsMatchingQuery(service, query=query)
where:
def ListThreadsMatchingQuery(service, user_id='me', query=''):
"""List all Threads of the user's mailbox matching the query.
Args:
service: Authorized Gmail API service instance.
user_id: User's email address. The special value "me"
can be used to indicate the authenticated user.
query: String used to filter messages returned.
Eg.- 'label:UNREAD' for unread messages only.
Returns:
List of threads that match the criteria of the query. Note that the returned
list contains Thread IDs, you must use get with the appropriate
ID to get the details for a Thread.
"""
try:
response = service.users().threads().list(userId=user_id, q=query).execute()
threads = []
if 'threads' in response:
threads.extend(response['threads'])
while 'nextPageToken' in response:
page_token = response['nextPageToken']
response = service.users().threads().list(userId=user_id, q=query,
pageToken=page_token).execute()
threads.extend(response['threads'])
return threads
except errors.HttpError as error:
raise error
You should try easyimap lib to get a list of e-mails, I'm not sure if works with pop3.
Code example:
import easyimap
host = 'imap.gmail.com'
user = 'you#example.com'
password = 'secret'
mailbox = 'INBOX.subfolder'
imapper = easyimap.connect(host, user, password, mailbox)
email_quantity = 10
emails_from_your_mailbox = imapper.listids(limit=email_quantity)
imapper.quit()
I am trying to read all the unread emails from the gmail account.
The above code is able to make connection but is unable to fetch the emails.
I want to print the content of each email.
I am getting the error as can't concat int to bytes.
code:
import smtplib
import time
import imaplib
import email
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('my_mail','my_pwd')
mail.select('inbox')
result, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
result, data = mail.fetch(i, '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
print(read_email_from_gmail())
error:
Traceback (most recent call last):
File "C:/Users/devda/Desktop/Internship/access_email.py", line 32, in <module>
print(read_email_from_gmail())
File "C:/Users/devda/Desktop/Internship/access_email.py", line 20, in read_email_from_gmail
result, data = mail.fetch(i, '(RFC822)' )
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 529, in fetch
typ, dat = self._simple_command(name, message_set, message_parts)
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 1191, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "C:\Users\devda\AppData\Local\Programs\Python\Python36\lib\imaplib.py", line 956, in _command
data = data + b' ' + arg
TypeError: can't concat int to bytes
>>>
I followed the tutorial from here
What I want to do is to extract content from email which is shown in image
I had to make a few changes to your code in order to get it to work on Python 3.5.1. I have inlined comments below.
# no need to import smtplib for this code
# no need to import time for this code
import imaplib
import email
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('my_mail','my_pwd')
mail.select('inbox')
result, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
# need str(i)
result, data = mail.fetch(str(i), '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
# from_bytes, not from_string
msg = email.message_from_bytes(response_part[1])
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
# nothing to print here
read_email_from_gmail()
Maybe submit a bug report to the author of that blog.
This is what worked for me:
It stores the from address, subject, content(text) into a file of all unread emails.
code:
import email
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
(retcode, capabilities) = mail.login('mymail','mypassword')
mail.list()
mail.select('inbox')
n=0
(retcode, messages) = mail.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in messages[0].split() :
print ('Processing ')
n=n+1
typ, data = mail.fetch(num,'(RFC822)')
for response_part in data:
if isinstance(response_part, tuple):
original = email.message_from_bytes(response_part[1])
# print (original['From'])
# print (original['Subject'])
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if (part.get_content_type() == "text/plain"): # ignore attachments/html
body = part.get_payload(decode=True)
save_string = str(r"C:\Users\devda\Desktop\Internship\Dumpemail_" + str('richboy') + ".txt" )
myfile = open(save_string, 'a')
myfile.write(original['From']+'\n')
myfile.write(original['Subject']+'\n')
myfile.write(body.decode('utf-8'))
myfile.write('**********\n')
myfile.close()
else:
continue
typ, data = mail.store(num,'+FLAGS','\\Seen')
print (n)
I want to create a program in python that receives every new unread email from one of my email accounts to another email account.
So far I got everything set up. Unfortunately I have really big issues with umlauts (ä,ö,ü). For some reason I con not make it to work properly.
Here is my code:
# -*- coding: utf-8 -*-
import os, sys
import imaplib
import email
import smtplib
from email.mime.multipart import MIMEMultipart
servername = 'SERVERNAME'
username='USERNAME'
password='PASSWORD'
mail = imaplib.IMAP4_SSL(servername)
(retcode, capabilities) = mail.login(username,password)
mail.list()
mail.select('inbox')
server_smtp = smtplib.SMTP_SSL('SMTP')
n=0
(retcode, messages) = mail.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in messages[0].split() :
n=n+1
typ, data = mail.fetch(num,'(RFC822)')
for response_part in data:
if isinstance(response_part, tuple):
original = email.message_from_string(response_part[1])
print original['From']
typ, data = mail.store(num,'+FLAGS','\\Seen')
body = ""
if original.is_multipart():
for part in original.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
else:
body = original.get_payload(decode=True)
body = body.encode('UTF-8')
body = str(body)
print(body)
body = "Betreff: " + str(original['Subject']) + "\n\n\n" + body.encode('UTF-8')
SUBJECT = original['From']
server_smtp.login(username, password)
msg = 'Subject: {}\n\n{}'.format(SUBJECT, body.decode('UTF-8'))
server_smtp.sendmail(username, 'TARGET', msg)
For example: if I want to send this message: "ÄäÖöÜü&ß" I will receive this: "������������&��
���"
Do you know what I am doing wrong?
I would really appreciate your help!
I have this code (below) that shows me all emails in my email account. It also shows the whole email, including all the metadata (which I dont want). Is there a way to just print the To, From, Subject and Message only? This is in Python as well. Thanks.
Code:
import getpass, imaplib
import os
email = raw_input('Email: ')
password = getpass.getpass()
M = imaplib.IMAP4_SSL("imap.gmail.com", 993)
print('Logging in as ' + email + '...')
M.login(email, password)
M.select()
typ, data = M.search(None, 'ALL')
for num in data[0].split():
typ, data = M.fetch(num, '(RFC822)')
print ('Message %s\n%s\n' % (num, data[0][1]))
M.close()
M.logout()
You can use email.parser.Parser() from standard module to parse mail and get headers
from __future__ import print_function
import imaplib
import getpass
import os
from email.parser import Parser
email = raw_input('Email: ')
password = getpass.getpass()
print('Logging in as', email, '...')
M = imaplib.IMAP4_SSL("imap.gmail.com", 993)
M.login(email, password)
M.select()
typ, data = M.search(None, 'ALL')
for num in data[0].split():
typ, data = M.fetch(num, '(RFC822)')
#print ('Message %s\n%s\n' % (num, data[0][1]))
header = Parser().parsestr(data[0][1])
print('From:', header['From'])
print('To:', header['To'])
print('Subject:', header['Subject'])
print('Body:')
for part in header.get_payload():
print(part.as_string()[:150], '.....')
#break # to test only first message
M.close()
M.logout()
For anyone else who wants to know, this is the working code:
from __future__ import print_function
import imaplib
import getpass
import os
from email.parser import Parser
email = raw_input('Email: ')
password = getpass.getpass()
print('Logging in as', email, '...\n')
M = imaplib.IMAP4_SSL("imap.gmail.com", 993)
M.login(email, password)
M.select()
typ, data = M.search(None, 'ALL')
for num in data[0].split():
typ, data = M.fetch(num, '(RFC822)')
## To view whole email, uncomment next line
## print ('Message %s\n%s\n' % (num, data[0][1]))
header = Parser().parsestr(data[0][1])
print('To:', header['Delivered-To'])
print('From:', header['From'])
print('Subject:', header['Subject'])
print('Body:', header.get_payload(), '\n')
M.close()
M.logout()
Hope this helps :) Big thanks to #furas!
Update: my code works under python 2.6.5 but not python 3 (I'm using 3.4.1).
I'm unable to search for messages in the "All Mail" or "Sent Mail" folders - I get an exception:
imaplib.error: SELECT command error: BAD [b'Could not parse command']
my code:
import imaplib
m = imaplib.IMAP4_SSL("imap.gmail.com", 993)
m.login("myemail#gmail.com","mypassword")
m.select("[Gmail]/All Mail")
using m.select("[Gmail]/Sent Mail") doesn't work either.
But reading from the inbox works:
import imaplib
m = imaplib.IMAP4_SSL("imap.gmail.com", 993)
m.login("myemail#gmail.com","mypassword")
m.select("inbox")
...
I used the mail.list() command to verify the folder names are correct:
b'(\\HasNoChildren) "/" "INBOX"',
b'(\\Noselect \\HasChildren) "/" "[Gmail]"',
b'(\\HasNoChildren \\All) "/" "[Gmail]/All Mail"',
b'(\\HasNoChildren \\Drafts) "/" "[Gmail]/Drafts"',
b'(\\HasNoChildren \\Important) "/" "[Gmail]/Important"',
b'(\\HasNoChildren \\Sent) "/" "[Gmail]/Sent Mail"',
b'(\\HasNoChildren \\Junk) "/" "[Gmail]/Spam"',
b'(\\HasNoChildren \\Flagged) "/" "[Gmail]/Starred"',
b'(\\HasNoChildren \\Trash) "/" "[Gmail]/Trash"'
I'm following the solutions from these questions, but they don't work for me:
imaplib - What is the correct folder name for Archive/All Mail in Gmail?
I cannot search sent emails in Gmail with Python
Here is a complete sample program that doesn't work on Python 3:
import imaplib
import email
m = imaplib.IMAP4_SSL("imap.gmail.com", 993)
m.login("myemail#gmail.com","mypassword")
m.select("[Gmail]/All Mail")
result, data = m.uid('search', None, "ALL") # search all email and return uids
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1]) # raw email text including headers
print('From:' + email_message['From'])
m.close()
m.logout()
The following exception is thrown:
Traceback (most recent call last):
File "./eport3.py", line 9, in <module>
m.select("[Gmail]/All Mail")
File "/RVM/lib/python3/lib/python3.4/imaplib.py", line 682, in select
typ, dat = self._simple_command(name, mailbox)
File "/RVM/lib/python3/lib/python3.4/imaplib.py", line 1134, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "/RVM/lib/python3/lib/python3.4/imaplib.py", line 965, in _command_complete
raise self.error('%s command error: %s %s' % (name, typ, data))
imaplib.error: SELECT command error: BAD [b'Could not parse command']
Here's the corresponding Python 2 version that works:
import imaplib
import email
m = imaplib.IMAP4_SSL("imap.gmail.com", 993)
m.login("myemail#gmail.com","mypassword")
m.select("[Gmail]/All Mail")
result, data = m.uid('search', None, "ALL") # search all email and return uids
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_string(data[0][1]) # raw email text including headers
print 'From:' + email_message['From']
m.close()
m.logout()
As it's mentioned in this answer:
Try using m.select('"[Gmail]/All Mail"'), so that the double quotes get transmitted.
I suspect imaplib is not properly quoting the string, so the server gets what looks like two arguments: [Gmail]/All, and Mail.
And it works in python v3.4.1
import imaplib
import email
m = imaplib.IMAP4_SSL("imap.gmail.com", 993)
m.login("myemail#gmail.com","mypassword")
m.select('"[Gmail]/All Mail"')
result, data = m.uid('search', None, "ALL") # search all email and return uids
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1]) # raw email text including headers
print('From:' + email_message['From'])
m.close()
m.logout()