Alright so I load the email in from gmail with imaplib and then when I'm trying to parse the email it does not separate anything out in a usable format. I suspect this is because somewhere in the process '<' or '>' are being added to the raw email.
Here is what the debugger is showing me after I have called the method:
As you can see it hasn't really parsed anything into a usable format.
Here is the code I'm using: (NOTE: the .replace('>', '') seems to have no effect on the end result.)
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('myEmail#gmail.com', 'password')
mail.list()
mail.select('inbox')
typ, data = mail.search(None, 'ALL')
ids = data[0]
id_list = ids.split()
# get the most recent email id
latest_email_id = int( id_list[-1] )
# iterate through 15 messages in descending order starting with latest_email_id
# the '-1' dictates reverse looping order
for i in range( latest_email_id -10, latest_email_id-15, -1 ):
typ, data = mail.fetch( str(i), '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = str(response_part[1]).replace('<', '')
msg = msg.replace('>', '')
msg = email.message_from_string(msg)
#msg = feedparser.parse(response_part[1])
varSubject = msg['subject']
varFrom = msg['from']
python email.message_from_string() parse problems and Parsing email with Python both had very similar and identical problems to me (I think) and they solved it by altering their email, however I'm reading my email straight from Google's servers so I'm not sure exactly what to do to the email to fix it up since removing all '<' and '>' obviously won't work.
So, how do I fix the email that is read from imaplib so that it can be easily read with email.message_from_string()? (Or any other improvements/possible solutions as I'm not 100% certain the '<' and '>' are actually the problem, I'm only guessing based off of those other questions asked.)
Cheers
You shouldn't parse <, > and data between them - it is like parsing HTML, but much more complicated. There are existing solutions to do it.
Here is my code that can read mail with attachments, extract data that can be used for further use and process it to human and code readable format. As you can see, all tasks are being made by third-party modules.
from datetime import datetime
import imaplib
import email
import html2text
from os import path
class MailClient(object):
def __init__(self):
self.m = imaplib.IMAP4_SSL('your.server.com')
self.Login()
def Login(self):
result, data = self.m.login('login#domain.com', 'p#s$w0rd')
if result != 'OK':
raise Exception("Error connecting to mailbox: {}".format(data))
def ReadLatest(self, delete = True):
result, data = self.m.select("inbox")
if result != 'OK':
raise Exception("Error reading inbox: {}".format(data))
if data == ['0']:
return None
latest = data[0].split()[-1]
result, data = self.m.fetch(latest, "(RFC822)")
if result != 'OK':
raise Exception("Error reading email: {}".format(data))
if delete:
self.m.store(latest, '+FLAGS', '\\Deleted')
message = email.message_from_string(data[0][1])
res = {
'From' : email.utils.parseaddr(message['From'])[1],
'From name' : email.utils.parseaddr(message['From'])[0],
'Time' : datetime.fromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(message['Date']))),
'To' : message['To'],
'Subject' : email.Header.decode_header(message["Subject"])[0][0],
'Text' : '',
'File' : None
}
for part in message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get_content_maintype() == 'text':
# reading as HTML (not plain text)
_html = part.get_payload(decode = True)
res['Text'] = html2text.html2text(_html)
elif part.get_content_maintype() == 'application' and part.get_filename():
fname = path.join("your/folder", part.get_filename())
attachment = open(fname, 'wb')
attachment.write(part.get_payload(decode = True))
attachment.close()
if res['File']:
res['File'].append(fname)
else:
res['File'] = [fname]
return res
def __del__(self):
self.m.close()
Related
Log in and read subject works. An error occurs when reading the body. What is the error? In the internet the error was always in this part : " email.message_from_bytes(data[0][1].decode())"but I think this part is correct.
# Connection settings
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
b = email.message_from_string(email_message_raw)
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
Error message:
TypeError: initial_value must be str or None, not Message
Thanks for the comments and reply
You have everything in place. Just have to understand a few concepts.
"email" library allows you to convert typical email bytes into an easily usable object called Message using its parser APIs, such as message_from_bytes(), message_from_string(), etc.
The typical error is due to an input error.
email.message_from_bytes(data[0][1].decode())
The function above, message_from_bytes, takes bytes as an input not str. So, it is redundant to decode data[0][1] and also inputting through the parser API.
In short, you are trying to parse the original email message twice using message_from_bytes(data[0][1]) and message_from_string(email_message_raw). Get rid of one of them and you will be all set!
Try this approach:
HOST = 'imap.host'
USERNAME = 'name#domain.com'
PASSWORD = 'password'
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "UNSEEN")
if result == 'OK':
for num in data[0].split()[:5]:
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message = email.message_from_bytes(data[0][1])
email_from = str(make_header(decode_header(email_message_raw['From'])))
# von Edward Chapman -> https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
subject = str(email.header.make_header(email.header.decode_header(email_message_raw['Subject'])))
# content = email_message_raw.get_payload(decode=True)
# von Todor Minakov -> https://stackoverflow.com/questions/17874360/python-how-to-parse-the-body-from-a-raw-email-given-that-raw-email-does-not
# b = email.message_from_string(email_message_raw)
# this is already set as Message object which have many methods (i.e. is_multipart(), walk(), etc.)
b = email_message
body = ""
if b.is_multipart():
for part in b.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True) # decode
break
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
else:
body = b.get_payload(decode=True)
m.close()
m.logout()
txt = body
regarding = subject
print("###########################################################")
print(regarding)
print("###########################################################")
print(txt)
print("###########################################################")
from imap_tools import MailBox, AND
# get email bodies from INBOX
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for msg in mailbox.fetch():
body = msg.text or msg.html
https://github.com/ikvk/imap_tools
After 5 hours of trying, time to get some help. Sifted through all the stackoverflow questions related to this but couldn't find the answer.
The code is a gmail parser - works for most emails but some emails cause the UnicodeDecodeError. The problem is "raw_email.decode('utf-8')" but changing it (see comments) causes a different problem down below.
# Source: https://stackoverflow.com/questions/7314942/python-imaplib-to-get-gmail-inbox-subjects-titles-and-sender-name
import datetime
import time
import email
import imaplib
import mailbox
from vars import *
import re # to remove links from str
import string
EMAIL_ACCOUNT = 'gmail_login'
PASSWORD = 'gmail_psswd'
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "ALL") # (ALL/UNSEEN)
id_list = data[0].split()
email_rev = reversed(id_list) # Returns a type list.reverseiterator, which is not list
email_list = list(email_rev)
i = len(email_list)
todays_date = time.strftime("%m/%d/%Y")
for x in range(i):
latest_email_uid = email_list[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = email_data[0][1] # Returns a byte
raw_email_str = raw_email.decode('utf-8') # Returns a str
#raw_email_str = base64.b64decode(raw_email_str1) # Tried this but didn't work.
#raw_email_str = raw_email.decode('utf-8', errors='ignore') # Tried this but caused a TypeError down where var subject is created because something there is expecting a str or byte-like
email_message = email.message_from_string(raw_email_str)
date_tuple = email.utils.parsedate_tz(email_message['Date'])
date_short = f'{date_tuple[1]}/{date_tuple[2]}/{date_tuple[0]}'
# Header Details
if date_short == '12/23/2019':
#if date_tuple:
# local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
# local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
#print(subject)
if email_from.find('restaurants#uber.com') != -1:
print('yay')
# Body details
if email_from.find('restaurants#uber.com') != -1 and subject.find('Payment Summary') != -1:
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
body = body.decode("utf-8") # Convert byte to str
body = body.replace("\r\n", " ")
text = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', body) # removes url links
text2 = text.translate(str.maketrans('', '', string.punctuation))
body_list = re.sub("[^\w]", " ", text2).split()
print(body_list)
print(date_short)
else:
continue
Here is an example how to retrieve and read mail parts with imapclient and the email.* modules from the python standard libs:
from imapclient import IMAPClient
import email
from email import policy
def walk_parts(part, level=0):
print(' ' * 4 * level + part.get_content_type())
# do something with part content (applies encoding by default)
# part.get_content()
if part.is_multipart():
for part in part.get_payload():
get_parts(part, level + 1)
# context manager ensures the session is cleaned up
with IMAPClient(host="your_mail_host") as client:
client.login('user', 'password')
# select some folder
client.select_folder('INBOX')
# do something with folder, e.g. search & grab unseen mails
messages = client.search('UNSEEN')
for uid, message_data in client.fetch(messages, 'RFC822').items():
email_message = email.message_from_bytes(
message_data[b'RFC822'], policy=policy.default)
print(uid, email_message.get('From'), email_message.get('Subject'))
# alternatively search for specific mails
msgs = client.search(['SUBJECT', 'some subject'])
#
# do something with a specific mail:
#
# fetch a single mail with UID 12345
raw_mails = client.fetch([12345], 'RFC822')
# parse the mail (very expensive for big mails with attachments!)
mail = email.message_from_bytes(
raw_mails[12345][b'RFC822'], policy=policy.default)
# Now you have a python object representation of the mail and can dig
# into it. Since a mail can be composed of several subparts we have
# to walk the subparts.
# walk all parts at once
for part in mail.walk():
# do something with that part
print(part.get_content_type())
# or recurse yourself into sub parts until you find the interesting part
walk_parts(mail)
See the docs for email.message.EmailMessage. There you find all needed bits to read into a mail message.
use 'ISO 8859-1' instead of 'utf-8'
I had the same issue And after a lot of research I realized that I simply need to use, message_from_bytes function from email rather than using message_from_string
so for your code simply replace:
raw_email_str = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_str)
to
email_message = email.message_from_bytes(raw_email)
should work like a charm :)
I want to get the last 10 received gmails with python.
Currently I have this code but it only returns a limited number of emails and it manipulates pop3 directly, which makes it unnecessary long.
Source of the code: https://www.code-learner.com/python-use-pop3-to-read-email-example/
import poplib
import smtplib, ssl
def guess_charset(msg):
# get charset from message object.
charset = msg.get_charset()
# if can not get charset
if charset is None:
# get message header content-type value and retrieve the charset from the value.
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
def decode_str(s):
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
# variable indent_number is used to decide number of indent of each level in the mail multiple bory part.
def print_info(msg, indent_number=0):
if indent_number == 0:
# loop to retrieve from, to, subject from email header.
for header in ['From', 'To', 'Subject']:
# get header value
value = msg.get(header, '')
if value:
# for subject header.
if header=='Subject':
# decode the subject value
value = decode_str(value)
# for from and to header.
else:
# parse email address
hdr, addr = parseaddr(value)
# decode the name value.
name = decode_str(hdr)
value = u'%s <%s>' % (name, addr)
print('%s%s: %s' % (' ' * indent_number, header, value))
# if message has multiple part.
if (msg.is_multipart()):
# get multiple parts from message body.
parts = msg.get_payload()
# loop for each part
for n, part in enumerate(parts):
print('%spart %s' % (' ' * indent_number, n))
print('%s--------------------' % (' ' * indent_number))
# print multiple part information by invoke print_info function recursively.
print_info(part, indent_number + 1)
# if not multiple part.
else:
# get message content mime type
content_type = msg.get_content_type()
# if plain text or html content type.
if content_type=='text/plain' or content_type=='text/html':
# get email content
content = msg.get_payload(decode=True)
# get content string charset
charset = guess_charset(msg)
# decode the content with charset if provided.
if charset:
content = content.decode(charset)
print('%sText: %s' % (' ' * indent_number, content + '...'))
else:
print('%sAttachment: %s' % (' ' * indent_number, content_type))
# input email address, password and pop3 server domain or ip address
email = 'yourgmail#gmail.com'
password = 'yourpassword'
# connect to pop3 server:
server = poplib.POP3_SSL('pop.gmail.com')
# open debug switch to print debug information between client and pop3 server.
server.set_debuglevel(1)
# get pop3 server welcome message.
pop3_server_welcome_msg = server.getwelcome().decode('utf-8')
# print out the pop3 server welcome message.
print(server.getwelcome().decode('utf-8'))
# user account authentication
server.user(email)
server.pass_(password)
# stat() function return email count and occupied disk size
print('Messages: %s. Size: %s' % server.stat())
# list() function return all email list
resp, mails, octets = server.list()
print(mails)
# retrieve the newest email index number
#index = len(mails)
index = 3
# server.retr function can get the contents of the email with index variable value index number.
resp, lines, octets = server.retr(index)
# lines stores each line of the original text of the message
# so that you can get the original text of the entire message use the join function and lines variable.
msg_content = b'\r\n'.join(lines).decode('utf-8')
# now parse out the email object.
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
import poplib
# parse the email content to a message object.
msg = Parser().parsestr(msg_content)
print(len(msg_content))
# get email from, to, subject attribute value.
email_from = msg.get('From')
email_to = msg.get('To')
email_subject = msg.get('Subject')
print('From ' + email_from)
print('To ' + email_to)
print('Subject ' + email_subject)
for part in msg.walk():
if part.get_content_type():
body = part.get_payload(decode=True)
print_info(msg, len(msg))
# delete the email from pop3 server directly by email index.
# server.dele(index)
# close pop3 server connection.
server.quit()
I also tried this code but it didn't work:
import imaplib, email, base64
def fetch_messages(username, password):
messages = []
conn = imaplib.IMAP4_SSL("imap.gmail.com", 993)
conn.login(username, password)
conn.select()
typ, data = conn.uid('search', None, 'ALL')
for num in data[0].split():
typ, msg_data = conn.uid('fetch', num, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
messages.append(email.message_from_string(response_part[1]))
typ, response = conn.store(num, '+FLAGS', r'(\Seen)')
return messages
and this also didn't work for me...
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('#gmail.com')
pop_conn.pass_('password')
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print(message['subject'])
print(message['body'])
I managed to solve it, the only issue is that it marks as read every unread email, here is the code I used:
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
email = input('Email: ')
password = input('Password: ')
mail.login(email+'#gmail.com', password)
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("inbox") # connect to inbox.
result, data = mail.search(None, "ALL")
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
latest_email_id = id_list[-1] # get the latest
# fetch the email body (RFC822) for the given ID
result, data = mail.fetch(latest_email_id, "(RFC822)")
raw_email = data[0][1] # here's the body, which is raw text of the whole email
# including headers and alternate payloads
import email
email_message = email.message_from_string(str(raw_email))
print (email_message['To'])
print (email.utils.parseaddr(email_message['From'])) # for parsing "Yuji Tomita" <yuji#grovemade.com>
print (email_message.items()) # print all headers
# note that if you want to get text content (body) and the email contains
# multiple payloads (plaintext/ html), you must parse each message separately.
# use something like the following: (taken from a stackoverflow post)
def get_first_text_block(self, email_message_instance):
maintype = email_message_instance.get_content_maintype()
if maintype == 'multipart':
for part in email_message_instance.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
elif maintype == 'text':
return email_message_instance.get_payload()
https://developers.google.com/gmail/api/quickstart/python is the preferred way:
from gmail.gmail import gmail_auth, ListThreadsMatchingQuery
service = gmail_auth()
threads = ListThreadsMatchingQuery(service, query=query)
where:
def ListThreadsMatchingQuery(service, user_id='me', query=''):
"""List all Threads of the user's mailbox matching the query.
Args:
service: Authorized Gmail API service instance.
user_id: User's email address. The special value "me"
can be used to indicate the authenticated user.
query: String used to filter messages returned.
Eg.- 'label:UNREAD' for unread messages only.
Returns:
List of threads that match the criteria of the query. Note that the returned
list contains Thread IDs, you must use get with the appropriate
ID to get the details for a Thread.
"""
try:
response = service.users().threads().list(userId=user_id, q=query).execute()
threads = []
if 'threads' in response:
threads.extend(response['threads'])
while 'nextPageToken' in response:
page_token = response['nextPageToken']
response = service.users().threads().list(userId=user_id, q=query,
pageToken=page_token).execute()
threads.extend(response['threads'])
return threads
except errors.HttpError as error:
raise error
You should try easyimap lib to get a list of e-mails, I'm not sure if works with pop3.
Code example:
import easyimap
host = 'imap.gmail.com'
user = 'you#example.com'
password = 'secret'
mailbox = 'INBOX.subfolder'
imapper = easyimap.connect(host, user, password, mailbox)
email_quantity = 10
emails_from_your_mailbox = imapper.listids(limit=email_quantity)
imapper.quit()
I'm using the following code (got it from StackOverflow :)) to get all unread e-mail from specific email adresses. It works perfect!
I would however like to get the actual recived (or sent) date for each e-mail I'm getting an attached file from. But I dont know how to do that?
import email
import imaplib
import os
import sys
import random
import string
import glob
import unicodedata
def remove_accents(s):
nkfd_form = unicodedata.normalize('NFKD', s)
return u''.join([c for c in nkfd_form if not unicodedata.combining(c)])
def remove_non_ascii(text):
return unidecode(unicode(text, encoding = "cp865"))
def replace_non_ascii(x): return ''.join(i if ord(i) < 128 else '_' for i in x)
detach_dir = r'\\1.1.1.1\xxx\xxx\drop_folder'
try:
imapSession = imaplib.IMAP4_SSL('outlook.office365.com')
typ, accountDetails = imapSession.login('xxxx', 'xxxx')
if typ != 'OK':
print ('Not able to sign in!')
raise
imapSession.select('Inbox')
typ, data = imapSession.search(None, '(UNSEEN FROM "#xxxx.xxx")')
if typ != 'OK':
print ('Error searching Inbox.')
raise
# Iterating over all emails
for msgId in data[0].split():
typ, messageParts = imapSession.fetch(msgId, '(RFC822)')
if typ != 'OK':
print ('Error fetching mail.')
raise
emailBody = messageParts[0][1]
mail = email.message_from_string(emailBody)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
# print part.as_string()
continue
if part.get('Content-Disposition') is None:
# print part.as_string()
continue
fileName = part.get_filename().encode('utf-8')
if bool(fileName):
filePath = os.path.join(detach_dir, 'EXP-' + fileName + '.xls')
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
imapSession.close()
imapSession.logout()
except :
print ('Not able to download all attachments.')
You could fetch the INTERNALDATE instead of, or in addition to the RFC822 item; It is (generally) time the server received the message.
You will have to do some parsing of the return item, since imaplib does no parsing of FETCH results. It will be easier to parse if it's the only thing you fetch.
The response will look something like
* 5 FETCH (INTERNALDATE "17-Jul-2018 02:44:25 -0700")
I want to be able to move an email in GMail from the inbox to another folder using Python. I am using imaplib and can't figure out how to do it.
There is no explicit move command for IMAP. You will have to execute a COPY followed by a STORE (with suitable flag to indicate deletion) and finally expunge. The example given below worked for moving messages from one label to the other. You'll probably want to add more error checking though.
import imaplib, getpass, re
pattern_uid = re.compile(r'\d+ \(UID (?P<uid>\d+)\)')
def connect(email):
imap = imaplib.IMAP4_SSL("imap.gmail.com")
password = getpass.getpass("Enter your password: ")
imap.login(email, password)
return imap
def disconnect(imap):
imap.logout()
def parse_uid(data):
match = pattern_uid.match(data)
return match.group('uid')
if __name__ == '__main__':
imap = connect('<your mail id>')
imap.select(mailbox = '<source folder>', readonly = False)
resp, items = imap.search(None, 'All')
email_ids = items[0].split()
latest_email_id = email_ids[-1] # Assuming that you are moving the latest email.
resp, data = imap.fetch(latest_email_id, "(UID)")
msg_uid = parse_uid(data[0])
result = imap.uid('COPY', msg_uid, '<destination folder>')
if result[0] == 'OK':
mov, data = imap.uid('STORE', msg_uid , '+FLAGS', '(\Deleted)')
imap.expunge()
disconnect(imap)
As for Gmail, based on its api working with labels, the only thing for you to do is adding dest label and deleting src label:
import imaplib
obj = imaplib.IMAP4_SSL('imap.gmail.com', 993)
obj.login('username', 'password')
obj.select(src_folder_name)
typ, data = obj.uid('STORE', msg_uid, '+X-GM-LABELS', desti_folder_name)
typ, data = obj.uid('STORE', msg_uid, '-X-GM-LABELS', src_folder_name)
I suppose one has a uid of the email which is going to be moved.
import imaplib
obj = imaplib.IMAP4_SSL('imap.gmail.com', 993)
obj.login('username', 'password')
obj.select(src_folder_name)
apply_lbl_msg = obj.uid('COPY', msg_uid, desti_folder_name)
if apply_lbl_msg[0] == 'OK':
mov, data = obj.uid('STORE', msg_uid , '+FLAGS', '(\Deleted)')
obj.expunge()
None of the previous solutions worked for me. I was unable to delete a message from the selected folder, and unable to remove the label for the folder when the label was the selected folder. Here's what ended up working for me:
import email, getpass, imaplib, os, sys, re
user = "user#example.com"
pwd = "password" #getpass.getpass("Enter your password: ")
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login(user,pwd)
from_folder = "Notes"
to_folder = "food"
m.select(from_folder, readonly = False)
response, emailids = imap.search(None, 'All')
assert response == 'OK'
emailids = emailids[0].split()
errors = []
labeled = []
for emailid in emailids:
result = m.fetch(emailid, '(X-GM-MSGID)')
if result[0] != 'OK':
errors.append(emailid)
continue
gm_msgid = re.findall(r"X-GM-MSGID (\d+)", result[1][0])[0]
result = m.store(emailid, '+X-GM-LABELS', to_folder)
if result[0] != 'OK':
errors.append(emailid)
continue
labeled.append(gm_msgid)
m.close()
m.select(to_folder, readonly = False)
errors2 = []
for gm_msgid in labeled:
result = m.search(None, '(X-GM-MSGID "%s")' % gm_msgid)
if result[0] != 'OK':
errors2.append(gm_msgid)
continue
emailid = result[1][0]
result = m.store(emailid, '-X-GM-LABELS', from_folder)
if result[0] != 'OK':
errors2.append(gm_msgid)
continue
m.close()
m.logout()
if errors: print >>sys.stderr, len(errors), "failed to add label", to_folder
if errors2: print >>sys.stderr, len(errors2), "failed to remove label", from_folder
I know that this is a very old question, but any way. The proposed solution by Manoj Govindan probably works perfectly (I have not tested it but it looks like it. The problem that I encounter and I had to solve is how to copy/move more than one email!!!
So I came up with solution, maybe someone else in the future might have the same problem.
The steps are simple, I connect to my email (GMAIL) account choose folder to process (e.g. INBOX) fetch all uids, instead of email(s) list number. This is a crucial point to notice here. If we fetched the list number of emails and then we processed the list we would end up with a problem. When we move an email the process is simple (copy at the destination folder and delete email from each current location). The problem appears if you have a list of emails e.g. 4 emails inside the inbox and we process the 2nd email in inside the list then number 3 and 4 are different, they are not the emails that we thought that they would be, which will result into an error because list item number 4 it will not exist since the list moved one position down because 2 position was empty.
So the only possible solution to this problem was to use UIDs. Which are unique numbers for each email. So no matter how the email will change this number will be binded with the email.
So in the example below, I fetch the UIDs on the first step,check if folder is empty no point of processing the folder else iterate for all emails found in the folder. Next fetch each email Header. The headers will help us to fetch the Subject and compare the subject of the email with the one that we are searching. If the subject matches, then continue to copy and delete the email. Then you are done. Simple as that.
#!/usr/bin/env python
import email
import pprint
import imaplib
__author__ = 'author'
def initialization_process(user_name, user_password, folder):
imap4 = imaplib.IMAP4_SSL('imap.gmail.com') # Connects over an SSL encrypted socket
imap4.login(user_name, user_password)
imap4.list() # List of "folders" aka labels in gmail
imap4.select(folder) # Default INBOX folder alternative select('FOLDER')
return imap4
def logout_process(imap4):
imap4.close()
imap4.logout()
return
def main(user_email, user_pass, scan_folder, subject_match, destination_folder):
try:
imap4 = initialization_process(user_email, user_pass, scan_folder)
result, items = imap4.uid('search', None, "ALL") # search and return uids
dictionary = {}
if items == ['']:
dictionary[scan_folder] = 'Is Empty'
else:
for uid in items[0].split(): # Each uid is a space separated string
dictionary[uid] = {'MESSAGE BODY': None, 'BOOKING': None, 'SUBJECT': None, 'RESULT': None}
result, header = imap4.uid('fetch', uid, '(UID BODY[HEADER])')
if result != 'OK':
raise Exception('Can not retrieve "Header" from EMAIL: {}'.format(uid))
subject = email.message_from_string(header[0][1])
subject = subject['Subject']
if subject is None:
dictionary[uid]['SUBJECT'] = '(no subject)'
else:
dictionary[uid]['SUBJECT'] = subject
if subject_match in dictionary[uid]['SUBJECT']:
result, body = imap4.uid('fetch', uid, '(UID BODY[TEXT])')
if result != 'OK':
raise Exception('Can not retrieve "Body" from EMAIL: {}'.format(uid))
dictionary[uid]['MESSAGE BODY'] = body[0][1]
list_body = dictionary[uid]['MESSAGE BODY'].splitlines()
result, copy = imap4.uid('COPY', uid, destination_folder)
if result == 'OK':
dictionary[uid]['RESULT'] = 'COPIED'
result, delete = imap4.uid('STORE', uid, '+FLAGS', '(\Deleted)')
imap4.expunge()
if result == 'OK':
dictionary[uid]['RESULT'] = 'COPIED/DELETED'
elif result != 'OK':
dictionary[uid]['RESULT'] = 'ERROR'
continue
elif result != 'OK':
dictionary[uid]['RESULT'] = 'ERROR'
continue
else:
print "Do something with not matching emails"
# do something else instead of copy
dictionary = {scan_folder: dictionary}
except imaplib.IMAP4.error as e:
print("Error, {}".format(e))
except Exception as e:
print("Error, {}".format(e))
finally:
logout_process(imap4)
return dictionary
if __name__ == "__main__":
username = 'example.email#gmail.com'
password = 'examplePassword'
main_dictionary = main(username, password, 'INBOX', 'BOKNING', 'TMP_FOLDER')
pprint.pprint(main_dictionary)
exit(0)
Useful information regarding imaplib Python — imaplib IMAP example with Gmail and the imaplib documentation.
This is the solution to move multiple from one folder to another.
mail_server = 'imap.gamil.com'
account_id = 'yourimap#gmail.com'
password = 'testpasword'
TLS_port = '993'
# connection to imap
conn = imaplib.IMAP4_SSL(mail_server,TLS_port)
try:
(retcode, capabilities) = conn.login(account_id, password)
# return HttpResponse("pass")
except:
# return HttpResponse("fail")
messages.error(request, 'Request Failed! Unable to connect to Mailbox. Please try again.')
return redirect('addIEmMailboxes')
conn.select('"INBOX"')
(retcode, messagess) = conn.uid('search', None, "ALL")
if retcode == 'OK':
for num in messagess[0].split():
typ, data = conn.uid('fetch', num,'(RFC822)')
msg = email.message_from_bytes((data[0][1]))
#MOVE MESSAGE TO ProcessedEmails FOLDER
result = conn.uid('COPY', num, 'ProcessedEmails')
if result[0] == 'OK':
mov, data = conn.uid('STORE', num , '+FLAGS', '(\Deleted)')
conn.expunge()
conn.close()
return redirect('addIEmMailboxes')
Solution with Python 3, to move Zoho mails from Trash to Archive. (Zoho does not archive deleted messages, so if you want to preserve them forever, you need to move from Trash to an archival folder.)
#!/usr/bin/env python3
import imaplib, sys
obj = imaplib.IMAP4_SSL('imap.zoho.com', 993)
obj.login('account', 'password')
obj.select('Trash')
_, data = obj.uid('FETCH', '1:*' , '(RFC822.HEADER)')
if data[0] is None:
print("No messages in Trash")
sys.exit(0)
messages = [data[i][0].split()[2] for i in range(0, len(data), 2)]
for msg_uid in messages:
apply_lbl_msg = obj.uid('COPY', msg_uid, 'Archive')
if apply_lbl_msg[0] == 'OK':
mov, data = obj.uid('STORE', msg_uid , '+FLAGS', '(\Deleted)')
obj.expunge()
print("Moved msg %s" % msg_uid)
else:
print("Copy of msg %s did not work" % msg_uid)
My external lib: https://github.com/ikvk/imap_tools
# MOVE all messages from INBOX to INBOX/folder2
from imap_tools import MailBox
with MailBox('imap.ya.ru').login('tst#ya.ru', 'pwd', 'INBOX') as mailbox:
mailbox.move(mailbox.fetch('ALL'), 'INBOX/folder2') # *implicit creation of uid list on fetch