How do I implement the imaplib search function? - python

import imaplib
import email
from email.header import decode_header
import webbrowser
import os
# account credentials
username = "example#stack.com"
password = "exapleforstack"
imap_server = "imap.one.com"
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
# create an IMAP4 class with SSL
imap = imaplib.IMAP4_SSL(imap_server)
# authenticate
imap.login(username, password)
status, messages = imap.select("INBOX")
imap.search(None, 'SUBJECT', '"exampleforstack"')
# number of top emails to fetch
N = 3
# total number of emails
messages = int(messages[0])
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
if content_type == "text/html":
# if it's HTML, create a new HTML file and open it in browser
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
# write the file
open(filepath, "w").write(body)
# open in the default browser
webbrowser.open(filepath)
print("="*100)
# close the connection and logout
imap.close()
imap.logout()
I tried using the search method a couple different ways, like:
res, msg = imap.search(None, 'SUBJECT', "example")
and
res, msg = imap.search(None, 'SUBJECT, "example"')
but my code just gives an error and automatically fetches the most recent 3.
I've tried replacing the line:
res, msg = imap.fetch(str(i), "(RFC822)")
with
res, msg = imap.search(None, 'SUBJECT', '"example"')
but the program returns nothing at all.
how would I go about implementing the search I got this code from pythoncode and altered its credentials.
But I'm not sure why I can't implement the search function.

Related

How to extract attachments from base64 encoded files?

i am trying to make a script which whill extract attachments from base64 encoded files.
base64 files are on amazon s3. Idea was, to take the data from file on s3, save it in temp file and pass it to the 'extraction' part of the code.
Script below perfectly works with a single file which is stored locally on my pc.
from email.message import EmailMessage
from email.header import decode_header
with open('mail2', 'r') as efile: # mail2 - это название файла который на s3 лежит
msg = EmailMessage.message_from_file(efile)
subject, encoding = decode_header(msg["Subject"])[0]
print(subject)
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print(From)
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
#filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filename, "wb").write(part.get_payload(decode=True))
else:
pass
In order to avoid constant uploadings from s3, i decided to get the body of files and transfer them to temp file using boto3.
my_bucket = s3.Bucket('bucket_name')
substring = "reestrs"
for obj in my_bucket.objects.all():
#получение данных
if re.search(substring, obj.key):
raw_data = obj.get()['Body'].read()
temp = tempfile.NamedTemporaryFile()
temp.write(raw_data)
print(temp)
with open(temp.name, 'r') as efile:
msg = email.message_from_file(temp.name)
subject, encoding = decode_header(msg["Subject"])[0]
print(subject)
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print(From)
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
#filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filename, "wb").write(part.get_payload(decode=True))
temp.close()
else:
pass
Whenever i launch this script, result is
File "rum_fin.py", line 42, in <module>
msg = email.message_from_file(temp.name)
File "/usr/lib/python3.8/email/__init__.py", line 54, in message_from_file
return Parser(*args, **kws).parse(fp)
File "/usr/lib/python3.8/email/parser.py", line 53, in parse
data = fp.read(8192)
AttributeError: 'str' object has no attribute 'read'
I've tested scipt above with a single file. Unfortunately, result is the attachment with base64 name insted of normal csv/png/txt
obj = s3.Object('bucket', 'filename')
raw_data = obj.get()['Body'].read()
temp = tempfile.NamedTemporaryFile()
temp.write(raw_data)
print(temp)
# In[7]:
with open(temp.name, 'r') as efile: # mail2 - это название файла который на s3 лежит
msg = email.message_from_file(efile)
subject, encoding = decode_header(msg["Subject"])[0]
print(subject)
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print(From)
I suppose, it's something to do with the way of passing the data from actual file to the temp file. Any ideas how to solve it?

How to get specific details from email using python beautiful soup and extract to excel

i am trying to get specific data from my email such as check in & checkout date from my email, but i not sure what can i do to get the specific text from the email.
For the code below, it can search for specific hotel booking number and write in html file.
For now only not sure how can i read from those html file and get the specific details and extract to excel
my_mail = imaplib.IMAP4_SSL("imap.gmail.com","993")
my_mail.login(username, password)
my_mail.select('Inbox')
key = 'SUBJECT'
value = "721182693"
_, data = my_mail.search(None, key, value)
mail_id_list = data[0].split()
msgs = []
for num in mail_id_list:
_, data = my_mail.fetch(num, '(RFC822)') #RFC822 returns whole message (BODY fetches just body)
msgs.append(data)
def clean(text):
return "".join(c if c.isalnum() else "_" for c in text)
soup = BeautifulSoup(data)
for msg in msgs[::-1]:
for response in msg:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding="utf-8")
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
print(body)
elif "attachment" in content_disposition:
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
open(filepath, "w",encoding="utf-8").write(body)
webbrowser.open(filepath)
else:
content_type = msg.get_content_type()
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
print(body)
if content_type == "text/html":
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
open(filepath, "w",encoding="utf-8").write(body)
webbrowser.open(filepath)
print("="*100)`

Fetch a particular unseen email and print only those emails

To elaborate the question I have a code that searches for UNSEEN emails and stores the ID to a variable.
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, new_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(new_mails[0].split())
print("Total Messages that is New:" , recent_mails)
print(new_mails)
and it prints this:
Total Messages that is New: 2
[b'389 393']
What I want to do is the use these numbers to fetch it's contents like subject, who sent it and the body of the email. Is it possible to this implementation?
I have a previous code that fetches the first and beyond emails by indicating on how emails it will go to using the variable
N code follows:
N = 0
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = mail.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
pre_subject, encoding = decode_header(msg["Subject"])[0]
subject = pre_subject.upper()
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", pre_subject)
print("From:", From)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
print(body)
plain = body
elif "attachment" in content_disposition:
# download attachment
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
# download attachment and save it
open(filepath, "wb").write(part.get_payload(decode=True))
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
print(body)
plain = body
print("="*100)
Finally for hours of experimenting I successfully implemented it.
Basically the ID I fetched on UNSEEN emails is converted to string from byte and then pass those number lists to the loop
gmail_host = 'imap.gmail.com'
mail = imaplib.IMAP4_SSL(gmail_host)
mail.login(EMAIL_ADDRESS,EMAIL_PASSWORD)
mail.list()
status, messages = mail.select('Inbox')
messages = int(messages[0])
_, raw_mails = mail.search(None, '(UNSEEN)')
recent_mails = len(raw_mails[0].split())
splited = str(raw_mails[0], 'utf-8')
new_emails = splited.split()
print("Total Messages that is New:" , recent_mails)
print(new_emails)
Output:
['378', '390']
And changed my for loop to this
for i in new_emails:

Python Script to get attachments from gmail account

This code runs and does download the attachments as expected.
That said, It writes all attachments to the directory on my PC. I only want to include an attachment with a .html file type. Anything else should be omitted.
Secondly, I would like it to only import the .html attachment if the subject line = "Whatever" if this is true then I would like it to save to a specific subfolder in the PC directory. So for example: if subject = "Whatever1" then save to c:\Desktop\Folder\Subfolder1, if subject = "Whatever2" then save to c:\Desktop\Folder\Subfolder2 etc
import email, imaplib, os
#Credentials
username = 'myusername'
password = 'mypassword'
#SaveTo Directory on PC
attachment_dir = "C:/Desktop\Folder\Subfolder"
#Functions
def get_body(msg):
if msg.is_multipart():
return get_body(msg.get_payload(0))
else:
return msg.get_payload(None,True)
def get_attachments(msg):
for part in msg.walk():
if part.get_content_maintype()=='multipart':
continue
if part.get('Content-disposition') is None:
continue
filename = part.get_filename()
if bool(filename) :
filepath =os.path.join(attachment_dir, filename)
with open(filepath,'wb')as f:
f.write(part.get_payload(decode=True))
def search(key,value,mail):
result, data = mail.search(none,key,'"()"'.format(value))
return data
def get_emails(result_bytes):
msgs = []
for num in result_bytes[0].split():
typ,data = mail.fetch(num,'(RCF822)')
msgs.append(data)
return msgs
#Create Connection
mail = imaplib.IMAP4_SSL("imap.gmail.com")
mail.login(username, password)
#Which Gmail Folder to Select?
mail.select("Inbox")
result, data = mail.fetch(b'12','(RFC822)')
raw = email.message_from_bytes(data[0][1])
get_attachments(raw)
from imap_tools import MailBox
# get all attachments from INBOX and save them to files
with MailBox('imap.my.ru').login('acc', 'pwd', 'INBOX') as mailbox:
for msg in mailbox.fetch():
print(msg.subject)
for att in msg.attachments:
print('-', att.filename, att.content_type)
with open('C:/1/{}'.format(att.filename), 'wb') as f:
f.write(att.payload)
lib: https://github.com/ikvk/imap_tools
for mList in range(numMessages):
for msg in msg.startswith("Subject:")
if msg.startswith('Subject'):
print(msg)
break
This should parse the subject line, so you can distinguish by whatever you want the subject to be.

Downloading multiple attachments using imaplib

How can I download multiple attachments from a single mail using imaplib?
Let's say I have an e-mail and that e-mail contains 4 attachments. How can I download all of those attachments? The code below only downloads a single attachment from an e-mail.
detach_dir = 'c:/downloads'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('hello#gmail.com','3323434')
m.select("[Gmail]/All Mail")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
For any future python travellers.
Here is a class that downloads any attachment found for an email and saves it to a specific location.
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print "No new emails to read."
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def parse_email_address(self, email_address):
"""
Helper function to parse out the email address from the message
return: tuple (name, address). Eg. ('John Doe', 'jdoe#example.com')
"""
return email.utils.parseaddr(email_address)
I reworked the code, breaking it up into functions. I use PEEK so I don't change the UNREAD status of the email messages.
I'm posting my take on the problem, similar to #John, but I use only functions instead of classes:
import imaplib
import email
# Connect to an IMAP server
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
# Download all attachment files for a given email
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_string(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
# Download all the attachment files for all emails in the inbox.
def downloadAllAttachmentsInInbox(server, user, password, outputdir):
m = connect(server, user, password)
resp, items = m.search(None, "(ALL)")
items = items[0].split()
for emailid in items:
downloaAttachmentsInEmail(m, emailid, outputdir)
You code appears okay except for the return (perhaps a typo?) right after the fp.close():
...
fp.write(part.get_payload(decode=True))
fp.close()
return HttpResponse('check folder')
After saving the first attachment it returns from the function. Comment out that line and see if it fixes your issue.
You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
for message in mailbox.fetch():
for att in message.attachments: # list: [Attachment objects]
att.filename # str: 'cat.jpg'
att.content_type # str: 'image/jpeg'
att.payload # bytes: b'\xff\xd8\xff\xe0\'
* You can try following function to get mail attachment
def create_message_attachment(self,msg_str):
count = 1
body = ''
content_id = ''
for part in msg_str.walk():
file_name_gl = None
mptype = part.get_content_maintype()
file_name_gl = part.get_filename()
if mptype == "multipart":
continue
elif mptype == "text":
if not file_name_gl: continue
elif mptype == "image":
content_id = part.get('Content-ID')
if not file_name_gl:
file_name_gl = 'image_' + str(count) + '.' + part.get_content_subtype()
count = count + 1
body = part.get_payload(decode = True)
if type(body) <> type(None) :
body = body.strip()
if body <> "":
body = base64.encodestring(body)
#sashoalm 's code worked for me with a minor change:
change mail = email.message_from_string(email_body) in downloaAttachmentsInEmail to mail = email.message_from_bytes(email_body)
I was getting an error when trying to read bytes (the attachment) as a string. Now it works perfectly for me.
Heres a full example of the code:
server = 'outlook.office365.com'
user = 'YOUR USERNAME'
password = 'YOUR PASSWORD'
outputdir = 'DIRECTORY THAT YOU WANT FILES DOWNLOADED TO'
subject = 'Data Exports' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)
import re
def get_valid_filename(s):
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
fileName = get_valid_filename(part.get_filename())
Clean up the file name if it contains invalid characters. e.g: : on Windows.

Categories