I was trying to download the attachments from the Gmail using python for a specific keyword and the code is below,
def read_email_from_gmail():
mail = imaplib.IMAP4_SSL(imap_url)
mail.login(user, password)
mail.select('"[Gmail]/All Mail"')
print("Login into Mailbox")
result, data = mail.search(None, '(SUBJECT "Contract note")')
count = 0
for num in data[0].split():
result, data = mail.fetch(num, "(RFC822)")
raw_email_string = data[0][1].decode('utf-8')
msg = email.message_from_string(raw_email_string)
for part in msg.walk():
if part.get_content_type() == "text/plain":
print(part.get_payload(decode=True))
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
print(part.get_)
fileName = fileName+str(count)+str('.pdf')
count = count + 1
if bool(fileName):
filePath = os.path.join(attachment_dir, fileName)
with open(filePath, 'wb') as e:
e.write(part.get_payload(decode=True))
The code works fine and it was downloading the attachment. The issue is all the attachment in the mails are off in the same name so in the above code I have added a count and appended it. But in future it will be so tough to find the right file.
Note:- I used to recieve the mail daily
Can someone please guide me how to get the date of the email so that I will append it to the filename rather than the count.
We can get the date of the email using
msg['Date']
Related
I have emails that will be flagged by gmail settings to move to a certain label called "Test". This script I am writing when ran, downloads any attachments in that label then moves all those emails to another label called "Checked" (to keep that label clear).
I have the download and parsing part done but I can't seem to manage moving the emails.
Here is the completed part of the program:
import imaplib
import email
import os
import base64
#import Const
user = 'email#gmail.com'
password = 'imnottellingyou'
imap_url = 'imap.gmail.com'
def auth(user, password, imap_url):
con = imaplib.IMAP4_SSL(imap_url)
con.login(user, password)
return con
con = auth(user, password, imap_url)
con.select('Test')
type, data = con.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
print(id_list)
print(mail_ids)
for num in data[0].split():
typ, data = con.fetch(num, '(RFC822)')
raw_email = data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(
'C:/Users/User/Desktop/test', fileName)
if not os.path.isfile(filePath):
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
for uid in id_list:
con.uid('STORE', uid, '+X-GM-LABELS', 'Checked')
con.uid('STORE', uid, '-X-GM-LABELS', 'Test')
Here is the trouble area. This is what I have tried:
#after emails in label have been checked for attachments and downloaded
#emails will be transferred to a "checked" labe
for uid in id_list:
con.uid('STORE', uid, '+X-GM-LABELS', 'Checked')
con.uid('STORE', uid, '-X-GM-LABELS', 'Test')
The program executes fine, and no error messages appear but nothing changes in my gmail inbox.
Finally was able to come up with a solution.
for uid in id_list:
#adds the checked label (new label) to all emails that are in the id list
con.store(uid, '+X-GM-LABELS', '(Checked)')
#instead of "removing" original label it deletes the email from the label
#since labels act like folders in gmail
con.store(uid,'+FLAGS', '\\Deleted')
The script I wrote:1) connects to my work Outlook email. The script reads my username and password from a text file which is found in the variable TextFilePath. 2) Looks for attachments based upon a searchterm I choose that would be in the Subject of the email (here, it's "special_search_term_in_email"). 3) Downloads the attachments to a specific folder titled 'DownloadFolderPath'.
The goal for this script is to run everyday and connect to my email and download 4 attachments that will be sent to me everyday. The issue is that the script will sometimes download all 4 attachments, but then sometimes will only download 3 of 4 attachments and then won't even terminate. Appreciate the help.
import email
import imaplib
import os
import datetime
import csv
# these 3 variables you need to set to run the code
mail_server = "imap-mail.outlook.com"
TextFilePath = "C:/Users/FakeName/PycharmProjects/imaplib_script/unpw.txt"
LogFilePath = 'C:/Users/FakeName/PycharmProjects/imaplib_script/downloaded_attachments/logfile.csv'
DownloadFolderPath = 'C:/Users/FakeName/PycharmProjects/imaplib_script/downloaded_attachments/'
# below read_input_return_list function reads input from a text file and returns a list
def read_input_return_list():
textunpw = open(TextFilePath, "r")
lines = textunpw.readlines()
username = lines[0].strip('\n')
password = lines[1]
textunpw.close()
return [username, password]
read_input_variable = read_input_return_list()
username = read_input_variable[0]
password = read_input_variable[1]
script_ran_time=datetime.datetime.today().strftime('%c')
mail = imaplib.IMAP4_SSL(mail_server)
mail.login(username, password)
print("{0} Connecting to mailbox via IMAP...".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")))
mail.select()
type, data = mail.search(None, '(SUBJECT "special_search_term_in_email")')
total_count = 0
with open(LogFilePath,newline='', encoding='utf-8', mode='a') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for num in data[0].split():
type, data = mail.fetch(num, '(RFC822)')
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
for part1 in part.walk():
c_type = part.get_content_type()
c_disp0 = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if c_type == 'text/plain' and 'attachment' not in c_disp0:
body = part1.get_payload(decode=True)
break
attachment = part.get_filename()
if bool(attachment):
filePath = os.path.join(DownloadFolderPath, attachment)
if os.path.isfile(filePath):
filename, file_extension = os.path.splitext(filePath)
FileDownloadAndWriteTime = '__' + datetime.datetime.today().strftime('%m_%d_%Y %H_%M_%S')
new_fname = "{}_{}{}".format(filename, FileDownloadAndWriteTime, file_extension)
while os.path.exists(new_fname):
new_fname = "{}_{}{}".format(filename, FileDownloadAndWriteTime, file_extension)
filePath = new_fname
filepathopen = open(filePath, 'wb')
filepathopen.write(part.get_payload(decode=True))
FileDownloadAndWriteTime = datetime.datetime.today().strftime('%m_%d_%Y %H_%M_%S')
total_count += 1
writer.writerow([filePath,FileDownloadAndWriteTime, script_ran_time])
filepathopen.close()
print('Download file attachment name: ', attachment)
print("Total count of downloaded documents: ", total_count)
mail.close()
I can't pinpoint what's wrong but try adopting this code here: https://gist.github.com/cdunklau/9001357
It worked for me.
I updated the find_attachments method like this:
def find_attachments(message):
"""
Return a tuple of parsed content-disposition dict, message object
for each attachment found.
"""
found = []
for part in message.walk():
if 'content-disposition' not in part:
continue
cdisp = part['content-disposition'].split(';')
cdisp = [x.strip() for x in cdisp]
if cdisp[0].lower() != 'attachment':
continue
parsed = {}
for kv in cdisp[1:]:
try:
key, val = kv.split('=')
if val.startswith('"'):
val = val.strip('"')
elif val.startswith("'"):
val = val.strip("'")
parsed[key] = val
except Exception as e:
parsed['filename']=kv.replace('filename=','')
found+=list(parsed.values())
return found
I would really appreciate if someone can help me with this issue.
I have implemented the below code to read "unread emails from gmail inbox". I need to print "To", "From", "Subject", "Body" and "save attachments in a specified location"
I have 2 issues here.
If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN'T print the body.
This works fine if no attachment is included.
If there is an email body with any styling in it like "bold/italic/underline/colour...etc", it doesn't print as it is.
Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by "*".
def get_body(email_message):
for payload in email_message.get_payload():
# print('Body:\t', payload.get_payload())
break
return(payload.get_payload())
def read_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(location, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
print('Body:\t', get_body(email_message))
break
else:
print('Nothing'])
except IndexError:
print("No new email")
while True:
read_email("imap.gmail.com", "s#gmail.com", "spassword")
time.sleep(10)
Many thanks
I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.
import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'
def get_body(email_message):
for payload in email_message.get_payload():
break
return payload.get_payload()
def two_way_email(server,uname,pwd):
username = uname
password = pwd
mail = imaplib.IMAP4_SSL(server)
mail.login(username, password)
mail.select("inbox")
try:
result, data = mail.uid('search', None, '(UNSEEN)')
inbox_item_list = data[0].split()
most_recent = inbox_item_list[-1]
result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode("UTF-8")
email_message = email.message_from_string(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('Downloaded file:', filename)
if email_message.is_multipart():
for payload in email_message.get_payload():
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t',email_message['Date'])
for part in email_message.walk():
if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
print('Body:\t',part.get_payload())
break
else:
print('To:\t\t', email_message['To'])
print('From:\t', email_message['From'])
print('Subject:', email_message['Subject'])
print('Date:\t', email_message['Date'])
print('Thread-Index:\t', email_message['Thread-Index'])
text = f"{email_message.get_payload(decode=True)}"
html = text.replace("b'", "")
h = html2text.HTML2Text()
h.ignore_links = True
output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
output = output.replace("'", "")
print(output)
except IndexError:
print("No new email")
while True:
two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
time.sleep(10)
from imap_tools import MailBox, A
with MailBox('imap.mail.com').login('test#mail.com', 'pwd') as mailbox:
for msg in mailbox.fetch(A(seen=False)):
body = msg.text or msg.html
print(msg.subject, msg.from_, msg.to, len(body))
for att in msg.attachments:
print(att.filename, len(att.payload))
https://github.com/ikvk/imap_tools
I am lib author.
More examples: https://github.com/ikvk/imap_tools/blob/master/examples/idle.py#L19
I have this code, and it seems to be able to log into the Gmail and view the emails (Unread email become marked as read). However, there is no downloading occurring, and I'm not sure why. Any ideas on how to make this happen?
import email
import getpass, imaplib
import os
import sys
detach_dir = '.'
if 'attachments' not in os.listdir(detach_dir):
os.mkdir('attachments')
userName = input('Enter your GMail username:')
passwd = getpass.getpass('Enter your password: ')
imapSession = imaplib.IMAP4_SSL('imap.gmail.com')
typ, accountDetails = imapSession.login(userName, passwd)
if typ != 'OK':
print ('Not able to sign in!')
raise
imapSession.select('Inbox')
typ, data = imapSession.search(None, 'ALL')
if typ != 'OK':
print ('Error searching Inbox.')
raise
# Iterating over all emails
for msgId in data[0].split(): typ, messageParts = imapSession.fetch(msgId,
'(RFC822)')
if typ != 'OK':
print ('Error fetching mail.')
raise
emailBody = messageParts[0][1]
mail = email.message_from_string(emailBody)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
# print part.as_string()
continue
if part.get('Content-Disposition') is None:
# print part.as_string()
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(detach_dir, 'attachments', fileName)
if not os.path.isfile(filePath) :
print (fileName)
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
imapSession.close()
imapSession.logout()
I was working with similar code and I had the same problem. In order to solve it, I changed
mail = email.message_from_string(emailBody)
to
mail = email.message_from_bytes(emailBody)
which worked for me.
Hope it helps!
I want to download the attachments from Unread Messages, but also does not want the messages to be flagged Seen.
The below code works, but currently setting the mail as Seen
Tried '(BODY.PEEK[HEADER])' , but then even mail download stopped.
import upload,checkFileAtServer,sha1sum,email, getpass, imaplib, os
detach_dir = '.'
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login('myaccount#gmail.com','password')
m.select("inbox")
resp, items = m.search(None, "(UNSEEN)")
items = items[0].split()
for emailid in items:
#resp, data = m.fetch(emailid, '(BODY.PEEK[HEADER])')
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
temp = m.store(emailid,'+FLAGS', '\\Seen')
m.expunge()
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
sha1sum = sha1sum.calculateSHA1(att_path)
print type(sha1sum)
responseFromServer = checkFileAtServer.responseFromServer(sha1sum)
if(responseFromServer == "NOT_CHECKED"):
upload.uploadToSecureServer('root','root',att_path,att_path)
Anybody can guide me what am I missing ?
Thanks.
If you do not want to mark a message as \Seen, don't call the STORE IMAP command and don't use FETCHable items which are documented to cause an implicit marking as such (yes, the RFC822 is an alias for BODY[] which causes the message to be marked as read).