import imaplib
import re
mail = imaplib.IMAP4_SSL("imap.gmail.com", 993)
mail.login("****iot#gmail.com","*****iot")
while True:
mail.select("inbox")
status, response = mail.search(None,'(SUBJECT "Example")')
unread_msg_nums = response[0].split()
data = []
for e_id in unread_msg_nums:
_, response = mail.fetch(e_id, '(UID BODY[TEXT])')
data.append(response[0][1].decode("utf-8"))
str1 = ''.join(map(str,data))
#a = int(re.search(r"\d+",str1).group())
print(str1)
#for e_id in unread_msg_nums:
#mail.store(e_id, '+FLAGS', '\Seen')
When I **print str1 i have this:
Temperature:time,5
Lux:time,6
Distance:time,3
This is the text from email message and it's ok. It's configuration message for raspberry pi to do some things.
For temperature , lux and Distance i can set 1-10 number(minutes) for each of them, and that numbers represent time for example during which time something will happen in loop. This is all on the side of email message. How to put each line i some different variable, and check them later?
**For example**
string1= first line of message #Temperature:time,5
string2= second line of message #Lux:time,6
string3= third line of message #Distance:time,3
This is not fix, first line may be Lux, or may be Distance etc..
A job for regular expressions, really (this approach uses a dict comprehension):
import re
string = """
Temperature:time,5
Lux:time,6
Distance:time,3
"""
rx = re.compile(r'''^(?P<key>\w+):\s*(?P<value>.+)$''', re.MULTILINE)
cmds = {m.group('key'): m.group('value') for m in rx.finditer(string)}
print(cmds)
# {'Lux': 'time,6', 'Distance': 'time,3', 'Temperature': 'time,5'}
The order in which your commands occur does not matter but they need to be unique (otherwise they will get overwritten by the next match). Afterwards, you can get your values with eg. cmds['Lux']
Related
Every morning I get spot data on FX volumes via an email, I'd like to build a process to search two pieces of data within the body of the email and save them as a new variable which I can then refer to later.
I've got the process to search my emails, order them according to date and check whether the entered data exists within the emails, but because the data is contained within a format between two commas, I am unsure how to take that data out and assign it to a new variable.
Format for example is this:
BWP/USD,0
CHF/AMD T,0
This is what I've achieved thus far:
import win32com.client
import os
import time
import re
# change the ticker to the one you're looking for
FX_volume1 = "BWP/USD"
FX_volume2 = "CHF/AMD"
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)
messages = inbox.Items
messages.Sort("[ReceivedTime]", True)
# find spot data
for message in messages:
if message.subject.startswith("FX SPOT FIGURES"):
if FX_volume1 and FX_volume2 in message.body:
data = message.body
print(data)
else:
print('No data for', FX_volume1, 'or', FX_volume2, 'was found')
break
Any idea how to take this forward?
Thanks for any assistance/pointers
import win32com.client
import os
import time
import re
# change the ticker to the one you're looking for
FX_volume1 = "BWP/USD"
FX_volume2 = "CHF/AMD"
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6)
messages = inbox.Items
messages.Sort("[ReceivedTime]", True)
# find spot data
for message in messages:
if message.subject.startswith("FX SPOT FIGURES"):
case1 = re.match(FX_volume1 + ",(\d*)", message.body)
case2 = re.match(FX_volume2 + ",(\d*)", message.body)
case (1 and 2) will be match objects if a match is found, else they will be None. To retrieve your values just do val = case1.group(1). Hence:
EDIT:
if case1 not None:
FX_vol1_val = case1.group(1)
if case2 not None:
FX_vol2_val = case1.group(1)
For more info on match objects:
https://docs.python.org/3/library/re.html#match-objects
If you are expecting floats, see the following link:
Regular expression for floating point numbers
EDIT 2:
Hi, so as you couldn't get it working I gave it a quick try and it worked for me with the following example. Just to add to regex notation, anything that you put in brackets (), if the pattern matches, the contents between the brackets will be stored.
import re
my_text = "BWP/USD,1"
FX_pattern = "BWP/USD," # added the comma here for my convinience
my_match = re.match(FX_pattern, "(\d*)")
print("Group 0:", my_match.group(0))
print("Group 1:", my_match.group(1))
Printout:
Group 0: BWP/USD,1
Group 1: 1
Iam using Python3.6 with IMAP4 module.Iam trying to copy emails from "Inbox" to "mytestfolder".
Iam getting "OK" as the response but the email itself is not being copied to "mytestfolder".
Where as the same code snippet is working for "someotherfolder" to "mytestfolder" without any problem for the first time and after that it doesn't work. Below is the code snippet can someone please help me resolve this.
import config
import imaplib
from creds import username,password
imap = imaplib.IMAP4_SSL(config.imap_server,config.imap_port)
r, d = imap.login(username, password)
assert r == 'OK', 'login failed: %s' % str (r)
print(" > Signed in as %s" % username, d)
imap.select("Inbox")
r, d = imap.search(None, "ALL")
allIds = d[0].decode('utf8').split(' ')
''' Login works and iam getting msg_ids as well'''
for msg_id in allIds:
apply_lbl_msg = imap.uid('COPY', msg_id, 'mytestfolder')
if apply_lbl_msg[0] == 'OK':
mov, data = imap.uid('STORE', msg_id , '+FLAGS', '(\Deleted)')
imap.expunge()
TLDR: You're miscounting by removing things and then indexing by what used to be the order.
Your code does:
r, d = imap.search(None, "ALL")
"Give me the sequence numbers of all messages in the inbox", so you get 1, 2, 3, 4, 5 and so on. The last number in d will equal the return value from select() a few lines above. Then you loop, I'll explain the first iteration:
apply_lbl_msg = imap.uid('COPY', msg_id, 'mytestfolder')
if apply_lbl_msg[0] == 'OK':
"Copy the first message to mytestfolder, and if that works…."
mov, data = imap.uid('STORE', msg_id , '+FLAGS', '(\Deleted)')
imap.expunge()
"… then delete the first message in the inbox", which means that what was the second message now becomes the first.
The next iteration operates on the message that's currently the second in the mailbox, and was once the third, so you never operate on the message that was 2 at the start. The third iteration operates on the message that's currently the third, and was once the... fifth I think? It doesn't matter.
You can make this correct by switching to the UID versions of the same. UIDs don't change as you renumber.
You could also make this correct and very much faster by issuing one single COPY command that copies all messages, and then one single STORE that marks them as deleted. You don't even need the SEARCH, because the result of the search is just all the numbers from 1 to the return value of select().
So my goal is to make a python script that reads an email and then selects a specific link in it, which it then opens in a web-browser.
But at the moment I'm stuck at the part whereby I get all the URL links. But I want to filter those to only a specific one
The specific URL contains "/user/cm-l.php?" but after the question mark, you get a randomly generated link.
Does someone know how to fix this or edit the script to filter for only URLs that contain that part?
I tried something with the re.search/findall/match but I couldn't make it work so it would filter for only that URL.
import imaplib
import email
import re
# imap and user credentials.
mail = imaplib.IMAP4_SSL('imap.domain.com')
mail.login('username#domain.com', 'password')
mail.list()
# connect to right mailbox inside inbox.
mail.select("inbox")
result, data = mail.search(None, "ALL")
# data is a list.
ids = data[0]
# ids is a space separated string.
id_list = ids.split()
# changes which e-mail to read. '-1': gets the latest e-mail.
latest_email_id = id_list[6]
result, data = mail.fetch(latest_email_id, "(RFC822)")
raw_email = data[0][1]
raw_email = str(raw_email)
# this will search al the urls in an email.
def Find(string):
regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/user)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
url = re.findall(regex,string)
return [x[0] for x in url]
# prints all of the URLs.
print(Find(raw_email))
By defining regex pattern with applying groups (..), you can find exact strings with optional pre- and suffix. ([a-zA-Z\/]*?)(\/user\/cm-l\.php\?)(.*)? includes three groups.
The following example shows how to access the extracted content.
import re
mailstring = """
/user/cm-l.php?
some link : /main/home/user/cm-l.php?
link with suffix /user/cm-l.php?345TfvbzteW4rv#!_
"""
def Find(string):
pattern = r'([a-zA-Z\/]*?)(\/user\/cm-l\.php\?)(.*)?'
for idx,match in enumerate(re.findall(pattern,string)):
print(f'### Match {idx}')
print('full= ',''.join(match))
print('0= ',match[0])
print('1= ',match[1]) # match[1] is the base url
print('2= ',match[2])
Find(mailstring)
'''
### Match 0
full= /user/cm-l.php?
0=
1= /user/cm-l.php?
2=
### Match 1
full= /main/home/user/cm-l.php?
0= /main/home
1= /user/cm-l.php?
2=
### Match 2
full= /user/cm-l.php?345TfvbzteW4rv#!_
0=
1= /user/cm-l.php?
2= 345TfvbzteW4rv#!_
'''
I have a problem with a code which is supposed to download your emails in eml files.
Its supposed to go through the INBOX email listing, retrieve the email content and attachments(if any) and create an .eml file which contains all that.
What it does is that it works with content type of text and the majority multiparts. If an email in the listing contains utf-8B in its header, it simply acts like its the end of the email listing, without displaying any error.
The code in question is:
result, data = p.uid('search',None, search_criteria) # search_criteria is defined earlier in code
if result == 'OK':
data = get_newer_emails_first(data) # get_newer_emails_first() is a function defined to return the list of UIDs in reverse order (newer first)
context['emailsum'] = len(data) # total amount of emails based on the search_criteria parameter.
for num in data:
mymail2 = {}
result,data1 = p.iud('fetch', num, '(RFC822)')
email_message = email.message_from_bytes(data[0][1])
fullemail = email_message.as_bytes()
default_charset = 'ASCII'
if email_message.is_multipart():
m_subject = make_header(decode_header(email_message['Subject']))
else:
m_subject = r''.join([ six.text_type(t[0], t[1] or default_charset) for t in email.header.decode_header(email_message['Subject']) ])
m_from = string(make_header(decode_header(email_message['From'])))
m_date = email_message['Date']
I have done my tests and discovered that while the fullemail variable contains the email properly (thus it reads the data from the actual email successfully), the problem should be in the if else immediately after, but I cannot find what the problem is exactly.
Any ideas?
PS: I accidentally posted this question as a guest, but I opted to delete it and repost it from my account.
Apparently the error lay in my code in the silliest of ways.
Instead of:
m_from = string(make_header(decode_header(email_message['From'])))
m_date = email_message['Date']
It should be:
m_from = str(make_header(decode_header(email_message['From'])))
m_date = str(make_header(decode_header(email_message['Date'])))
I have following script which processes emails and save them to csv file. there will be advancement to script where I will use mechanize lib to process the extracted emails data for further processing on an another web interface. There are times it may fail now I can trap that specific email without having any problem but how can I forward the trapped email to a different address where I can process it manually or see what's wrong with it?
Here's the script
import ConfigParser
import poplib
import email
import BeautifulSoup
import csv
import time
DEBUG = False
CFG = 'email' # 'email' or 'test_email'
#def get_config():
def get_config(fnames=['cron/orderP/get_orders.ini'], section=CFG):
"""
Read settings from one or more .ini files
"""
cfg = ConfigParser.SafeConfigParser()
cfg.read(*fnames)
return {
'host': cfg.get(section, 'host'),
'use_ssl': cfg.getboolean(section, 'use_ssl'),
'user': cfg.get(section, 'user'),
'pwd': cfg.get(section, 'pwd')
}
def get_emails(cfg, debuglevel=0):
"""
Returns a list of emails
"""
# pick the appropriate POP3 class (uses SSL or not)
#pop = [poplib.POP3, poplib.POP3_SSL][cfg['use_ssl']]
emails = []
try:
# connect!
print('Connecting...')
host = cfg['host']
mail = poplib.POP3(host)
mail.set_debuglevel(debuglevel) # 0 (none), 1 (summary), 2 (verbose)
mail.user(cfg['user'])
mail.pass_(cfg['pwd'])
# how many messages?
num_messages = mail.stat()[0]
print('{0} new messages'.format(num_messages))
# get text of messages
if num_messages:
get = lambda i: mail.retr(i)[1] # retrieve each line in the email
txt = lambda ss: '\n'.join(ss) # join them into a single string
eml = lambda s: email.message_from_string(s) # parse the string as an email
print('Getting emails...')
emails = [eml(txt(get(i))) for i in xrange(1, num_messages+1)]
print('Done!')
except poplib.error_proto, e:
print('Email error: {0}'.format(e.message))
mail.quit() # close connection
return emails
def parse_order_page(html):
"""
Accept an HTML order form
Returns (sku, shipto, [items])
"""
bs = BeautifulSoup.BeautifulSoup(html) # parse html
# sku is in first <p>, shipto is in second <p>...
ps = bs.findAll('p') # find all paragraphs in data
sku = ps[0].contents[1].strip() # sku as unicode string
shipto_lines = [line.strip() for line in ps[1].contents[2::2]]
shipto = '\n'.join(shipto_lines) # shipping address as unicode string
# items are in three-column table
cells = bs.findAll('td') # find all table cells
txt = [cell.contents[0] for cell in cells] # get cell contents
items = zip(txt[0::3], txt[1::3], txt[2::3]) # group by threes - code, description, and quantity for each item
return sku, shipto, items
def get_orders(emails):
"""
Accepts a list of order emails
Returns order details as list of (sku, shipto, [items])
"""
orders = []
for i,eml in enumerate(emails, 1):
pl = eml.get_payload()
if isinstance(pl, list):
sku, shipto, items = parse_order_page(pl[1].get_payload())
orders.append([sku, shipto, items])
else:
print("Email #{0}: unrecognized format".format(i))
return orders
def write_to_csv(orders, fname):
"""
Accepts a list of orders
Write to csv file, one line per item ordered
"""
outf = open(fname, 'wb')
outcsv = csv.writer(outf)
for poNumber, shipto, items in orders:
outcsv.writerow([]) # leave blank row between orders
for code, description, qty in items:
outcsv.writerow([poNumber, shipto, code, description, qty])
# The point where mechanize will come to play
def main():
cfg = get_config()
emails = get_emails(cfg)
orders = get_orders(emails)
write_to_csv(orders, 'cron/orderP/{0}.csv'.format(int(time.time())))
if __name__=="__main__":
main()
As we all know that POP3 is used solely for retrieval (those who know or have idea how emails work) so there is no point using POP3 for the sake of message sending that why I mentioned How to forward an email message captured with poplib to a different email address? as an question.
The complete answer was
smtplib can be used for that sake to forward an poplib captured email message, all you need to do is to capture the message body and send it using smtplib to the desired email address. Furthermore as Aleksandr Dezhin quoted I will agree with him as some SMTP servers impose different restrictions on message they are processed.
Beside that you can use sendmail to achieve that if you are on Unix machine.