Fetching Email in Twisted using IMAP - python

I am trying to fetch emails on a gmail account using twisted, and to say the least it has been a pain, looking at email is their a clear explanation and structure (it seems hacked together at best). I am trying to grab attachments yet the attachment isn't any where in sight.
I am using the example IMAP Client from twisted and modified it, I am using fetchAll('1:') to get the email and then getting the first email but I can't find the email attachment that is on that email (I checked it is there in google). Also what is with the 1: and I can't seem to find any thing that actually explains email (as though no one understands it)
So Stackoverflow, what am I missing?
Code
#!/usr/bin/env python
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Simple IMAP4 client which displays the subjects of all messages in a
particular mailbox.
"""
import sys
from twisted.internet import protocol
from twisted.internet import ssl
from twisted.internet import defer
from twisted.internet import stdio
from twisted.mail import imap4
from twisted.protocols import basic
from twisted.python import util
from twisted.python import log
class TrivialPrompter(basic.LineReceiver):
from os import linesep as delimiter
promptDeferred = None
def prompt(self, msg):
assert self.promptDeferred is None
self.display(msg)
self.promptDeferred = defer.Deferred()
return self.promptDeferred
def display(self, msg):
self.transport.write(msg)
def lineReceived(self, line):
if self.promptDeferred is None:
return
d, self.promptDeferred = self.promptDeferred, None
d.callback(line)
class SimpleIMAP4Client(imap4.IMAP4Client):
"""
A client with callbacks for greeting messages from an IMAP server.
"""
greetDeferred = None
def serverGreeting(self, caps):
self.serverCapabilities = caps
if self.greetDeferred is not None:
d, self.greetDeferred = self.greetDeferred, None
d.callback(self)
class SimpleIMAP4ClientFactory(protocol.ClientFactory):
usedUp = False
protocol = SimpleIMAP4Client
def __init__(self, username, onConn):
self.ctx = ssl.ClientContextFactory()
self.username = username
self.onConn = onConn
def buildProtocol(self, addr):
"""
Initiate the protocol instance. Since we are building a simple IMAP
client, we don't bother checking what capabilities the server has. We
just add all the authenticators twisted.mail has. Note: Gmail no
longer uses any of the methods below, it's been using XOAUTH since
2010.
"""
assert not self.usedUp
self.usedUp = True
p = self.protocol(self.ctx)
p.factory = self
p.greetDeferred = self.onConn
p.registerAuthenticator(imap4.PLAINAuthenticator(self.username))
p.registerAuthenticator(imap4.LOGINAuthenticator(self.username))
p.registerAuthenticator(
imap4.CramMD5ClientAuthenticator(self.username))
return p
def clientConnectionFailed(self, connector, reason):
d, self.onConn = self.onConn, None
d.errback(reason)
def cbServerGreeting(proto, username, password):
"""
Initial callback - invoked after the server sends us its greet message.
"""
# Hook up stdio
tp = TrivialPrompter()
stdio.StandardIO(tp)
# And make it easily accessible
proto.prompt = tp.prompt
proto.display = tp.display
# Try to authenticate securely
return proto.authenticate(password
).addCallback(cbAuthentication, proto
).addErrback(ebAuthentication, proto, username, password
)
def ebConnection(reason):
"""
Fallback error-handler. If anything goes wrong, log it and quit.
"""
log.startLogging(sys.stdout)
log.err(reason)
return reason
def cbAuthentication(result, proto):
"""
Callback after authentication has succeeded.
Lists a bunch of mailboxes.
"""
return proto.list("", "*"
).addCallback(cbMailboxList, proto
)
def ebAuthentication(failure, proto, username, password):
"""
Errback invoked when authentication fails.
If it failed because no SASL mechanisms match, offer the user the choice
of logging in insecurely.
If you are trying to connect to your Gmail account, you will be here!
"""
failure.trap(imap4.NoSupportedAuthentication)
return proto.prompt(
"No secure authentication available. Login insecurely? (y/N) "
).addCallback(cbInsecureLogin, proto, username, password
)
def cbInsecureLogin(result, proto, username, password):
"""
Callback for "insecure-login" prompt.
"""
if result.lower() == "y":
# If they said yes, do it.
return proto.login(username, password
).addCallback(cbAuthentication, proto
)
return defer.fail(Exception("Login failed for security reasons."))
def cbMailboxList(result, proto):
"""
Callback invoked when a list of mailboxes has been retrieved.
"""
result = [e[2] for e in result]
s = '\n'.join(['%d. %s' % (n + 1, m) for (n, m) in zip(range(len(result)), result)])
if not s:
return defer.fail(Exception("No mailboxes exist on server!"))
return proto.prompt(s + "\nWhich mailbox? [1] "
).addCallback(cbPickMailbox, proto, result
)
def cbPickMailbox(result, proto, mboxes):
"""
When the user selects a mailbox, "examine" it.
"""
mbox = mboxes[int(result or '1') - 1]
return proto.examine(mbox
).addCallback(cbExamineMbox, proto
)
def cbExamineMbox(result, proto):
"""
Callback invoked when examine command completes.
Retrieve the subject header of every message in the mailbox.
"""
# FETCH ALL HEADERS? WHERE IS A ONE FOR AN ATTACHMENT
return proto.fetchAll('1:*').addCallback(cbFetch, proto)
def cbFetch(result, proto):
"""
Finally, display headers.
"""
if result:
keys = result.keys()
keys.sort()
k = keys[-1]
proto.display('%s %s' % (k, result[k]))
else:
print "Hey, an empty mailbox!"
return proto.logout()
def cbClose(result):
"""
Close the connection when we finish everything.
"""
from twisted.internet import reactor
reactor.stop()
def main():
hostname = raw_input('IMAP4 Server Hostname: ')
port = raw_input('IMAP4 Server Port (the default is 143, 993 uses SSL): ')
username = raw_input('IMAP4 Username: ')
password = util.getPassword('IMAP4 Password: ')
onConn = defer.Deferred(
).addCallback(cbServerGreeting, username, password
).addErrback(ebConnection
).addBoth(cbClose)
factory = SimpleIMAP4ClientFactory(username, onConn)
from twisted.internet import reactor
if port == '993':
reactor.connectSSL(hostname, int(port), factory, ssl.ClientContextFactory())
else:
if not port:
port = 143
reactor.connectTCP(hostname, int(port), factory)
reactor.run()
if __name__ == '__main__':
main()

First off, imap4 is a (perhaps needlessly) complex protocol for mail handling, and twisted's client implementation is (necessarily) complex to fully support that protocol. To get much of anywhere, you should consider spending some time reading the standard that explains the protocol: rfc3501 and the relevent parts of twisted's api.
That said, it looks like you're using IMAP4Client.fetchAll(), which, paradoxically, fetches "envelope" data, the headers and metadata about the message. the comparable call that returns the body of the email, too, is actually fetchFull().

Thanks to the pointers to RFC3501 and RFC822 by SingleNegationElimination and Jean-Paul, I think I have a better understanding of how to achieve this.
The solution I found is to the API fetchSpecific source code of imap4. You can specify which "part" of the message (i.e. a mail in your inbox) to retrieve. For attachment, the content of the attached file(s) is embedded in the TEXT part and encoded with base64. In my particular example, there is a pdf attachment in the mail, and
proto.fetchSpecific(imap4.MessageSet(247), uid=True, headerType='TEXT').addCallback(cbViewAttachment, proto)
def cbViewAttachment(result, proto):
for k, value in result.items():
print(value[0][4][:400])
...
gives something like
------_CANON_2007111239350128_
Content-Type: Application/pdf;
name="0128_20200711123935_001.pdf"
Content-Disposition: attachment;
filename="0128_20200711123935_001.pdf"
Content-Transfer-Encoding: base64
JVBERi0xLjYKJeLjz9MNCjEgMCBvYmoKPDwgCi9DcmVhdGlvbkRhdGUgKEQ6MjAyMDA3MTEyMDM5
MThaMDAnMDAnKQovQ3JlYXRvciAoXDM3NlwzNzdcMDAwQ1wwMDBhXDAwMG5cMDAwb1wwMDBuXDAw
MCBcMDAwTVwwMDBGXDAwMDJcMDAwNFw
Some quick explanation. The above code does fetch the TEXT part of the message with UID = 247. Upon success of the call to fetchSpecific, cbDownloadAttachment is called, where the first argument is a dictionary of TEXT parts - one entry for each of the specified message. In this case there is only one message being specified, so the result will just be a dictionary with one single entry corresponding to the UID = 247 message.
The actual attachment content is embedded in the [0][4] of the value of the entry. It looks like above. It is almost obvious that the JVB... part is the base64-encoded content. I tried to decode the first small portion of it with base64 and it looks like
%PDF-1.6\n%\xe2\xe3
which means that this is actually the beginning of a PDF file.
You can then write the content to a local file so as to "download" the attachment. In the above example it is basically
with open('your/file/name', 'wb') as f:
f.write(base64.urlsafe_b64decode(value[0][4][213:]))
Note that 213 is hard-coded as the start of the content. In the real-world case you should definitely parse the TEXT a bit to find it out.
Hope this helps someone in the similar situation as me.

Related

Send raw email from python and retrieve sendmail queueid

I have a need to take an incoming email that matches a specific ruleset in postfix, send it to an external python process, rewrite the email to multiple delivery recipients based upon the postfix postmap table, and re-inject back into the postfix chain. The included python meets that basic requirement except for one, tracking the queueid of the new re-injected email.
The typical method of re-injecting with /usr/sbin/sendmail does not return a usable queueid for the correct process. This causes loss of visibility of the newly created emails (logs have to be manually parse to generate a delivery confirmation report for 3rd parties).
Since the re-injection process is in python, ideally I would like to use the smtplib.Sendmail for that, but it also does not return a queueid.
What I have tried, and kind of works, is using netcat as a subprocess in python (netcat_msg = "helo myserver\nmail from: " + sender + "\nrcpt to: " + recipient + "\ndata\n" + msg.as_string() + "\n\n.\nquit\n") but I feel that's a hack and I get warnings about command sequences in smtp (which is expected since I'm not waiting on the response before issuing the next command).
Is there a way to expose the queueid returned from the remote SMTP server when the process completes? Any recommended approaches to this?
My goal is to log these queueids to a file/api endpoint/whatever so we can determine that incoming email to a#domain.tld was sent to bob#example.com, chris#example.com and track the return status of the destination server.
(Excuse my crude python)
#!/bin/python
#v 2.7.15
import email
import sys
from email.mime.text import MIMEText
import argparse
from subprocess import Popen
from subprocess import PIPE
#ignore this, it was just for debugging
def dump(obj):
for attr in dir(obj):
if hasattr( obj, attr ):
print( "obj.%s = %s" % (attr, getattr(obj, attr)))
def process_message(data, sender, recipient):
msg = email.message_from_string(data)
newaddress = '"{recipient}" <{recipient}>'.format(recipient=recipient)
oldaddress = ''
if msg.has_key('To'):
oldaddress = msg['To']
msg.replace_header('To', newaddress)
else:
msg.add_header('To', newaddress)
oldaddress = newaddress
if msg.has_key('X-Original-To'):
msg.replace_header('X-Original-To', oldaddress)
else:
msg.add_header('X-Original-To', oldaddress)
#print(msg.as_string())
try:
# replace this with a re-inject that can return the queueid
p = Popen(["/usr/sbin/sendmail", "-G", "-i", "-f " + sender, "--", recipient ], stdin=PIPE)
p.communicate(msg.as_string())
# end replacement
# log original queueid, returned queueid and destination email here
except Exception as ex:
exit(69)
def main():
parser = argparse.ArgumentParser(description='To field replacement for Email MIME.')
parser.add_argument('--from', dest="sender", help="From email address", required=True)
parser.add_argument('--recipient', dest="recipients", help="Recipient address to replace in To field (N+1)", nargs='+', required=True)
args = parser.parse_args()
#dump(args)
data = sys.stdin.readlines()
data = ''.join(data)
for recipient in args.recipients:
#print(recipient)
process_message(data, args.sender, recipient)
exit(0)
main()

how to sign request tokens?

I am currently trying to write a script to send off a request token, I have the header, and the claimset, but I don't understand the signature! OAuth requires my private key to be encrypted with SHA256withRSA (also known as RSASSA-PKCS1-V1_5-SIGN with the SHA-256 hash function), but the closest I could find was RSAES-PKCS1-v1_5 (has RSA, and the SHA-256 hash). I followed the example, and tweaked it, so I could get it set, but heres my dillema:
signature = ""
h = SHA.new (signature)
key = RSA.importKey(open('C:\Users\Documents\Library\KEY\My Project 905320c6324f.json').read())
cipher = PKCS1_v1_5.new(key)
ciphertext = cipher.encrypt(message+h.digest())
print(ciphertext)
I'm a bit lost, the JSON file I was given has both public key, and private, do I copy and paste the private key into the signature variable (it gave me a invalid syntax)? Or do I past the directory again? I am so lost, and way over my head haha. I am currently running Python 3.4, with pyCrypto for the signature.
Based on what you've said below about wanting to write a command system using gmail, I wrote a simple script to do this using IMAP. I think this is probably simpler than trying to use Google APIs for a single user, unless you were wanting to do that simply for the exercise.
import imaplib, logging
from time import sleep
USERNAME = 'YOUR_USERNAME_HERE' # For gmail, this is your full email address.
PASSWORD = 'YOUR_PASSWORD_HERE'
CHECK_DELAY = 60 # In seconds
LOGGING_FORMAT = '%(asctime)s %(message)s'
logging.basicConfig(filename='imapTest.log', format=LOGGING_FORMAT, level=logging.INFO)
logging.info("Connecting to IMAP server...")
imap = imaplib.IMAP4_SSL('imap.gmail.com')
imap.login(USERNAME, PASSWORD)
logging.info("Connected to IMAP server.")
def get_command_messages():
logging.info("Checking for new commands.")
imap.check()
# Search the inbox (server-side) for messages containing the subject 'COMMAND' and which are from you.
# Substitute USERNAME below for the sending email address if it differs.
typ, data = imap.search(None, '(FROM "%s" SUBJECT "COMMAND")' %(USERNAME))
return data[0]
def delete_messages(message_nums):
logging.info("Deleting old commands.")
for message in message_nums.split():
imap.store(message, '+FLAGS', '\\DELETED')
imap.expunge()
# Select the inbox
imap.select()
# Delete any messages left over that match commands, so we are starting 'clean'.
# This probably isn't the nicest way to do this, but saves checking the DATE header.
message_nums = get_command_messages()
delete_messages(message_nums)
try:
while True:
sleep(CHECK_DELAY)
# Get the message body and sent time. Use BODY.PEEK instead of BODY if you don't want to mark the message as read, but we're deleting it anyway below.
message_nums = get_command_messages()
if message_nums:
# search returns space-separated message IDs, but we need them comma-separated for fetch.
typ, messages = imap.fetch(message_nums.replace(' ', ','), '(BODY[TEXT])')
logging.info("Found %d commands" %(len(messages[0])))
for message in messages[0]:
# You now have the message body in the message variable.
# From here, you can check against it to perform commands, e.g:
if 'shutdown' in message:
print("I got a shutdown command!")
# Do stuff
delete_messages(message_nums)
finally:
try:
imap.close()
except:
pass
imap.logout()
If you're set on using the Gmail API, though, Google strongly encourage you to use their existing Python library rather than attempt to do full authentication etc. yourself as you appear to be. With that, it should - more or less - be a case of replacing the imap calls above with the relevant Gmail API ones.

How to set IMAP flags using Twisted

How do you delete messages using imap4.IMAP4Client? I cannot get the "deleted" tag correctly applied for using the "expunge" method.
I keep getting the following error:
Failure: twisted.mail.imap4.IMAP4Exception: Invalid system flag \
Sample code would be appreciated. This is what I have so far:
from twisted.internet import protocol, reactor
from twisted.mail import imap4
#Variables for connection
username = 'user#host.com'
password = 'mypassword'
host = 'imap.host.com'
port = 143
class IMAP4LocalClient(imap4.IMAP4Client):
def connectionMade(self):
self.login(username,password).addCallbacks(self._getMessages, self._ebLogin)
#reports any connection errors
def connectionLost(self,reason):
reactor.stop()
#drops the connection
def _ebLogin(self,result):
print result
self.transport.loseConnection()
def _programUtility(self,result):
print result
return self.logout()
def _cbExpungeMessage(self,result):
return self.expunge().addCallback(self._programUtility)
def _cbDeleteMessage(self,result):
return self.setFlags("1:5",flags=r"\\Deleted",uid=False).addCallback(self._cbExpungeMessage)
#gets the mailbox list
def _getMessages(self,result):
return self.list("","*").addCallback(self._cbPickMailbox)
#selects the inbox desired
def _cbPickMailbox(self,result):
mbox='INBOX.Trash'
return self.select(mbox).addCallback(self._cbExamineMbox)
def _cbExamineMbox(self,result):
return self.fetchMessage("1:*",uid=False).addCallback(self._cbDeleteMessage)
class IMAP4ClientFactory(protocol.ClientFactory):
def buildProtocol(self,addr):
return IMAP4LocalClient()
def clientConnectionFailed(self,connector,reason):
print reason
reactor.stop()
reactor.connectTCP(host,port,IMAP4ClientFactory())
reactor.run()
Changed to:
def _cbDeleteMessage(self,result):
return self.setFlags("1:5",flags=['\\Deleted'],uid=False).addCallback(self._cbExpungeMessage)
thanks to Jean-Paul Calderone and it worked, setFlags requires a list, not just a string.
I think there are two problems here.
First, you're passing a string as the flags parameter to setFlags. Notice the documentation for that parameter: The flags to set (type: Any iterable of str). Try a list containing one string, instead.
Second, \\Deleted is probably not a flag the server you're interacting with supports. The standard deleted flag in IMAP4 is \Deleted.

Python twisted irc: Wait for a whois reply inside privmsg method

I'm trying to make an IRC bot using the twisted.words.protocols.irc module.
The bot will parse messages from a channel and parse them for command strings.
Everything works fine except when I need the bot to identify a nick by sending a whois command. The whois reply will not be handled until the privmsg method (the method from which I'm doing the parsing) returns.
example:
from twisted.words.protocols import irc
class MyBot(irc.IRClient):
..........
def privmsg(self, user, channel, msg):
"""This method is called when the client recieves a message"""
if msg.startswith(':whois '):
nick = msg.split()[1]
self.whois(nick)
print(self.whoislist)
def irc_RPL_WHOISCHANNELS(self, prefix, params):
"""This method is called when the client recieves a reply for whois"""
self.whoislist[prefix] = params
Is there a way to somehow make the bot wait for a reply after self.whois(nick)?
Perhaps use a thread (I don't have any experience with those).
Deferred is a core concept in Twisted, you must be familiar with it to use Twisted.
Basically, your whois checking function should return a Deferred that will be fired when you receive whois-reply.
I managed to fix this by running all handler methods as threads, and then setting a field, following
kirelagin's suggestion, before running a whois query, and modifying the method that recieves the data
to change the field when it recieves a reply. Its not the most elegant solution but it works.
Modified code:
class MyBot(irc.IRClient):
..........
def privmsg(self, user, channel, msg):
"""This method is called when the client recieves a message"""
if msg.startswith(':whois '):
nick = msg.split()[1]
self.whois_status = 'REQUEST'
self.whois(nick)
while not self.whois_status == 'ACK':
sleep(1)
print(self.whoislist)
def irc_RPL_WHOISCHANNELS(self, prefix, params):
"""This method is called when the client recieves a reply for whois"""
self.whoislist[prefix] = params
def handleCommand(self, command, prefix, params):
"""Determine the function to call for the given command and call
it with the given arguments.
"""
method = getattr(self, "irc_%s" % command, None)
try:
# all handler methods are now threaded.
if method is not None:
thread.start_new_thread(method, (prefix, params))
else:
thread.start_new_thread(self.irc_unknown, (prefix, command, params))
except:
irc.log.deferr()
def irc_RPL_WHOISCHANNELS(self, prefix, params):
"""docstring for irc_RPL_WHOISCHANNELS"""
self.whoislist[prefix] = params
def irc_RPL_ENDOFWHOIS(self, prefix, params):
self.whois_status = 'ACK'

Twisted IMAP4 Client QUOTA family of commands

Update It seems to be the way untagged responses are handled by twisted, the only example I have found seem to iterate through the data received and somehow collect the response to their command though I am not sure how...
I am trying to implement the IMAP4 quota commands as defined in RFC 2087 ( https://www.rfc-editor.org/rfc/rfc2087 ).
Code - ImapClient
class SimpleIMAP4Client(imap4.IMAP4Client):
"""
A client with callbacks for greeting messages from an IMAP server.
"""
greetDeferred = None
def serverGreeting(self, caps):
self.serverCapabilities = caps
if self.greetDeferred is not None:
d, self.greetDeferred = self.greetDeferred, None
d.callback(self)
def lineReceived(self, line):
print "<" + str(line)
return imap4.IMAP4Client.lineReceived(self, line)
def sendLine(self, line):
print ">" + str(line)
return imap4.IMAP4Client.sendLine(self, line)
Code - QUOTAROOT Implementation
def cbExamineMbox(result, proto):
"""
Callback invoked when examine command completes.
Retrieve the subject header of every message in the mailbox.
"""
print "Fetching storage space"
cmd = "GETQUOTAROOT"
args = _prepareMailboxName("INBOX")
resp = ("QUOTAROOT", "QUOTA")
d = proto.sendCommand(Command(cmd, args, wantResponse=resp))
d.addCallback(cbFetch, proto)
return d
def cbFetch(result, proto):
"""
Finally, display headers.
"""
print "Got Quota"
print result
Output
Fetching storage space
>0005 GETQUOTAROOT INBOX
<* QUOTAROOT "INBOX" ""
<* QUOTA "" (STORAGE 171609 10584342)
<0005 OK Success
Got Quota
([], 'OK Success')
So I am getting the data but the result doesn't contain it, I am thinking it is because they are untagged responses?
Since the IMAP4 protocol mixes together lots of different kinds of information as "untagged responses", you probably also need to update some other parts of the parsing code in the IMAP4 client implementation.
Specifically, take a look at twisted.mail.imap4.Command and its finish method. Also look at twisted.mail.imap4.IMAP4Client._extraInfo, which is what is passed as the unusedCallback to Command.finish.
To start, you can check to see if the untagged responses to the QUOTA command are being sent to _extraInfo (and then dropped (well, logged)).
If so, I suspect you want to teach Command to recognize QUOTA and QUOTAROOT untagged responses to the QUOTA command, so that it collects them and sends them as part of the result it fires its Deferred with.
If not, you may need to dig a bit deeper into the logic of Command.finish to see where the data does end up.
You may also want to actually implement the Command.wantResponse feature, which appears to be only partially formed currently (ie, lots of client code tries to send interesting values into Command to initialize that attribute, but as far as I can tell nothing actually uses the value of that attribute).

Categories