import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
mail_server="outlook.office365.com"
username="me#domain.com"
password="'Password'"
self.save_attachment(self,msg,download_folder)
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder="/tmp"):
"""
Given a message, save its attachments to the specified
download folder (default is /tmp)
return: file path to attachment
"""
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_unread_messages(self):
"""
Retrieve unread messages
"""
emails = []
(result, messages) = self.connection.search(None, 'UnSeen')
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print ("No new emails to read.")
self.close_connection()
exit()
msg = email.message_from_string(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
I have the above code currently, and at line 12 it says self is not defined. What could be the reason for this error. I think self is defined below that line in the init function.
Well, just by looking at the code, I can see inconsistent indentation right from the start - this is probably your main problem. Try defining your functions within the FetchEmail class.
Secondly, change the init function to:
def __init__(self, mail_server=mail_server,
username=username,
password=password):
this is effectively just applying default values to the init function.
Lastly, to us the save_attachment function /save_attachment(self, msg, download_folder) within the class, you will either need to call it within the init function or within the top-level of the scipt (outside the class definition)
Within class definition (within init): self.save_attatchment(msg,download_folder)
Within top level: after creating an FetchEmail object using fe = FetchEmail() then you could call the save_attachment function like this: attatchment_path = fe.save_attachment()
This is how I would implement the init:
class FetchEmail():
def __init__(self,
mail_server="outlook.office365.com",
username="rnandipati#jmawireless.com",
password="'RNjma17!'"):
self.error = None
self.connection = None
self.mail_server = mail_server
self.username = username
self.password = password
self.connection = imaplib.IMAP4_SSL(mail_server)
self.connection.login(username, password)
self.connection.select(readonly=False) # so we can mark mails as readread
def close_connection(self): ...
just take note, if you do this, just remember to change the reference of all your functions to self.password, self.error etc.
I do not know whether this would work.
Maybe take a look at this. I think it is your best bet.
All the best!
Related
I'm trying to customise the SFTOperator take download multiple file from a server. I know that the original SFTPOperator only allow one file at a time.
I copied the same code from source and I twerk by adding a new function called get_xml_from_source(). Please refer the code below:
def get_xml_from_source(sftp_client, remote_filepath, local_filepath, prev_execution_date, execution_date):
"""
Copy from Source to local path
"""
files_attr = sftp_client.listdir_attr(remote_filepath) # eg: /source/ HITTING ERROR HERE
files_name = sftp_client.listdir(remote_filepath) # eg: /source/
today_midnight = datetime.combine(datetime.today(), time.min)
yesterday_midnight = today_midnight - timedelta(days=1)
for file_attr, file_name in zip(files_attr, files_name):
modified_time = datetime.fromtimestamp(file_attr.st_mtime)
if yesterday_midnight <= modified_time < today_midnight:
# if prev_execution_date <= modified_time < execution_date:
try:
# Download to local path
sftp_client.get(remote_filepath, local_filepath)
print(file_name)
except: # pylint: disable=bare-except
print("File not found")
else:
print("Not the file!")
Where this function will only download files from yesterday up to today.
I added the function at this line:
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
if self.operation.lower() == SFTPOperation.GET:
local_folder = os.path.dirname(self.local_filepath)
if self.create_intermediate_dirs:
# Create Intermediate Directories if it doesn't exist
try:
os.makedirs(local_folder)
except OSError:
if not os.path.isdir(local_folder):
raise
file_msg = "from {0} to {1}".format(self.remote_filepath,
self.local_filepath)
self.log.info("Starting to transfer %s", file_msg)
# This is where it starts to copy, customization begins here
# sftp_client.get(self.remote_filepath, self.local_filepath) <--- Original code that I commented out and replace with mine below
get_xml_from_source(sftp_client, self.remote_filepath,
self.local_filepath, self.prev_execution_date, self.execution_date)
Note that, rest of the codes did not change. It is how it looks like in the source.
I keep hitting error at files_attr = sftp_client.listdir_attr(remote_filepath) with this error:
Error while transferring from /source/ to
/path/to/destination, error: [Errno 2] No such file.
Which obviously meant, it can't find the sftp directory. I tried running the whole function locally, it works fine.
Is there any part of the code that tied the paramiko connection to only get one file? I checked the paramiko connection for SFTPOperator, it should be just fine. In this case, how should I fix it?
This is how I established my connection when running locally :
def connect_to_source():
"""
Get source credentials
:param: None
:return: username & password
"""
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
username, password = get_eet_credentials()
# key = paramiko.RSAKey.from_private_key_file(openssh_key, password=password)
ssh.connect(hostname=SFTP_SERVER, port=SFTP_PORT_NUMBER,
username=username, password=password)
client = ssh.open_sftp()
print("Connection to source success!")
return client
Lastly, below is my airflow task:
def copy_from_source():
"""
Copy XML file from source to local path
"""
return SFTPOperator(
task_id="copy_from_source",
ssh_conn_id="source_conn",
local_filepath=f"{current_dir}/destination",
remote_filepath= "/source/",
prev_execution_date='{{ prev_execution_date }}',
execution_date='{{ execution_date }}', # strftime("%Y-%m-%d %H:%M:%S")
create_intermediate_dirs=True,
operation="get",
dag=dag
)
I'm trying to do something similar to you. I'm not sure what is causing the issues you are facing but this is the updated SFTP Operator I have written that gets multiple files from a server
sftp_get_multiple_files_operator.py
import os
from pathlib import Path
from typing import Any
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.contrib.hooks import SSHHook
class SFTPGetMultipleFilesOperator(BaseOperator):
template_fields = ('local_directory', 'remote_filename_pattern', 'remote_host')
def __init__(
self,
*,
ssh_hook=None,
ssh_conn_id=None,
remote_host=None,
local_directory=None,
remote_filename_pattern=None,
filetype=None,
confirm=True,
create_intermediate_dirs=False,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.ssh_hook = ssh_hook
self.ssh_conn_id = ssh_conn_id
self.remote_host = remote_host
self.local_directory = local_directory
self.filetype = filetype
self.remote_filename_pattern = remote_filename_pattern
self.confirm = confirm
self.create_intermediate_dirs = create_intermediate_dirs
def execute(self, context: Any) -> str:
file_msg = None
try:
if self.ssh_conn_id:
if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
else:
self.log.info(
"ssh_hook is not provided or invalid. Trying ssh_conn_id to create SSHHook."
)
self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)
if not self.ssh_hook:
raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")
if self.remote_host is not None:
self.log.info(
"remote_host is provided explicitly. "
"It will replace the remote_host which was defined "
"in ssh_hook or predefined in connection of ssh_conn_id."
)
self.ssh_hook.remote_host = self.remote_host
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
all_files = sftp_client.listdir()
self.log.info(f'Found {len(all_files)} files on server')
timestamp = context['ds_nodash']
filename_pattern = self.remote_filename_pattern + timestamp
# fetch all CSV files for the run date that match the filename pattern
matching_files = [f for f in all_files
if f.find(filename_pattern) != -1]
# if file type is specified filter matching files for the file type
if self.filetype is not None:
matching_files = [filename for filename in matching_files
if filename[-len(self.filetype):] == self.filetype]
self.log.info(f'Found {len(matching_files)} files with name including {filename_pattern}')
local_folder = os.path.dirname(self.local_directory)
if self.create_intermediate_dirs:
Path(local_folder).mkdir(parents=True, exist_ok=True)
for f in matching_files:
self.log.info(f"Starting to transfer from /{f} to {self.local_directory}/{f}")
sftp_client.get(f'/{f}', f'{self.local_directory}/{f}')
except Exception as e:
raise AirflowException(f"Error while transferring {file_msg}, error: {str(e)}")
return self.local_directory
def _make_intermediate_dirs(sftp_client, remote_directory) -> None:
"""
Create all the intermediate directories in a remote host
:param sftp_client: A Paramiko SFTP client.
:param remote_directory: Absolute Path of the directory containing the file
:return:
"""
if remote_directory == '/':
sftp_client.chdir('/')
return
if remote_directory == '':
return
try:
sftp_client.chdir(remote_directory)
except OSError:
dirname, basename = os.path.split(remote_directory.rstrip('/'))
_make_intermediate_dirs(sftp_client, dirname)
sftp_client.mkdir(basename)
sftp_client.chdir(basename)
return
dag.py
sftp_report = SFTPGetMultipleFilesOperator(
task_id=f"sftp_reports_to_gcs",
ssh_conn_id="sftp_connection",
local_directory=f'/opt/airflow/dags/reports',
remote_filename_pattern=f'reportname_', # ds_nodash is added in the operator by accessing Airflow context
create_intermediate_dirs=True,
filetype='.csv'
)
I am moving from C# to Python, and I am guessing I am stepping on some namesapce issue, but I can't find the problem. Here is the current class that is giving me error (on line 41)
import imaplib
import os
import email
class EmailWrapper:
hostname = None # herp
username = None # derp
password = None # might encrypt this if there is time
def __init__(self, host, user, passwd):
self.hostname = host
self.username = user
self.password = passwd
# Create connection and return it
def connect(self, verbose=True):
if verbose: print 'Connecting to ', self.hostname
connection = imaplib.IMAP4_SSL(self.hostname)
if verbose: print 'Logging in as ', self.username
connection.login(self.username, self.password)
if verbose: print 'Selecting Inbox.'
connection.select('Inbox')
return connection
# Grab last email in inbox and return it from connection
def get_last_email(self, c):
# Get list of emails
result, data = c.search(None, "ALL")
# get last ID
ids = data[0]
idList = ids.split()
lastEmailID = idList[-1]
# Fetch email
result, data = c.fetch(lastEmailID, "(RFC822)")
# Seclect body and return it
rawEmail = data[0][1]
emailMessage = email.message_from_string(rawEmail)
return emailMessage
def close_connection(self, c):
c.close()
c.logout()
Any time I call get_last_email I get the error "AttributeError: 'module' object has no attribute 'message_from_string'". Any ideas would be appreciated.
Thanks
The problem, as pointed out by #jDo, was that i still had an empty email.py file sitting around in the project directory. Thanks!
I created a class in python that will send emails via one of my private servers. It works but I'm wondering if there is a method to replace an existing email body message with a new one?
Emailer Class
class Emailer:
def __init__(self, subj=None, message=None, toAddr=None, attachment=None, image=None):
# initialize email inputs
self.msg = email.MIMEMultipart.MIMEMultipart()
self.cidNum = 0
self.message = []
if message is not None:
self.addToMessage(message,image)
# set the subject of the email if there is one specified
self.subj = []
if subj is not None:
self.setSubject(subj)
# set the body of the email and any attachements specified
self.attachment = []
if attachment is not None:
self.addAtachment(attachment)
# set the recipient list
self.toAddr = []
if toAddr is not None:
self.addRecipient(toAddr)
def addAttachment(self,attachment):
logger.debug("Adding attachement to email")
# loop through list of attachments and add them to the email
if attachment is not None:
if type(attachment) is not list:
attachment = [attachment]
for f in attachment:
part = email.MIMEBase.MIMEBase('application',"octet-stream")
part.set_payload( open(f,"rb").read() )
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="{0}"'.format(os.path.basename(f)))
self.msg.attach(part)
def addToMessage(self,message,image=None):
logger.debug("Adding to email message. Content: [%s]" % message)
# add the plain text message
self.message.append(message)
# add embedded images to message
if image is not None:
if type(image) is not list:
image = [image]
for i in image:
msgText = email.MIMEText.MIMEText('<br><img src="cid:image%s"><br>' % self.cidNum, 'html')
self.msg.attach(msgText)
fp = open(i, 'rb')
img = email.MIMEImage.MIMEImage(fp.read())
fp.close()
img.add_header('Content-ID','<image%s>' % self.cidNum)
self.msg.attach(img)
self.cidNum += 1
# method to set the subject of the email
def setSubject(self,subj):
self.msg['Subject'] = subj
# method to add recipients to the email
def addRecipient(self, toAddr):
# loop through recipient list
for x in toAddr:
self.msg['To'] = x
# method to configure server settings: the server host/port and the senders login info
def configure(self, serverLogin, serverPassword, fromAddr, toAddr, serverHost='myserver', serverPort=465):
self.server=smtplib.SMTP_SSL(serverHost,serverPort)
self.server.set_debuglevel(True)
# self.server.ehlo()
# self.server.ehlo()
self.server.login(serverLogin, serverPassword) #login to senders email
self.fromAddr = fromAddr
self.toAddr = toAddr
# method to send the email
def send(self):
logger.debug("Sending email!")
msgText = email.MIMEText.MIMEText("\n".join(self.message))
self.msg.attach(msgText)
print "Sending email to %s " % self.toAddr
text = self.msg.as_string() #conver the message contents to string format
try:
self.server.sendmail(self.fromAddr, self.toAddr, text) #send the email
except Exception as e:
logger.error(e)
Currently, the addToMessage() method is what adds text to the body of the email. If addToMessage() had already been called but I wanted to replace that body text with new text, is there a way?
If addToMessage() had already been called but I wanted to replace that body text with new text, is there a way?
Yes. If you are always replacing the last entry added to self.message, you can reference this element with self.message[-1] since it is a list. If you want to replace a specific element, you can search for it with the index() method.
Example #1: Replace Last Written Text in Body
def replace_last_written_body_text(new_text):
if len(self.message) > 0:
self.message[-1] = new_text
Example #2: Replace Specified Text in Body
def replace_specified_body_text(text_to_replace, new_text):
index_of_text_to_replace = self.message.index(text_to_replace)
if index_of_text_to_replace is not None:
self.message[index_of_text_to_replace] = new_text
else:
logger.warning("Cannot replace non-existent body text")
If addToMessage has been called just once, then:
message is a list, and its first element is the body text, so you just need to replace that element with the new text:
def replace_body(self, new_text):
if len(self.message) > 0:
self.message[0] = new_text
else:
self.message = [new_text]
I haven't tested that, but it should work. Make sure you write some unit tests for this project!
EDIT:
if addToMessage has been called multiple times, then the new replace function could replace the entire text, or just part of it. If you want to replace all of it, then just replace message, like the part after else above: self.message = [new_text]. Otherwise, you're going to have to find the element you need to replace, like #BobDylan is doing in his answer.
I very new in Python, but i need to build a IMAP handler, i have now build so i can get the mails out, and i can save the attached files right now.
the next step i need to move the message after i have readed it and downloaded the attached items to my computer, i have build a class to handle it and what happen now you can see in my code here.
class IMAPHandler
mailFolder = "INBOX"
mailFolderCopyTo = "INOBX/Parsed"
localPath = "./tmp"
""" Defined server, username and password """
def __init__(self, server, username, password): ...
""" Connect to your IMAP mailbox """
def __login(self): ...
""" Close connection to your IMAP mailbox """
def __close(self): ...
""" Get all mailbox messegt """
def getAll(self):
self.__login()
rv, data = self.mailbox.search(None, 'UNSEEN', '(HEADER FROM "{mail-from}")')
if rv != 'OK':
print("No messages found!")
for num in data[0].split():
rv, data = self.mailbox.fetch(num, '(RFC822)')
print(rv)
email_body = data[0][1]
mail = email.message_from_string(email_body)
print "["+mail["From"]+"] :" + mail["Subject"]
print "-"
self.__saveAttachedFiles(mail)
self.__close()
def __saveAttachedFiles(self,mail): ...
after self.__saveAttachedFiles(mail) i need to move the mail-messegt, but i don't know how i do it.
I have trying the most of this day and the resolved are
""" Move mail messegt to parsed folder after its handle """
def __moveMailToParsedFolder(self,num):
mail_uid = num
apply_lbl_msg = self.mailbox.copy(mail_uid, self.mailFolderCopyTo)
if apply_lbl_msg[0] == 'OK':
self.mailbox.store(mail_uid, '+FLAGS', '\Deleted')
self.mailbox.expunge()
Now i only need to know why its only take 50% each time....
I built a class that watches for changes in a directory and upload them to a server, it is working fine for one dir. However, i had the idea to use the threading module from python to actually watch more than one directory. But, i am getting confused, since when i change a file in one location, it uploads just fine, but then the OTHER location starts uploading all it's files. I think it's because somehow the threads are sharing the same variable or something, but still it's impossible because each directory has it's own instance of the class working specifically for it.
Here's some code:
import os, ftplib, time
from threading import Thread
class FTPSync(Thread):
local_root = ''
remote_root = ''
host = ''
user = ''
password = ''
content = {
'previous': [],
'current': []
}
ignore = []
rest = 0.5
files = []
cwd = ''
watching = True
def __init__(self, local_root='', remote_root='', config={}):
Thread.__init__(self)
self.local_root = local_root if local_root != '' else os.path.join(os.path.dirname(__file__), os.pardir)
self.remote_root = remote_root
self.ignore = config['ignore'] if 'ignore' in config else []
self.rest = config['rest'] if 'rest' in config else 0.5
self.host, self.user, self.password = config['host'], config['user'], config['password']
self.content['previous'] = self.read_dir(self.local_root)
# Connect and reconnect to the server
def connect(self, reconnect=False):
print "Connecting..."
self.ftp = ftplib.FTP(self.host)
self.ftp.login(self.user, self.password)
print "Welcome message from server:\n"
print self.ftp.getwelcome()
if not reconnect:
self.cwd = self.remote_root
self.ftp.cwd(self.cwd)
# Start watching for local changes
def watch(self):
self.connect()
while self.watching:
self.files = []
self.content['current'] = self.read_dir(self.local_root)
diff = [f for f in self.content['current'] if f not in self.content['previous']]
if len(diff) > 0:
self.stor(diff)
self.content['previous'] = self.content['current']
diff = []
time.sleep(self.rest)
# Read a directory and its contents recursively
def read_dir(self, dir_name, return_value=True):
reading = os.listdir(dir_name)
file_content = None
for i in range(len(reading)):
d = self._local_abspath(dir_name, reading[i])
is_dir = os.path.isdir(d)
file_content = open(d).read() if not is_dir else None
offset = d.replace(self.local_root, '').replace(reading[i], '')
if is_dir and reading[i] not in self.ignore:
self.read_dir(d, return_value=False)
elif not is_dir:
info = {"name": reading[i], "content": file_content, "local_path": d, "offset": offset}
self.files.append(info)
if (return_value):
return self.files
pass
# Here we go
def run(self):
self.watch()
# Store (STOR) the files in the server
def stor(self, files):
nav = ''
try:
for f in files:
if self._server_abspath(f['offset']) != self.cwd:
nav = self._server_abspath(f['offset'])
self.ftp.cwd(nav)
mode = ''
if f['name'].split('.')[-1:][0] in ['jpg', 'png', 'gif'] or os.path.getsize(f['local_path']) > 8190:
mode = 'binary'
self.ftp.storbinary('STOR {!s}'.format(f['name']), open(f['local_path']))
else:
mode = 'ascii'
self.ftp.storlines('STOR {!s}'.format(f['name']), open(f['local_path']))
self.cwd = self._server_abspath(f['offset'])
print "Stored %s in %s mode" % (f['name'], mode)
# The connection has timed out
except ftplib.error_temp:
self.connect(reconnect=True)
self.stor(files)
# A new file has been created inside a folder that does not exist in the server
except ftplib.error_perm:
self.ftp.mkd(nav)
self.stor(files)
# A new folder has been created locally, but we'll wait to update this on the server
# when there's some content inside of it and throw us a ftplib.error_perm error, so here it'll just pass
except IOError:
pass
# Return the absolute path in the server
def _server_abspath(self, path):
return self.remote_root + '/' + path.replace('\\', '/')
# Return the absolute path locally
def _local_abspath(self, dn, fn):
return (dn +'\\'+ fn) if not dn[-1:]=='\\' else dn + fn
def start(local_root='', remote_root='', config={}):
instance = FTPSync(local_root, remote_root, config)
instance.start()
return instance
And this is how i use the class:
import ftpsync
config = {
'host': 'ftp.myhost.com',
'user': '****',
'password': '****',
'ignore': ['.git']
}
ftpsync.start(remote_root='/www/tst', config=config)
ftpsync.start(local_root='C:\\pygames', remote_root='/www/tst', config=config)
I would like to remember that it works fine for ONE directory.
After some time, I realized I had to use processes. I came back here in case someone finds it useful.
So basically, with threads you're just running two or more concurrent things at once, but they all share the same address space and memory, and can cause some unwanted things by having the same context and interacting with each other.
Now with processes, every process is independent from one another, so they all have resources reserved for each one of them. This won't let them share variables and stuff.