Multiprocessing Python Bottle Multiple Commands independent of a running pool.apply_async - python

The Problem, I made a rest api out of bottle to start Elasticsearch bulk loads. The bulk load process runs inside of a multiprocess pool, the problem is while that is running the api wont except any other commands.
I've even tried running another instance of the api on a different port but I have stack question about that too. The second one doesn't accept commands.
I want to be able to call a different api command to get the status of the load and return it. Currently it just includes ES data, but eventually its going to include each nodes stats. This is designed to run from Jenkins and initiate parallel loads.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright [current year] the Melange authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import RequestError
from subprocess import Popen, PIPE
from multiprocessing import Pool, Process, pool
from datetime import datetime
import boto3
import sys
import os
import argparse
import logging
import logging.config
from bottle import route, run
from boto.cloudformation.stack import Output
import json
#this is what is called to set up the loading process from the api.
def start_load(secret, access, protocol, host, ports, index, type, mapping, data,threads):
# decompress a gzip string
def decompress_gzip(data):
return Popen(['zcat'], stdout=PIPE, stdin=PIPE).communicate(input=data)[0]
# parse an s3 path into a bucket and key 's3://my-bucket/path/to/data' -> ('my-bucket', 'path/to/data')
def parse_s3_path(str):
_, _, bucket, key = str.split('/', 3)
return (bucket, key)
def shell_command_execute(command):
p = Popen(command, stdout=PIPE, shell=True)
(output, err) = p.communicate()
return output
# load an S3 file to elasticsearch
def load_s3_file(s3_bucket, s3_key, es_host, es_port, es_index, es_type, access, secret):
try:
logging.info('loading s3://%s/%s', s3_bucket, s3_key)
s3 = boto3.client('s3', aws_access_key_id=access, aws_secret_access_key=secret)
file_handle = s3.get_object(Bucket=s3_bucket, Key=s3_key)
file_contents = file_handle['Body'].read()
logging.info('%s'%s3_key)
if file_contents:
if s3_key.endswith('.gz'):
file_contents = decompress_gzip(file_contents)
es = Elasticsearch(host=es_host, port=es_port, timeout=180)
es.bulk(body=file_contents, index=es_index, doc_type=es_type, timeout=120)
except Exception as e:
logging.error("There has been a major error %s" % e)
# load an S3 file to elasticsearch
def load_single_s3_file(s3_bucket, s3_key, es_host, es_port, es_index, es_type, access, secret):
try:
logging.info('loading s3://%s/%s', s3_bucket, s3_key)
s3 = boto3.client('s3', aws_access_key_id=access, aws_secret_access_key=secret)
file_handle = s3.get_object(Bucket=s3_bucket, Key=s3_key)
file_contents = file_handle['Body'].read()
logging.info('%s'%s3_key)
if file_contents:
if s3_key.endswith('.gz'):
file_contents = decompress_gzip(file_contents)
es = Elasticsearch(host=es_host, port=es_port, timeout=180)
res = es.get(index="test-index", doc_type='tweet', id=1)
es.insert(body = file_contents, index = es_index, doc_type=es_type, timeout=120)
except Exception as e:
logging.error("There has been a major error %s" % e)
start = datetime.now()
es_url = protocol + '://' + host + ':' + str(ports) + '/' + index + '/' + type
es = Elasticsearch(host=host, port=ports, timeout=180)
# S3 file - https://boto3.readthedocs.org/en/latest/reference/services/s3.html#object
s3 = boto3.client('s3', aws_access_key_id=access, aws_secret_access_key=secret)
s3_bucket, s3_key = parse_s3_path(mapping)
file_handle = s3.get_object(Bucket=s3_bucket, Key=s3_key)
mapping = file_handle['Body'].read()
try:
es.indices.create(index=index, body=mapping)
except:
logging.error('index exist')
logging.info('starting to load %s to %s', data, es_url)
es.indices.put_settings({'index': {'refresh_interval': '-1'}}, index=index)
pool = Pool(processes=int(threads))
s3 = boto3.resource('s3', aws_access_key_id=access, aws_secret_access_key=secret)
s3_bucket, s3_key = parse_s3_path(data)
for file_summary in s3.Bucket(s3_bucket).objects.all():
if file_summary.key.startswith(s3_key):
pool.apply_async(load_s3_file, args=(s3_bucket, file_summary.key, host, ports, index, type, access, secret))
pool.close()
pool.join()
es.indices.put_settings({'index': {'refresh_interval': '1s'}}, index=index)
logging.info('finished loading %s to %s in %s', data, es_url, str(datetime.now() - start))
sys.exit(0)
#reset_es_settings(host, ports)
#This is what is called when no arguments are given
#route('/load_data/')
def no_comands():
return """Please include all nessecary values: example:
Start Load
http://127.0.0.1:8001/load_data/load&host=ip or DNS&thread=5&mappinglocation=tr-ips-ses-data|mappings|version_1_2|wos.mapping&datalocation=tr-ips-ses-data|json-data|wos|20150724|wos-1&port=9200&index=wos4&protocol=http&type=wos&access=access_key&secret=secret_key
Delete Index
http://127.0.0.1:8001/delete/wos4&host=ip or DNS&port=9200
with loading you must specify the load command as shown above
use & to seperate values
use = to seperate key value pairs
use | to insert \
"""
#route('/load_data/<name>', method='GET')
def commands( name="Execute Load" ):
values = name.split('&')
#split apart the url syntax items are split by & key values by = and any plcae that needs \ gets |
try:
command = values[0]
host = values[1] + ".us-west-2.elb.amazonaws.com"
threads = values[2]
mapping_location = values[3].replace('|', '/')
data_location = values[4].replace('|', '/')
#mapping_location = values[3]
#data_location = values[4]
ports = values[5]
index = values[6]
protocol = values[7]
type = values[8]
access = values[9]
secret = values[10]
host = host.split('=')[1]
threads = threads.split('=')[1]
mapping_location = "s3://" + mapping_location.split('=')[1]
data_location = "s3://" + data_location.split('=')[1]
ports = ports.split('=')[1]
index = index.split('=')[1]
protocol = protocol.split('=')[1]
types = type.split('=')[1]
access = access.split('=')[1]
secret = secret.split('=')[1]
yield ("Starting Load of data use /get_status/es_url&es_port&index to get the status of your load.")
start_load(secret, access, protocol, host, ports, index, types, mapping_location, data_location,threads)
except Exception as e:
logging.error(e)
yield """Please include all nessecary values: example:
Start Load
http://127.0.0.1:8001/load_data/load&host=ip or DNS&thread=5&mappinglocation=tr-ips-ses-data|mappings|version_1_2|wos.mapping&datalocation=tr-ips-ses-data|json-data|wos|20150724|wos-1&port=9200&index=wos4&protocol=http&type=wos&access=access_key&secret=secret_key
Delete Index
http://127.0.0.1:8001/delete/wos4&host=ip or DNS&port=9200
with loading you must specify the load command as shown above
use & to seperate values
use = to seperate key value pairs
use | to insert \
"""
#This is what is cvalled when /delete/ is used.
#route('/delete/<name>', method='GET' )
def recipe_delete( name="Delete Index" ):
def shell_command_execute(command):
p = Popen(command, stdout=PIPE, shell=True)
(output, err) = p.communicate()
return output
values = name.split('&')
try:
#split apart the url syntax items are split by & key values by |
index = values[0]
host = values[1] + ".us-west-2.elb.amazonaws.com"
host = host.split('=')[1]
port = values[2]
port = port.split('=')[1]
except Exception as e:
logging.error(e)
return """Please include all nessecary values: example:
Start Load
http://127.0.0.1:8001/load_data/load&host=ip or DNS&thread=5&mappinglocation=tr-ips-ses-data|mappings|version_1_2|wos.mapping&datalocation=tr-ips-ses-data|json-data|wos|20150724|wos-1&port=9200&index=wos4&protocol=http&type=wos&access=access_key&secret=secret_key
Delete Index
http://127.0.0.1:8001/delete/wos4&host=ip or DNS&port=9200
with loading you must specify the load command as shown above
use & to seperate values
use = to seperate key value pairs
use | to insert \
"""
try:
#This is the command that deletes the index.
curl_command = 'curl -XDELETE http://' + host + ':9200/' + index
shell_command_execute(curl_command)
return "Successfully Deleted Index"
except Exception as e:
logging.error(e)
return "Failed to Deleted Index %s" % e
if __name__ == '__main__':
reload(sys)
sys.setdefaultencoding('utf8')
url = os.path.dirname(os.path.realpath(__file__)) + '/logging.ini'
print url
logging.config.fileConfig(url)
run(host='172.31.28.189', port=8001, debug=True)
#run(host='127.0.0.1', port=8001, debug=True)

Related

AIRFLOW : Customise SFTPOperator to download multiple files

I'm trying to customise the SFTOperator take download multiple file from a server. I know that the original SFTPOperator only allow one file at a time.
I copied the same code from source and I twerk by adding a new function called get_xml_from_source(). Please refer the code below:
def get_xml_from_source(sftp_client, remote_filepath, local_filepath, prev_execution_date, execution_date):
"""
Copy from Source to local path
"""
files_attr = sftp_client.listdir_attr(remote_filepath) # eg: /source/ HITTING ERROR HERE
files_name = sftp_client.listdir(remote_filepath) # eg: /source/
today_midnight = datetime.combine(datetime.today(), time.min)
yesterday_midnight = today_midnight - timedelta(days=1)
for file_attr, file_name in zip(files_attr, files_name):
modified_time = datetime.fromtimestamp(file_attr.st_mtime)
if yesterday_midnight <= modified_time < today_midnight:
# if prev_execution_date <= modified_time < execution_date:
try:
# Download to local path
sftp_client.get(remote_filepath, local_filepath)
print(file_name)
except: # pylint: disable=bare-except
print("File not found")
else:
print("Not the file!")
Where this function will only download files from yesterday up to today.
I added the function at this line:
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
if self.operation.lower() == SFTPOperation.GET:
local_folder = os.path.dirname(self.local_filepath)
if self.create_intermediate_dirs:
# Create Intermediate Directories if it doesn't exist
try:
os.makedirs(local_folder)
except OSError:
if not os.path.isdir(local_folder):
raise
file_msg = "from {0} to {1}".format(self.remote_filepath,
self.local_filepath)
self.log.info("Starting to transfer %s", file_msg)
# This is where it starts to copy, customization begins here
# sftp_client.get(self.remote_filepath, self.local_filepath) <--- Original code that I commented out and replace with mine below
get_xml_from_source(sftp_client, self.remote_filepath,
self.local_filepath, self.prev_execution_date, self.execution_date)
Note that, rest of the codes did not change. It is how it looks like in the source.
I keep hitting error at files_attr = sftp_client.listdir_attr(remote_filepath) with this error:
Error while transferring from /source/ to
/path/to/destination, error: [Errno 2] No such file.
Which obviously meant, it can't find the sftp directory. I tried running the whole function locally, it works fine.
Is there any part of the code that tied the paramiko connection to only get one file? I checked the paramiko connection for SFTPOperator, it should be just fine. In this case, how should I fix it?
This is how I established my connection when running locally :
def connect_to_source():
"""
Get source credentials
:param: None
:return: username & password
"""
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
username, password = get_eet_credentials()
# key = paramiko.RSAKey.from_private_key_file(openssh_key, password=password)
ssh.connect(hostname=SFTP_SERVER, port=SFTP_PORT_NUMBER,
username=username, password=password)
client = ssh.open_sftp()
print("Connection to source success!")
return client
Lastly, below is my airflow task:
def copy_from_source():
"""
Copy XML file from source to local path
"""
return SFTPOperator(
task_id="copy_from_source",
ssh_conn_id="source_conn",
local_filepath=f"{current_dir}/destination",
remote_filepath= "/source/",
prev_execution_date='{{ prev_execution_date }}',
execution_date='{{ execution_date }}', # strftime("%Y-%m-%d %H:%M:%S")
create_intermediate_dirs=True,
operation="get",
dag=dag
)
I'm trying to do something similar to you. I'm not sure what is causing the issues you are facing but this is the updated SFTP Operator I have written that gets multiple files from a server
sftp_get_multiple_files_operator.py
import os
from pathlib import Path
from typing import Any
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.contrib.hooks import SSHHook
class SFTPGetMultipleFilesOperator(BaseOperator):
template_fields = ('local_directory', 'remote_filename_pattern', 'remote_host')
def __init__(
self,
*,
ssh_hook=None,
ssh_conn_id=None,
remote_host=None,
local_directory=None,
remote_filename_pattern=None,
filetype=None,
confirm=True,
create_intermediate_dirs=False,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.ssh_hook = ssh_hook
self.ssh_conn_id = ssh_conn_id
self.remote_host = remote_host
self.local_directory = local_directory
self.filetype = filetype
self.remote_filename_pattern = remote_filename_pattern
self.confirm = confirm
self.create_intermediate_dirs = create_intermediate_dirs
def execute(self, context: Any) -> str:
file_msg = None
try:
if self.ssh_conn_id:
if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
else:
self.log.info(
"ssh_hook is not provided or invalid. Trying ssh_conn_id to create SSHHook."
)
self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)
if not self.ssh_hook:
raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")
if self.remote_host is not None:
self.log.info(
"remote_host is provided explicitly. "
"It will replace the remote_host which was defined "
"in ssh_hook or predefined in connection of ssh_conn_id."
)
self.ssh_hook.remote_host = self.remote_host
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
all_files = sftp_client.listdir()
self.log.info(f'Found {len(all_files)} files on server')
timestamp = context['ds_nodash']
filename_pattern = self.remote_filename_pattern + timestamp
# fetch all CSV files for the run date that match the filename pattern
matching_files = [f for f in all_files
if f.find(filename_pattern) != -1]
# if file type is specified filter matching files for the file type
if self.filetype is not None:
matching_files = [filename for filename in matching_files
if filename[-len(self.filetype):] == self.filetype]
self.log.info(f'Found {len(matching_files)} files with name including {filename_pattern}')
local_folder = os.path.dirname(self.local_directory)
if self.create_intermediate_dirs:
Path(local_folder).mkdir(parents=True, exist_ok=True)
for f in matching_files:
self.log.info(f"Starting to transfer from /{f} to {self.local_directory}/{f}")
sftp_client.get(f'/{f}', f'{self.local_directory}/{f}')
except Exception as e:
raise AirflowException(f"Error while transferring {file_msg}, error: {str(e)}")
return self.local_directory
def _make_intermediate_dirs(sftp_client, remote_directory) -> None:
"""
Create all the intermediate directories in a remote host
:param sftp_client: A Paramiko SFTP client.
:param remote_directory: Absolute Path of the directory containing the file
:return:
"""
if remote_directory == '/':
sftp_client.chdir('/')
return
if remote_directory == '':
return
try:
sftp_client.chdir(remote_directory)
except OSError:
dirname, basename = os.path.split(remote_directory.rstrip('/'))
_make_intermediate_dirs(sftp_client, dirname)
sftp_client.mkdir(basename)
sftp_client.chdir(basename)
return
dag.py
sftp_report = SFTPGetMultipleFilesOperator(
task_id=f"sftp_reports_to_gcs",
ssh_conn_id="sftp_connection",
local_directory=f'/opt/airflow/dags/reports',
remote_filename_pattern=f'reportname_', # ds_nodash is added in the operator by accessing Airflow context
create_intermediate_dirs=True,
filetype='.csv'
)

Get List of latest stable files from FTP server

I'm trying to get the list of files that are fully uploaded on the FTP server.
I have access to this FTP server where a 3rd party writes data and marker files every 15 minutes. Once the data file is completely uploaded then a marker file gets created. we know once this marker file is there that means data files are ready and we can download it. I'm looking for a way to efficiently approach this problem. I want to check every minute if there are any new stable files on FTP server, if there is then I'll download those files. one preferred way is see if the marker file is 2 minutes old then we are good to download marker file and corresponding data file.
I'm new with python and looking for help.
I have some code till I list out the files
import paramiko
from datetime import datetime, timedelta
FTP_HOST = 'host_address'
FTP_PORT = 21
FTP_USERNAME = 'username'
FTP_PASSWORD = 'password'
FTP_ROOT_PATH = 'path_to_dir'
def today():
return datetime.strftime(datetime.now(), '%Y%m%d')
def open_ftp_connection(ftp_host, ftp_port, ftp_username, ftp_password):
"""
Opens ftp connection and returns connection object
"""
client = paramiko.SSHClient()
client.load_system_host_keys()
try:
transport = paramiko.Transport(ftp_host, ftp_port)
except Exception as e:
return 'conn_error'
try:
transport.connect(username=ftp_username, password=ftp_password)
except Exception as identifier:
return 'auth_error'
ftp_connection = paramiko.SFTPClient.from_transport(transport)
return ftp_connection
def show_ftp_files_stat():
ftp_connection = open_ftp_connection(FTP_HOST, int(FTP_PORT), FTP_USERNAME, FTP_PASSWORD)
full_ftp_path = FTP_ROOT_PATH + "/" + today()
file_attr_list = ftp_connection.listdir_attr(full_ftp_path)
print(file_attr_list)
for file_attr in file_attr_list:
print(file_attr.filename, file_attr.st_size, file_attr.st_mtime)
if __name__ == '__main__':
show_ftp_files_stat()
Sample file name
org-reference-delta-quotes.REF.48C2.20200402.92.1.1.txt.gz
Sample corresponding marker file name
org-reference-delta-quotes.REF.48C2.20200402.92.note.txt.gz
I solved my use case with 2 min stable rule, if modified time is within 2 min of the current time, I consider them stable.
import logging
import time
from datetime import datetime, timezone
from ftplib import FTP
FTP_HOST = 'host_address'
FTP_PORT = 21
FTP_USERNAME = 'username'
FTP_PASSWORD = 'password'
FTP_ROOT_PATH = 'path_to_dir'
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
def today():
return datetime.strftime(datetime.now(tz=timezone.utc), '%Y%m%d')
def current_utc_ts():
return datetime.utcnow().timestamp()
def current_utc_ts_minus_120():
return int(datetime.utcnow().timestamp()) - 120
def yyyymmddhhmmss_string_epoch_ts(dt_string):
return time.mktime(time.strptime(dt_string, '%Y%m%d%H%M%S'))
def get_ftp_connection(ftp_host, ftp_username, ftp_password):
try:
ftp = FTP(ftp_host, ftp_username, ftp_password)
except Exception as e:
print(e)
logger.error(e)
return 'conn_error'
return ftp
def get_list_of_files(ftp_connection, date_to_process):
full_ftp_path = FTP_ROOT_PATH + "/" + date_to_process + "/"
ftp_connection.cwd(full_ftp_path)
entries = list(ftp_connection.mlsd())
entry_list = [line for line in entries if line[0].endswith('.gz') | line[0].endswith('.zip')]
ftp_connection.quit()
print('Total file count', len(entry_list))
return entry_list
def parse_file_list_to_dict(entries):
try:
file_dict_list = []
for line in entries:
file_dict = dict({"file_name": line[0],
"server_timestamp": int(yyyymmddhhmmss_string_epoch_ts(line[1]['modify'])),
"server_date": line[0].split(".")[3])
file_dict_list.append(file_dict)
except IndexError as e:
# Output expected IndexErrors.
logging.exception(e)
except Exception as exception:
# Output unexpected Exceptions.
logging.exception(exception, False)
return file_dict_list
def get_stable_files_dict_list(dict_list):
stable_list = list(filter(lambda d: d['server_timestamp'] < current_utc_ts_minus_120(), dict_list))
print('stable file count: {}'.format(len(stable_list)))
return stable_list
if __name__ == '__main__':
ftp_connection = get_ftp_connection(FTP_HOST, FTP_USERNAME, FTP_PASSWORD)
if ftp_connection == 'conn_error':
logger.error('Failed to connect FTP Server!')
else:
file_list = get_list_of_files(ftp_connection, today())
parse_file_list = parse_file_list_to_dict(file_list)
stable_file_list = get_stable_files_dict_list(parse_file_list)

Xively: how to activate a device with the python api?

Since COSM has become Xively, a nice device api has been added (or was always there- not sure). The flow is
create product batch with serial numbers
activate devices using some product batch identifiers (?)
start using the device with the obtained feed/api keys
I can't figure out how to do this via the python API- are there any pointers?
This should be added to the library, but for now you can use this code to implement device activation. I have used environment variables to store product secret and device serial, but change that for anything that suites your use case. The only tricky part is that you need to call a2b_hex().
import xively
from os import environ
from hashlib import sha1
from binascii import a2b_hex
import hmac
secret = environ['XIVELY_PRODUCT_SECRET']
serial = environ['XIVELY_DEVICE_SERIAL_NUMBER']
activation = hmac.new(a2b_hex(secret), serial, sha1).hexdigest()
creds = xively.Client(key=None).get('/v2/devices/'+activation+'/activate').json()
xi_feed = xively.XivelyAPIClient(creds['apikey']).feeds.get(creds['feed_id'])
You will also need take care to store the credentials into a file, as a device can be activated only once. You will notice 403 errors if you try to run this code again and again, so do use the Xively developer workbench for deactivating the device under test (you may need to refresh the page).
Here is a fully working example using config files or environment variables:
#!/usr/bin/python
from os import environ
from hashlib import sha1
from binascii import a2b_hex
import hmac
import sys, subprocess
import ConfigParser
import xively
CONFIG_FILE = 'xively.conf'
PROVISIONING = 'PROVISIONING'
PROVISIONING_PRODUCT_SECRET = 'PRODUCT_SECRET'
PROVISIONING_DEVICE_SERIAL = 'DEVICE_SERIAL'
PROVISIONING_FEED_ID = 'FEED_ID'
PROVISIONING_API_KEY = 'API_KEY'
def get_setting(config, section, key):
try:
value = config.get(section, key)
except:
print key + " not found in config file. Using environment variable " + key + " instead."
try:
value = environ[key]
except:
print key + " not found in environment."
raise
# value defined?
if not value:
raise
return value
def xively_activate_product(secret, serial):
activation = hmac.new(a2b_hex(secret), serial, sha1).hexdigest()
creds = xively.Client(key=None).get('/v2/devices/'+activation+'/activate').json()
return creds
# main
config = ConfigParser.RawConfigParser()
config.read(CONFIG_FILE)
try:
# see if we already have an api key and feed id
feed_id = config.get(PROVISIONING, PROVISIONING_FEED_ID)
api_key = config.get(PROVISIONING, PROVISIONING_API_KEY)
print "Provisioned product details:"
print "FEED_ID: " + str(feed_id)
print "API_KEY: " + api_key
# continue working with your activated product here
except:
print "FEED_ID and API_KEY not found. Activating product now."
# no error handling for secret- it _is_ needed
try:
secret = get_setting(config, PROVISIONING, PROVISIONING_PRODUCT_SECRET)
except:
print "Finding " + PROVISIONING_PRODUCT_SECRET + " failed. Giving up."
sys.exit(1)
try:
serial = get_setting(config, PROVISIONING, PROVISIONING_DEVICE_SERIAL)
except:
serial = subprocess.check_output("hostname", shell=True)
if not serial:
print "Fallback to hostname for " + PROVISIONING_DEVICE_SERIAL + " failed. Giving up."
sys.exit(1)
try:
creds = xively_activate_product(secret, serial)
# check if there were errors
try:
creds["errors"]
except:
pass
else:
print "Product activation failed (" + creds["title"] +": "+ creds["errors"] + ")."
sys.exit(1)
feed_id = creds['feed_id']
api_key = creds['apikey']
print "Product activation successful."
print "FEED_ID: " + str(feed_id)
print "API_KEY: " + api_key
if not config.has_section(PROVISIONING):
config.add_section(PROVISIONING)
config.set(PROVISIONING, PROVISIONING_FEED_ID, feed_id)
config.set(PROVISIONING, PROVISIONING_API_KEY, api_key)
# Writing our configuration file to 'example.cfg'
with open(CONFIG_FILE, 'wb') as configfile:
config.write(configfile)
except Exception as e:
print "Product activation failed (" + str(e) +")."
sys.exit(1)
This is another helpful class I wrote:
## Logging for debugging purposes
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
import os
from os import environ
from hashlib import sha1
from binascii import a2b_hex
import hmac
import sys, subprocess
import ConfigParser
import xively
PROVISIONING = 'PROVISIONING'
PROVISIONING_PRODUCT_SECRET = 'PRODUCT_SECRET'
PROVISIONING_FEED_ID = 'FEED_ID'
PROVISIONING_API_KEY = 'API_KEY'
class XivelyManager:
def __init__(self, settings="xively.conf"):
# main
self.settings=settings
self.config = ConfigParser.RawConfigParser()
self.config.read(settings)
try:
# see if we already have an api key and feed id
self.api_key = self.get_setting(PROVISIONING, PROVISIONING_API_KEY)
self.secret = self.get_setting(PROVISIONING, PROVISIONING_PRODUCT_SECRET)
# continue working with your activated product here
except:
logger.exception( "API KEY and SECRET NOT FOUND" )
def activate_sensor(self,serial):
try:
creds = self.xively_activate_product(str(serial))
# check if there were errors
try:
creds["errors"]
except:
pass
else:
logger.exception("Product activation failed (" + creds["title"] +": "+ creds["errors"] + ").")
return False
feed_id = creds['feed_id']
api_key = creds['apikey']
if not self.config.has_section(PROVISIONING):
self.config.add_section(PROVISIONING)
if not self.config.has_section(str(serial)):
self.config.add_section(str(serial))
self.config.set(PROVISIONING, PROVISIONING_API_KEY, api_key)
self.config.set(str(serial), PROVISIONING_FEED_ID , feed_id)
# Writing our configuration file to 'xively.cfg'
with open(self.settings, 'wb') as configfile:
self.config.write(configfile)
return True
except Exception as e:
logger.exception("Product activation failed (" + str(e) +").")
return False
def get_setting(self, section, key):
try:
value = self.config.get(section, key)
except:
logger.exception( key + " not found in config file. Using environment variable " + key + " instead.")
## try:
## value = environ[key]
## except:
## logger.exception( key + " not found in environment.")
## finally:
## pass
finally:
# value defined?
if not value:
raise
return value
def get_feed(self,serial):
try:
if self.config.has_section(str(serial)):
feed_id = self.get_setting(str(serial), PROVISIONING_FEED_ID)
else:
feed_id=False
except Exception, e:
feed_id=False
finally:
return feed_id
def xively_activate_product(self, serial):
activation = hmac.new(a2b_hex(self.secret), serial, sha1).hexdigest()
creds = xively.Client(key=None).get('/v2/devices/'+activation+'/activate').json()
return creds
if __name__ == "__main__":
print "Testing Xively Manager "
settings = os.path.join(os.path.dirname(sys.argv[0]), "config", "xively.conf")
print settings
testxive=XivelyManager(settings)
#print testxive.activate_sensor(10)
print testxive.get_feed(10)
This is helpful when your internet gateway is connected to several other devices.
Your config file will be updated with stuff like this:
[PROVISIONING]
product_secret = xxxxxxxxxxxxxxxxxxxxxxxxxxxx
api_key = xxxxxxxxxxxxxxxxxxxxxxxx
[productserial1]
feed_id = xxxxxxxx
[productserial2]
feed_id = xxxxxxxx

Python fast static file serving

What's the fastest way to serve static files in Python? I'm looking for something equal or close enough to Nginx's static file serving.
I know of SimpleHTTPServer but not sure if it can handle serving multiple files efficiently and reliably.
Also, I don't mind it being a part of a lib/framework of some sort as long as its lib/framework is lightweight.
EDIT: This project appears to be dead.
What about FAPWS3? One of the selling points:
Static file server
FAPWS can be used to serve a huge amount of static file requests. With the help of a async database in the backend, you can use FAPWS as your own Amazon S3.
If you look for a oneliner you can do the following:
$> python -m SimpleHTTPServer
This will not fullfil all the task required but worth mentioning that this is the simplest way :-)
I would highly recommend using a 3rd party HTTP server to serve static files.
Servers like nginx are heavily optimized for the task at hand, parallelized and written in fast languages.
Python is tied to one processor and interpreted.
Original SimpleHTTPServer from python standard library does NOT "handle serving multiple files efficiently and reliably". For instance, if you are downloading one file from it, another HTTP access to it must be hovering since SimpleHTTPServer.py is a simple singal-thread HTTP server which could only support one connecting simultaneously.
Fortunately, note that SimpleHTTPServer.py use BaseHTTPServer.HTTPServer as handler, which can be wrapped by SocketServer.ForkingMixIn and SocketServer.ThreadingMixIn also from python standard library to support multi-process and multi-thread mode, which could highly enhance simple HTTP server's "efficience and reliability".
According to this idea, a SimpleHTTPServer with multi-thread/multi-process support modified from original one is given as follows:
$ python2.7 ModifiedSimpleHTTPServer.py
usage: ModifiedSimpleHTTPServer.py [-h] [--pydoc] [--port PORT]
[--type {process,thread}] [--root ROOT]
[--run]
Modified SimpleHTTPServer with MultiThread/MultiProcess and IP bind support.
Original: https://docs.python.org/2.7/library/simplehttpserver.html
Modified by: vbem#163.com
optional arguments:
-h, --help show this help message and exit
--pydoc show this module's pydoc
run arguments:
--port PORT specify server port (default: 8000)
--type {process,thread}
specify server type (default: 'thread')
--root ROOT specify root directory (default: cwd '/home/vbem')
--run run http server foreground
NOTE: stdin for input, stdout for result, stderr for logging
For example, ModifiedSimpleHTTPServer.py --run --root /var/log --type process will run a multi-process HTTP static files server with '/var/log' as its root directory.
Modified codes are:
#! /usr/bin/env python2.7
# -*- coding: utf-8 -*-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
r"""Modified SimpleHTTPServer with MultiThread/MultiProcess and IP bind support.
Original: https://docs.python.org/2.7/library/simplehttpserver.html
Modified by: vbem#163.com
"""
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import os, sys, pwd, posixpath, BaseHTTPServer, urllib, cgi, shutil, mimetypes, socket, SocketServer, BaseHTTPServer
from cStringIO import StringIO
USERNAME = pwd.getpwuid(os.getuid()).pw_name
HOSTNAME = socket.gethostname()
PORT_DFT = 8000
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
server_version = "SimpleHTTP/0.6"
def do_GET(self):
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def do_HEAD(self):
f = self.send_head()
if f:
f.close()
def send_head(self):
path = self.translate_path(self.path)
f = None
if os.path.isdir(path):
if not self.path.endswith('/'):
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
try:
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
self.send_response(200)
self.send_header("Content-type", ctype)
fs = os.fstat(f.fileno())
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
try:
list = ['..'] + os.listdir(path) #
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(urllib.unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>%s %s</title>\n<body>" % (HOSTNAME, displaypath))
f.write("%s#%s:<strong>%s</strong>\n" % (USERNAME, HOSTNAME, path.rstrip('/')+'/'))
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
f.write('<li>%s\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n<pre>%s</pre>\n</body>\n</html>\n" % __doc__)
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
return path
def copyfile(self, source, outputfile):
shutil.copyfileobj(source, outputfile)
def guess_type(self, path):
base, ext = posixpath.splitext(path)
if ext in self.extensions_map:
return self.extensions_map[ext]
ext = ext.lower()
if ext in self.extensions_map:
return self.extensions_map[ext]
else:
return self.extensions_map['']
if not mimetypes.inited:
mimetypes.init()
extensions_map = mimetypes.types_map.copy()
extensions_map.update({'': 'text/plain'})
class ProcessedHTTPServer(SocketServer.ForkingMixIn, BaseHTTPServer.HTTPServer):
r"""Handle requests in multi process."""
class ThreadedHTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
r"""Handle requests in a separate thread."""
SERVER_DICT = {
'thread' : ThreadedHTTPServer,
'process' : ProcessedHTTPServer,
}
SERVER_DFT = 'thread'
def run(sCwd=None, sServer=SERVER_DFT, nPort=PORT_DFT, *lArgs, **dArgs):
r"""
"""
sys.stderr.write('start with %r\n' % sys._getframe().f_locals)
if sCwd is not None:
os.chdir(sCwd)
cServer = SERVER_DICT[sServer]
oHttpd = cServer(("", nPort), SimpleHTTPRequestHandler)
sys.stderr.write('http://%s:%s/\n' % (HOSTNAME, nPort))
oHttpd.serve_forever()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# main
def _main():
r"""Main.
"""
import argparse
oParser = argparse.ArgumentParser(
description = __doc__,
formatter_class = argparse.RawTextHelpFormatter,
epilog = 'NOTE: stdin for input, stdout for result, stderr for logging',
)
oParser.add_argument('--pydoc', action='store_true',
help = "show this module's pydoc",
)
oGroupR = oParser.add_argument_group(title='run arguments', description='')
oGroupR.add_argument('--port', action='store', type=int, default=PORT_DFT,
help = 'specify server port (default: %(default)r)',
)
oGroupR.add_argument('--type', action='store', default=SERVER_DFT, choices=SERVER_DICT.keys(),
help = 'specify server type (default: %(default)r)',
)
oGroupR.add_argument('--root', action='store', default=os.getcwd(),
help = 'specify root directory (default: cwd %(default)r)',
)
oGroupR.add_argument('--run', action='store_true',
help = '\n'.join((
'run http server foreground',
)))
oArgs = oParser.parse_args()
if oArgs.pydoc:
help(os.path.splitext(os.path.basename(__file__))[0])
elif oArgs.run:
return run(sCwd=oArgs.root, sServer=oArgs.type, nPort=oArgs.port)
else:
oParser.print_help()
return 1
return 0
if __name__ == "__main__":
exit(_main())
Meanwhile, the single python file with only 200 lines may satisfy your "in Python" and "lightweight" demands.
Last but not least, this ModifiedSimpleHTTPServer.py may be a "killer app" by hand for temporary use, however, Nginx is advised for long term use.

Can Django apps be called from a locally runing daemon / python script

I am new to the python language, so please bear with me. Also English isn't my native language so sorry for any misspelled words.
I have a question about updating a Django app from a daemon that runs locally on my server. I have a server setup which has 8 hot-swappable bays. Users can plug-in there hard disk(s) into the server and, after the server has detected that a new hard disk is plugged-in, it starts copying the contents of the hard disk to a location on the network. The current setup displays information about the process on an LCD screen.
The current setup works fine but I need to change it in a way that the whole process is displayed on a website (since this is more user friendly). So I need to display to the user when a disk is inserted into the server, the progress of the copy task etc.
My idea it to create a Django app that gets updated when a task in process is completed, but I can't seem to find any information about updating a Django app from a locally running daemon. It this even possible? Or is Django not the right way to go? Any ideas are welcome.
Below is my script used to copy content of disk to a location on the network. Hopefully it give some more information about what I doing/tying to do.
Many thanks in advance!
Script:
#!/usr/bin/env python
import os
import sys
import glob
import re
import time
import datetime
import pyudev
import thread
import Queue
import gobject
import getopt
from pyudev import Context
from subprocess import Popen, PIPE
from subprocess import check_call
from lcdproc.server import Server
from pyudev.glib import GUDevMonitorObserver
from gobject import MainLoop
from threading import Thread
#used to show progress info
from progressbar import ProgressBar, Percentage, Bar, RotatingMarker, ETA, FileTransferSpeed
# used to set up screens
lcd = Server("localhost", 13666, debug=False)
screens = []
widgets = []
#Used for threading
disk_work_queue = Queue.Queue()
# used to store remote nfs folders
remote_dirs = ['/mnt/nfs/', '/mnt/nfs1/', '/mnt/nfs2/']
#Foldername on remote server (NFS Share name)
REMOTE_NFS_SHARE = ''
# a process that runs infinity, it starts disk processing
# functions.
class ProcessThread(Thread):
def __init__(self):
Thread.__init__(self)
def run(self):
while 1:
try:
disk_to_be_processed = disk_work_queue.get(block=False)
set_widget_text(disk_to_be_processed[1], "Removed from queue..", "info", "on")
process_disk(disk_to_be_processed[0], disk_to_be_processed[1])
except Queue.Empty:
time.sleep(10)
set_main_widget_text("Please insert disks ")
# used to set message on the lcdscreen, message are set by disk
def set_widget_text(host, message, priority, blacklight):
if host == "host4":
screen_disk1 = screens[1]
screen_disk1.clear()
screen_disk1.set_priority(priority)
screen_disk1.set_backlight(blacklight)
widgets[1].set_text(str(message))
elif host == "host5":
screen_disk2 = screens[2]
screen_disk2.clear()
screen_disk2.set_priority(priority)
screen_disk2.set_backlight(blacklight)
widgets[2].set_text(str(message))
elif host == "host6":
screen_disk3 = screens[3]
screen_disk3.clear()
screen_disk3.set_priority(priority)
screen_disk3.set_backlight(blacklight)
widgets[3].set_text(str(message))
elif host == "host7":
screen_disk4 = screens[4]
screen_disk4.clear()
screen_disk4.set_priority(priority)
screen_disk4.set_backlight(blacklight)
widgets[4].set_text(str(message))
# used to set a message for all hosts
def set_widget_text_all(hosts, message, priority, blacklight):
for host in hosts:
set_widget_text(host, message, priority, blacklight)
def set_main_widget_text(message):
screen_disk1 = screens[0]
screen_disk1.clear()
screen_disk1.set_priority("info")
screen_disk1.set_backlight("on")
widgets[0].set_text(str(message))
# mounts, find logs files and copy image files to destionation
def process_disk(disk, host):
datadisk = mount_disk(disk, host)
source = datadisk + "/images"
set_widget_text(host, "Processing, hold on ", "info", "on")
cases = find_log(source)
upload(source, cases, host)
time.sleep(5)
umount_disk(host)
set_widget_text(host, "Disk can be removed", "info", "blink")
time.sleep(10)
# search the datadisk for logfiles containing information
# about cases and images
def find_log(src):
inf = ""
case = []
for root,dirs,files in os.walk(src):
for f in files:
if f.endswith(".log"):
log = open(os.path.join(root,f), 'r')
lines = log.readlines()[2:5]
for l in lines:
inf += re.sub("\n","",l[11:]) + ":"
log.close()
print inf
case.append(inf)
inf = ""
return case
def get_directory_size(dir):
dir_size = 0
for(path, dirs, files) in os.walk(dir):
for file in files:
filename = os.path.join(path, file)
dir_size+=os.path.getsize(filename)
return dir_size
# copies the image files to the destination location, dc3dd is used
# to copy the files in a forensicly correct way.
def upload(src, cases, host):
remotedir = ''
while len(cases) > 0:
count = 0
nfs_share_found = False
case = cases.pop()
onderzoek = case.split(':')[0];
#verwijder de _ uit de naam van het object
object = case.split(':')[1];
#image = case.split(':')[2];
localdir = src + '/' + onderzoek + '/' + object +'/'
total_files = len(os.listdir(localdir))
folder_size = get_directory_size(localdir)
for d in remote_dirs:
if os.path.exists(d + onderzoek + '/B/' + object.replace('_',' ') + '/Images/'):
nfs_share_found = True
remotedir = d + onderzoek + '/B/' + object.replace('_', ' ') + '/Images/'
break
if nfs_share_found == False:
set_widget_text(host, " Onderzoek onbekend ", "info", "flash")
time.sleep(30)
return
for root,dirs,files in os.walk(localdir):
for uploadfile in files:
currentfile = os.path.join(root, uploadfile)
file_size = os.stat(currentfile).st_size
copy_imagefile(currentfile, onderzoek, object, remotedir)
count += 1
percentage = int(count*file_size*100/folder_size)
message = onderzoek + " Obj: " + object + "..%d%%" % percentage
set_widget_text(host, message, "info", "on")
set_widget_text(host, " Copy Succesfull! ", "info", "flash")
# the actualy function to copy the files, using dc3dd
def copy_imagefile(currentfile, onderzoek, object, remotedir):
currentfilename = os.path.basename(currentfile)
dc3dd = Popen(["dc3dd", "if=" + currentfile, "hash=md5", "log=/tmp/"+ onderzoek + "_" + object + ".log", "hof=" + remotedir + currentfilename,"verb=on", "nwspc=on"],stdin=PIPE,stdout=PIPE, stderr=PIPE)
dc3dd_stdout = dc3dd.communicate()[1]
awk = Popen([r"awk", "NR==13 { print $1 }"],stdin=PIPE, stdout=PIPE)
awk_stdin = awk.communicate(dc3dd_stdout)[0]
output = awk_stdin.rstrip('\n')
if output == "[ok]":
return False
else:
return True
# when a disk gets inserted into the machine this function is called to prepare the disk
# for later use.
def device_added_callback(self, device):
position = device.sys_path.find('host')
host = device.sys_path[(position):(position+5)]
set_widget_text(host, " New disk inserted! ", "info", "on")
time.sleep(2)
disk = "/dev/" + device.sys_path[-3:] + "1"
disk_work_queue.put((disk, host))
set_widget_text(host, " Placed in queue... ", "info", "on")
# gets called when the disk is removed form the machine
def device_removed_callback(self, device):
position = device.sys_path.find('host')
host = device.sys_path[(position):(position+5)]
#message = 'Slot %s : Please remove drive' % host[4:]
set_widget_text(host, " Replace disk ", "info", "on")
# mounts the partition on the datadisk
def mount_disk(disk, host):
#device = "/dev/" + disk + "1"
mount_point = "/mnt/" + host
if not os.path.exists(mount_point):
os.mkdir(mount_point)
cmd = ['mount', '-o', 'ro,noexec,noatime,nosuid', str(disk), str(mount_point)]
check_call(cmd)
set_widget_text(host, " Disk mounted ", "info", "on")
return mount_point
# umounts the partition datadisk
def umount_disk(host):
mount_point = "/mnt/" + host
cmd = ['umount', str(mount_point)]
check_call(cmd)
os.removedirs(mount_point)
def build_screens():
screen_main = lcd.add_screen("MAIN")
screen_main.set_heartbeat("off")
screen_main.set_duration(3)
screen_main.set_priority("background")
widget0_1 = screen_main.add_string_widget("screen0Widget1", " Welcome to AFFC ", x=1, y=1)
widget0_2 = screen_main.add_string_widget("screen0Widget2", "Please insert disks ", x=1, y=2)
widgets.append(widget0_2)
screens.append(screen_main)
screen_disk1 = lcd.add_screen("DISK1")
screen_disk1.set_heartbeat("off")
screen_disk1.set_duration(3)
screen_disk1.clear()
widget_disk1_1 = screen_disk1.add_string_widget("disk1Widget1", " Slot 1 ", x=1, y=1)
widget_disk1_2 = screen_disk1.add_string_widget("disk1Widget2", " Please insert disk ", x=1, y=2)
widgets.append(widget_disk1_2)
screens.append(screen_disk1)
screen_disk2 = lcd.add_screen("DISK2")
screen_disk2.set_heartbeat("off")
screen_disk2.set_duration(3)
widget_disk2_1 = screen_disk2.add_string_widget("disk2Widget1", " Slot 2 ", x=1, y=1)
widget_disk2_2 = screen_disk2.add_string_widget("disk2Widget2", " Please insert disk ", x=1, y=2)
widgets.append(widget_disk2_2)
screens.append(screen_disk2)
screen_disk3 = lcd.add_screen("DISK3")
screen_disk3.set_heartbeat("off")
screen_disk3.set_duration(3)
widget_disk3_1 = screen_disk3.add_string_widget("disk3Widget1", " Slot 3 ", x=1, y=1)
widget_disk3_2 = screen_disk3.add_string_widget("disk3Widget2", " Please insert disk ", x=1, y=2)
widgets.append(widget_disk3_2)
screens.append(screen_disk3)
screen_disk4 = lcd.add_screen("DISK4")
screen_disk4.set_heartbeat("off")
screen_disk4.set_duration(3)
widget_disk4_1 = screen_disk4.add_string_widget("disk4Widget1", " Slot 4 ", x=1, y=1)
widget_disk4_2 = screen_disk4.add_string_widget("disk4Widget2", " Please insert disk ", x=1, y=2)
widgets.append(widget_disk4_2)
screens.append(screen_disk4)
def restart_program():
"""Restarts the current program.
Note: this function does not return. Any cleanup action (like
saving data) must be done before calling this function."""
python = sys.executable
os.execl(python, python, * sys.argv)
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hd:v", ["help", "destination="])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
verbose = False
for o, a in opts:
if o == "-v":
verbose = True
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-d", "--destination"):
REMOTE_NFS_SHARE = a
else:
assert False, "unhandled option"
lcd.start_session()
build_screens()
#t = Thread(target=loop_disks_process())
#t.start();
context = pyudev.Context()
monitor = pyudev.Monitor.from_netlink(context)
observer = GUDevMonitorObserver(monitor)
observer.connect('device-added', device_added_callback)
observer.connect('device-removed', device_removed_callback)
monitor.filter_by(subsystem='block', device_type='disk')
monitor.enable_receiving()
mainloop = MainLoop()
gobject.threads_init()
t = ProcessThread()
t.start()
mainloop.run()
raw_input("Hit <enter>")
t.running = False
t.join()
if __name__ == "__main__":
try:
main()
except Exception, e:
restart_program()
Sorry, much too much code to read there.
I'm not sure what you mean by "updating" a Django app. Do you mean adding some data into the database? This is easy to do, either by getting your script to write directly into the DB, or by using something like a custom Django management command which can use the ORM.
Take a look at Django Piston. You can implement a RESTful API on your django app and call those apis from your demon. I use it on one of my project in which some worker processes need to communicate to frontend django apps periodically.
It could be done like this:
Daemon shares its disk information/copy progress using some inter-process communication method like simple text file or some memory objects;
Django view could then read this info and display it to the user;
Or daemon could call Django management command (#Daniel Roseman) and that command will then update app DB to represent current state.
Consider using something like Memcached as a shared area to store the state of the drives.
As the drives are added or removed, the daemon should write those changes to Memcached, and on each page load the Django web app should read the state from Memcached. You could use a management command and a SQL database, but that seems like too many moving parts for such simple problem: you're only storing a handful boolean flags.
You might even try a micro-framework like Flask instead of Django, to reduce the complexity even more.

Categories