My requirement:
About 300 (field sensor-like) clients report their status (json string) once every X (say 10) minutes. They can only use "curl" to do the reporting . The API/handler on the server needs to parse and dump all those values into the master table.
A few users (usually less than 10) connect to the app from their browsers to check the status of the sensor-reports and may linger around checking a few pages (main status page, detailed report page etc.). There are less than 10 pages (think reports) that the users want to see.
My setup:
Web Server: nginx
App Server: uwsgi
Framework: Bottle
Database: PostgreSQL
Python DB Driver: psycopg2
Frontend: Bootstrap
My code:
Please note that I did not include a lot of error checking and other security measures that we have in code, simply because they do not contribute to this discussion.
import os
from bottle import route, post, run, request, template, install, static_file
import psycopg2
import customemailservice
#route('/static/<filepath:path>')
def server_static(filepath):
return static_file(filepath, root='/webapp/ss/static')
# The URL that the sensors will hit to pass on their status report
#post('/logger')
def dolog():
sensor_id = request.json['id']
sensor_ts = request.json['ts']
sensor_ut = request.json['main']['ut']
sensor_ploss = request.json['main']['ploss']
sensor_s1 = request.json['main']['s1']
sensor_s2 = request.json['main']['s2']
sensor2_status = request.json['aux']['status']
sensor2_rts = request.json['aux']['rts']
try:
conn = psycopg2.connect('dbname=<dbnane> user=<username> password= <password> host=<dbhost> port=<dbport>')
except psycopg2.Error as pe:
print pe.pgerror
curr = conn.cursor()
if conn != None and curr != None:
curr.execute('''INSERT INTO tbllog (id, ts, ut, ploss, s1, s2, status, rts) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(sensor_id, sensor_ts, sensor_ut, sensor_ploss, sensor_s1, sensor_s2, sensor2_status, sensor2_rts))
conn.commit()
curr.close()
conn.close()
else:
pass
# The code here is irrelevant
return template('Ok from {{sid}} at {{ts}}', sid=sid,ts=ts)
#route('/')
def index():
try:
conn = psycopg2.connect('dbname=<dbnane> user=<username> password=<password> host=<dbhost> port=<dbport>')
except psycopg2.Error as pe:
conn = None
print pe.pgerror
curr = conn.cursor()
if conn != None and curr != None:
sql = 'select t1.* from tbllog t1 where t1.ts = (select max(t2.ts) from tbllog t2 where t2.id=t1.id) order by id;'
curr.execute(sql)
rs=curr.fetchall()
html=""
for row in rs:
html = html + '<tr><td class="warning">' + row[0] + '</td><td class="warning">' + str(row[1]) + '</td><td class="success">' + str(row[2]) + '</td><td class="success">' + str(row[3]) + '</td><td class="success">' + str(row[4]) + '</td><td class="info">' + str(row[5]) + '</td><td class="info">' + str(row[6]) + '</td></tr>'
curr.close()
conn.close()
# Pass the raw html table that will be inserted into the index template.
return template('index',tdata=html)
#route('/status/<sensor_id>')
def getsensorid(sensor_id):
try:
conn = psycopg2.connect('dbname=<dbnane> user=<username> password=<password> host=<dbhost> port=<dbport>')
except psycopg2.Error as pe:
conn = None
print pe.pgerror
curr = conn.cursor()
if conn != None and curr != None:
sql = 'select * from tbllog where id=\'' + sensor_id + '\' order by ts;'
curr.execute(sql)
rs=curr.fetchall()
html=""
for row in rs:
html = html + '<tr class="info"><td>' + row[0] + '</td><td>' + str(row[1]) + '</td><td>' + str(row[2]) + '</td><td>' + str(row[3]) + '</td><td>' + str(row[4]) + '</td><td>' + str(row[5]) + '</td><td>' + str(row[6]) + '</td></tr>'
curr.close()
conn.close()
if __name__ == '__main__':
run (host="0.0.0.0", port=8080, debug=True)
else:
app = application = bottle.default_app()
My Question:
Given the requirements, is this a reasonable approach? Or, do you recommend I use DB connection pooling? I am a little confused about using pooling since I am not sure at what level (nginx, uwsgi or bottle) does my app code get duplicated (to serve concurrent clients) and how should I go about creating the pool that I can use across different threads/processes (each of which contain a copy of this app code).
Obviously, this is my initial foray into web apps (and even serious Python for that matter) and would like to hear back from you if you think there is a better (I'm assuming there are many) ways to skin the cat.
Related
I want to collect and check what errors are occurring, so I am trying to upload log in the database. I wrote the code to upload the log to mysql by referring to this page. python logging to database.
However, I get the following error. Which part is wrong? Also, if there is another way to easily upload logs in mysql, please let me know.
import logging
import time
import pymysql
user = 'test'
passw = '******'
host = 'db'
port = ****
database = '****'
db_tbl_log = 'log'
log_file_path = 'C:\\Users\\Desktop\\test_log.txt'
log_error_level = 'DEBUG' # LOG error level (file)
log_to_db = True # LOG to database?
class LogDBHandler(logging.Handler):
'''
Customized logging handler that puts logs to the database.
pymssql required
'''
def __init__(self, sql_conn, sql_cursor, db_tbl_log):
logging.Handler.__init__(self)
self.sql_cursor = sql_cursor
self.sql_conn = sql_conn
self.db_tbl_log = db_tbl_log
def emit(self, record):
# Set current time
tm = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
# Clear the log message so it can be put to db via sql (escape quotes)
self.log_msg = record.msg
self.log_msg = self.log_msg.strip()
self.log_msg = self.log_msg.replace('\'', '\'\'')
# Make the SQL insert
sql = 'INSERT INTO ' + self.db_tbl_log + ' (log_level, ' + \
'log_levelname, log, created_at, created_by) ' + \
'VALUES (' + \
'' + str(record.levelno) + ', ' + \
'\'' + str(record.levelname) + '\', ' + \
'\'' + str(self.log_msg) + '\', ' + \
'(convert(datetime2(7), \'' + tm + '\')), ' + \
'\'' + str(record.name) + '\')'
try:
self.sql_cursor.execute(sql)
self.sql_conn.commit()
# If error - print it out on screen. Since DB is not working - there's
# no point making a log about it to the database :)
except pymysql.Error as e:
print("error: ", e)
# print(sql)
# print('CRITICAL DB ERROR! Logging to database not possible!')
# Main settings for the database logging use
if (log_to_db):
# Make the connection to database for the logger
log_conn = pymysql.connect(host=host,
port=port,
user=user,
password=passw,
database=database,
charset='utf8')
log_cursor = log_conn.cursor()
logdb = LogDBHandler(log_conn, log_cursor, db_tbl_log)
# Set logger
logging.basicConfig(filename=log_file_path)
# Set db handler for root logger
if (log_to_db):
logging.getLogger('').addHandler(logdb)
# Register MY_LOGGER
log = logging.getLogger('MY_LOGGER')
log.setLevel(log_error_level)
# Example variable
test_var = 'This is test message'
# Log the variable contents as an error
log.error('This error occurred: %s' % test_var)
error: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''2021-02-22 16:52:06')), 'MY_LOGGER')' at line 1")
Don't format SQL statements by yourself, you will miss a lot of cases. Just pass them as the second parameter:
sql = f'INSERT INTO {self.db_tbl_log} (log_level, log_levelname, log, created_at, created_by) VALUES (%s, %s, %s, %s, %s)'
self.sql_cursor.execute(sql, (record.levelno, record.levelname, self.log_msg, tm, record.name))
%s is placeholder, pymysql will convert given params to valid formats one by one.
I've been trying to make my code works but can't find why won't it does what it does.
I'm trying to put data in my database, by it only seem to exec one ....
The code is :
def setCubesValues(value):
mysql.connection.autocommit(on=True)
tabVal = value.split(';;')
del tabVal[0]
for i in range(0, len(tabVal)):
tabi = tabVal[i]
cur = mysql.connection.cursor()
responseCur = cur.execute('SELECT * FROM idcudetoaction where id_cube = "' + tabi +'"')
if responseCur == 1:
curResultInsert = cur.execute('update idcudetoaction set action = "' + tabi +'" where id_cube = ' + str(i))
else:
curResultInsert = cur.execute('insert into idcudetoaction (id_cube, action) values (' + str(i)+', "' + tabi +'")')
return jsonify(curResultInsert);
Thing is I have 7 values, but only on that get put in the database ...
Any help ? :)
thx !
So I have a quick function that's supposed to upload data (stored in a Python dictionary) to a MySQL database.
def uploadData(of_item):
global DB_HOST
global DB_USER
global DB_PASSWORD
global DB_DATABASE
my_db = connector.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASSWORD, database=DB_DATABASE, port=3306)
my_db.autocommit = True
my_cursor = my_db.cursor()
print("\rThe DB pipeline is now connected.")
slots_text = ", ".join([a[0] for a in of_item.items()])
values_text = ", ".join(["'" + a[1].replace("'", "\\'") + "'" for a in of_item.items()])
set_portion_text = ", ".join([a[0] + " = " + "'" + a[1].replace("'", "\\'") + "'" for a in of_item.items()])
sql = 'INSERT INTO UsersData ({0}) VALUES ({1})'.format(slots_text, values_text)
try:
my_cursor.execute(sql)
row_cnt = my_cursor.rowcount
my_db.commit()
my_cursor.close()
my_db.close()
print("\r" + str(row_cnt) + " is now in UsersData.")
return [True, str(row_cnt)]
except Exception as exception:
print("\n".join(["The update failed for profileID: " + of_item['UniqueId'],
str(exception),
str(sql),
"*",
'Item:',
str(of_item),
"*"]))
my_cursor.close()
my_db.close()
return [False, 0]
Currently, the row_cnt sits at -1, so it should be entirely empty. However, when I execute the function, I'm constantly getting this thrown error:
1062 (23000): Duplicate entry 'ABCDEFGHIJKLMNOPQRSTUVWXYZ-123' for key 'profileId_2'
Now, profileId_2 is just this:
...
UNIQUE KEY `profileId_2` (`profileId`,`companyId`),
...
profileId is whatever the user's unique ID is, and companyId is just a preset (in this case, 123). It's odd that there would be a claimed duplicate, since there's nothing in the database yet.
First, what might be causing this error? Second, how can I get through it and successfully append new entries to UsersData?
I am trying to query the dataset present in s3 bucket, using Athena query via python script with help of boto3 functions.
I am using start_query_execution() to run my query. this is being executed perfectly, next to get results in my python script, so that I get access to the result of the query I am using the function get_query_results().
Now if I run these two function separately(one script after another) I get the data which is an output of Athena query. I want them to be written in a single script - something like, fetch data from s3 and start manipulating the output of query using python code.
Since the query is asyn in nature, i am using pool technique, where it waits till the Athena query is executed. But if i run the below codethe, the status show is running for the query.
I think I am doing some silly mistake as if I run them separately I get desired output. In short, I want to query the data present in s3 using Athena, then do some processing on this fetched data in python script, hence this approach. Please help
Here is the sample code
#!/usr/bin/env python3
import boto3
import time
from functools import partial
from multiprocessing.dummy import Pool
pool = Pool(processes=1)
# def async_function(name):
# time.sleep(1)
# return name
#
# def callback_function(name, age):
# print(name, age)
def run_query(query, database, s3_output):
client = boto3.client('athena')
response = client.start_query_execution(
QueryString=query,
QueryExecutionContext={
'Database': database
},
ResultConfiguration={
'OutputLocation': s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
return response
def show_res(res, q):
client = boto3.client('athena')
print("Executing query: %s" % (q))
print('Execution ID: ' + res['QueryExecutionId'])
# response = client.stop_query_execution(
# QueryExecutionId=res['QueryExecutionId']
# )
response = client.get_query_results(
# QueryExecutionId='f3642735-d9d9-4246-ade4-7453eaed0717'
QueryExecutionId=res['QueryExecutionId']
)
print("Executing query: %s" % (q))
print('Execution ID: ' + res['QueryExecutionId'])
print('rRespone:'.join(str(x) for x in response['ResultSet']['Rows']));
return response
# for age, name in enumerate(['jack', 'jill', 'james']):
# new_callback_function = partial(callback_function, age=age)
# pool.apply_async(
# async_function,
# args=[name],
# callback=new_callback_function
# )
#Athena configuration
s3_input = 's3://dummy/'
s3_ouput = 's3://dummy/results/'
database = 'dummy'
table = 'dummy'
#Query definitions
query_1 = "SELECT * FROM %s.%s where sex = 'F';" % (database, table)
query_2 = "SELECT * FROM %s.%s where age > 30;" % (database, table)
#Execute all queries
queries = [ query_1 ]
for q in queries:
print("Executing query: %s" % (q))
new_callback_function = partial(show_res, q=q)
pool.apply_async(
run_query,
args=[q, database, s3_ouput],
callback=new_callback_function
)
pool.close()
pool.join()
Instead of use apply_async try with:
pool = Pool(cores)
df = pd.concat(pool.map(func, [value_1,...,value_n]))
pool.close()
pool.join()
I wrote you my code that it works great and I expect you can reuse some lines. Basically, I run multiples queries in Athena at the "same" time (I parallelized the array named endpoints), and I store each result in a row of a Pandas dataframe. Also, you can fetch data for each query and I added a status print then you can see the status of each query. Remember that Athena has a limit of queries that you can run concurrently.
import time
import boto3
import pandas as pd
from multiprocessing import Pool
class QueryAthena:
def __init__(self, endpoint, init_date, end_date):
self.s3_input = 's3://my_bucket/input'
self.s3_output = 's3://my_bucket/output'
self.database = 'datalake'
self.table = 'my_table'
self.endpoint = "'" + endpoint + "'"
self.init_date = "'" + init_date + "'"
self.end_date = "'" + end_date + "'"
self.year = self.init_date[1:5]
self.month = self.init_date[6:8]
self.day = self.init_date[9:11]
self.region_name = 'us-east-1'
self.aws_access_key_id = "my_id"
self.aws_secret_access_key = "my_key"
def load_conf(self, q):
self.client = boto3.client('athena',
region_name = self.region_name,
aws_access_key_id = self.aws_access_key_id,
aws_secret_access_key= self.aws_secret_access_key)
try:
response = self.client.start_query_execution(
QueryString = q,
QueryExecutionContext={
'Database': self.database
},
ResultConfiguration={
'OutputLocation': self.s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
except Exception as e:
print(e)
return response
def query(self):
self.query = "SELECT count(*) as total_requests, SUM(CASE WHEN count_endpoints > 1 THEN 1 ELSE 0 END) as total_repeated, AVG(CASE WHEN count_endpoints > 1 THEN count_endpoints END) as TRAFFIC_QUALITY FROM (SELECT * from (SELECT domain, size, device_id, ip, array_join(array_agg(distinct endpoint), ',') as endpoints_all, count(distinct endpoint) as count_endpoints FROM %s.%s WHERE year=%s and month=%s and day=%s and ts between timestamp %s and timestamp %s and status = '2' GROUP BY domain, size, device_id, ip) l1 where endpoints_all LIKE '%%' || %s || '%%') l2;" % (self.database, self.table, self.year, self.month, self.day, self.init_date, self.end_date, self.endpoint)
def run_query(self):
self.query()
queries = [self.query]
for q in queries:
#print("Executing query: %s" % (q))
res = self.load_conf(q)
try:
query_status = None
while query_status == 'QUEUED' or query_status == 'RUNNING' or query_status is None:
query_status = self.client.get_query_execution(QueryExecutionId=res["QueryExecutionId"])['QueryExecution']['Status']['State']
print(query_status + " " + self.endpoint)
if query_status == 'FAILED' or query_status == 'CANCELLED':
raise Exception('Athena query with the string "{}" failed or was cancelled'.format(query_string))
time.sleep(20)
print("Query %s finished." % (self.endpoint))
response = self.client.get_query_results(QueryExecutionId=res['QueryExecutionId'])
df = self.results_to_df(response)
df = pd.DataFrame(df)
df["endpoint"] = str(self.endpoint)
try:
df["percentaje_repeated"] = str(int(df["total_repeated"].iloc[0]) * 100 / int(df["total_requests"].iloc[0]))
except Exception as e:
print(self.endpoint + " here")
df["date"] = str(self.init_date + "-" + self.end_date)
return df
except Exception as e:
print(e + " " + endpoint)
print(df["total_repeated"].iloc[0])
print(df["total_requests"].iloc[0])
def results_to_df(self, results):
columns = [
col['Label']
for col in results['ResultSet']['ResultSetMetadata']['ColumnInfo']
]
listed_results = []
for res in results['ResultSet']['Rows'][1:]:
values = []
for field in res['Data']:
try:
values.append(list(field.values())[0])
except:
values.append(list(' '))
listed_results.append(
dict(zip(columns, values))
)
return listed_results
def func(end):
qa = QueryAthena(end, "2018-10-09 00:00:00", "2018-10-09 05:59:59")
result = qa.run_query()
return result
endpoints = ["677SRI149821","V14509674","1426R"]
if __name__ == '__main__':
pool = Pool(15)
df = pd.concat(pool.map(func, endpoints))
pool.close()
pool.join()
I've got a flask app that queries a sqlite database and returns a list to html via jsonify. It works fine on localhost, but I've uploaded to dreamhost and am getting 404s for the jsonified lists. I can't tell whether it is an issue with flask communicating with my sqlite database, flask, or with json.
Here is the flask app:
#!/usr/bin/python
import sqlite3 as sqlite
import json
from flask import Flask, g, jsonify, make_response, render_template
DEBUG = True
DATABASE = './whdt_combined.db'
#setup flask application
app = Flask(__name__)
app.config.from_object(__name__)
##########################
### DATABASE STUFF #######
##########################
#connect to the database
def connect_db():
rv = sqlite.connect(app.config['DATABASE'])
rv.row_factory = dict_factory
return rv
#function for making rows nice
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
#open new db connection if ones hasn't been opened
def get_db():
if not hasattr(g, 'sqlite_db'):
g.sqlite_db = connect_db()
return g.sqlite_db
#when application stops, close db connection
#app.teardown_appcontext
def close_db(error):
if hasattr(g, 'sqlite_db'):
g.sqlite_db.close()
#easy function to nicely query the db
def query_db(query, args=(), one =False):
cur = get_db().execute(query, args)
rv = cur.fetchall()
cur.close()
return (rv[0] if rv else None) if one else rv
##########################
### APPLICATION ##########
##########################
#app.route('/')
def index():
return render_template('index.html')
#app.route('/<data1>/<data2>/<region>/<year>/1/')
def datafunction1(data1, data2, region, year):
data = []
for row in query_db('SELECT ccode, country, year, region, ' + data1 + ' FROM whdt WHERE year = ' + year + ' AND region = "' + region + '" AND ' + data1 + ' != "" AND ' + data1 + ' > 0 AND ' + data1 + ' < 100 ORDER BY ' + data1 + ' DESC'):
countrywhole = {
'country' :row['country'],
'ccode':row['ccode'],
'year':row['year'],
'region':row['region'],
data1:row[data1]
}
data.append(countrywhole)
return jsonify( { 'data': data } )
if __name__ == '__main__':
app.run()
And here is my folder setup:
/home/user/mydomain.com/myapp/whdt_combined.db
/home/user/mydomain.com/myapp/myapp.py
/home/user/mydomain.com/myapp/__init__.py
/home/user/mydomain.com/myapp/templates/index.html
/home/user/mydomain.com/myapp/static/a couple of css and js files
Flask is not having trouble rendering index.html. Part of the issue is I don't know how to debug through dreamhost. I'm fairly new to this stuff so I'm sorry if I am missing something obvious or otherwise clearly don't know what I'm talking about. Please let me know if I should be providing other important information here. And thanks in advance for your help!