How to structure python / Flask project - python

As a python beginner and newby to Flask, I made some structural mistakes in my project.
I have a html page with some dropdown menus and buttons where I set parameters and then my python code (let’s call it apple.py) does some analysis work and sends it back to the html page to display.
Now, I would like to add some modified copies of my apple.py code and have a start page from where I can choose which py code / page I’d like to load. I’ve read about the Blueprints in Flask but I don’t get it implemented.
This is the structure of my project now:
apple.py
/static
/css
/template
Index.html
apple.py is organised like this:
csv / Dataframe crunching
Tons of variables
Class xxx()
#app.route('/')
#app.route('/main/', methods=['GET', 'POST'])
Some Functions / main code()…
if __name__ == "__main__":
app.run(debug=True)
I can’t change anything in the main code, also moving the routes above the class causes many errors.
Any suggestions how to structure this to get a html start page from where I could navigate to apple.py, banana.py, etc..?
UPDATE:
Upon request I post here the truncated main code (apple.py). I deleted all repetitive lines from the functions as well as the variables because they don't matter here
from flask import Flask, render_template, request, make_response
import pandas as pd
import numpy as np
import sys
import pygal
#****** csv import and dataframe setup ****************************************************
df = pd.read_csv('ES_M5_7h00.csv', sep=';', engine='python')
df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df.set_index('Date', inplace=True)
to_delete = ['2019-12-25', '2019-12-26', '2020-01-01', '2020-07-03']
df = df[~(df.index.strftime('%Y-%m-%d').isin(to_delete))]
df.Time = df.Time.str.replace(':', '').astype(float).astype(int)
df.reset_index(inplace=True)
df = df[df['Time'].between(70000, 221000)]
SessionStart = 70000
df = df.join(df.resample('B', on='Date')['yVAH_init'].last().rename('yVAH'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df = df.join(df.resample('B', on='Date')['yVAL_init'].last().rename('yVAL'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df = df.join(df.resample('B', on='Date')['yPOC_init'].last().rename('yPOC'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df['Opening'] = df.Date.dt.date.map(df.resample('B', on='Date').Open.first())
df.drop(['yVAH_init', 'yVAL_init','yPOC_init'], axis=1, inplace=True)
sample_length = df['Date'].dt.date.nunique()
#****** variables start ****************************************************
#****** variables end ****************************************************
#****** main code ****************************************************
class Opening(object):
def __init__(self, zone=None, zonegroup=None, yClosing=None, zonelist=None, zonetextlist=None, chart_legend=None, zone_names=None, chart_titles=None, yCondition=None):
self.zone = zone
self.zonegroup = zonegroup
self.yClosing = yClosing
self.zonelist = zonelist
self.zonetextlist = zonetextlist
self.chart_legend = chart_legend
self.zone_names = zone_names
self.chart_titles = chart_titles
self.yCondition = yCondition
#app.route('/')
#app.route('/main/', methods=['GET', 'POST'])
def select_zone():
selected_zone = request.form.get("select_zone")
if selected_zone is None:
return open_empty()
if selected_zone == "-":
return open_empty()
if selected_zone == "Z1":
Opening.zone = Z1
Opening.zonelist = zonelist[0]
Opening.zonetextlist = zonetextlist[0]
Opening.chart_legend = chart_legend[0]
Opening.zone_names = zone_names[0]
Opening.chart_titles = chart_titles[0]
return module_opening(Z1group, Z1)
etc etc...
def select_yClosing():
yClosing = request.form.get("select_yClosing")
if yClosing == "irrelevant":
Opening.yClosing = df1 = Opening.zone
df1 = df[Opening.zone].groupby(df['Date'].dt.date).first()
return df1
if yClosing == "above_value":
Opening.yClosing = df1 = df[yCLOSEOUTOFVALUEabove].eq(df[Opening.zone])
df1 = df[df1].groupby(df['Date'].dt.date).first()
return df1
etc etc....
def select_yCondition():
yCondition = request.form.get("select_yCondition")
if yCondition == "irrelevant":
Opening.yCondition = df1 = Opening.zone
df1 = df[Opening.zone].groupby(df['Date'].dt.date).first()
return df1
if yCondition == "close_above_open":
Opening.yCondition = df1 = df[Opening.yClosing].eq(df[yClose_above_yOpen])
df1 = df[df1].groupby(df['Date'].dt.date).first()
return df1
etc etc...
def open_empty():
return render_template('emptyRTH.html')
def module_opening(zonegroup, zone):
time_receive1 = 0
time_receive2 = 0
time1 = request.form
time2 = request.form
if request.method == "POST":
time_receive1 = time1["select_time1"]
time_receive2 = time2["select_time2"]
TimeSpanStart = int(time_receive1)
TimeSpanEnd = int(time_receive2)
output = []
output2 = []
chart_values = []
df1 = zonegroup
df1 = select_yClosing()
df1 = select_yCondition()
df1 = df1[df1['Time'].between(SessionStart, SessionStart)]
SESSIONS = len(df1)
output.append(
str(len(df1))
+ " " +str(Opening.zone_names) + "sessions out of "
+ str(sample_length)
+" days sample size. "
+ "\n" + "\n" )
#+ str(df1.Date))
#)
z = [None] * 5
for j in range(5):
df1 = Opening.yClosing
df1 = Opening.yCondition
z[j] = df[Opening.zonelist[j]].eq(df[df1])
z[j] = df[z[j]].groupby(df['Date'].dt.date).first().reset_index(drop=True).set_index('Date')
z[j] = z[j][z[j]['Time'].between(TimeSpanStart, TimeSpanEnd)]
output2.append(
str(len(z[j]))
+ " hits in "
+ str(SESSIONS)
+ " " +str(Opening.zone_names) + " sessions sample size. "
+"<br><b>"
+ "{:.2%}".format(len(z[j]) / SESSIONS)
+"</b>"
+ str(Opening.zonetextlist[j])
+str(TimeSpanStart)
+ ' and '
+ str(TimeSpanEnd)
+ "\n" + "\n"
+'<div class="toggleDIV">'
+'"""<xmp>'
+ str(z[j].index.strftime("%Y-%m-%d %H:%M:%S"))
+' </xmp>"""'
+'</div>'
)
chart_values.append(round((len(z[j]) / SESSIONS)*100))
from pygal.style import Style
custom_style = Style(
background='transparent',
plot_background='transparent',
foreground='#403C44',
foreground_strong='#003366',
foreground_subtle='#630C0D',
opacity='.6',
opacity_hover='.9',
legend_font_size=9,
title_font_size=12,
#transition='400ms ease-in',
colors=('#CD6155', '#3498DB', '#16A085', '#95A5A6', '#5D6D7E'))
line_chart = pygal.HorizontalBar(print_values=True, style=custom_style, width=650, height=450, explicit_size=True)
line_chart.title = Opening.chart_titles
line_chart.x_labels = [chart_values[0]]#, chart_values[1], chart_values[2], chart_values[3], chart_values[4], chart_values[5]]
#line_chart.add('Depth %', [chart_values[0], chart_values[1], chart_values[2], chart_values[3], chart_values[4], chart_values[5]])
line_chart.add(Opening.chart_legend[0], [chart_values[0]])
line_chart.add(Opening.chart_legend[1], [chart_values[1]])
line_chart.add(Opening.chart_legend[2], [chart_values[2]])
line_chart.add(Opening.chart_legend[3], [chart_values[3]])
line_chart.add(Opening.chart_legend[4], [chart_values[4]])
bar_data = line_chart.render_data_uri()
return render_template('indexRTH.html', output = output, output2 = output2, bar_data = bar_data)
if __name__ == "__main__":
app.run(debug=True)

If i got your question, you should try this;
in apple.py;
from flask import Flask, render_template
app = Flask(__name__, template_folder="template") #template is the name of your templates folder
#app.route("/")
def index():
return render_template("index.html")

Related

Run a background thread in apache deployed flask script

I have a flask script which runs a background loop constantly and at a given time runs a process. When I ran this on my local machine for testing purposes it ran fine and the background thread worked, however, once I deployed it on ubuntu using apache the background process seems to not be running
from flask import Flask, render_template, redirect, url_for
from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField, DateField
from wtforms.validators import Optional
from flask_bootstrap import Bootstrap
from threading import Thread
from MetocSC import scraper
import pandas as pd
from datetime import date
import time
import os
import re
app = Flask(__name__)
app.config['SECRET_KEY'] = 'key'
Bootstrap(app)
if not os.path.exists('Database.csv'):
df = pd.DataFrame(columns=['Type', 'Origin', 'Issue date', 'Subject', 'Date', 'Points', 'Upload date'])
df.to_csv('Database.csv', index=False)
class SearchForm(FlaskForm):
criteria = StringField('search')
date = DateField('Date', validators=(Optional(),))
submit = SubmitField('Search', render_kw={'class': 'btn btn-success'})
#app.route('/', methods=['GET', 'POST'])
def home():
# Display data with a search bar and date search
form = SearchForm()
if form.validate_on_submit():
return redirect(url_for('results', x=form.criteria.data, y=form.date.data))
return render_template('home.html', form=form)
#app.route('/results', defaults={'x': None, 'y': None})
#app.route('/results/<x>', defaults={'y': None})
#app.route('/results/<x>/<y>')
def results(x, y):
if x:
if re.search("""^\d{4}\-\d{2}\-\d{2}""", x):
date = x
if y:
criteria = y
else:
criteria = ''
if y:
if re.search("""^\d{4}\-\d{2}\-\d{2}""", y):
date = y
if x:
criteria = x
else:
criteria = ''
elif not x and not y:
date = ''
criteria = ''
print('date: ', date),
print('criteria', criteria)
df = pd.read_csv('Database.csv', index_col=[0])
if criteria != '' and date == '':
df = df[df.apply(lambda row: row.astype(str).str.contains(criteria, case=False).any(), axis=1)]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
elif date != '' and criteria == '':
df = df.loc[df['Upload date'] == date]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
elif date != '' and criteria != '':
df = df[(df['Upload date'] == date)]
df = df[df.apply(lambda row: row.astype(str).str.contains(criteria, case=False).any(), axis=1)]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
else:
return 'Error: Either a date or search criteria must be provided'
#app.route('/logs')
def logs():
df = pd.read_csv('Database.csv', index_col=[0])
df.drop('Upload date', inplace=True, axis=1)
return render_template('logs.html', tables=[df.to_html()], titles=[''])
def threaded_scraper():
while True: # Permanently run this loop as a thread
gmt = time.gmtime(time.time()) # Get the current GMT time
if gmt.tm_hour == 6 and gmt.tm_min == 30: # If half 6 scrape for the day
# region Check that it hasn't already been scraped today
f = open('check.txt', 'r')
if int(f.read()) == gmt.tm_mday:
time.sleep(1)
f.close()
# endregion
# region If not already scraped today then scrape
else:
ABPW10_data = scraper()
df = pd.read_csv('Database.csv', index_col=[0])
new_row = pd.DataFrame(data=
{
'Type': ABPW10_data['Type'],
'Origin': ABPW10_data['Origin'],
'Issue date': ABPW10_data['Issue date'],
'Subject': ABPW10_data['Subject'],
'Date': ABPW10_data['Date'],
'Points': str(ABPW10_data['Points']),
'Upload date': date.today()
},
index=[0]
)
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv('Database.csv', index=False)
# endregion
# region Update text file to show that scraping has been done today
with open('check.txt', 'w') as f:
f.write(str(gmt.tm_mday))
f.close()
# endregion
else:
time.sleep(1)
thread = Thread(target=threaded_scraper)
thread.daemon = True
thread.start()
if __name__ == '__main__':
app.run()
I am aware there are other potentially better ways to thread tasks in flask, however this method worked for me so I stuck with it.
I followed this tutorial to get it set up on apache https://python.plainenglish.io/how-to-securely-deploy-flask-with-apache-in-a-linux-server-environment-7eacd4c69a73
Thanks for your help in advance

How to pass two or more dataframes from a module to main script

Edit with #RJ Adriaansen update:
I'm trying to pull two or more dataframes from a module so that I can use the data in the main script.
I only get 4 empty dataframes returned from the df_make module.
The main and df_make codes are below.
Any advice would be great thanks.
import pandas as pd
import df_make
df_trn = pd.DataFrame()
df_trn_trk = pd.DataFrame()
df_jky = pd.DataFrame()
df_jky_code = pd.DataFrame()
def main():
df_make.jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code)
#df_make.jky_trn([df_trn])
print(df_trn)
print(df_trn_trk)
print(df_jky)
print(df_jky_code)
if __name__ == '__main__':
main()
import pandas as pd
#def jky_trn(df_trn):
def jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code):
#global df_trn
#global df_trn_trk
#global df_jky
#global df_jky_code
path = (r"C:\Users\chris\Documents\UKHR\PythonSand\PY_Scripts\StackOF")
xls_tbl = "\Racecards.xlsx"
xls_link = path + xls_tbl
df1 = pd.read_excel(xls_link, usecols=["Jockey","Course","RaceDesc"])
df2 = pd.read_excel(xls_link, usecols=["Trainer","Course","RaceDesc"])
df1 = df1.drop_duplicates(subset=["Jockey","Course","RaceDesc"])
df1 = df1.dropna() # Remove rows with NaN
df1['Course'] = df1['Course'].str.replace(' \(AW\)', '') #Replace (AW) in Course
df2['Course'] = df2['Course'].str.replace(' \(AW\)', '')
df_jky = df1[['Jockey']].copy()
df_jky_code = df1[['Jockey', 'Course']].copy()
df_jky = df_jky.drop_duplicates()
df_jky_code = df_jky_code.drop_duplicates()
df_trn = df2[['Trainer']].copy()
df_trn_trk = df2[['Trainer', 'Course']].copy()
df_trn = df_trn.drop_duplicates()
df_trn_trk = df_trn_trk.drop_duplicates()
#print(df_jky_code)
#print(df_trn_trk)
return df_jky, df_jky_code, df_trn, df_trn_trk
So, it turns out that I needed to refer to the dataframes as a tuple item in the main script e.g. df_jt = df_make.jky_trn()
The new main script code is:
import pandas as pd
import df_make
def main():
df_jt = df_make.jky_trn()
print(df_jt[0])
print(df_jt[1])
print(df_jt[2])
print(df_jt[3])
if name == 'main':
main()

Generalize Getting Data From SQL Server to Python

I'm studying on a task that I have to get data from SQL Server, and because I'm running time series analysis, I need to specify a date field that can change every table or query. Also I can read a simple query or a stored procedure. I want to generalize my below code which is a field and database specific. I thought that I can define an empty dictionary in class and then I can call it in below dataread method. But I am conflicted.
class DataPrep:
def __init__(self,conn):
self.df = pd.DataFrame()
self.mega_projects = set()
self.mega_project_to_df = {}
self.mega_project_to_df_pvt = {}
self.conn={}
def read_data(self):
self.conn=pyodbc.connect({'driver':None, 'server':None, 'database':None, 'uid':None, 'pwd':None})
self.df = pd.read_sql_query('''exec [dbo].[ML_WorkLoad]''', self.conn, parse_dates={'CreatedDate': '%d/%m/%Y %H.%M.%S'})
#self.df = self.df[['EstimateManDay', 'CreatedDate', 'MegaProject', 'ProjectName']]
self.df['month'] = pd.DatetimeIndex(self.df['CreatedDate']).month
self.df['year'] = pd.DatetimeIndex(self.df['CreatedDate']).year
self.df['quarter'] = pd.DatetimeIndex(self.df['CreatedDate']).quarter
self.df['week'] = pd.DatetimeIndex(self.df['CreatedDate']).week
self.df['dayorg'] = pd.DatetimeIndex(self.df['CreatedDate']).day
self.df['day'] = 1
self.df['year_quarter'] = self.df['year'].astype(str) + "_" + self.df[
'quarter'].astype(str)
self.df['year_month'] = self.df['year'].astype(str) + "_" + self.df[
'month'].astype(str)
self.df['year_week'] = self.df['year'].astype(str) + "_" + self.df['week'].astype(
str)
self.df['date'] = pd.to_datetime(self.df[['year', 'month', 'day']])
self.df = self.df[self.df['CreatedDate'] <= datetime.strptime("2020-01-01", "%Y-%m-%d")]

Overwriting one data with another data in pandas(dataframe)

Periodically (every 120 seconds) get data but recent data overwrites previous data in SQL DB. I want all data to be saved.In addition, is the timer correct?
import sqlalchemy as sa
import psycopg2
import requests as rq
import pandas as pd
import json
import time
start_time = time.time()
while True:
temp = pd.DataFrame()
df = pd.DataFrame()
vehicleList = {"SN63NBK", "YY67UTP"}
for ids in vehicleList:
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
r = r.text
temp = pd.read_json(r)
temp['Type'] = 'ids'
df = pd.concat([df, temp], sort=False).reset_index(drop=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='replace', index=False)
time.sleep(120.0 - ((time.time() - start_time) % 120.0))
I changed your code slightly, but I think the main problem is in if_exists parameter which you should set to append, as #K753 have mentioned in the comments.
Also, YY67UTP id returns nothing, so I replaced it with another random id from the site to illustrate how code works.
def _data_gen(vehicles):
""" Yields a dataframe for each request """
for ids in vehicles:
time.sleep(1)
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
temp = pd.read_json(r.text)
temp['Type'] = ids
yield temp
while True:
# how do you break from while loop if you need to?
vehicleList = {"SN63NBK", "YY67UTP"}
df = pd.concat(_data_gen(vehicleList), sort=False, ignore_index=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='append', index=False)
time.sleep(120)

Instantiating a class for text analytics

I’ve found this code, a Python Class which takes a WhatsApp conversation text file processes and generates a Chat class which I can interact with. Things like generate charts, the response matrix etc.:
import re
import time
import pandas as pd
import dateutil
import matplotlib.pyplot as plt
class WppAnalyser:
def open_file(self):
x = open(self.filename,'r')
y = x.read()
content = y.splitlines()
return content
def ismessage(self,str):
patterns = {
"hor1":r'w{3}s{1}[0-9]{1,2},s{1}d{4},s{1}d{2}:d{2}',
"hor2":r'w{3}s{1}[0-9]{1,2},s{1}d{2}:d{2}',
"imp2":r'd{1,2}sw{3}sd{2}:d{2}',
"imp1":r'd{1,2}sw{3}sd{4}sd{2}:d{2}'
}
for key in patterns:
result = re.search(patterns[key], str)
if result and str.count(':') >=2:
name_start = str.find("-")+2
first_colon = str.find(":")
name_end = str.find(":", first_colon+1)
name=str[name_start:name_end]
message=str[name_end+1:]
return [name, message, result.group()]
return ["","",str]
def process(self,content):
j = 1
df = pd.DataFrame(index = range(1, len(content)+1), columns=[ 'Name', 'Message', 'date_string'])
for i in content:
results = self.ismessage(i)
if results[0] != "":
df.ix[j]=results
else:
df.ix[j]['Name']=df.ix[j-1]['Name']
df.ix[j]['date_string']=df.ix[j-1]['date_string']
df.ix[j]['Message']=results[2]
j = j+1
df['Time'] = df['date_string'].map(lambda x: dateutil.parser.parse(x))
df['Day'] = df['date_string'].map(lambda x: dateutil.parser.parse(x).strftime("%a"))
df['Date'] = df['date_string'].map(lambda x:dateutil.parser.parse(x).strftime("%x"))
df['Hour'] = df['date_string'].map(lambda x:dateutil.parser.parse(x).strftime("%H"))
How would I run these functions together, passing self in each function is confusing me. What would a main function looks like here?
I have to instantiate WppAnalyser class, right? So far, I tried this for the first method:
class Chat:
def __init__(self, x, y):
self.x = open("chatPPL.txt", "r")
self.y = y

Categories