Run a background thread in apache deployed flask script - python

I have a flask script which runs a background loop constantly and at a given time runs a process. When I ran this on my local machine for testing purposes it ran fine and the background thread worked, however, once I deployed it on ubuntu using apache the background process seems to not be running
from flask import Flask, render_template, redirect, url_for
from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField, DateField
from wtforms.validators import Optional
from flask_bootstrap import Bootstrap
from threading import Thread
from MetocSC import scraper
import pandas as pd
from datetime import date
import time
import os
import re
app = Flask(__name__)
app.config['SECRET_KEY'] = 'key'
Bootstrap(app)
if not os.path.exists('Database.csv'):
df = pd.DataFrame(columns=['Type', 'Origin', 'Issue date', 'Subject', 'Date', 'Points', 'Upload date'])
df.to_csv('Database.csv', index=False)
class SearchForm(FlaskForm):
criteria = StringField('search')
date = DateField('Date', validators=(Optional(),))
submit = SubmitField('Search', render_kw={'class': 'btn btn-success'})
#app.route('/', methods=['GET', 'POST'])
def home():
# Display data with a search bar and date search
form = SearchForm()
if form.validate_on_submit():
return redirect(url_for('results', x=form.criteria.data, y=form.date.data))
return render_template('home.html', form=form)
#app.route('/results', defaults={'x': None, 'y': None})
#app.route('/results/<x>', defaults={'y': None})
#app.route('/results/<x>/<y>')
def results(x, y):
if x:
if re.search("""^\d{4}\-\d{2}\-\d{2}""", x):
date = x
if y:
criteria = y
else:
criteria = ''
if y:
if re.search("""^\d{4}\-\d{2}\-\d{2}""", y):
date = y
if x:
criteria = x
else:
criteria = ''
elif not x and not y:
date = ''
criteria = ''
print('date: ', date),
print('criteria', criteria)
df = pd.read_csv('Database.csv', index_col=[0])
if criteria != '' and date == '':
df = df[df.apply(lambda row: row.astype(str).str.contains(criteria, case=False).any(), axis=1)]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
elif date != '' and criteria == '':
df = df.loc[df['Upload date'] == date]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
elif date != '' and criteria != '':
df = df[(df['Upload date'] == date)]
df = df[df.apply(lambda row: row.astype(str).str.contains(criteria, case=False).any(), axis=1)]
df.drop('Upload date', inplace=True, axis=1)
return render_template('results.html', tables=[df.to_html()], titles=[''])
else:
return 'Error: Either a date or search criteria must be provided'
#app.route('/logs')
def logs():
df = pd.read_csv('Database.csv', index_col=[0])
df.drop('Upload date', inplace=True, axis=1)
return render_template('logs.html', tables=[df.to_html()], titles=[''])
def threaded_scraper():
while True: # Permanently run this loop as a thread
gmt = time.gmtime(time.time()) # Get the current GMT time
if gmt.tm_hour == 6 and gmt.tm_min == 30: # If half 6 scrape for the day
# region Check that it hasn't already been scraped today
f = open('check.txt', 'r')
if int(f.read()) == gmt.tm_mday:
time.sleep(1)
f.close()
# endregion
# region If not already scraped today then scrape
else:
ABPW10_data = scraper()
df = pd.read_csv('Database.csv', index_col=[0])
new_row = pd.DataFrame(data=
{
'Type': ABPW10_data['Type'],
'Origin': ABPW10_data['Origin'],
'Issue date': ABPW10_data['Issue date'],
'Subject': ABPW10_data['Subject'],
'Date': ABPW10_data['Date'],
'Points': str(ABPW10_data['Points']),
'Upload date': date.today()
},
index=[0]
)
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv('Database.csv', index=False)
# endregion
# region Update text file to show that scraping has been done today
with open('check.txt', 'w') as f:
f.write(str(gmt.tm_mday))
f.close()
# endregion
else:
time.sleep(1)
thread = Thread(target=threaded_scraper)
thread.daemon = True
thread.start()
if __name__ == '__main__':
app.run()
I am aware there are other potentially better ways to thread tasks in flask, however this method worked for me so I stuck with it.
I followed this tutorial to get it set up on apache https://python.plainenglish.io/how-to-securely-deploy-flask-with-apache-in-a-linux-server-environment-7eacd4c69a73
Thanks for your help in advance

Related

How to create library of a self-written static function in python

I have a python script which has multiple static functions. I want to convert that complete python script into a python library
import pandas as pd
import numpy as np
import EA_Upload_config as cfg
import datetime
#%%
def clockPrint(sentence):
now = datetime.datetime.now()
date_time = now.strftime("%H:%M:%S")
print(date_time + " : " + sentence)
def uploadToEA(df_,ds_api_name,operation_,instance,xmd_=None): #Upsert #Overwrite
import SalesforceEinsteinAnalytics as EA
clockPrint("Upload Process Initiated for "+instance+" instance...")
if instance.lower() == 'commercial':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratings.my.salesforce.com', browser='chrome')
if instance.lower() == 'analytical':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratingsae.my.salesforce.com', browser='chrome')
EAS.load_df_to_EA(df_,dataset_api_name=ds_api_name, operation=operation_,xmd=xmd_,fillna=False) #Error because of fillna=False
clockPrint("Upload Process Completed successfully for "+instance+" instance. Navigate to (Einstein Analytics --> Data Manager --> Monitor) to check progress.")
def processDate(date):
if pd.isnull(date):
return np.nan
else:
date = pd.to_datetime(date)
date = datetime.datetime.strftime(date,"%m/%d/%Y")
return date
if __name__ == '__main__':
df = pd.read_csv(cfg.FILE_PATH)
if len(cfg.DATE_COLUMNS) != 0:
for c in cfg.DATE_COLUMNS:
df[c] = df[c].apply(lambda x: processDate(x))
for c in df.columns:
if df[c].dtype == "O":
df[c].fillna('', inplace=True)
elif np.issubdtype(df[c].dtype, np.number):
df[c].fillna(0, inplace=True)
elif df[c].dtype == "datetime64[ns]":
df[c] = df[c].apply(lambda x: processDate(x))
df[c].fillna("", inplace=True)
df.fillna("", inplace=True)
for instance in cfg.INSTANCES:
if instance.lower() == 'commercial':
uploadToEA(df, cfg.COM_DATASET_API_NAME, cfg.COM_OPERATION, instance, cfg.COM_XMD)
elif instance.lower() == 'analytical':
uploadToEA(df, cfg.ANA_DATASET_API_NAME, cfg.ANA_OPERATION, instance, cfg.ANA_XMD)
else: clockPrint("Update INSTANCES variable as ['Commercial'] or ['Analytical'] or ['Commercial','Analytical'].")
This is my complete python script which I want to convert it into a library. How should I do it?

Converting python script(which has __name__ == '__main__') to a Package/Library

I have a python script
import pandas as pd
import numpy as np
import EA_Upload_config as cfg
import datetime
#%%
def clockPrint(sentence):
now = datetime.datetime.now()
date_time = now.strftime("%H:%M:%S")
print(date_time + " : " + sentence)
def uploadToEA(df_,ds_api_name,operation_,instance,xmd_=None): #Upsert #Overwrite
import SalesforceEinsteinAnalytics as EA
clockPrint("Upload Process Initiated for "+instance+" instance...")
if instance.lower() == 'commercial':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratings.my.salesforce.com', browser='chrome')
if instance.lower() == 'analytical':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratingsae.my.salesforce.com', browser='chrome')
EAS.load_df_to_EA(df_,dataset_api_name=ds_api_name, operation=operation_,xmd=xmd_,fillna=False) #Error because of fillna=False
clockPrint("Upload Process Completed successfully for "+instance+" instance. Navigate to (Einstein Analytics --> Data Manager --> Monitor) to check progress.")
def processDate(date):
if pd.isnull(date):
return np.nan
else:
date = pd.to_datetime(date)
date = datetime.datetime.strftime(date,"%m/%d/%Y")
return date
if __name__ == '__main__':
df = pd.read_csv(cfg.FILE_PATH, dtype={"As of Date": str})
if len(cfg.DATE_COLUMNS) != 0:
for c in cfg.DATE_COLUMNS:
df[c] = df[c].apply(lambda x: processDate(x))
for c in df.columns:
if df[c].dtype == "O":
df[c].fillna('', inplace=True)
elif np.issubdtype(df[c].dtype, np.number):
df[c].fillna(0, inplace=True)
elif df[c].dtype == "datetime64[ns]":
df[c] = df[c].apply(lambda x: processDate(x))
df[c].fillna("", inplace=True)
df.fillna("", inplace=True)
for instance in cfg.INSTANCES:
if instance.lower() == 'commercial':
uploadToEA(df, cfg.COM_DATASET_API_NAME, cfg.COM_OPERATION, instance, cfg.COM_XMD)
elif instance.lower() == 'analytical':
uploadToEA(df, cfg.ANA_DATASET_API_NAME, cfg.ANA_OPERATION, instance, cfg.ANA_XMD)
else: clockPrint("Update INSTANCES variable as ['Commercial'] or ['Analytical'] or ['Commercial','Analytical'].")
It has three functions within it -
clockPrint
ProcessDate
uploadtoEA
** There is a link between this python script with another python script named EA_Upload_config which is being imported in this python script as cfg.
Now I want to convert this entire python script into a package. I am confused as to how should I handle name == 'main' ??
Aim is to convert this entire python script into a package

Python If printed results are the same as before print no change otherwise print new results. Run again every 10 mins

I want to run this script every 10 minutes and if the results are the same today I don't want to print them again unless they change. Is this even possible? No I'm not a programmer by any means this is just a hobby.
I'm using Twilio to send me a sms message for campsites that are available but I don't want to receive the same sms message every 10 minutes. I removed the Twilio code because it has my account info. Thank you in advance for any help. Here is my code below.
from datetime import datetime
import pandas as pd
import requests
from tabulate import tabulate
result = []
for unit_id in range(5095, 5099):
resp = requests.get(
f"https://calirdr.usedirect.com/rdr/rdr/fd/"
f"availability/getbyunit/{unit_id}/startdate/2020-10-30/nights/30/true?").json()
result.extend(resp)
filter_by = ['UnitId', 'StartTime', 'IsFree', 'IsWalkin']
df = pd.DataFrame(result)
df = df.filter(items=filter_by)
df['StartTime'] = df['StartTime'].apply(lambda d: datetime.fromisoformat(d).strftime("%Y-%m-%d"))
df = df[df['IsFree']]
df = df[~df['IsWalkin']]
df['UnitId'] = df['UnitId'].replace([5095], 'Site 81')
df['UnitId'] = df['UnitId'].replace([5096], 'Site 82')
df['UnitId'] = df['UnitId'].replace([5097], 'Site 83')
df['UnitId'] = df['UnitId'].replace([5098], 'Site 84')
df['UnitId'] = df['UnitId'].replace([5099], 'Site 85')
print(tabulate(df, headers=filter_by))
Below are the results if you run the code.
UnitId StartTime IsFree IsWalkin
-- -------- ----------- -------- ----------
62 Site 83 2020-11-01 True False
80 Site 83 2020-11-19 True False
89 Site 83 2020-11-28 True False
Process finished with exit code 0
This will run the programm, wait ten minutes, check if the previous result is the same as the current one and if yes, then quit. So the part for you is now to figure out, how to quit it only until the next day :)
//EDIT: I edited the code corresponding to your comment
from datetime import datetime
import pandas as pd
import requests
from tabulate import tabulate
import time
def main():
result = []
for unit_id in range(5095, 5099):
resp = requests.get(
f"https://calirdr.usedirect.com/rdr/rdr/fd/"
f"availability/getbyunit/{unit_id}/startdate/2020-10-30/nights/30/true?").json()
result.extend(resp)
filter_by = ['UnitId', 'StartTime', 'IsFree', 'IsWalkin']
df = pd.DataFrame(result)
df = df.filter(items=filter_by)
df['StartTime'] = df['StartTime'].apply(lambda d: datetime.fromisoformat(d).strftime("%Y-%m-%d"))
df = df[df['IsFree']]
df = df[~df['IsWalkin']]
df['UnitId'] = df['UnitId'].replace([5095], 'Site 81')
df['UnitId'] = df['UnitId'].replace([5096], 'Site 82')
return tabulate(df, headers=filter_by)
res_before = ""
while True:
res = main()
if res != res_before:
print(res)
res_before = res
else:
print("nothing changed")
time.sleep(600)

How to structure python / Flask project

As a python beginner and newby to Flask, I made some structural mistakes in my project.
I have a html page with some dropdown menus and buttons where I set parameters and then my python code (let’s call it apple.py) does some analysis work and sends it back to the html page to display.
Now, I would like to add some modified copies of my apple.py code and have a start page from where I can choose which py code / page I’d like to load. I’ve read about the Blueprints in Flask but I don’t get it implemented.
This is the structure of my project now:
apple.py
/static
/css
/template
Index.html
apple.py is organised like this:
csv / Dataframe crunching
Tons of variables
Class xxx()
#app.route('/')
#app.route('/main/', methods=['GET', 'POST'])
Some Functions / main code()…
if __name__ == "__main__":
app.run(debug=True)
I can’t change anything in the main code, also moving the routes above the class causes many errors.
Any suggestions how to structure this to get a html start page from where I could navigate to apple.py, banana.py, etc..?
UPDATE:
Upon request I post here the truncated main code (apple.py). I deleted all repetitive lines from the functions as well as the variables because they don't matter here
from flask import Flask, render_template, request, make_response
import pandas as pd
import numpy as np
import sys
import pygal
#****** csv import and dataframe setup ****************************************************
df = pd.read_csv('ES_M5_7h00.csv', sep=';', engine='python')
df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df.set_index('Date', inplace=True)
to_delete = ['2019-12-25', '2019-12-26', '2020-01-01', '2020-07-03']
df = df[~(df.index.strftime('%Y-%m-%d').isin(to_delete))]
df.Time = df.Time.str.replace(':', '').astype(float).astype(int)
df.reset_index(inplace=True)
df = df[df['Time'].between(70000, 221000)]
SessionStart = 70000
df = df.join(df.resample('B', on='Date')['yVAH_init'].last().rename('yVAH'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df = df.join(df.resample('B', on='Date')['yVAL_init'].last().rename('yVAL'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df = df.join(df.resample('B', on='Date')['yPOC_init'].last().rename('yPOC'), on=pd.to_datetime((df['Date'] - pd.tseries.offsets.BusinessDay()).dt.date))
df['Opening'] = df.Date.dt.date.map(df.resample('B', on='Date').Open.first())
df.drop(['yVAH_init', 'yVAL_init','yPOC_init'], axis=1, inplace=True)
sample_length = df['Date'].dt.date.nunique()
#****** variables start ****************************************************
#****** variables end ****************************************************
#****** main code ****************************************************
class Opening(object):
def __init__(self, zone=None, zonegroup=None, yClosing=None, zonelist=None, zonetextlist=None, chart_legend=None, zone_names=None, chart_titles=None, yCondition=None):
self.zone = zone
self.zonegroup = zonegroup
self.yClosing = yClosing
self.zonelist = zonelist
self.zonetextlist = zonetextlist
self.chart_legend = chart_legend
self.zone_names = zone_names
self.chart_titles = chart_titles
self.yCondition = yCondition
#app.route('/')
#app.route('/main/', methods=['GET', 'POST'])
def select_zone():
selected_zone = request.form.get("select_zone")
if selected_zone is None:
return open_empty()
if selected_zone == "-":
return open_empty()
if selected_zone == "Z1":
Opening.zone = Z1
Opening.zonelist = zonelist[0]
Opening.zonetextlist = zonetextlist[0]
Opening.chart_legend = chart_legend[0]
Opening.zone_names = zone_names[0]
Opening.chart_titles = chart_titles[0]
return module_opening(Z1group, Z1)
etc etc...
def select_yClosing():
yClosing = request.form.get("select_yClosing")
if yClosing == "irrelevant":
Opening.yClosing = df1 = Opening.zone
df1 = df[Opening.zone].groupby(df['Date'].dt.date).first()
return df1
if yClosing == "above_value":
Opening.yClosing = df1 = df[yCLOSEOUTOFVALUEabove].eq(df[Opening.zone])
df1 = df[df1].groupby(df['Date'].dt.date).first()
return df1
etc etc....
def select_yCondition():
yCondition = request.form.get("select_yCondition")
if yCondition == "irrelevant":
Opening.yCondition = df1 = Opening.zone
df1 = df[Opening.zone].groupby(df['Date'].dt.date).first()
return df1
if yCondition == "close_above_open":
Opening.yCondition = df1 = df[Opening.yClosing].eq(df[yClose_above_yOpen])
df1 = df[df1].groupby(df['Date'].dt.date).first()
return df1
etc etc...
def open_empty():
return render_template('emptyRTH.html')
def module_opening(zonegroup, zone):
time_receive1 = 0
time_receive2 = 0
time1 = request.form
time2 = request.form
if request.method == "POST":
time_receive1 = time1["select_time1"]
time_receive2 = time2["select_time2"]
TimeSpanStart = int(time_receive1)
TimeSpanEnd = int(time_receive2)
output = []
output2 = []
chart_values = []
df1 = zonegroup
df1 = select_yClosing()
df1 = select_yCondition()
df1 = df1[df1['Time'].between(SessionStart, SessionStart)]
SESSIONS = len(df1)
output.append(
str(len(df1))
+ " " +str(Opening.zone_names) + "sessions out of "
+ str(sample_length)
+" days sample size. "
+ "\n" + "\n" )
#+ str(df1.Date))
#)
z = [None] * 5
for j in range(5):
df1 = Opening.yClosing
df1 = Opening.yCondition
z[j] = df[Opening.zonelist[j]].eq(df[df1])
z[j] = df[z[j]].groupby(df['Date'].dt.date).first().reset_index(drop=True).set_index('Date')
z[j] = z[j][z[j]['Time'].between(TimeSpanStart, TimeSpanEnd)]
output2.append(
str(len(z[j]))
+ " hits in "
+ str(SESSIONS)
+ " " +str(Opening.zone_names) + " sessions sample size. "
+"<br><b>"
+ "{:.2%}".format(len(z[j]) / SESSIONS)
+"</b>"
+ str(Opening.zonetextlist[j])
+str(TimeSpanStart)
+ ' and '
+ str(TimeSpanEnd)
+ "\n" + "\n"
+'<div class="toggleDIV">'
+'"""<xmp>'
+ str(z[j].index.strftime("%Y-%m-%d %H:%M:%S"))
+' </xmp>"""'
+'</div>'
)
chart_values.append(round((len(z[j]) / SESSIONS)*100))
from pygal.style import Style
custom_style = Style(
background='transparent',
plot_background='transparent',
foreground='#403C44',
foreground_strong='#003366',
foreground_subtle='#630C0D',
opacity='.6',
opacity_hover='.9',
legend_font_size=9,
title_font_size=12,
#transition='400ms ease-in',
colors=('#CD6155', '#3498DB', '#16A085', '#95A5A6', '#5D6D7E'))
line_chart = pygal.HorizontalBar(print_values=True, style=custom_style, width=650, height=450, explicit_size=True)
line_chart.title = Opening.chart_titles
line_chart.x_labels = [chart_values[0]]#, chart_values[1], chart_values[2], chart_values[3], chart_values[4], chart_values[5]]
#line_chart.add('Depth %', [chart_values[0], chart_values[1], chart_values[2], chart_values[3], chart_values[4], chart_values[5]])
line_chart.add(Opening.chart_legend[0], [chart_values[0]])
line_chart.add(Opening.chart_legend[1], [chart_values[1]])
line_chart.add(Opening.chart_legend[2], [chart_values[2]])
line_chart.add(Opening.chart_legend[3], [chart_values[3]])
line_chart.add(Opening.chart_legend[4], [chart_values[4]])
bar_data = line_chart.render_data_uri()
return render_template('indexRTH.html', output = output, output2 = output2, bar_data = bar_data)
if __name__ == "__main__":
app.run(debug=True)
If i got your question, you should try this;
in apple.py;
from flask import Flask, render_template
app = Flask(__name__, template_folder="template") #template is the name of your templates folder
#app.route("/")
def index():
return render_template("index.html")

Overwriting one data with another data in pandas(dataframe)

Periodically (every 120 seconds) get data but recent data overwrites previous data in SQL DB. I want all data to be saved.In addition, is the timer correct?
import sqlalchemy as sa
import psycopg2
import requests as rq
import pandas as pd
import json
import time
start_time = time.time()
while True:
temp = pd.DataFrame()
df = pd.DataFrame()
vehicleList = {"SN63NBK", "YY67UTP"}
for ids in vehicleList:
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
r = r.text
temp = pd.read_json(r)
temp['Type'] = 'ids'
df = pd.concat([df, temp], sort=False).reset_index(drop=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='replace', index=False)
time.sleep(120.0 - ((time.time() - start_time) % 120.0))
I changed your code slightly, but I think the main problem is in if_exists parameter which you should set to append, as #K753 have mentioned in the comments.
Also, YY67UTP id returns nothing, so I replaced it with another random id from the site to illustrate how code works.
def _data_gen(vehicles):
""" Yields a dataframe for each request """
for ids in vehicles:
time.sleep(1)
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
temp = pd.read_json(r.text)
temp['Type'] = ids
yield temp
while True:
# how do you break from while loop if you need to?
vehicleList = {"SN63NBK", "YY67UTP"}
df = pd.concat(_data_gen(vehicleList), sort=False, ignore_index=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='append', index=False)
time.sleep(120)

Categories