How to use data frame in python web application code - python

I am trying to create a python web application to show a time series graph with gold price distribution. I have added app layout, app calls back, and update_graph function as well. after that, I tried to take a copy of my data frame and save it to a new data frame as 'dff' but it is throwing an error. Also at the end of the code, I have put 'return fig' and it is throwing an error as well. I am fairly new to python and need help to figure out what's wrong with my code. Below is the entire code.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output
from urllib.request import urlopen, Request
url = "http://goldpricez.com/gold/history/lkr/years-3"
req = Request(url=url)
html = urlopen(req).read()
df = pd.read_html(url) # this will give you a list of dataframes from html
df1 = df[3]
first_val = df1.iloc[0][0]
date = df1[0]
price = df1[1]
data = [df1[0],df1[1]]
headers = ["Date", "Price"]
df3 = pd.concat(data, axis=1, keys=headers)
from datetime import datetime
df3['Date'] = df3['Date'].apply(lambda x: datetime.strptime(x, '%d-%m-%Y'))
df3['Year'] = pd.DatetimeIndex(df3['Date']).year
df3['Month'] = pd.DatetimeIndex(df3['Date']).month
df3['Day'] = pd.DatetimeIndex(df3['Date']).day
df3['WDay'] = df3['Date'].dt.dayofweek
df3['WeekDayName'] = pd.DatetimeIndex(df3['Date']).day_name()
print(df3['WDay'])
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df3.to_excel(writer, sheet_name='Sheet1')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Gold Price Analyst" , style={'text-align' : 'center'}),
dcc.Graph(id='this_year_graph', figure={})
])
#app.callback(
[Output(component_id='this_year_graph', component_property='figure')]
)
def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig
if __name__ =='__main__' :
app.run_server(debug=True)

def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig

Related

python: AttributeError: 'list' object has no attribute 'groupby'

I am following a Youtube tutorial on a streamlit application, however the error
"AttributeError: 'list' object has no attribute 'groupby'"
occured when I was trying to group my list that I scraped from wikipedia, the instructor had the exact code as me but didn't face a problem, where am I missing out exactly?
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = pd.read_html(url, header = 0)
df = html[0]
return df
df = load_data()
df = df.groupby('GICS Sector')
I fixed it, I just had to reassign the df variable to it's first index
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
html = pd.read_html(url, header=0)
df = html[0]
return df
df = load_data()
df = df[0]
df = df.groupby("GICS Sector")

Columns not displaying correctly using python dash and mongo db

I am working on a project and after finally getting around some Type errors, I am finally getting my chart to display but it is listing each character in each mongo db json document instead of in proper json form. I am a total noob at this and I cannot seem to get it right so any help that can be offered in getting my interactive chart displayed as well as my pie chart and location chart will be greatly appreciated. I think the problem is derived from how the data is being entered into the dataframe but I can not figure it out.
Here is the read functions that are being used to take the info from the mongo db and send it to the Python script:
def read(self, data):
if data !=None: # checks to make sure that the received data is not null
result = self.database.animals.find(data).limit(35)
return result
else:
raise Exception("Data entered is null!")
def readAll(self, data):
found = self.database.animals.find({}).limit(35)
return(found)
Here is the code for the python script that has imported dash:
from jupyter_plotly_dash import JupyterDash
import dash
import dash_leaflet as dl
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import dash_table
from dash.dependencies import Input, Output
from bson.json_util import dumps,loads
import bson
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient
from AAC import AnimalShelter
###########################
# Data Manipulation / Model
###########################
# FIX ME update with your username and password and CRUD Python module name
username = "aacuser"
password = "Melvin1234!"
shelter = AnimalShelter()
shelter._init_(username,password)
# class read method must support return of cursor object and accept projection json input
df =pd.DataFrame.from_records(list(dumps(shelter.readAll({}))))
dff = pd.DataFrame.from_records(list(dumps(shelter.readAll({}))))
#########################
# Dashboard Layout / View
#########################
app = JupyterDash('SimpleExample')
import base64
app.layout = html.Div([
html.Div(id='hidden-div', style={'display':'none'}),
html.Center(html.B(html.H1('SNHU CS-340 Dashboard'))),
html.Div(className='header',
children=[html.Center(html.Img(src='/home/16514911_snhu/Downloads/Grazioso Salvare Logo.jpg',width="500", height="600")), #image logo
html.Center(html.B(html.H6('Jacqueline Woods')))]), #Unique Handle
html.Br(),
html.Div(className ='row',
children=[
html.Button(id='submit-button-one',n_clicks=0, children='Cats'),
html.Button(id='submit-button-two',n_clicks=0,children='Dogs') ]),
html.Div(dash_table.DataTable(
id='datatable-interactivity',
columns=[
{"name": i, "id": i,"age_upon_outcome":i, 'animal_id':i,'animal_type':i,'breed':i,"color":i,
'date_of_birth':i,'datetime':i,'monthyear':i,'outcome_subtype':i,'outcome_type':i,'sex_upon_outcome':i,
'location_lat':i,'location_long':i,'age_upon_outcome_in_weeks':i,
"deletable": False, "selectable": True} for i in df.columns]
,data=df.to_dict('records'),editable = False,filter_action='native')),
html.Br(),
html.Div(
dcc.Graph( #piechart
id ='graph_id',
figure=list(dumps(shelter.readAll({})))
),
title="Outcome_Type"),
html.Br(),
html.Div( #location map
id='map-id',
className='col s12 m6',
title="Location"
)])
##Interaction Between Components / Controller
#This callback will highlight a row on the data table when the user selects it
#app.callback(
Output('datatable-interactivity',"data"),
[Input('submit-button-one','n_clicks'),
Input('submit-button-two','n_clicks')
]
)
def on_click(bt1, bt2):
if(int(bt1) ==0 and int(bt2) ==0):
df =pd.DataFrame.from_records(shelter.readAll({}))
elif (int(bt1) > int(bt2)):
df = pd.DataFrame(dumps(shelter.read({"animal__type":"Cat"})))
df =pd.DataFrame(dumps(shelter.read({"animal_type":"Dog"})))
return df.to_dict('records')
#app.callback(
Output('datatable-interactivity', 'style_data_conditional'),
[Input('datatable-interactivity', 'selected_columns')]
)
def update_styles(selected_columns):
return [{
'if': { 'column_id': i },
'background_color': '#D2F3FF'
} for i in selected_columns]
#app.callback(
Output('graph_id','figure'),
[Input('datatable-interactivity',"derived_virtual_data")])
def update_Graph(allData):
dff= pd.DataFrame(allData)
pieChart = px.pie(
data_frame=dff,
names=dff['outcome_type'],
hole = 3,
)
return pieChart
#app.callback(
Output('map-id', "children"),
[Input('datatable-interactivity', 'derived_viewport_data'),
Input('datatable-interactivity', 'derived_virtual_selected_rows')
])
def update_map(viewData):
#FIXME Add in the code for your geolocation chart
dff = pd.DataFrame.from_dict(viewData)
dff = df if viewData is None else pd.DataFrame(viewData)
selected_animal = None
if not derived_virtual_selected_rows:
slected_animal = dff.iloc[0]
else:
slected_animal = dff.iloc[derived_vertual_selected_rows[0]]
latitude = selected_animal[12]
longitude =selected_animal[13]
breed = selected_animal[3]
name = selected_animal[0]
# Austin TX is at [30.75,-97.48]
return [
dl.Map(style={'width': '1000px', 'height': '500px'}, center=[30.75,-97.48], zoom=10, children=[
dl.TileLayer(id="base-layer-id"),
# Marker with tool tip and popup
dl.Marker(position=[latitude,longitude], children=[
dl.Tooltip(breed),
dl.Popup([
html.H1("Animal Name"),
html.P(name)
])
])
])
]
app
I was in the same boat as you searching for answers online. I happened to found the answer. Hopefully, this can help others who are experiencing this problem.
#Convert the pymongo cursor into a pandas DataFrame
df = pd.DataFrame(list(shelter.readAll({})))
#Drop the _id column generated by Mongo
df = df.iloc[:, 1:]
After that, you can access the data of the DataFrame by using
df.to_dict('records')

Python Dash refresh page not updating source data

I have written a basic plotly dash app that pulls in data from a csv and displays it on a chart.
You can then toggle values on the app and the graph updates.
However, when I add new data to the csv (done once each day) the app doesn't update the data on refreshing the page.
The fix is normally that you define your app.layout as a function, as outlined here (scroll down to updates on page load). You'll see in my code below that I've done that.
Here's my code:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import numpy as np
import pandas as pd
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
path = 'https://raw.githubusercontent.com/tbuckworth/Public/master/CSVTest.csv'
df = pd.read_csv(path)
df2 = df[(df.Map==df.Map)]
def layout_function():
df = pd.read_csv(path)
df2 = df[(df.Map==df.Map)]
available_strats = np.append('ALL',pd.unique(df2.Map.sort_values()))
classes1 = pd.unique(df2["class"].sort_values())
metrics1 = pd.unique(df2.metric.sort_values())
return html.Div([
html.Div([
dcc.Dropdown(
id="Strategy",
options=[{"label":i,"value":i} for i in available_strats],
value=list(available_strats[0:1]),
multi=True
),
dcc.Dropdown(
id="Class1",
options=[{"label":i,"value":i} for i in classes1],
value=classes1[0]
),
dcc.Dropdown(
id="Metric",
options=[{"label":i,"value":i} for i in metrics1],
value=metrics1[0]
)],
style={"width":"20%","display":"block"}),
html.Hr(),
dcc.Graph(id='Risk-Report')
])
app.layout = layout_function
#app.callback(
Output("Risk-Report","figure"),
[Input("Strategy","value"),
Input("Class1","value"),
Input("Metric","value"),
])
def update_graph(selected_strat,selected_class,selected_metric):
if 'ALL' in selected_strat:
df3 = df2[(df2["class"]==selected_class)&(df2.metric==selected_metric)]
else:
df3 = df2[(df2.Map.isin(selected_strat))&(df2["class"]==selected_class)&(df2.metric==selected_metric)]
df4 = df3.pivot_table(index=["Fund","Date","metric","class"],values="value",aggfunc="sum").reset_index()
traces = []
for i in df4.Fund.unique():
df_by_fund = df4[df4["Fund"] == i]
traces.append(dict(
x=df_by_fund["Date"],
y=df_by_fund["value"],
mode="lines",
name=i
))
if selected_class=='USD':
tick_format=None
else:
tick_format='.2%'
return {
'data': traces,
'layout': dict(
xaxis={'type': 'date', 'title': 'Date'},
yaxis={'title': 'Values','tickformat':tick_format},
margin={'l': 40, 'b': 40, 't': 10, 'r': 10},
legend={'x': 0, 'y': 1},
hovermode='closest'
)
}
if __name__ == '__main__':
app.run_server(debug=True)
Things I've tried
Removing the initial df = pd.read_csv(path) before the def layout_function():. This results in an error.
Creating a callback button to refresh the data using this code:
#app.callback(
Output('Output-1','children'),
[Input('reload_button','n_clicks')]
)
def update_data(nclicks):
if nclicks == 0:
raise PreventUpdate
else:
df = pd.read_csv(path)
df2 = df[(df.Map==df.Map)]
return('Data refreshed. Click to refresh again')
This doesn't produce an error, but the button doesn't refresh the data either.
Defining df within the update_graph callback. This updates the data every time you toggle something, which is not practicable (my real data is > 10^6 rows, so i don't want to read it in every time the user changes a toggle value)
In short, i think that defining app.layout = layout_function should make this work, but it doesn't. What am I missing/not seeing?
Appreciate any help.
TLDR; I would suggest that you simply load the data from within the callback. If load time is too long, you could change the format (e.g. to feather) and/or reduce the data size via pre processing. If this is still not fast enough, the next step would be to store the data in a server-side in-memory cache such as Redis.
Since you are reassigning df and df2 in the layout_function, these variables are considered local in Python, and you are thus not modifying the df and df2 variables from the global scope. While you could achieve this behavior using the global keyword, the use of global variables is discouraged in Dash.
The standard approach in Dash would be to load the data in a callback (or in the the layout_function) and store it in a Store object (or equivalently, a hidden Div). The structure would be something like
import pandas as pd
import dash_core_components as dcc
from dash.dependencies import Output, Input
app.layout = html.Div([
...
dcc.Store(id="store"), html.Div(id="trigger")
])
#app.callback(Output('store','data'), [Input('trigger','children')], prevent_initial_call=False)
def update_data(children):
df = pd.read_csv(path)
return df.to_json()
#app.callback(Output("Risk-Report","figure"), [Input(...)], [State('store', 'data')])
def update_graph(..., data):
if data is None:
raise PreventUpdate
df = pd.read_json(data)
...
However, this approach will typically be much slower than just reading the data from disk inside the callback (which seems to be what you are trying to avoid) as it results in the data being transferred between the server and client.

Reload Graph When Input in Textbox Changes

I have a situation at the moment where I am stuck on making my graph to change whenever the input in textbox changes. I also wanted to make sure that whenever changes made to the textbox, it will reflect to the output I wanted from the DB [the graph] and this should be done continuously ie the graph will flow continuously.
However after some tries in using a button to kickstart the n-intervals, I still failed in doing so.
It'd will be great if anyone can have a look at my code. Thank you so much.
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly
import random
import plotly.graph_objs as go
from collections import deque
import sqlite3
import pandas as pd
import time
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(
[ html.H2('Live Twitter Sentiment Trend'),
dcc.Input(id='sentiment_term', value='trump', type='text'),
dcc.Graph(id='live-graph', animate=False),
dcc.Interval(
id='graph-update',
interval=1*1000,
n_intervals = 0
),
]
)
#app.callback(
Output('live-graph', 'figure'),
[Input(component_id='sentiment_term', component_property='value'),
[Input(component_id='graph-update', component_property='n_intervals')])
def update_graph_scatter(sentiment_term):
try:
conn = sqlite3.connect('twitter.db')
conn.cursor()
df = pd.read_sql("SELECT * FROM sentiment WHERE tweet LIKE ? ORDER BY unix DESC LIMIT 1000", conn ,params=('%' + sentiment_term + '%',))
df.sort_values('unix', inplace=True)
df['sentiment_smoothed'] =
df['sentiment'].rolling(int(len(df)/2)).mean()
df['date'] = pd.to_datetime(df['unix'],unit='ms')
df.set_index('date', inplace=True)
df = df.resample('0.15min').mean()
df.dropna(inplace=True)
X = df.index
Y = df.sentiment_smoothed
data = plotly.graph_objs.Scatter(
x=X,
y=Y,
name='Scatter',
mode= 'lines+markers'
)
return {'data': [data],'layout' : go.Layout(xaxis=dict(range=[min(X),max(X)]),
yaxis=dict(range=[min(Y),max(Y)]),
title='Term: {}'.format(sentiment_term))}
except Exception as e:
with open('errors.txt','a') as f:
f.write(str(e))
f.write('\n')
if __name__ == '__main__':
app.run_server(debug=True)
Your callback function update_graph_scatter is missing a second parameter. You can change it to:
def update_graph_scatter(sentiment_term, n_intervals):
Also your callback decorator should be:
#app.callback(
Output('live-graph', 'figure'),
[Input(component_id='sentiment_term', component_property='value'),
Input(component_id='graph-update', component_property='n_intervals')])
Because it had an extra "[" breaking it.
I'm not sure if this is causing your problem, but it might. I wish I had 50 reputation to post this as a mere comment because of its minor contribution.

Python Dash application with drop down menu that selects excel file sheets and displays in a table

I am in the process of learning Dash with the final goal being the development of an app that allows quick analysis of data sets in an excel file. I'd like a drop down menu that will allow for switching between the sheets in the excel file. I'm having trouble getting this to run properly. I can make it output to a graph but not a table. My code is as follows:
import dash
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import dash_table_experiments as dt
import pandas as pd
app = dash.Dash()
df = pd.read_excel('output.xlsx', sheet_name=None)
app.layout = html.Div(
html.Div([dcc.Dropdown(id='drop_value',
options=[{'label': i, 'value': i} for i in df],
value='Sheet1')]),
html.Div([dt.DataTable(rows=[{}],
id='table')])
)
#app.callback(Output('table', 'rows'), [Input('drop_value', 'value')])
def update_info_table(drop_value):
if drop_value == 'Sheet1':
new_data = df['Sheet1'].to_dict()
return new_data
elif drop_value == 'Sheet2':
new_data = df['Sheet2'].to_dict()
return new_data
else:
new_data = df['Sheet3'].to_dict()
return new_data
if __name__ == '__main__':
app.run_server()
When I run this code I get the following error:
TypeError: init() takes from 1 to 2 positional arguments but 3 were given
I'm assuming this has something to do with the format of the dataset I'm trying to feed to the datatable. Right now I'm just using a dummy excel file with only two columns labeled 'x1' and 'y1'.
Adjusted code and seems to work now. Correct code is below for anyone who needs.
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import dash_table_experiments as dt
# Load in data set
sheet_to_df_map = pd.ExcelFile('output.xlsx')
dropdown_options = pd.read_excel('output.xlsx', sheet_name=None)
# Create the dash app
app = dash.Dash()
# Define the layout for the drop down menu
app.layout = html.Div([
html.H2("Select Sheet Number"),
html.Div([dcc.Dropdown(id="field_dropdown", options=[{
'label': i,
'value': i
} for i in dropdown_options],
value='Sheet3')],
style={'width': '25%',
'display': 'inline-block'}),
dt.DataTable(rows=[{}],
row_selectable=True,
filterable=True,
sortable=True,
selected_row_indices=[],
id='datatable')
])
#app.callback(
dash.dependencies.Output('datatable', 'rows'),
[dash.dependencies.Input('field_dropdown', 'value')])
def update_datatable(user_selection):
if user_selection == 'Sheet1':
return sheet_to_df_map.parse(0).to_dict('records')
elif user_selection == 'Sheet2':
return sheet_to_df_map.parse(1).to_dict('records')
else:
return sheet_to_df_map.parse(2).to_dict('records')
if __name__ == '__main__':
app.run_server()code here

Categories