I am pretty new to dash and I have tried to read as much as I can to understand what the issue might be. In a nutshell I have a single datepicker which is an input to the DataTable and Graph callback. The graph callback is working fine so it is just the DataTable which is causing problems. I also tried the single input to multiple output callback but didnt work. My code is as below:
app = JupyterDash()
folder = os.getcwd()
portfolio_returns_table = pd.read_csv(Path(folder, 'portfolioreturns_maria.csv',parse_dates=[0]))
portfolio_returns_table = portfolio_returns_table.set_index('Unnamed: 0')
name_portfolioID_table = pd.read_csv(Path(folder, 'name_portfolioID.csv'))
#Calculate portfolio cumulative returns
df_cumret = (portfolio_returns_table+1).cumprod().round(5)
df_cumret.index = pd.to_datetime(df_cumret.index)
app.layout = html.Div(html.Div([dcc.DatePickerSingle(
id='my-date-picker-single',
min_date_allowed=dt.date(df_cumret.index.min()),
max_date_allowed=dt.date(df_cumret.index.max()),
initial_visible_month=dt.date(df_cumret.index.max()),
date = dt.date(df_cumret.index.max())
,display_format = 'Y-MM-DD',clearable = True),
html.Div(id='output-container-date-picker-single'),
html.Div(dash_table.DataTable(id = 'data_table',
data = {},
fixed_rows={'headers': True},
style_cell = {'textAlign': 'left'},
style_table={'height': 400})),
html.Div(dcc.Graph('my_graph'))
]))
#app.callback([Output('data_table','data'),Output('data_table','columns')],
[Input('my-date-picker-
single','date')])
def update_leader_table(date):
#Get data for the selected date and transpose
df_T = df_cumret.loc[[date]].T
#Sort the table to reveal the top leaders
df_Top = df_T.sort_values(df_T.columns[0], ascending=False)[:10]
#Convert the index to an interger
df_Top.index = df_Top.index.astype(int)
#Generate the leaderboard to given date
df_leader = pd.merge(df_Top,name_portfolioID_table,
left_index=True,right_index=True, how = 'left')
#Create the col rank
df_leader['Rank'] = range(1,len(df_leader)+1)
df_leader.columns = ['Cum Return', 'Investor','Rank']
df_leader.reset_index(drop = True, inplace = True)
data = df_leader.to_dict('records')
columns= [{'id': c, 'name': c, "selectable": True} for c in
df_leader.columns]
return (data,columns)
#callback to link calendar to graph
#app.callback(Output('my_graph','figure'),[Input('my-date-picker-single','date')])
def update_graph(date):
#date filter
df_T = df_cumret.loc[:date].T
#Sort the table to reveal the top leaders & filter for leaderboard
df_Top = df_T.sort_values(df_T.columns[-1], ascending=False)[:10]
#Transpose to have date as index
df_top_graph = df_Top.T
#set the columns as an Int
df_top_graph.columns = df_top_graph.columns.astype(int)
#Rename columns
df_top_graph.rename(columns=dict(zip(name_portfolioID_table.index,
name_portfolioID_table.name)),
inplace=True)
#Generate graph
fig = px.line(df_top_graph, x = df_top_graph.index, y =
df_top_graph.columns, title='ETF LEADERBOARD PERFORMANCE: '+date, labels=
{'Unnamed: 0':'Date','value':'Cumulative Returns'})
fig.update_layout(hovermode = 'x unified')
fig.update_traces(hovertemplate='Return: %{y} <br>Date: %{x}')
fig.update_layout(legend_title_text = 'Investor')
return fig
if __name__ == '__main__':
app.run_server(mode = 'inline',debug=True, port = 65398)
Related
I have a webapp that displays 11 checkboxes. When checked, the application returns a graph of the data that corresponds to the selected box.
Because the end user can select any variation of these boxes and should expect to see only the graph of those selected, every possible combination must be declared.
Currently, my implementation is as follows. It is quite unsophisticated and will likely take hours, so you can see why I'm looking for something a bit more.. supple?
Discretionary = st.sidebar.checkbox(label = 'Consumer Discretionary', value = False)
Consumer_Staples = st.sidebar.checkbox(label = 'Consumer Staples', value = False)
Energy = st.sidebar.checkbox(label = 'Energy', value = False)
Financials = st.sidebar.checkbox(label = 'Financials', value = False)
Healthcare = st.sidebar.checkbox(label = 'Healthcare', value = False)
Industrials = st.sidebar.checkbox(label = 'Industrials', value = False)
Materials = st.sidebar.checkbox(label = 'Materials', value = False)
Real_Estate = st.sidebar.checkbox(label = 'Real Estate', value = False)
Technology = st.sidebar.checkbox(label = 'Technology', value = False)
Utilities = st.sidebar.checkbox(label = 'Utilities', value = False)
Placeholder = st.empty()
for a in range(0, 500):
try:
Data = pd.read_csv('Mydata')
Data = Data.set_index('Time')
except:
pass
if Communications == True and Discretionary == True:
with Placeholder.container():
st.line_chart(data = Data[['Communications','CD Consumer Discretionary']] * 100)
elif Communications == True and Discretionary == False:
with Placeholder.container():
st.line_chart(data = Data['Communications'] * 100)
elif Discretionary == True and Communications == False:
with Placeholder.container():
st.line_chart(data = Data['CD Consumer Discretionary'] * 100)```
This problem isn't about permutations / combinations (no need for that), but about filtering.
I'll take the following simplified dataframe as an example for the rest of this post.
import pandas as pd
import streamlit as st
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
First thing you can do to simplify things, is regroup your checkboxes in a dictionary. It saves you from writing 20 lines of code if you have 20 columns, and makes it easier to adapt to new columns later on
checkboxes = {
col_name: st.sidebar.checkbox(label=col_name, value=False)
for col_name in df.columns
}
You can then define a function that filters the data, keeping only columns which checkbox is checked:
def filter_columns(df: pd.DataFrame, checkboxes: dict[str, bool]):
selected_cols = [col for col, sel in checkboxes.items() if sel]
return df[selected_cols]
Then filter your data and display it:
df_filtered = filter_columns(df, checkboxes)
st.dataframe(df_filtered)
If you copy all the codeblocks of this example, you should have a functioning streamlit app. You now just have to adapt the dataset to your particular needs.
Once you've filtered your data, create your graphs.
I have created an "input form" with several ipywidget boxes. I want to be able to reference all the values to create a new dataframe.
I'm currently doing this in a horrible way.
portfolio_df = pd.DataFrame([[VBox1.children[0].value, VBox2.children[0].value, VBox3.children[0].value, VBox4.children[0].value]],
columns=['Product Name','Units','Price', 'Invested Amount'])
row_2 = [VBox1.children[1].value, VBox2.children[1].value, VBox3.children[1].value, VBox4.children[21].value]
portfolio_df.loc[len(portfolio_df)] = row_2
row_3 = [VBox1.children[2].value, VBox2.children[2].value, VBox3.children[2].value, VBox4.children[2].value]
portfolio_df.loc[len(portfolio_df)] = row_3
row_4 = [VBox1.children[3].value, VBox2.children[3].value, VBox3.children[3].value, VBox4.children[3].value]
portfolio_df.loc[len(portfolio_df)] = row_4
and so on up till row 23 in this instance !! (but the length will vary up to the number of children within a VBox)
I suspect I can do this more pythonically using a for loop but cant figure it out.
Full code as per requests (I've edited columns so my live data is different but this is exact replica of the set up)
import pandas as pd
import numpy as np
import datetime as dt
import ipywidgets as ipw
from ipywidgets import *
barrier_list = pd.DataFrame(np.random.randn(24, 4), columns=('Product
Name','ISIN','A','B'))
barrier_list= barrier_list.astype(str)
dd_list = []
for i in range(len(barrier_list['Product Name'])):
dropdown = ipw.FloatText(description=barrier_list['ISIN'][i],
value=barrier_list['Product Name'][i],
disabled=False,
layout = {'width':'350px'})
dropdown.style.description_width = 'initial'
dd_list.append(dropdown)
dd_list1 = []
for i in range(len(barrier_list['Product Name'])):
dropdown1 = ipw.FloatText(description='Units',
value=0,
layout = {'width':'200px'})
dd_list1.append(dropdown1)
dd_list2 = []
for i in range(len(barrier_list['Product Name'])):
dropdown2 = ipw.FloatText(description='Price',
value=0,
layout = {'width':'200px'})
dd_list2.append(dropdown2)
dd_list3 = []
for i in range(len(barrier_list['Product Name'])):
dropdown3 = ipw.FloatText(description='Value',
value=0,
layout = {'width':'200px'})
dd_list3.append(dropdown3)
VBox1 = ipw.VBox(dd_list)
VBox2 = ipw.VBox(dd_list1)
VBox3 = ipw.VBox(dd_list2)
VBox4 = ipw.VBox(dd_list3)
HBox = widgets.HBox([VBox1, VBox2, VBox3, VBox4])
solved this one by looping through the VBoxes one by one and then concatenating the dataframes into one main one.
product_df = pd.DataFrame()
for i in range(len(dd_list)):
product_name_df = pd.DataFrame([[VBox1.children[i].value]],columns=
['Product Name'])
product_df = product_df.append(product_name_df)
unit_df = pd.DataFrame()
for i in range(len(dd_list)):
unit_amount_df = pd.DataFrame([[VBox2.children[i].value]],columns=
['Units'])
unit_df = unit_df.append(unit_amount_df)
price_df = pd.DataFrame()
for i in range(len(dd_list)):
price_amount_df = pd.DataFrame([[VBox3.children[i].value]],columns=
['Price'])
price_df = price_df.append(price_amount_df)
value_df = pd.DataFrame()
for i in range(len(dd_list)):
value_amount_df = pd.DataFrame([[VBox4.children[i].value]],columns=
['Value'])
value_df = value_df.append(value_amount_df)
df_list = [product_df.reset_index(drop=True),unit_df.reset_index(drop=True),
price_df.reset_ind ex(drop=True),value_df.reset_index(drop=True)]
portfolio_df = pd.concat((df_list), axis=1)
portfolio_df
I'm working on a personal project and I'm trying to retrieve air quality data from the https://aqicn.org website using their API.
I've used this code, which I've copied and adapted for the city of Bucharest as follows:
import pandas as pd
import folium
import requests
# GET data from AQI website through the API
base_url = "https://api.waqi.info"
path_to_file = "~/path"
# Got token from:- https://aqicn.org/data-platform/token/#/
with open(path_to_file) as f:
contents = f.readlines()
key = contents[0]
# (lat, long)-> bottom left, (lat, lon)-> top right
latlngbox = "44.300264,25.920181,44.566991,26.297836" # For Bucharest
trail_url=f"/map/bounds/?token={key}&latlng={latlngbox}" #
my_data = pd.read_json(base_url + trail_url) # Joined parts of URL
print('columns->', my_data.columns) #2 cols ‘status’ and ‘data’ JSON
### Built a dataframe from the json file
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],
each_row['lat'],
each_row['lon'],
each_row['aqi']])
df = pd.DataFrame(all_rows, columns=['station_name', 'lat', 'lon', 'aqi'])
# Cleaned the DataFrame
df['aqi'] = pd.to_numeric(df.aqi, errors='coerce') # Invalid parsing to NaN
# Remove NaN entries in col
df1 = df.dropna(subset = ['aqi'])
Unfortunately it only retrieves 4 stations whereas there are many more available on the actual site. In the API documentation the only limitation I saw was for "1,000 (one thousand) requests per second" so why can't I get more of them?
Also, I've tried to modify the lat-long values and managed to get more stations, but they were outside the city I was interested in.
Here is a view of the actual perimeter I've used in the embedded code.
If you have any suggestions as of how I can solve this issue, I'd be very happy to read your thoughts. Thank you!
Try using waqi through aqicn... not exactly a clean API but I found it to work quite well
import pandas as pd
url1 = 'https://api.waqi.info'
# Get token from:- https://aqicn.org/data-platform/token/#/
token = 'XXX'
box = '113.805332,22.148942,114.434299,22.561716' # polygon around HongKong via bboxfinder.com
url2=f'/map/bounds/?latlng={box}&token={token}'
my_data = pd.read_json(url1 + url2)
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],each_row['lat'],each_row['lon'],each_row['aqi']])
df = pd.DataFrame(all_rows,columns=['station_name', 'lat', 'lon', 'aqi'])
From there its easy to plot
df['aqi'] = pd.to_numeric(df.aqi,errors='coerce')
print('with NaN->', df.shape)
df1 = df.dropna(subset = ['aqi'])
df2 = df1[['lat', 'lon', 'aqi']]
init_loc = [22.396428, 114.109497]
max_aqi = int(df1['aqi'].max())
print('max_aqi->', max_aqi)
m = folium.Map(location = init_loc, zoom_start = 5)
heat_aqi = HeatMap(df2, min_opacity = 0.1, max_val = max_aqi,
radius = 60, blur = 20, max_zoom = 2)
m.add_child(heat_aqi)
m
Or as such
centre_point = [22.396428, 114.109497]
m2 = folium.Map(location = centre_point,tiles = 'Stamen Terrain', zoom_start= 6)
for idx, row in df1.iterrows():
lat = row['lat']
lon = row['lon']
station = row['station_name'] + ' AQI=' + str(row['aqi'])
station_aqi = row['aqi']
if station_aqi > 300:
pop_color = 'red'
elif station_aqi > 200:
pop_color = 'orange'
else:
pop_color = 'green'
folium.Marker(location= [lat, lon],
popup = station,
icon = folium.Icon(color = pop_color)).add_to(m2)
m2
checking for stations within HK, returns 19
df[df['station_name'].str.contains('HongKong')]
How can I merge the two functions given below to achieve something like the histogram example. Any button or drop down would do fine.
If you run the function, you get a nice Candlesticks chart with the functionality of removing non trading day gaps.
def plot_candlesticks(df, names = ('DATE','OPEN','CLOSE','LOW','HIGH'), mv:list = [200], slider:bool = False, fig_size:bool = (1400,700), plot:bool = True):
'''
Plot a candlestick on a given dataframe
args:
df: DataFrame
names: Tuple of column names showing ('DATE','OPEN','CLOSE','LOW','HIGH')
mv: Moving Averages
slider: Whether to have below zoom slider or not
fig_size: Size of Figure as (Width, Height)
plotting: Whether to plot the figure or just return the figure for firther modifications
'''
freq = 5 # 5 min candle
candle_text = f"{str(freq)} Min"
stocks = df.copy()
stocks.sort_index(ascending=False, inplace = True) # Without reverse, recent rolling mean will be either NaN or equal to the exact value
Date, Open, Close, Low, High = names
mv = [] if not mv else mv # just in case you don't want to have any moving averages
colors = sample(['black','magenta','teal','brown','violet'],len(mv))
# To remove, non-trading days, grab first and last observations from df.date and make a continuous date range from that
start = stocks['DATE'].iloc[0] - timedelta(days=1)
end = stocks['DATE'].iloc[-1] + timedelta(days=1)
dt_all = pd.date_range(start=start,end=end, freq = f'{str(freq)}min')
# check which dates from your source that also accur in the continuous date range
dt_obs = [d.strftime("%Y-%m-%d %H:%M:%S") for d in stocks['DATE']]
# isolate missing timestamps
dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d %H:%M:%S").tolist() if not d in dt_obs]
rangebreaks=[dict(dvalue = freq*60*1000, values=dt_breaks)]
range_selector = dict(buttons = list([dict(step = 'all', label = 'All')]))
candle = go.Figure(data = [go.Candlestick(opacity = 0.9, x = stocks[Date], name = 'X',
open = stocks[Open], high = stocks[High], low = stocks[Low], close = stocks[Close]),])
for i in range(len(mv)):
stocks[f'{str(mv[i])}-SMA'] = stocks[Close].rolling(mv[i], min_periods = 1).mean()
candle.add_trace(go.Scatter(name=f'{str(mv[i])} MA',x=stocks[Date], y=stocks[f'{str(mv[i])}-SMA'],
line=dict(color=colors[i], width=1.7)))
candle.update_xaxes(title_text = 'Date', rangeslider_visible = slider, rangeselector = range_selector, rangebreaks=rangebreaks)
candle.update_layout(autosize = False, width = fig_size[0], height = fig_size[1],
title = {'text': f"{stocks['SYMBOL'][0]} : {str(candle_text)} Candles",'y':0.97,'x':0.5,
'xanchor': 'center','yanchor': 'top'},
margin=dict(l=30,r=30,b=30,t=30,pad=2),
paper_bgcolor="lightsteelblue")
candle.update_yaxes(title_text = 'Price in Rupees', tickprefix = u"\u20B9" ) # Rupee symbol
if plot:
candle.show()
return candle
and running the below code resamples your data.
def resample_data(self,to:str = '15min', names:tuple = ('OPEN','CLOSE','LOW','HIGH','DATE')):
'''
Resample the data from 5 Minutes to 15 or 75 Minutes
args:
data: Dataframe of Daily data
to: One of [15M, 75M]
'''
Open, Close, Low, High, Date = names
data = data.resample(to,on=Date).agg({Open:'first', High:'max', Low: 'min', Close:'last'})
return data.sort_index(ascending = False).reset_index()
Is there a functionality when I click 15M / 75M button in my chart, it shows me exactly the same data but resampled? Just like there is functionality in online trading softwares.
no sample data so I have used https://plotly.com/python/candlestick-charts/ sample
at core use https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html and change trace contents with resampled data
plus using https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html for events from widgets
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv",
parse_dates=["Date"],
)
fig = go.FigureWidget(
data=[
go.Candlestick(
x=df["Date"],
open=df["AAPL.Open"],
high=df["AAPL.High"],
low=df["AAPL.Low"],
close=df["AAPL.Close"],
)
]
).update_layout(margin={"t": 30, "b": 0, "l": 0, "r": 0})
out = widgets.Output(layout={"border": "1px solid black"})
out.append_stdout("Output appended with append_stdout\n")
reset = widgets.Button(description="Reset")
slider = widgets.IntSlider(
value=1,
min=1,
max=10,
step=1,
description='Days:',
disabled=False,
continuous_update=False,
orientation='horizontal',
readout=True,
readout_format='d'
)
#out.capture()
def on_slider_change(v):
print(f"slider: {v['new']}")
dfr = df.resample(f"{v['new']}B", on="Date").mean().reset_index()
t = fig.data[0]
t.update(
x=dfr["Date"],
open=dfr["AAPL.Open"],
high=dfr["AAPL.High"],
low=dfr["AAPL.Low"],
close=dfr["AAPL.Close"],
)
#out.capture()
def on_reset_clicked(b):
print("reset")
t = fig.data[0]
t.update(
x=df["Date"],
open=df["AAPL.Open"],
high=df["AAPL.High"],
low=df["AAPL.Low"],
close=df["AAPL.Close"],
)
out.clear_output()
reset.on_click(on_reset_clicked)
slider.observe(on_slider_change, names='value')
widgets.VBox([widgets.HBox([reset, slider]), widgets.VBox([fig, out])])
I am new to Python and have limited coding experience, so any input and advice is deeply appreciated.
I have created a dynamic choropleth map which includes a scatter_geo plot that overlays the relevant areas.
I am trying create a hover callback so that when I hover over one of these points, a dataframe appears that is indexed according to the point id (the first column in the defined dataframe). Essentially, it is a choropleth map equivalent of this example: https://plotly.com/python/v3/cars-exploration/ but without using FigureWidget.
I keep getting stuck on the hover callback function; no dataframe displays when I hover. Below is the code I have so far.
license_df1 = pd.read_excel(lic, "Primary Holdings by License", dtype = "str").fillna('')
license_df2 = pd.read_excel(lic, "Secondary Holdings by License", dtype = "str").fillna('')
### CREATE PLOTTING FEATURES ###
app = dash.Dash(__name__, suppress_callback_exceptions = True)
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True
app.layout = html.Div([
html.P("Spectrum Band:"), # Create Toggle Items between spectrum bands
dcc.RadioItems(id = "Band", options=[{'value': x, 'label':x} for x in df1_band], value = df1_band[0]),
dcc.Graph(id = "choropleth"),
dash_table.DataTable(id = "table")])
#app.callback(
Output("choropleth", "figure"),
[Input("Band", "value")])
def build_graph(value):
if value == '600 MHz':
df1_600 = df1[(df1["Band"] == "600 MHz")]
fig1 = px.choropleth(df1_600, geojson = PEAs, featureidkey = "properties.PEA_Num",
locations = 'PEA # ', hover_data = {'PEA # ': False}, scope = "usa")
# Overlay Geographic Scatter Plot for interactive functionality
fig1b = px.scatter_geo(df1_600, geojson = PEAs, featureidkey = "properties.PEA_Num",
locations = 'PEA # ', hover_name = 'Market', scope = "usa")
fig1.add_trace(fig1b.data[0])
fig1.update_traces(showlegend = False)
fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
return fig1
elif value == '3.7 GHz':
df1_3700 = df1[(df1["Band"] == "3.7 GHz")]
fig2 = px.choropleth(df1_3700, geojson = PEAs, featureidkey = "properties.PEA_Num",
locations = 'PEA # ', hover_data = {'PEA # ': False}, scope = "usa")
# Overlay Geographic Scatter Plot for interactive functionality
fig2b = px.scatter_geo(df1_3700, geojson = PEAs, featureidkey = "properties.PEA_Num",
locations = 'PEA # ', hover_name = 'Market', scope = "usa")
fig2.add_trace(fig2b.data[0])
fig2.update_traces(showlegend = False)
fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
return fig2
#app.callback(
Output("table", "data"),
[Input("fig1", "hover_data")]) # WHERE I AM HAVING TROUBLE
def disp_license1(hover_data):
table_vals = license_df1.iloc[points.point_inds[0]].to_frame().to_html()
return table_vals
app.run_server(debug = True)