Faster serializations (pickle, parquet, feather, ...) than json in plotly dash Store? - python

Context
In a dashboard using plotly Dash I need to perform an expensive download from DB only when a component (DataPicker with the period to consider and so to be downloaded from DB) is updated and then use the resulting DataFrame with other components (e.g. Dropdowns filtering the DataFrame) avoiding the expensive download process.
The docs suggests to use dash_core_components.Store as Output of a callback that return the DataFrame serielized in json and than use the Store as Input of other callbacks that needs to deserialize from json to DataFrame.
Serialization from/to JSON is slow, and each time I update a component it takes 30 seconds to update the plot just for that.
I tried to use faster serializations like pickle, parquet and feather but in the deserialization part I get an error stating that the object is empty (when using JSON no such error appear).
Question
Is it possible to perform serializations in Dash Store with faster methods like pickle, feather or parquet (they takes approx half of time for my dataset) than JSON? How?
Code
import io
import traceback
import pandas as pd
from datetime import datetime, date, timedelta
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from plotly.subplots import make_subplots
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
today = date.today()
app.layout = html.Div([
dbc.Row(dbc.Col(html.H1('PMC'))),
dbc.Row(dbc.Col(html.H5('analysis'))),
html.Hr(),
html.Br(),
dbc.Container([
dbc.Row([
dbc.Col(
dcc.DatePickerRange(
id='date_ranges',
start_date=today - timedelta(days=20),
end_date=today,
max_date_allowed=today, display_format='MMM Do, YY',
),
width=4
),
]),
dbc.Row(
dbc.Col(
dcc.Dropdown(
id='dd_ycolnames',
options=options,
value=default_options,
multi=True,
),
),
),
]),
dbc.Row([
dbc.Col(
dcc.Graph(
id='graph_subplots',
figure={},
),
width=12
),
]),
dcc.Store(id='store')
])
#app.callback(
Output('store', 'data'),
[
Input(component_id='date_ranges', component_property='start_date'),
Input(component_id='date_ranges', component_property='end_date')
]
)
def load_dataset(date_ranges_start, date_ranges_end):
# some expensive clean data step
logger.info('loading dataset...')
date_ranges1_start = datetime.strptime(date_ranges_start, '%Y-%m-%d')
date_ranges1_end = datetime.strptime(date_ranges_end, '%Y-%m-%d')
df = expensive_load_from_db(date_ranges1_start, date_ranges1_end)
logger.info('dataset to json...')
#return df.to_json(date_format='iso', orient='split')
return df.to_parquet() # <----------------------
#app.callback(
Output(component_id='graph_subplots', component_property='figure'),
[
Input(component_id='store', component_property='data'),
Input(component_id='dd_ycolnames', component_property='value'),
],
)
def update_plot(df_bin, y_colnames):
logger.info('dataset from json')
#df = pd.read_json(df_bin, orient='split')
df = pd.read_parquet(io.BytesIO(df_bin)) # <----------------------
logger.info('building plot...')
traces = []
for y_colname in y_colnames:
if df[y_colname].dtype == 'bool':
df[y_colname] = df[y_colname].astype('int')
traces.append(
{'x': df['date'], 'y': df[y_colname].values, 'name': y_colname},
)
fig = make_subplots(
rows=len(y_colnames), cols=1, shared_xaxes=True, vertical_spacing=0.1
)
fig.layout.height = 1000
for i, trace in enumerate(traces):
fig.append_trace(trace, i+1, 1)
logger.info('plotted')
return fig
if __name__ == '__main__':
app.run_server(host='localhost', debug=True)
Error text
OSError: Could not open parquet input source '<Buffer>': Invalid: Parquet file size is 0 bytes

Due to the exchange of data between client and server, you are currently limited to JSON serialization. One way to circumvent this limitation is via the ServersideOutput component from dash-extensions, which stores the data on the server. It uses file storage and pickle serialization by default, but you can use other storage (e.g. Redis) and/or serialization protocols (e.g. arrow) as well. Here is a small example,
import time
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
from dash_extensions.enrich import Dash, Output, Input, State, ServersideOutput
app = Dash(prevent_initial_callbacks=True)
app.layout = html.Div([
html.Button("Query data", id="btn"), dcc.Dropdown(id="dd"), dcc.Graph(id="graph"),
dcc.Loading(dcc.Store(id='store'), fullscreen=True, type="dot")
])
#app.callback(ServersideOutput("store", "data"), Input("btn", "n_clicks"))
def query_data(n_clicks):
time.sleep(1)
return px.data.gapminder() # no JSON serialization here
#app.callback(Input("store", "data"), Output("dd", "options"))
def update_dd(df):
return [{"label": column, "value": column} for column in df["year"]] # no JSON de-serialization here
#app.callback(Output("graph", "figure"), [Input("dd", "value"), State("store", "data")])
def update_graph(value, df):
df = df.query("year == {}".format(value)) # no JSON de-serialization here
return px.sunburst(df, path=['continent', 'country'], values='pop', color='lifeExp', hover_data=['iso_alpha'])
if __name__ == '__main__':
app.run_server()

Related

Unable to run dash_interactive_graphviz

I created a huge Graphviz Network, which I now want to spice up with some interactivity. For this goal I discovered the package dash_interactive_graphviz. From my understanding I can simple provide my existing graph, but I'm already failing to execute the provided sample (see below):
import dash_interactive_graphviz
dot_source = """
digraph {
node[style="filled"]
a ->b->d
a->c->d
}
"""
dash_interactive_graphviz.DashInteractiveGraphviz(
id="graph",
dot_source=dot_source
)
The package itself and all requirements are fulfilled. I run the sample code from above in Visual Studio Code, but nothing happens (no output, no message, no error).
Anybody who can point me in the right direction? Thanks.
You are in the right direction, just need a few steps. I think you can check out this example here to run your file:
https://github.com/BusinessOptics/dash_interactive_graphviz/blob/master/usage.py
import dash_interactive_graphviz
import dash
from dash.dependencies import Input, Output
import dash_html_components as html
import dash_core_components as dcc
app = dash.Dash(__name__)
initial_dot_source = """
digraph {
node[style="filled"]
a ->b->d
a->c->d
}
"""
app.layout = html.Div(
[
html.Div(
dash_interactive_graphviz.DashInteractiveGraphviz(id="gv"),
style=dict(flexGrow=1, position="relative"),
),
html.Div(
[
html.H3("Selected element"),
html.Div(id="selected"),
html.H3("Dot Source"),
dcc.Textarea(
id="input",
value=initial_dot_source,
style=dict(flexGrow=1, position="relative"),
),
html.H3("Engine"),
dcc.Dropdown(
id="engine",
value="dot",
options=[
dict(label=engine, value=engine)
for engine in [
"dot",
"fdp",
"neato",
"circo",
"osage",
"patchwork",
"twopi",
]
],
),
],
style=dict(display="flex", flexDirection="column"),
),
],
style=dict(position="absolute", height="100%", width="100%", display="flex"),
)
#app.callback(
[Output("gv", "dot_source"), Output("gv", "engine")],
[Input("input", "value"), Input("engine", "value")],
)
def display_output(value, engine):
return value, engine
#app.callback(Output("selected", "children"), [Input("gv", "selected")])
def show_selected(value):
return html.Div(value)
if __name__ == "__main__":
app.run_server(debug=True)

Dash plotly app with Heroku: works well heroku locally but dosent work propely in production

I have a dash plotly python jupyter notebook app (music) that takes 5 rows as a sample from a bigger dataframe using df_sample= df.sample(5). It has a quiz with inputs to compare the response of the users with the df_sample and take points depending on the users response and the df_sample values.
When I test it locally the code works perfectly also when I run it locally through heroku local.
But When I deploy to Heroku in production the code fail because df_sample will takes two differents set of values, when the app is updating during the callbacks and then comparison between the users response and the the df_sample dataframe fails.
example:
df_sample('name')=('c','f', 'g', 'b', 'e')
when i use locally this is constant during the callbacks & every works fine
df_sample('name')=('c','f', 'g', 'b', 'e')
df_sample('name')=('b','i', 'l', 'm', 'o')
when I deploy to heroku df_sample get 2 different set of vales during the callbacks and the app fails
here an extract of the code:
Import requests
Import pandas as pd
Import dash
from dash.dependencies import Input, Output
Import dash_html_components as html
Import dash_core_components as dcc
from dash import Dash
from dash import dcc
from dash import html
url1='url'
download = github_session.get(url1).content
df_link = pd.read_csv(io.StringIO(download.decode('latin-1')), sep=";",
error_bad_lines=False, warn_bad_lines=False, encoding='latin-1')
server = app.server
df_sample=df_link.sample(5)
df_sample.reset_index(inplace=True)
compositors= dbc.Col(
dcc.Dropdown(
id="compositor",
options=[
{"label": str(i), "value": i} for i in autors
],
value="",
clearable=False,
)
dbc.Row(
[
dbc.Label("5-look The correct answer is:",
width="auto"),
dbc.Col(
dbc.Input(id="name", value=compositor_result, type="text",size="lg")
)
]
)
dbc.Col(dbc.Button("4-Submmit & play next record", color="danger",
id="example-button",
n_clicks=0)
),
dbc.Col(
dbc.Input(id="result", value=result, type="text",size="lg")
)
app.layout = ....
#app.callback(
Output("result", "value"),
Output("compositor_result", "value"),
Input('example-button', 'n_clicks'),
State("compositor", "value"),
def update_line_chart(n_clicks, compositor):
global value1
global df_sample
if (n_clicks==None):
raise prevent_update
if df:_sample[n_clicks-1]==compositor:
result=value1+15
else: value1=value1
compositor_result=df_sample.name[n_click-1]
return result, compositor_result
if __name__ == '__main__':
app.run_server(debug=True, use_reloader=False)
Thanks

Plotly: is there a way to save the data of a clicked point in a list?

I have a 2D plotly graph with a hover feature. When you hover over each point, the label (e.g. 'image 2, cluster 1') associated with that point appears. I'd like for label to be appended onto an existing list if I were to click on the point (rather than just hover over it). The reason why is that I'd later like to use the data of this point to perform another task. Is there an example online that demonstrates how to do this-- have looked through the documentation but haven't found something for this yet. Thanks!
The hoverData that is available to you by default, with some sample data, is this:
{
"points": [
{
"curveNumber": 1,
"pointNumber": 7,
"pointIndex": 7,
"x": 1987,
"y": 74.32,
"bbox": {
"x0": 420.25,
"x1": 426.25,
"y0": 256,
"y1": 262
}
}
]
}
I'm not quite sure what you mean by 'label', so I can only assume that it would be the name of a trace or something similar, like in this example from the Plotly docs:
But as you can see, that's not readily available in the hoverData dict. This means that you'll have to use this information to reference your figure structure as well, so that you end up with something like this:
[['New Zealand', 2002, 79.11]]
And that's not a problem as long as you're willing to use Plotly Dash. I've made a complete setup for you that should meet your requirements. In the app in the image below you'll find a figure along with two output fields for strings. The first field shows the info from that last point you've clicked in the figure. On every click, a new element is added to a list named store. The last fields shows the complete information from the same click.
The answer to your question is, yes, there is a way to save the data of a clicked point in a list. And one way to do so is through the following callback that uses clickdata to reference your figure object, store those references in a list, and append new elements every time you click a new element.
App
Complete code:
import json
from textwrap import dedent as d
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import dash
from dash import dcc
import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# app info
app = JupyterDash(__name__)
styles = {
'pre': {
'border': 'thin lightgrey solid',
'overflowX': 'scroll'
}
}
# data
df = px.data.gapminder().query("continent=='Oceania'")
# plotly figure
fig = px.line(df, x="year", y="lifeExp", color="country", title="No label selected")
fig.update_traces(mode="markers+lines")
app.layout = html.Div([
dcc.Graph(
id='figure1',
figure=fig,
),
html.Div(className
='row', children=[
html.Div([
dcc.Markdown(d("""Hoverdata using figure references""")),
html.Pre(id='hoverdata2', style=styles['pre']),
], className='three columns'),
html.Div([
dcc.Markdown(d("""
Full hoverdata
""")),
html.Pre(id='hoverdata1', style=styles['pre']),
], className='three columns')
]),
])
# container for clicked points in callbacks
store = []
#app.callback(
Output('figure1', 'figure'),
Output('hoverdata1', 'children'),
Output('hoverdata2', 'children'),
[Input('figure1', 'clickData')])
def display_hover_data(hoverData):
if hoverData is not None:
traceref = hoverData['points'][0]['curveNumber']
pointref = hoverData['points'][0]['pointNumber']
store.append([fig.data[traceref]['name'],
fig.data[traceref]['x'][pointref],
fig.data[traceref]['y'][pointref]])
fig.update_layout(title = 'Last label was ' + fig.data[traceref]['name'])
return fig, json.dumps(hoverData, indent=2), str(store)
else:
return fig, 'None selected', 'None selected'
app.run_server(mode='external', port = 7077, dev_tools_ui=True,
dev_tools_hot_reload =True, threaded=True)
You need to use callbacks to perform this type of action (register on_click()). Have defined clicked as a list of clicked points. Demonstrated how this can be achieved with ipwidgets or dash
ipwidgets
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from pathlib import Path
import json
x = np.random.uniform(-10, 10, size=50)
y = np.sin(x)
clicked = []
# construct figure that has holders for points, interpolated line and final lines
fig = go.FigureWidget(
[
go.Scatter(x=x, y=y, mode="markers", name="base_points"),
]
)
fig.update_layout(template="simple_white")
out = widgets.Output(layout={"border": "1px solid black"})
out.append_stdout("Output appended with append_stdout\n")
# create our callback function
#out.capture()
def base_click(trace, points, selector):
global clicked
clicked.append(points.__dict__)
fig.data[0].on_click(base_click)
widgets.HBox([fig, out])
dash
from jupyter_dash import JupyterDash
import dash
from dash.dependencies import Input, Output, State
import numpy as np
import json
clicked = []
# Build App
app = JupyterDash(__name__)
app.layout = dash.html.Div(
[
dash.dcc.Graph(
id="fig",
figure=go.Figure(go.Scatter(x=x, y=y, mode="markers", name="base_points")),
),
dash.html.Div(id="debug"),
]
)
#app.callback(
Output("debug", "children"),
Input("fig", "clickData"),
)
def point_clicked(clickData):
global clicked
clicked.append(clickData)
return json.dumps(clickData)
# Run app and display result inline in the notebook
app.run_server(mode="inline")

Change the language in dash/plotly

I want to change the language of dash's core components and the toolbar in plots (to german). I thought that defining external_scripts would be sufficient, but its still showing everything in english. Here is a minimal example of my code:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
from datetime import datetime as dt
external_scripts = ["https://cdn.plot.ly/plotly-locale-de-latest.js"]
app = dash.Dash(__name__, external_scripts=external_scripts)
data_canada = px.data.gapminder().query("country == 'Canada'")
fig = px.bar(data_canada, x='year', y='pop')
app.layout = html.Div(children=[
html.H1(children='Dashboard'),
dcc.DatePickerRange(
id="date_range_picker",
min_date_allowed=dt(2018,1,1),
max_date_allowed=dt(2020,12,31),
display_format="MMM, YYYY"
),
dcc.Graph(
id='example-graph',
figure=fig
)
])
if __name__ == '__main__':
app.run_server(debug=True)
What else do I have to do to change the language?
You must add:
config_plots = dict(locale='de')
to:
dcc.Graph(
id='example-graph',
figure=fig,
config=config_plots
)

Null is not an object (evaluating ‘n.layout’) in Dash Plotly

I am developing dashboard using Dash Plotly and I am getting an error when I click tabs.
The error says null is not an object (evaluating ‘n.layout’)
(This error originated from the built-in JavaScript code that runs Dash apps. Click to see the full stack trace or open your browser’s console.)"
Can any one help me to solve this problem?
My code is found below.
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_daq as daq
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import numpy as np
from copy import copy
import dash_table
import json
import base64
import plotly.express as px
#Data
errors = pd.read_csv(r’/Users/kapital/Documents/ABCD/PM/errors.csv’)
external_stylesheets = [‘https://codepen.io/chriddyp/pen/bWLwgP.css’]
app = dash.Dash(name, external_stylesheets=external_stylesheets)
first_graph = dcc.Graph(id=‘graph1’,style={‘borderBottom’: ‘thin lightgrey solid’,‘padding’: ‘10px 5px’})
#, animate = True
content_tab_1 = html.Div(children = [
html.Div(first_graph, style = {‘vertical-align’:‘center’, ‘horizontal-align’:‘center’})
],
style={‘width’: ‘87%’})
app.layout = html.Div([
dcc.Tabs(id='tabs-example', value='tab-1', children=[
dcc.Tab(label='Tab one', value='tab-1',
children =[content_tab_1]),
dcc.Tab(label='Tab two', value='tab-2'),
]),
html.Div(id='tabs-example-content')
])
#app.callback(Output(‘graph1’, ‘figure’),
Input(‘tabs-example’, ‘value’))
def render_content(tab):
if tab == ‘tab-1’:
err_count = pd.DataFrame(errors['errorID'].value_counts().reset_index().values, columns=["Error", "Count"])
err_count = err_count.sort_index(axis = 0, ascending=True)
fig = px.bar(err_count, x = 'Error', y = 'Count')
return fig
# return html.Div([
# html.H3('Tab content 1')
# ])
if name == ‘main’:
app.run_server(debug=True)
The problem is that your callback function does not have an else to its conditional statement. When the user selects the second tab, that callback has no alternate path, and returns None, which gives you the error you see. I did this to fix it, but you'll probably want something more:
#app.callback(Output('graph1', 'figure'),
Input('tabs-example', 'value'))
def render_content(tab):
if tab == 'tab-1':
err_count = pd.DataFrame.from_dict(dict(
Error=[1, 2, 3, ],
Count=[5, 6, 7])
)
err_count = err_count.sort_index(axis=0, ascending=True)
fig = px.bar(err_count, x='Error', y='Count')
return fig
else:
return {}

Categories