Hover data on bar graph without plotting text - plotly - python

I am struggling with something that is probably quite simple.
consider this: I have unit ids with data for each unit id.
I want to construct a bar graph and to be able to select whatever unit id I want.
To do this I chose plotly px. I am constructing the bars according to some range split method (using pd.cut)
The result I have is very close to what I want.
when I’m hovering on the different units on the graph I see the unit id, range, and exact value
but the text is also plotted on the graph and makes the plot very messy.
I tried to use different px.bar attributed to replacing %{text}% but without success.
Small code that reproduces the plot I have
import dash_core_components as dcc
import numpy as np
import pandas as pd
import dash
import dash_html_components as html
import plotly.express as px
data = np.random.rand(300)
unit_id = np.random.choice(range(1000), 300, replace=False)
data_tuple = sorted([(str(uid), data) for uid, data in zip(unit_id, data)], key=lambda x: x[1])
y_data = [y[1] for y in data_tuple]
x_data = [i for i in range(1, len(y_data) + 1)]
range_bins = pd.cut(np.array(np.array(y_data).astype(float)), 10)
data_range_lbl = [str(v) for v in range_bins]
sorted_unit_id = [x[0] for x in data_tuple]
unit_id_text = [sorted_unit_id[i] + '<br><b>Value</b>: ' + str(y_data[i]) for i in range(0, len(sorted_unit_id))]
fig = px.bar(x=data_range_lbl, y=[1 for i in range(0, len(data_range_lbl))], text=unit_id_text)
fig.update_layout(dragmode='select')
fig.update_traces(hovertemplate=
'<b>Die Id</b>: %{text}' +
'<br><b>Range</b>: %{x}<br>')
app = dash.Dash(__name__)
app.layout = html.Div([
html.Div(id='container-button-timestamp'),
dcc.Graph(
id='sample-graph', figure=fig
),
])
if __name__ == '__main__':
app.run_server(debug=True, port=8050)
how it looks:

using hovertext did the trick.
removed text from px.bar attributes:
fig = px.bar(x=data_range_lbl, y=[1 for i in range(0, len(data_range_lbl))])
and changed fig.update_traces:
fig.update_traces(
hovertext=unit_id_text,
hovertemplate=
'<b>Die Id</b>: %{hovertext}' +
'<br><b>Range</b>: %{x}<br>')

Related

How to return Box-Plot Parameters from Plotly

I've been at this for the past 5 hours in Plotly and cannot get it to work.
I need to get the following figure to return the different quartile points (Q3, Q1) etc as it has a strange algorithm which is not normal!
fig = px.box(ndf.Price, orientation='h',template="plotly_dark",points='all')
I need all the quartile information it is using to create the box plot, any help welcomed!
# ******************* IMPORT ESSENTIAL LIBRARIES *******************
from pathlib import Path
import pandas as pd
import plotly.express as px
from dash import Dash, html, dcc
import dash_bootstrap_components as dbc
...
filename = "data/data_set_prepared.csv"
df = pd.read_csv(filename)
...
fig = px.box(ndf.Price, orientation='h',template="plotly_dark",points='all')
...
app = Dash(__name__,
external_stylesheets=[dbc.themes.BOOTSTRAP],
meta_tags=[
{"name": "viewport", "content": "width=device-width, initial-scale=1"},
],
)
app.layout = dbc.Container(
[
html.H1("First Time Buyer Page: Is it a fair price?"),
html.H2("Houses in " + SEARCH_FULL_POSTCODE.upper() + " over the last year: "),
dcc.Graph(id="box-graph", figure=fig),
# dcc.Graph(id="primary-graph", figure=fig),
# dcc.Graph(id="secondary-graph", figure=fig1),
],
fluid=True,
)
if __name__ == '__main__':
app.run_server(debug=True)
The short answer is that you cannot extract that information from a plotly figure object, but I have provided an answer to a related question that requires extracting the quantiles manually – you can use the function that I wrote that replicates how plotly calculates quantiles (method #10 as outlined in this paper):
## calculate quartiles as outlined in the plotly documentation
def get_percentile(data, p):
data.sort()
n = len(data)
x = n*p + 0.5
x1, x2 = floor(x), ceil(x)
y1, y2 = data[x1-1], data[x2-1] # account for zero-indexing
return round(y1 + ((x - x1) / (x2 - x1))*(y2 - y1), 2)

`update` function doesn't work correctly for bokeh interactors in python

I have a source code that plots the alphashape of a stock price. There's a slider to update the plot dynamically. But the update function doesn't work as expected.
Here's the source code.
x=[76.84,76.85,76.86,76.87,76.88,76.9,76.91,76.92,76.93,76.94,76.97,76.97,76.98,76.99,77.0,77.03,77.03,77.04,77.05,77.06,77.09,77.09,77.1,77.11,77.12,77.15,77.16,77.16,77.17,77.18,77.21,77.22,77.22,77.23,77.24,77.27,77.28,77.28,77.29,77.3,77.33,77.34,77.35,77.35,77.36,77.39,77.4,77.41,77.41,77.42,77.45,77.46,77.47,77.47,77.48,77.51,77.52,77.53,77.54,77.54,77.57,77.58,77.59,77.6,77.6,77.63,77.64,77.65,77.66,77.66,77.69,77.7,77.71,77.72,77.73,77.75,77.76,77.77,77.78,77.79,77.81,77.82,77.83,77.84,77.85,77.87,77.88,77.89,77.9,77.91,77.93,77.94,77.95,77.96,77.97,77.99,78.0,78.01,78.02,78.03,78.05,78.06,78.07,78.08,78.09,78.13,78.14,78.15,78.17,78.18,78.19,78.2,78.21,78.24,78.24,78.25,78.26,78.27,78.3,78.3,78.31,78.32,78.33,78.36,78.36,78.37,78.38,78.39,78.42,78.43,78.43,78.44,78.45,78.48,78.49,78.49,78.5,78.51,78.54,78.55,78.55,78.56,78.57,78.6,78.61,78.62,78.62,78.63,78.66,78.67,78.68,78.68,78.69,78.72,78.73,78.74,78.74,78.75,78.78,78.79,78.8,78.81,78.81,78.84,78.85,78.86,78.87,78.87,78.91,78.92,78.93,78.94,78.96,78.97,78.98,78.99,79.0,79.02,79.03,79.04,79.05,79.06,79.08,79.09,79.1,79.11,79.12,79.2,79.21,79.22,79.23,79.24,79.26,79.27,79.28,79.29,79.3,79.32,79.33,79.34,79.35,79.36,79.38,79.39,79.4,79.41,79.42,79.44,79.45,79.46,79.47,79.48,79.51,79.51,79.52,79.53,79.54,79.57,79.57,79.58,79.59,79.6,79.63,79.63,79.64,79.65,79.66,79.69,79.7,79.7,79.71,79.72,79.75,79.76,79.76,79.77,79.78,79.81,79.82,79.82,79.83,79.84,79.87,79.88,79.89,79.89,79.9,79.94,79.95,79.95,79.96,79.99,80.0,80.01,80.02,80.02,80.05,80.06,80.07,80.08,80.08,80.11,80.12,80.13,80.14,80.14,80.17,80.18,80.19,80.2,80.21,80.23,80.24,80.25,80.26,80.27,80.29,80.3,80.31,80.32,80.33,80.35,80.36,80.37,80.38,80.39,80.41,80.42,80.43,80.44,80.45,80.47,80.48,80.49,80.5,80.51,80.53,80.54,80.55,80.56,80.57,80.59,80.6,80.61,80.62,80.63,80.65,80.66,80.67,80.68,80.69,80.71,80.72,80.73,80.74,80.75,80.78,80.78,80.79,80.8,80.81,80.84,80.84,80.85,80.86,80.87,80.9,80.9,80.91,80.92,80.93,80.96,80.97,80.97,80.98,80.99,81.02,81.03,81.03,81.04,81.05,81.08,81.09,81.1,81.1,81.11,81.14,81.15,81.16,81.16,81.17,81.2,81.21,81.22,81.22,81.23,81.28,81.29,81.29,81.32,81.33,81.34,81.35,81.35,81.38,81.39,81.4,81.41,81.41,81.44,81.45,81.46,81.47,81.48,81.5,81.51,81.52,81.53,81.54,81.56,81.57,81.58,81.59,81.6,81.62,81.63,81.64,81.65,81.66,81.68,81.69,81.7,81.71,81.72,81.74,81.75,81.76,81.77,81.78,81.8,81.81,81.82,81.83,81.84,81.86,81.87,81.88,81.89,81.9,81.92,81.93,81.94,81.95,81.96,81.98,81.99,82.0,82.01,82.02,82.05,82.06,82.07,82.08,82.11,82.11,82.12,82.13,82.14,82.17,82.18,82.18,82.19,82.2,82.23,82.24,82.24,82.25,82.26,82.29,82.3,82.3,82.31,82.32,82.35,82.36,82.37,82.37,82.38,82.41,82.42,82.43,82.43,82.44,82.59,82.6,82.61,82.62,82.62,82.65,82.66,82.67,82.68,82.68,82.71,82.72,82.73,82.74,82.75,82.77,82.78,82.79,82.8,82.81,82.83,82.84,82.85,82.86,82.87,82.89,82.9,82.91,82.92,82.93,82.95,82.96,82.97,82.98,82.99,83.01,83.02,83.03,83.04,83.05,83.07,83.08,83.1,83.11,83.13,83.14,83.15,83.16,83.17,83.19,83.2,83.21,83.22,83.23,83.26,83.26,83.27,83.28,83.29,83.32,83.32,83.33,83.34,83.35,83.38,83.38,83.39,83.4,83.41,83.44,83.45,83.45,83.46,83.47,83.5,83.51,83.51,83.52,83.53,83.56,83.57,83.57,83.58,83.59,83.62,83.63,83.64,83.64,83.65,83.68,83.69,83.7,83.7,83.71,83.74,83.75,83.76,83.76,83.77,83.8,83.81,83.82,83.83,83.83,83.86,83.87,83.88,83.89,83.89,83.92,83.93,83.94,83.95,83.95,83.98,83.99,84.0,84.01,84.02,84.04,84.05,84.06,84.07,84.08,84.1,84.11,84.12,84.13,84.14,84.16,84.17,84.18,84.19,84.2,84.22,84.23,84.24,84.25,84.26,84.28,84.29,84.3,84.31,84.32,84.34,84.35,84.36,84.37,84.38,84.43,84.44,84.46,84.47,84.48,84.49,84.5,84.53,84.53,84.54,84.55,84.56,84.59,84.59,84.6,84.61,84.62,84.65,84.65,84.66,84.67,84.68,84.71,84.72,84.72,84.73,84.74,84.77,84.78,84.78,84.79,84.8,84.83,84.84,84.84,84.85,84.86,84.89,84.9,84.91,84.91,84.92,84.95,84.96,84.97,84.97,84.98,85.01,85.02,85.03,85.03,85.04,85.07,85.08,85.09,85.1,85.1,85.13,85.14,85.15,85.16,85.16,85.19,85.2,85.22,85.22,85.25,85.26,85.27,85.28,85.29,85.31,85.32,85.33,85.34,85.35,85.37,85.38,85.39,85.4,85.41,85.43,85.44,85.45,85.46,85.47,85.61,85.62,85.63,85.64,85.65,85.67,85.68,85.69,85.7,85.71,85.73,85.74,85.75,85.76,85.77,85.8,85.8,85.81,85.82,85.83,85.86,85.86,85.87,85.88,85.89,85.92,85.92,85.93,85.94,85.95,85.98,85.99,85.99,86.0,86.01,86.04,86.05,86.05,86.06,86.07,86.1,86.11,86.11,86.12,86.13,86.16,86.17,86.18,86.18,86.19,86.22,86.23,86.24,86.28,86.29,86.3,86.3,86.31,86.34,86.35,86.36,86.37,86.37,86.4,86.41,86.42,86.43,86.43,86.46,86.47,86.48,86.49,86.5,86.52,86.53,86.54,86.55,86.56,86.58,86.59,86.6,86.61,86.62,86.64,86.65,86.66,86.67,86.68,86.7,86.71,86.72,86.73,86.74,86.78,86.79,86.8,86.82,86.83,86.84,86.85,86.86,86.88,86.89,86.9,86.91,86.92,86.94,86.95,86.96,86.97,86.98,87.0,87.01,87.02,87.03,87.04,87.07,87.07,87.08,87.09,87.1,87.13,87.13,87.14,87.15,87.16,87.19,87.19,87.2,87.21,87.22,87.25,87.26,87.26,87.27,87.28,87.31,87.32,87.32,87.33,87.34,87.37,87.38,87.38,87.39,87.4,87.43,87.44,87.45,87.45,87.46,87.49,87.5,87.51,87.51,87.52,87.55,87.56,87.61,87.62,87.63,87.64,87.64,87.67,87.68,87.69,87.7,87.7,87.73,87.74,87.75,87.76,87.77,87.79,87.8,87.81,87.82,87.83,87.85,87.86,87.87,87.88,87.89,87.91,87.92,87.93,87.94,87.95,87.97,87.98,87.99,88.0,88.01,88.03,88.04,88.05,88.06,88.07,88.09,88.1,88.11,88.12,88.13,88.15,88.16,88.17,88.18,88.19,88.21,88.22,88.23,88.24,88.25,88.27,88.28,88.29,88.3,88.31,88.34,88.34,88.35,88.4,88.4,88.41,88.42,88.43,88.46,88.46,88.47,88.48,88.49,88.52,88.53,88.53,88.54,88.55,88.7,88.71,88.72,88.72,88.73,88.76,88.77,88.78,88.78,88.79,88.82,88.83,88.84,88.85,88.85,88.88,88.89,88.9,88.91,88.91,88.94,88.95,88.96,88.97,88.97,89.0,89.01,89.02,89.03,89.04,89.06,89.07,89.08,89.09,89.1,89.12,89.13,89.14,89.15,89.16,89.18,89.19,89.2,89.21,89.22,89.24,89.25,89.26,89.27,89.28,89.3,89.31,89.32,89.33,89.34,89.36,89.37,89.38,89.39,89.42,89.43,89.44,89.45,89.46,89.48,89.49,89.5,89.51,89.52,89.54,89.55,89.56,89.57,89.58,89.61]
y=[2.29,2.41,2.4,2.38,2.43,2.42,2.38,2.36,2.4,2.37,2.36,2.37,2.34,2.32,2.31,2.25,2.25,2.21,2.2,2.21,2.21,2.21,2.21,2.19,2.17,2.1,2.08,2.08,2.12,2.15,2.1,2.09,2.1,2.08,2.08,2.01,2.0,1.98,1.98,1.95,1.92,1.92,1.92,1.92,1.92,1.88,1.88,1.91,1.91,1.88,1.89,1.87,1.85,1.84,1.83,1.88,1.93,1.88,1.82,1.82,2.08,2.13,2.35,2.32,2.37,2.34,2.25,2.35,2.33,2.34,2.32,2.34,2.39,2.53,2.49,2.53,2.54,2.55,2.53,2.52,2.52,2.54,2.66,2.71,2.81,2.92,3.09,2.99,3.03,2.98,3.01,2.98,2.93,2.91,2.93,2.91,2.89,2.92,2.9,2.87,2.9,2.9,2.93,2.83,2.78,2.67,2.6,2.66,2.61,2.61,2.61,2.54,2.56,2.51,2.52,2.55,2.6,2.6,2.67,2.63,2.62,2.63,2.61,2.58,2.59,2.59,2.62,2.59,2.58,2.61,2.63,2.6,2.63,2.63,2.61,2.6,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.55,2.52,2.53,2.53,2.51,2.46,2.48,2.45,2.54,2.53,2.49,2.51,2.49,2.48,2.49,2.47,2.48,2.49,2.48,2.5,2.5,2.55,2.53,2.52,2.51,2.49,2.5,2.49,2.49,2.47,2.46,2.48,2.45,2.45,2.43,2.43,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.46,2.45,2.44,2.44,2.45,2.45,2.47,2.56,2.52,2.48,2.47,2.5,2.54,2.54,2.58,2.61,2.63,2.63,2.63,2.61,2.59,2.59,2.56,2.57,2.58,2.56,2.57,2.61,2.59,2.6,2.6,2.58,2.6,2.59,2.6,2.61,2.61,2.59,2.6,2.62,2.62,2.6,2.61,2.59,2.59,2.59,2.59,2.61,2.67,2.65,2.63,2.63,2.6,2.56,2.59,2.59,2.59,2.58,2.58,2.57,2.58,2.55,2.55,2.58,2.58,2.57,2.58,2.83,2.88,2.93,2.79,2.82,2.81,2.86,2.86,2.85,2.82,2.82,2.82,2.78,2.78,2.82,2.79,2.8,2.79,2.79,2.78,2.72,2.73,2.71,2.72,2.73,2.73,2.74,2.74,2.72,2.73,2.73,2.71,2.68,2.71,2.75,2.84,2.91,2.89,2.92,2.97,2.96,2.94,2.99,3.04,2.97,2.99,2.97,2.99,2.98,2.99,3.0,3.01,2.99,2.98,2.99,2.99,2.99,3.01,2.96,2.97,3.0,2.98,2.97,2.96,2.96,3.0,3.0,2.99,2.98,2.99,2.99,2.99,2.99,2.99,2.99,2.98,2.98,2.98,2.98,3.02,3.03,3.03,3.05,3.09,3.08,3.1,3.12,3.14,3.13,3.12,3.14,3.15,3.13,3.15,3.14,3.14,3.14,3.14,3.13,3.11,3.08,3.08,3.08,3.08,3.1,3.11,3.11,3.11,3.09,3.13,3.17,3.28,3.43,3.52,3.47,3.45,3.45,3.45,3.44,3.46,3.46,3.45,3.44,3.45,3.45,3.45,3.45,3.45,3.47,3.5,3.54,3.52,3.5,3.5,3.5,3.44,3.45,3.45,3.45,3.43,3.45,3.48,3.48,3.45,3.46,3.43,3.46,3.45,3.43,3.43,3.42,3.42,3.43,3.42,3.41,3.39,3.38,3.38,3.38,3.4,3.39,3.38,3.39,3.37,3.37,3.38,3.38,3.38,3.38,3.38,3.38,3.37,3.36,3.37,3.36,3.36,3.37,3.36,3.41,3.41,3.4,3.39,3.39,3.37,3.37,3.36,3.36,3.36,3.36,3.36,3.37,3.36,3.37,3.39,3.45,3.42,3.39,3.4,3.4,3.39,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.42,3.42,3.41,3.39,3.39,3.39,3.37,3.38,3.4,3.41,3.44,3.43,3.43,3.43,3.43,3.42,3.42,3.42,3.47,3.46,3.47,3.53,3.65,3.59,3.76,3.85,3.77,3.9,3.76,3.75,3.8,3.73,3.7,3.66,3.68,3.66,3.69,3.68,3.69,3.69,3.61,3.61,3.61,3.59,3.59,3.59,3.63,3.61,3.62,3.63,3.62,3.61,3.61,3.62,3.69,3.66,3.69,3.68,3.66,3.65,3.66,3.68,3.78,3.76,3.77,3.74,3.75,3.77,3.75,3.7,3.7,3.73,3.74,3.79,3.83,3.87,3.86,3.8,3.81,3.78,3.8,3.78,3.78,3.84,3.81,3.81,3.82,3.78,3.75,3.76,3.74,3.72,3.71,3.72,3.78,3.78,3.77,3.76,3.74,3.74,3.75,3.75,3.73,3.72,3.71,3.68,3.7,3.67,3.64,3.56,3.57,3.56,3.61,3.62,3.59,3.57,3.59,3.55,3.54,3.53,3.52,3.53,3.53,3.58,3.6,3.57,3.53,3.53,3.54,3.55,3.57,3.57,3.58,3.64,3.63,3.6,3.6,3.6,3.59,3.6,3.6,3.61,3.61,3.62,3.64,3.64,3.64,3.69,3.73,3.71,3.69,3.69,3.69,3.65,3.66,3.66,3.72,3.73,3.7,3.7,3.72,3.74,3.74,3.74,3.79,3.85,3.9,3.88,3.93,3.86,3.94,4.0,4.0,3.97,3.94,3.93,3.91,3.92,3.94,3.94,3.94,3.99,3.98,4.01,3.99,3.92,3.82,3.71,3.81,3.77,3.76,3.81,3.79,3.83,3.83,3.88,3.89,3.84,3.84,3.83,3.79,3.81,3.8,3.81,3.82,3.83,3.8,3.81,3.81,3.83,3.83,3.86,3.92,3.93,3.97,3.97,3.96,3.95,3.94,3.96,3.98,3.88,3.98,4.0,4.02,4.04,4.08,4.09,4.09,4.16,4.22,4.21,4.19,4.19,4.18,4.19,4.2,4.19,4.2,4.21,4.27,4.3,4.29,4.26,4.29,4.29,4.34,4.36,4.35,4.33,4.33,4.36,4.34,4.33,4.34,4.37,4.35,4.36,4.39,4.38,4.41,4.4,4.4,4.39,4.39,4.41,4.42,4.46,4.48,4.53,4.63,4.65,4.71,4.81,4.91,5.0,4.95,5.04,5.01,4.98,4.9,4.95,4.91,4.8,4.9,4.86,4.76,4.77,4.77,4.79,4.8,4.79,4.81,4.89,4.87,4.87,4.87,4.8,4.79,4.75,4.69,4.69,4.71,4.78,4.76,4.74,4.73,4.8,4.81,4.84,4.83,4.83,4.83,4.79,4.75,4.75,4.66,4.69,4.7,4.68,4.7,4.73,4.72,4.75,4.75,4.75,4.71,4.72,4.71,4.69,4.68,4.64,4.65,4.65,4.66,4.66,4.64,4.65,4.64,4.62,4.63,4.6,4.52,4.45,4.53,4.49,4.5,4.48,4.37,4.39,4.4,4.41,4.43,4.47,4.46,4.45,4.42,4.44,4.45,4.45,4.44,4.43,4.41,4.41,4.44,4.41,4.38,4.38,4.37,4.37,4.38,4.32,4.24,4.29,4.31,4.29,4.27,4.28,4.28,4.28,4.32,4.32,4.33,4.33,4.32,4.33,4.39,4.47,4.47,4.53,4.53,4.53,4.52,4.54,4.51,4.53,4.53,4.53,4.54,4.54,4.58,4.56,4.58,4.56,4.55,4.53,4.54,4.54,4.55,4.54,4.53,4.52,4.49,4.45,4.45,4.46,4.46,4.48,4.46,4.47,4.47,4.49,4.47,4.47,4.48,4.51,4.57,4.57,4.59,4.61,4.57,4.57,4.6,4.64,4.64,4.63,4.65,4.65,4.64,4.64,4.66,4.72,4.73,4.76,4.74,4.8,4.78,4.72,4.76,4.86,4.86,4.88,4.86,4.83,4.85,4.85,4.84,4.81,4.82,4.82,4.82,4.81,4.82,4.85,4.85,4.84,4.82,4.81,4.78,4.81,4.79,4.75,4.78,4.8,4.79,4.78,4.76,4.77,4.77,4.77,4.78,4.79,4.79,4.76,4.75,4.74,4.73,4.74,4.75,4.8,4.81,4.84,4.82,4.8,4.81,4.8,4.77,4.81,4.8,4.81,4.84,4.86,4.83,4.82,4.81,4.8,4.78,4.81,4.81,4.82,4.88,4.84,4.84,4.83,4.83,4.85,4.85,4.83,4.81,4.82,4.79,4.8,4.79,4.78,4.8,4.79,4.78,4.77,4.78,4.77,4.76,]
from alphashape import alphashape
from shapely.geometry import mapping
from bokeh.plotting import figure
from ipywidgets import interact
from bokeh.io import output_notebook, show, push_notebook
def alphashape_func(x, y, alpha):
length = range(len(x))
# date count
pnt = [[x[i],y[i]] for i in length]
# return a shapely.polygon/multipolygon
alpha_shape = alphashape(pnt, alpha=alpha)
# convert shapely.polygon/multipolygon to list
map = mapping(alpha_shape)['coordinates']
poly_shp = [i[0] for i in map]
bound_len = len(poly_shp)
# single alpha shape case
if bound_len == 1:
bound_x = [i[0] for i in poly_shp]
bound_y = [i[1] for i in poly_shp]
# multiple alpha shape case
else:
bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)]
bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)]
# return a dict containing 2 lists: x & y.
return {'x':bound_x, 'y':bound_y}
alpha = 5
alpha_high_pnt = alphashape_func(x,y,alpha)
plot = figure(sizing_mode='stretch_width', output_backend="webgl")
# line_pnt(plot, max_processed_xy['x'], max_processed_xy['y'],legend_label ='processed_xy',line_color='yellow', line_width=2)
alpha_shape_plt = plot.multi_line(xs=alpha_high_pnt['x'],ys=alpha_high_pnt['y'], line_color='cyan',legend_label = 'alpha_high_pnt')
# create an update function
def update(alpha=5):
alpha_high_pnt = alphashape_func(x,y,alpha)
alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x']
alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y']
# push new values to the notebook
push_notebook()
output_notebook()
show(plot)
interact(update, alpha=(0,25,1))
(the dynamic slider only works when you run it in jupyter in a web browser)
When I drag the slider, it shows an error message:
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('xs', 54), ('ys', 99)
I don't see the reason of this error, since when I manually adjust the alpha value, the lengths of xs and ys equal.
Can anyone help?
===================== update ======================
Based on #bigreddot suggestion, I update the code to this, the doesn't match problem is resolved, but the plot doesn't refresh yet.
from alphashape import alphashape
from shapely.geometry import mapping
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, push_notebook
from bokeh.models import ColumnDataSource
from ipywidgets import interact
output_notebook()
def alphashape_func(x, y, alpha):
length = range(len(x))
# date count
pnt = [[x[i],y[i]] for i in length]
# return a shapely.polygon/multipolygon
alpha_shape = alphashape(pnt, alpha=alpha)
# convert shapely.polygon/multipolygon to list
map = mapping(alpha_shape)['coordinates']
poly_shp = [i[0] for i in map]
bound_len = len(poly_shp)
# single alpha shape case
if bound_len == 1:
bound_x = [i[0] for i in poly_shp]
bound_y = [i[1] for i in poly_shp]
# multiple alpha shape case
else:
bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)]
bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)]
# return a dict containing 2 lists: x & y.
return {'x':bound_x, 'y':bound_y}
alpha = 5
plot = figure(sizing_mode='stretch_width', output_backend="webgl")
source = ColumnDataSource(data=alphashape_func(x,y,alpha))
alpha_shape_plt = plot.multi_line(source=source, xs='x',ys='y', line_color='cyan',legend_label = 'alpha_high_pnt')
print
# create an update function
def update(alpha=5):
source.data = alphashape_func(x,y,alpha)
# push new values to the notebook
push_notebook()
interact(update, alpha=(0,25,1))
show(plot)
In between this line:
alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x']
and this line:
alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y']
the CDS columns are not all the same length. If you need to update with data that has a new length you should collect all the updates up front in a new_data dict and then set
source.data = new_data
to update the CDS "all at once". This is more efficient in any case, as well, since it results in fewer property update change events being sent out.

Plotly - Highlight data point and nearest three points on hover

I have made a scatter plot of the word2vec model using plotly.
I want functionality of highlighting the specific data point on hover along with the top 3 nearest vectors to that.
It would be of great help if anyone can guide me with this or suggest any other option
model
csv
Code:
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_df.head()
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
ic_fig.show()
In plotly-python, I don't think there's an easy way of retrieving the location of the cursor. You can attempt to use go.FigureWidget to highlight a trace as described in this answer, but i think you're going to be limited with with plotly-python and i'm not sure if highlighting the closest n points will be possible.
However, I believe that you can accomplish what you want in plotly-dash since callbacks are supported - meaning you would be able to retrieve location of your cursor and then calculate the n closest data points to your cursor and highlight the data points as needed.
Below is an example of such a solution. If you haven't seen it before, it looks complicated, but what is happening is that I am taking the point where you clicked as an input. plotly is plotly.js under the hood so it comes us in the form of a dictionary (and not some kind of plotly-python object). Then I calculate the closest three data points to the clicked input point by comparing the coordinates of every other point in the dataframe, add the information from the three closest points as traces to the input with the color teal (or any color of your choosing), and send this modified input back as the output, and update the figure.
I am using click instead of hover because hover would cause the highlighted points to flicker too much as you drag your mouse through the points.
Also the dash app doesn't work perfectly as I believe there is some issue when you double click on points (you can see me click once in the gif below before getting it to start working), but this basic framework is hopefully close enough to what you want. Cheers!
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
import json
import dash
from dash import dcc, html, Input, Output
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
NUMBER_OF_TRACES = len(ic_df['Category'].unique())
ic_fig.update_layout(clickmode='event+select')
app.layout = html.Div([
dcc.Graph(
id='ic_figure',
figure=ic_fig)
])
## we take the 4 closest points because the 1st closest point will be the point itself
def get_n_closest_points(x0, y0, df=ic_df[['X','Y']].copy(), n=4):
"""we can save some computation time by looking for the smallest distance^2 instead of distance"""
"""distance = sqrt[(x1-x0)^2 + (y1-y0)^2]"""
"""distance^2 = [(x1-x0)^2 + (y1-y0)^2]"""
df["dist"] = (df["X"]-x0)**2 + (df["Y"]-y0)**2
## we don't return the point itself which will always be closest to itself
return df.sort_values(by="dist")[1:n][["X","Y"]].values
#app.callback(
Output('ic_figure', 'figure'),
[Input('ic_figure', 'clickData'),
Input('ic_figure', 'figure')]
)
def display_hover_data(clickData, figure):
print(clickData)
if clickData is None:
# print("nothing was clicked")
return figure
else:
hover_x, hover_y = clickData['points'][0]['x'], clickData['points'][0]['y']
closest_points = get_n_closest_points(hover_x, hover_y)
## this means that this function has ALREADY added another trace, so we reduce the number of traces down the original number
if len(figure['data']) > NUMBER_OF_TRACES:
# print(f'reducing the number of traces to {NUMBER_OF_TRACES}')
figure['data'] = figure['data'][:NUMBER_OF_TRACES]
# print(figure['data'])
new_traces = [{
'marker': {'color': 'teal', 'symbol': 'circle'},
'mode': 'markers',
'orientation': 'v',
'showlegend': False,
'x': [x],
'xaxis': 'x',
'y': [y],
'yaxis': 'y',
'type': 'scatter',
'selectedpoints': [0]
} for x,y in closest_points]
figure['data'].extend(new_traces)
# print("after\n")
# print(figure['data'])
return figure
if __name__ == '__main__':
app.run_server(debug=True)

Weird Time-Series Graph Using Pycaret and plotly

I am trying to visualize Air Quality Data as time-series charts using pycaret and plotly dash python libraries , but i am getting very weird graphs, below is my code:
import pandas as pd
import plotly.express as px
data = pd.read_csv('E:/Self Learning/Djang_Dash/2019-2020_5.csv')
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')
#data.set_index('Date', inplace=True)
# combine store and item column as time_series
data['OBJECTID'] = ['Location_' + str(i) for i in data['OBJECTID']]
#data['AQI_Bins_AI'] = ['Bin_' + str(i) for i in data['AQI_Bins_AI']]
data['time_series'] = data[['OBJECTID']].apply(lambda x: '_'.join(x), axis=1)
data.drop(['OBJECTID'], axis=1, inplace=True)
# extract features from date
data['month'] = [i.month for i in data['Date']]
data['year'] = [i.year for i in data['Date']]
data['day_of_week'] = [i.dayofweek for i in data['Date']]
data['day_of_year'] = [i.dayofyear for i in data['Date']]
data.head(4000)
data['time_series'].nunique()
for i in data['time_series'].unique():
subset = data[data['time_series'] == i]
subset['moving_average'] = subset['CO'].rolling(window = 30).mean()
fig = px.line(subset, x="Date", y=["CO","moving_average"], title = i, template = 'plotly_dark')
fig.show()
require needful help in this regard,
here is my sample data Google Drive Link
data has not been provided in a usable way. Sought out publicly available similar data. found: https://www.kaggle.com/rohanrao/air-quality-data-in-india?select=station_hour.csv
using this data, with a couple of cleanups of your code, no issues with plots. I suspect your data has one of these issues
date is not datetime64[ns] in your data frame
date is not sorted, leading to lines being drawn in way you have noted
by refactoring way moving average is calculated, you can use animation instead of lots of separate figures
get some data
import kaggle.cli
import sys, math
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import plotly.express as px
# download data set
# https://www.kaggle.com/rohanrao/air-quality-data-in-india?select=station_hour.csv
sys.argv = [
sys.argv[0]
] + "datasets download rohanrao/air-quality-data-in-india".split(
" "
)
kaggle.cli.main()
zfile = ZipFile("air-quality-data-in-india.zip")
print([f.filename for f in zfile.infolist()])
plot using code from question
import pandas as pd
import plotly.express as px
from pathlib import Path
from distutils.version import StrictVersion
# data = pd.read_csv('E:/Self Learning/Djang_Dash/2019-2020_5.csv')
# use kaggle data
# dfs = {f.filename:pd.read_csv(zfile.open(f)) for f in zfile.infolist() if f.filename in ['station_day.csv',"stations.csv"]}
# data = pd.merge(dfs['station_day.csv'],dfs["stations.csv"], on="StationId")
# data['Date'] = pd.to_datetime(data['Date'])
# # kaggle data is different from question, make it compatible with questions data
# data = data.assign(OBJECTID=lambda d: d["StationId"])
# sample data from google drive link
data2 = pd.read_csv(Path.home().joinpath("Downloads").joinpath("AQI.csv"))
data2["Date"] = pd.to_datetime(data2["Date"])
data = data2
# as per very first commment - it's important data is ordered !
data = data.sort_values(["Date","OBJECTID"])
data['time_series'] = "Location_" + data["OBJECTID"].astype(str)
# clean up data, remove rows where there is no CO value
data = data.dropna(subset=["CO"])
# can do moving average in one step (can also be used by animation)
if StrictVersion(pd.__version__) < StrictVersion("1.3.0"):
data["moving_average"] = data.groupby("time_series",as_index=False)["CO"].rolling(window=30).mean().to_frame()["CO"].values
else:
data["moving_average"] = data.groupby("time_series",as_index=False)["CO"].rolling(window=30).mean()["CO"]
# just first two for purpose of demonstration
for i in data['time_series'].unique()[0:3]:
subset = data.loc[data['time_series'] == i]
fig = px.line(subset, x="Date", y=["CO","moving_average"], title = i, template = 'plotly_dark')
fig.show()
can use animation
px.line(
data,
x="Date",
y=["CO", "moving_average"],
animation_frame="time_series",
template="plotly_dark",
).update_layout(yaxis={"range":[data["CO"].min(), data["CO"].quantile(.97)]})

Plotly for python, only first data point is being graphed

I am new to plotly and working on a script to generate a graph based on some results pulled from a database. However when I send the data over to plotly, only the first data point for each of the three traces is being graphed. I've verified that the lists contain the right data, I've even simply pasted the lists in instead of dynamically creating the variables. Unfortunately each time only the first data point is being graphed. Does anyone know what I am missing here? I am also open to another library if needed.
Is it also possible to have the x axis show as a string?
import plotly.plotly as py
import plotly.graph_objs as go
# Custom database class, works fine.
from classes.database import DatabaseConnection
# Database Connections and instances
db_instance = DatabaseConnection()
db_conn = db_instance.conn
db_cur = db_instance.cur
def main():
# Get a list of versions and their stats.
db_cur.execute(
"""
select row_to_json(x) from
(SELECT
versions.version_number,
cast(AVG(results.average) as double precision) as average,
cast(AVG(results.minimum) as double precision) as minimum,
cast(AVG(results.maximum) as double precision) as maximum
FROM versions,results
WHERE
versions.version_number = results.version_number
GROUP BY
versions.version_number) x;
"""
)
versions = []
average = []
minimum = []
maximum = []
unclean = db_cur.fetchall()
# Create lists for x and y coordinates.
for row in unclean:
versions.append(row[0]['version_number'])
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))
grph_average = go.Scatter(
x=versions,
y=average,
name = 'Average',
mode='lines',
)
grph_minimum = go.Scatter(
x=versions,
y=minimum,
name = 'Minimum',
mode='lines',
)
grph_maximum = go.Scatter(
x=versions,
y=maximum,
name = 'Maximum',
mode='lines',
)
data = go.Data([grph_average, grph_minimum, grph_maximum])
# Edit the layout
layout = dict(title = 'Responses',
xaxis = dict(title = 'Versions'),
yaxis = dict(title = 'Ms'),
)
fig = dict(data=data, layout=layout)
py.plot(fig, filename='response-times', auto_open=False)
if __name__ == '__main__':
main()
The data that query returns is as follows, if you want to plug in the values :
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
Here is some matplotlib that might get you started on this:
import matplotlib.pyplot as plt
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
plt.plot(minimum)
plt.plot(average)
plt.plot(maximum)
plt.xticks(range(len(versions)), versions)
It looks like it was an issue with my x axis. By adding some text before the version number and specifically type casting to a string I was able to get the graphs to generate properly.
# Create lists for x and y coordinates.
for row in unclean:
versions.append("Version: " + str(row[0]['version_number']))
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))

Categories