Plotly - Highlight data point and nearest three points on hover - python
I have made a scatter plot of the word2vec model using plotly.
I want functionality of highlighting the specific data point on hover along with the top 3 nearest vectors to that.
It would be of great help if anyone can guide me with this or suggest any other option
model
csv
Code:
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_df.head()
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
ic_fig.show()
In plotly-python, I don't think there's an easy way of retrieving the location of the cursor. You can attempt to use go.FigureWidget to highlight a trace as described in this answer, but i think you're going to be limited with with plotly-python and i'm not sure if highlighting the closest n points will be possible.
However, I believe that you can accomplish what you want in plotly-dash since callbacks are supported - meaning you would be able to retrieve location of your cursor and then calculate the n closest data points to your cursor and highlight the data points as needed.
Below is an example of such a solution. If you haven't seen it before, it looks complicated, but what is happening is that I am taking the point where you clicked as an input. plotly is plotly.js under the hood so it comes us in the form of a dictionary (and not some kind of plotly-python object). Then I calculate the closest three data points to the clicked input point by comparing the coordinates of every other point in the dataframe, add the information from the three closest points as traces to the input with the color teal (or any color of your choosing), and send this modified input back as the output, and update the figure.
I am using click instead of hover because hover would cause the highlighted points to flicker too much as you drag your mouse through the points.
Also the dash app doesn't work perfectly as I believe there is some issue when you double click on points (you can see me click once in the gif below before getting it to start working), but this basic framework is hopefully close enough to what you want. Cheers!
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
import json
import dash
from dash import dcc, html, Input, Output
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
NUMBER_OF_TRACES = len(ic_df['Category'].unique())
ic_fig.update_layout(clickmode='event+select')
app.layout = html.Div([
dcc.Graph(
id='ic_figure',
figure=ic_fig)
])
## we take the 4 closest points because the 1st closest point will be the point itself
def get_n_closest_points(x0, y0, df=ic_df[['X','Y']].copy(), n=4):
"""we can save some computation time by looking for the smallest distance^2 instead of distance"""
"""distance = sqrt[(x1-x0)^2 + (y1-y0)^2]"""
"""distance^2 = [(x1-x0)^2 + (y1-y0)^2]"""
df["dist"] = (df["X"]-x0)**2 + (df["Y"]-y0)**2
## we don't return the point itself which will always be closest to itself
return df.sort_values(by="dist")[1:n][["X","Y"]].values
#app.callback(
Output('ic_figure', 'figure'),
[Input('ic_figure', 'clickData'),
Input('ic_figure', 'figure')]
)
def display_hover_data(clickData, figure):
print(clickData)
if clickData is None:
# print("nothing was clicked")
return figure
else:
hover_x, hover_y = clickData['points'][0]['x'], clickData['points'][0]['y']
closest_points = get_n_closest_points(hover_x, hover_y)
## this means that this function has ALREADY added another trace, so we reduce the number of traces down the original number
if len(figure['data']) > NUMBER_OF_TRACES:
# print(f'reducing the number of traces to {NUMBER_OF_TRACES}')
figure['data'] = figure['data'][:NUMBER_OF_TRACES]
# print(figure['data'])
new_traces = [{
'marker': {'color': 'teal', 'symbol': 'circle'},
'mode': 'markers',
'orientation': 'v',
'showlegend': False,
'x': [x],
'xaxis': 'x',
'y': [y],
'yaxis': 'y',
'type': 'scatter',
'selectedpoints': [0]
} for x,y in closest_points]
figure['data'].extend(new_traces)
# print("after\n")
# print(figure['data'])
return figure
if __name__ == '__main__':
app.run_server(debug=True)
Related
`update` function doesn't work correctly for bokeh interactors in python
I have a source code that plots the alphashape of a stock price. There's a slider to update the plot dynamically. But the update function doesn't work as expected. Here's the source code. x=[76.84,76.85,76.86,76.87,76.88,76.9,76.91,76.92,76.93,76.94,76.97,76.97,76.98,76.99,77.0,77.03,77.03,77.04,77.05,77.06,77.09,77.09,77.1,77.11,77.12,77.15,77.16,77.16,77.17,77.18,77.21,77.22,77.22,77.23,77.24,77.27,77.28,77.28,77.29,77.3,77.33,77.34,77.35,77.35,77.36,77.39,77.4,77.41,77.41,77.42,77.45,77.46,77.47,77.47,77.48,77.51,77.52,77.53,77.54,77.54,77.57,77.58,77.59,77.6,77.6,77.63,77.64,77.65,77.66,77.66,77.69,77.7,77.71,77.72,77.73,77.75,77.76,77.77,77.78,77.79,77.81,77.82,77.83,77.84,77.85,77.87,77.88,77.89,77.9,77.91,77.93,77.94,77.95,77.96,77.97,77.99,78.0,78.01,78.02,78.03,78.05,78.06,78.07,78.08,78.09,78.13,78.14,78.15,78.17,78.18,78.19,78.2,78.21,78.24,78.24,78.25,78.26,78.27,78.3,78.3,78.31,78.32,78.33,78.36,78.36,78.37,78.38,78.39,78.42,78.43,78.43,78.44,78.45,78.48,78.49,78.49,78.5,78.51,78.54,78.55,78.55,78.56,78.57,78.6,78.61,78.62,78.62,78.63,78.66,78.67,78.68,78.68,78.69,78.72,78.73,78.74,78.74,78.75,78.78,78.79,78.8,78.81,78.81,78.84,78.85,78.86,78.87,78.87,78.91,78.92,78.93,78.94,78.96,78.97,78.98,78.99,79.0,79.02,79.03,79.04,79.05,79.06,79.08,79.09,79.1,79.11,79.12,79.2,79.21,79.22,79.23,79.24,79.26,79.27,79.28,79.29,79.3,79.32,79.33,79.34,79.35,79.36,79.38,79.39,79.4,79.41,79.42,79.44,79.45,79.46,79.47,79.48,79.51,79.51,79.52,79.53,79.54,79.57,79.57,79.58,79.59,79.6,79.63,79.63,79.64,79.65,79.66,79.69,79.7,79.7,79.71,79.72,79.75,79.76,79.76,79.77,79.78,79.81,79.82,79.82,79.83,79.84,79.87,79.88,79.89,79.89,79.9,79.94,79.95,79.95,79.96,79.99,80.0,80.01,80.02,80.02,80.05,80.06,80.07,80.08,80.08,80.11,80.12,80.13,80.14,80.14,80.17,80.18,80.19,80.2,80.21,80.23,80.24,80.25,80.26,80.27,80.29,80.3,80.31,80.32,80.33,80.35,80.36,80.37,80.38,80.39,80.41,80.42,80.43,80.44,80.45,80.47,80.48,80.49,80.5,80.51,80.53,80.54,80.55,80.56,80.57,80.59,80.6,80.61,80.62,80.63,80.65,80.66,80.67,80.68,80.69,80.71,80.72,80.73,80.74,80.75,80.78,80.78,80.79,80.8,80.81,80.84,80.84,80.85,80.86,80.87,80.9,80.9,80.91,80.92,80.93,80.96,80.97,80.97,80.98,80.99,81.02,81.03,81.03,81.04,81.05,81.08,81.09,81.1,81.1,81.11,81.14,81.15,81.16,81.16,81.17,81.2,81.21,81.22,81.22,81.23,81.28,81.29,81.29,81.32,81.33,81.34,81.35,81.35,81.38,81.39,81.4,81.41,81.41,81.44,81.45,81.46,81.47,81.48,81.5,81.51,81.52,81.53,81.54,81.56,81.57,81.58,81.59,81.6,81.62,81.63,81.64,81.65,81.66,81.68,81.69,81.7,81.71,81.72,81.74,81.75,81.76,81.77,81.78,81.8,81.81,81.82,81.83,81.84,81.86,81.87,81.88,81.89,81.9,81.92,81.93,81.94,81.95,81.96,81.98,81.99,82.0,82.01,82.02,82.05,82.06,82.07,82.08,82.11,82.11,82.12,82.13,82.14,82.17,82.18,82.18,82.19,82.2,82.23,82.24,82.24,82.25,82.26,82.29,82.3,82.3,82.31,82.32,82.35,82.36,82.37,82.37,82.38,82.41,82.42,82.43,82.43,82.44,82.59,82.6,82.61,82.62,82.62,82.65,82.66,82.67,82.68,82.68,82.71,82.72,82.73,82.74,82.75,82.77,82.78,82.79,82.8,82.81,82.83,82.84,82.85,82.86,82.87,82.89,82.9,82.91,82.92,82.93,82.95,82.96,82.97,82.98,82.99,83.01,83.02,83.03,83.04,83.05,83.07,83.08,83.1,83.11,83.13,83.14,83.15,83.16,83.17,83.19,83.2,83.21,83.22,83.23,83.26,83.26,83.27,83.28,83.29,83.32,83.32,83.33,83.34,83.35,83.38,83.38,83.39,83.4,83.41,83.44,83.45,83.45,83.46,83.47,83.5,83.51,83.51,83.52,83.53,83.56,83.57,83.57,83.58,83.59,83.62,83.63,83.64,83.64,83.65,83.68,83.69,83.7,83.7,83.71,83.74,83.75,83.76,83.76,83.77,83.8,83.81,83.82,83.83,83.83,83.86,83.87,83.88,83.89,83.89,83.92,83.93,83.94,83.95,83.95,83.98,83.99,84.0,84.01,84.02,84.04,84.05,84.06,84.07,84.08,84.1,84.11,84.12,84.13,84.14,84.16,84.17,84.18,84.19,84.2,84.22,84.23,84.24,84.25,84.26,84.28,84.29,84.3,84.31,84.32,84.34,84.35,84.36,84.37,84.38,84.43,84.44,84.46,84.47,84.48,84.49,84.5,84.53,84.53,84.54,84.55,84.56,84.59,84.59,84.6,84.61,84.62,84.65,84.65,84.66,84.67,84.68,84.71,84.72,84.72,84.73,84.74,84.77,84.78,84.78,84.79,84.8,84.83,84.84,84.84,84.85,84.86,84.89,84.9,84.91,84.91,84.92,84.95,84.96,84.97,84.97,84.98,85.01,85.02,85.03,85.03,85.04,85.07,85.08,85.09,85.1,85.1,85.13,85.14,85.15,85.16,85.16,85.19,85.2,85.22,85.22,85.25,85.26,85.27,85.28,85.29,85.31,85.32,85.33,85.34,85.35,85.37,85.38,85.39,85.4,85.41,85.43,85.44,85.45,85.46,85.47,85.61,85.62,85.63,85.64,85.65,85.67,85.68,85.69,85.7,85.71,85.73,85.74,85.75,85.76,85.77,85.8,85.8,85.81,85.82,85.83,85.86,85.86,85.87,85.88,85.89,85.92,85.92,85.93,85.94,85.95,85.98,85.99,85.99,86.0,86.01,86.04,86.05,86.05,86.06,86.07,86.1,86.11,86.11,86.12,86.13,86.16,86.17,86.18,86.18,86.19,86.22,86.23,86.24,86.28,86.29,86.3,86.3,86.31,86.34,86.35,86.36,86.37,86.37,86.4,86.41,86.42,86.43,86.43,86.46,86.47,86.48,86.49,86.5,86.52,86.53,86.54,86.55,86.56,86.58,86.59,86.6,86.61,86.62,86.64,86.65,86.66,86.67,86.68,86.7,86.71,86.72,86.73,86.74,86.78,86.79,86.8,86.82,86.83,86.84,86.85,86.86,86.88,86.89,86.9,86.91,86.92,86.94,86.95,86.96,86.97,86.98,87.0,87.01,87.02,87.03,87.04,87.07,87.07,87.08,87.09,87.1,87.13,87.13,87.14,87.15,87.16,87.19,87.19,87.2,87.21,87.22,87.25,87.26,87.26,87.27,87.28,87.31,87.32,87.32,87.33,87.34,87.37,87.38,87.38,87.39,87.4,87.43,87.44,87.45,87.45,87.46,87.49,87.5,87.51,87.51,87.52,87.55,87.56,87.61,87.62,87.63,87.64,87.64,87.67,87.68,87.69,87.7,87.7,87.73,87.74,87.75,87.76,87.77,87.79,87.8,87.81,87.82,87.83,87.85,87.86,87.87,87.88,87.89,87.91,87.92,87.93,87.94,87.95,87.97,87.98,87.99,88.0,88.01,88.03,88.04,88.05,88.06,88.07,88.09,88.1,88.11,88.12,88.13,88.15,88.16,88.17,88.18,88.19,88.21,88.22,88.23,88.24,88.25,88.27,88.28,88.29,88.3,88.31,88.34,88.34,88.35,88.4,88.4,88.41,88.42,88.43,88.46,88.46,88.47,88.48,88.49,88.52,88.53,88.53,88.54,88.55,88.7,88.71,88.72,88.72,88.73,88.76,88.77,88.78,88.78,88.79,88.82,88.83,88.84,88.85,88.85,88.88,88.89,88.9,88.91,88.91,88.94,88.95,88.96,88.97,88.97,89.0,89.01,89.02,89.03,89.04,89.06,89.07,89.08,89.09,89.1,89.12,89.13,89.14,89.15,89.16,89.18,89.19,89.2,89.21,89.22,89.24,89.25,89.26,89.27,89.28,89.3,89.31,89.32,89.33,89.34,89.36,89.37,89.38,89.39,89.42,89.43,89.44,89.45,89.46,89.48,89.49,89.5,89.51,89.52,89.54,89.55,89.56,89.57,89.58,89.61] y=[2.29,2.41,2.4,2.38,2.43,2.42,2.38,2.36,2.4,2.37,2.36,2.37,2.34,2.32,2.31,2.25,2.25,2.21,2.2,2.21,2.21,2.21,2.21,2.19,2.17,2.1,2.08,2.08,2.12,2.15,2.1,2.09,2.1,2.08,2.08,2.01,2.0,1.98,1.98,1.95,1.92,1.92,1.92,1.92,1.92,1.88,1.88,1.91,1.91,1.88,1.89,1.87,1.85,1.84,1.83,1.88,1.93,1.88,1.82,1.82,2.08,2.13,2.35,2.32,2.37,2.34,2.25,2.35,2.33,2.34,2.32,2.34,2.39,2.53,2.49,2.53,2.54,2.55,2.53,2.52,2.52,2.54,2.66,2.71,2.81,2.92,3.09,2.99,3.03,2.98,3.01,2.98,2.93,2.91,2.93,2.91,2.89,2.92,2.9,2.87,2.9,2.9,2.93,2.83,2.78,2.67,2.6,2.66,2.61,2.61,2.61,2.54,2.56,2.51,2.52,2.55,2.6,2.6,2.67,2.63,2.62,2.63,2.61,2.58,2.59,2.59,2.62,2.59,2.58,2.61,2.63,2.6,2.63,2.63,2.61,2.6,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.55,2.52,2.53,2.53,2.51,2.46,2.48,2.45,2.54,2.53,2.49,2.51,2.49,2.48,2.49,2.47,2.48,2.49,2.48,2.5,2.5,2.55,2.53,2.52,2.51,2.49,2.5,2.49,2.49,2.47,2.46,2.48,2.45,2.45,2.43,2.43,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.46,2.45,2.44,2.44,2.45,2.45,2.47,2.56,2.52,2.48,2.47,2.5,2.54,2.54,2.58,2.61,2.63,2.63,2.63,2.61,2.59,2.59,2.56,2.57,2.58,2.56,2.57,2.61,2.59,2.6,2.6,2.58,2.6,2.59,2.6,2.61,2.61,2.59,2.6,2.62,2.62,2.6,2.61,2.59,2.59,2.59,2.59,2.61,2.67,2.65,2.63,2.63,2.6,2.56,2.59,2.59,2.59,2.58,2.58,2.57,2.58,2.55,2.55,2.58,2.58,2.57,2.58,2.83,2.88,2.93,2.79,2.82,2.81,2.86,2.86,2.85,2.82,2.82,2.82,2.78,2.78,2.82,2.79,2.8,2.79,2.79,2.78,2.72,2.73,2.71,2.72,2.73,2.73,2.74,2.74,2.72,2.73,2.73,2.71,2.68,2.71,2.75,2.84,2.91,2.89,2.92,2.97,2.96,2.94,2.99,3.04,2.97,2.99,2.97,2.99,2.98,2.99,3.0,3.01,2.99,2.98,2.99,2.99,2.99,3.01,2.96,2.97,3.0,2.98,2.97,2.96,2.96,3.0,3.0,2.99,2.98,2.99,2.99,2.99,2.99,2.99,2.99,2.98,2.98,2.98,2.98,3.02,3.03,3.03,3.05,3.09,3.08,3.1,3.12,3.14,3.13,3.12,3.14,3.15,3.13,3.15,3.14,3.14,3.14,3.14,3.13,3.11,3.08,3.08,3.08,3.08,3.1,3.11,3.11,3.11,3.09,3.13,3.17,3.28,3.43,3.52,3.47,3.45,3.45,3.45,3.44,3.46,3.46,3.45,3.44,3.45,3.45,3.45,3.45,3.45,3.47,3.5,3.54,3.52,3.5,3.5,3.5,3.44,3.45,3.45,3.45,3.43,3.45,3.48,3.48,3.45,3.46,3.43,3.46,3.45,3.43,3.43,3.42,3.42,3.43,3.42,3.41,3.39,3.38,3.38,3.38,3.4,3.39,3.38,3.39,3.37,3.37,3.38,3.38,3.38,3.38,3.38,3.38,3.37,3.36,3.37,3.36,3.36,3.37,3.36,3.41,3.41,3.4,3.39,3.39,3.37,3.37,3.36,3.36,3.36,3.36,3.36,3.37,3.36,3.37,3.39,3.45,3.42,3.39,3.4,3.4,3.39,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.42,3.42,3.41,3.39,3.39,3.39,3.37,3.38,3.4,3.41,3.44,3.43,3.43,3.43,3.43,3.42,3.42,3.42,3.47,3.46,3.47,3.53,3.65,3.59,3.76,3.85,3.77,3.9,3.76,3.75,3.8,3.73,3.7,3.66,3.68,3.66,3.69,3.68,3.69,3.69,3.61,3.61,3.61,3.59,3.59,3.59,3.63,3.61,3.62,3.63,3.62,3.61,3.61,3.62,3.69,3.66,3.69,3.68,3.66,3.65,3.66,3.68,3.78,3.76,3.77,3.74,3.75,3.77,3.75,3.7,3.7,3.73,3.74,3.79,3.83,3.87,3.86,3.8,3.81,3.78,3.8,3.78,3.78,3.84,3.81,3.81,3.82,3.78,3.75,3.76,3.74,3.72,3.71,3.72,3.78,3.78,3.77,3.76,3.74,3.74,3.75,3.75,3.73,3.72,3.71,3.68,3.7,3.67,3.64,3.56,3.57,3.56,3.61,3.62,3.59,3.57,3.59,3.55,3.54,3.53,3.52,3.53,3.53,3.58,3.6,3.57,3.53,3.53,3.54,3.55,3.57,3.57,3.58,3.64,3.63,3.6,3.6,3.6,3.59,3.6,3.6,3.61,3.61,3.62,3.64,3.64,3.64,3.69,3.73,3.71,3.69,3.69,3.69,3.65,3.66,3.66,3.72,3.73,3.7,3.7,3.72,3.74,3.74,3.74,3.79,3.85,3.9,3.88,3.93,3.86,3.94,4.0,4.0,3.97,3.94,3.93,3.91,3.92,3.94,3.94,3.94,3.99,3.98,4.01,3.99,3.92,3.82,3.71,3.81,3.77,3.76,3.81,3.79,3.83,3.83,3.88,3.89,3.84,3.84,3.83,3.79,3.81,3.8,3.81,3.82,3.83,3.8,3.81,3.81,3.83,3.83,3.86,3.92,3.93,3.97,3.97,3.96,3.95,3.94,3.96,3.98,3.88,3.98,4.0,4.02,4.04,4.08,4.09,4.09,4.16,4.22,4.21,4.19,4.19,4.18,4.19,4.2,4.19,4.2,4.21,4.27,4.3,4.29,4.26,4.29,4.29,4.34,4.36,4.35,4.33,4.33,4.36,4.34,4.33,4.34,4.37,4.35,4.36,4.39,4.38,4.41,4.4,4.4,4.39,4.39,4.41,4.42,4.46,4.48,4.53,4.63,4.65,4.71,4.81,4.91,5.0,4.95,5.04,5.01,4.98,4.9,4.95,4.91,4.8,4.9,4.86,4.76,4.77,4.77,4.79,4.8,4.79,4.81,4.89,4.87,4.87,4.87,4.8,4.79,4.75,4.69,4.69,4.71,4.78,4.76,4.74,4.73,4.8,4.81,4.84,4.83,4.83,4.83,4.79,4.75,4.75,4.66,4.69,4.7,4.68,4.7,4.73,4.72,4.75,4.75,4.75,4.71,4.72,4.71,4.69,4.68,4.64,4.65,4.65,4.66,4.66,4.64,4.65,4.64,4.62,4.63,4.6,4.52,4.45,4.53,4.49,4.5,4.48,4.37,4.39,4.4,4.41,4.43,4.47,4.46,4.45,4.42,4.44,4.45,4.45,4.44,4.43,4.41,4.41,4.44,4.41,4.38,4.38,4.37,4.37,4.38,4.32,4.24,4.29,4.31,4.29,4.27,4.28,4.28,4.28,4.32,4.32,4.33,4.33,4.32,4.33,4.39,4.47,4.47,4.53,4.53,4.53,4.52,4.54,4.51,4.53,4.53,4.53,4.54,4.54,4.58,4.56,4.58,4.56,4.55,4.53,4.54,4.54,4.55,4.54,4.53,4.52,4.49,4.45,4.45,4.46,4.46,4.48,4.46,4.47,4.47,4.49,4.47,4.47,4.48,4.51,4.57,4.57,4.59,4.61,4.57,4.57,4.6,4.64,4.64,4.63,4.65,4.65,4.64,4.64,4.66,4.72,4.73,4.76,4.74,4.8,4.78,4.72,4.76,4.86,4.86,4.88,4.86,4.83,4.85,4.85,4.84,4.81,4.82,4.82,4.82,4.81,4.82,4.85,4.85,4.84,4.82,4.81,4.78,4.81,4.79,4.75,4.78,4.8,4.79,4.78,4.76,4.77,4.77,4.77,4.78,4.79,4.79,4.76,4.75,4.74,4.73,4.74,4.75,4.8,4.81,4.84,4.82,4.8,4.81,4.8,4.77,4.81,4.8,4.81,4.84,4.86,4.83,4.82,4.81,4.8,4.78,4.81,4.81,4.82,4.88,4.84,4.84,4.83,4.83,4.85,4.85,4.83,4.81,4.82,4.79,4.8,4.79,4.78,4.8,4.79,4.78,4.77,4.78,4.77,4.76,] from alphashape import alphashape from shapely.geometry import mapping from bokeh.plotting import figure from ipywidgets import interact from bokeh.io import output_notebook, show, push_notebook def alphashape_func(x, y, alpha): length = range(len(x)) # date count pnt = [[x[i],y[i]] for i in length] # return a shapely.polygon/multipolygon alpha_shape = alphashape(pnt, alpha=alpha) # convert shapely.polygon/multipolygon to list map = mapping(alpha_shape)['coordinates'] poly_shp = [i[0] for i in map] bound_len = len(poly_shp) # single alpha shape case if bound_len == 1: bound_x = [i[0] for i in poly_shp] bound_y = [i[1] for i in poly_shp] # multiple alpha shape case else: bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)] bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)] # return a dict containing 2 lists: x & y. return {'x':bound_x, 'y':bound_y} alpha = 5 alpha_high_pnt = alphashape_func(x,y,alpha) plot = figure(sizing_mode='stretch_width', output_backend="webgl") # line_pnt(plot, max_processed_xy['x'], max_processed_xy['y'],legend_label ='processed_xy',line_color='yellow', line_width=2) alpha_shape_plt = plot.multi_line(xs=alpha_high_pnt['x'],ys=alpha_high_pnt['y'], line_color='cyan',legend_label = 'alpha_high_pnt') # create an update function def update(alpha=5): alpha_high_pnt = alphashape_func(x,y,alpha) alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x'] alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y'] # push new values to the notebook push_notebook() output_notebook() show(plot) interact(update, alpha=(0,25,1)) (the dynamic slider only works when you run it in jupyter in a web browser) When I drag the slider, it shows an error message: BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('xs', 54), ('ys', 99) I don't see the reason of this error, since when I manually adjust the alpha value, the lengths of xs and ys equal. Can anyone help? ===================== update ====================== Based on #bigreddot suggestion, I update the code to this, the doesn't match problem is resolved, but the plot doesn't refresh yet. from alphashape import alphashape from shapely.geometry import mapping from bokeh.plotting import figure from bokeh.io import output_notebook, show, push_notebook from bokeh.models import ColumnDataSource from ipywidgets import interact output_notebook() def alphashape_func(x, y, alpha): length = range(len(x)) # date count pnt = [[x[i],y[i]] for i in length] # return a shapely.polygon/multipolygon alpha_shape = alphashape(pnt, alpha=alpha) # convert shapely.polygon/multipolygon to list map = mapping(alpha_shape)['coordinates'] poly_shp = [i[0] for i in map] bound_len = len(poly_shp) # single alpha shape case if bound_len == 1: bound_x = [i[0] for i in poly_shp] bound_y = [i[1] for i in poly_shp] # multiple alpha shape case else: bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)] bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)] # return a dict containing 2 lists: x & y. return {'x':bound_x, 'y':bound_y} alpha = 5 plot = figure(sizing_mode='stretch_width', output_backend="webgl") source = ColumnDataSource(data=alphashape_func(x,y,alpha)) alpha_shape_plt = plot.multi_line(source=source, xs='x',ys='y', line_color='cyan',legend_label = 'alpha_high_pnt') print # create an update function def update(alpha=5): source.data = alphashape_func(x,y,alpha) # push new values to the notebook push_notebook() interact(update, alpha=(0,25,1)) show(plot)
In between this line: alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x'] and this line: alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y'] the CDS columns are not all the same length. If you need to update with data that has a new length you should collect all the updates up front in a new_data dict and then set source.data = new_data to update the CDS "all at once". This is more efficient in any case, as well, since it results in fewer property update change events being sent out.
Integrating animated process map from bupaR into Bokeh dashboard in Python; is there a way?
I am trying to develop a Bokeh dashboard in Python and I want to add in a process mining feature to it. I have experience with the bupaR package in R and it has really nice animated process maps within it, which is what I would like to implement into the dashboard I am making. I have seen some documentation of how to implement R code within Python i.e. r2py, some ggplot in the bokeh dashboard etc, but what I am wanting seems a bit niche and I am not sure if it is possible. Python does have a pm4py extension to bupaR but so far I have not been able to see a way to implement animated process map similar to the one in R within a bokeh dashboard. Just to provide some sort of example (albeit, unrelated but just for demonstration purposes), here is some Python code for a bokeh dashboard for a clustering app:- #https://raw.githubusercontent.com/bokeh/bokeh/master/examples/app/clustering/main.py import numpy as np from sklearn import cluster, datasets from sklearn.neighbors import kneighbors_graph from sklearn.preprocessing import StandardScaler from bokeh.io import curdoc from bokeh.layouts import column, row from bokeh.models import ColumnDataSource, Select, Slider from bokeh.palettes import Spectral6 from bokeh.plotting import figure np.random.seed(0) # define some helper functions def clustering(X, algorithm, n_clusters): # normalize dataset for easier parameter selection X = StandardScaler().fit_transform(X) # estimate bandwidth for mean shift bandwidth = cluster.estimate_bandwidth(X, quantile=0.3) # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # Generate the new colors: if algorithm=='MiniBatchKMeans': model = cluster.MiniBatchKMeans(n_clusters=n_clusters) elif algorithm=='Birch': model = cluster.Birch(n_clusters=n_clusters) elif algorithm=='DBSCAN': model = cluster.DBSCAN(eps=.2) elif algorithm=='AffinityPropagation': model = cluster.AffinityPropagation(damping=.9, preference=-200) elif algorithm=='MeanShift': model = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) elif algorithm=='SpectralClustering': model = cluster.SpectralClustering(n_clusters=n_clusters, eigen_solver='arpack', affinity="nearest_neighbors") elif algorithm=='Ward': model = cluster.AgglomerativeClustering(n_clusters=n_clusters, linkage='ward', connectivity=connectivity) elif algorithm=='AgglomerativeClustering': model = cluster.AgglomerativeClustering(linkage="average", affinity="cityblock", n_clusters=n_clusters, connectivity=connectivity) elif algorithm=='KMeans': model = cluster.KMeans(n_clusters= n_clusters) model.fit(X) if hasattr(model, 'labels_'): y_pred = model.labels_.astype(int) else: y_pred = model.predict(X) return X, y_pred def get_dataset(dataset, n_samples): if dataset == 'Noisy Circles': return datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05) elif dataset == 'Noisy Moons': return datasets.make_moons(n_samples=n_samples, noise=0.05) elif dataset == 'Blobs': return datasets.make_blobs(n_samples=n_samples, random_state=8) elif dataset == "No Structure": return np.random.rand(n_samples, 2), None # set up initial data n_samples = 1500 n_clusters = 2 algorithm = 'MiniBatchKMeans' dataset = 'Noisy Circles' X, y = get_dataset(dataset, n_samples) X, y_pred = clustering(X, algorithm, n_clusters) spectral = np.hstack([Spectral6] * 20) colors = [spectral[i] for i in y] # set up plot (styling in theme.yaml) plot = figure(toolbar_location=None, title=algorithm) source = ColumnDataSource(data=dict(x=X[:, 0], y=X[:, 1], colors=colors)) plot.circle('x', 'y', fill_color='colors', line_color=None, source=source) # set up widgets clustering_algorithms= [ 'MiniBatchKMeans', 'AffinityPropagation', 'MeanShift', 'SpectralClustering', 'Ward', 'AgglomerativeClustering', 'DBSCAN', 'Birch', 'KMeans' ] datasets_names = [ 'Noisy Circles', 'Noisy Moons', 'Blobs', 'No Structure' ] algorithm_select = Select(value='MiniBatchKMeans', title='Select algorithm:', width=200, options=clustering_algorithms) dataset_select = Select(value='Noisy Circles', title='Select dataset:', width=200, options=datasets_names) samples_slider = Slider(title="Number of samples", value=1500.0, start=1000.0, end=3000.0, step=100, width=400) clusters_slider = Slider(title="Number of clusters", value=2.0, start=2.0, end=10.0, step=1, width=400) # set up callbacks def update_algorithm_or_clusters(attrname, old, new): global X algorithm = algorithm_select.value n_clusters = int(clusters_slider.value) X, y_pred = clustering(X, algorithm, n_clusters) colors = [spectral[i] for i in y_pred] source.data = dict(colors=colors, x=X[:, 0], y=X[:, 1]) plot.title.text = algorithm def update_samples_or_dataset(attrname, old, new): global X, y dataset = dataset_select.value algorithm = algorithm_select.value n_clusters = int(clusters_slider.value) n_samples = int(samples_slider.value) X, y = get_dataset(dataset, n_samples) X, y_pred = clustering(X, algorithm, n_clusters) colors = [spectral[i] for i in y_pred] source.data = dict(colors=colors, x=X[:, 0], y=X[:, 1]) algorithm_select.on_change('value', update_algorithm_or_clusters) clusters_slider.on_change('value_throttled', update_algorithm_or_clusters) dataset_select.on_change('value', update_samples_or_dataset) samples_slider.on_change('value_throttled', update_samples_or_dataset) # set up layout selects = row(dataset_select, algorithm_select, width=420) inputs = column(selects, samples_slider, clusters_slider) # add to document curdoc().add_root(row(inputs, plot)) curdoc().title = "Clustering" Which will give you something like this:- And below it, I would like to put the process mining animation from bupaR:- library(bupaR) library(processanimateR) library(eventdataR) animate_process(patients) Which gives you something like this:- Is there a way to deploy both of these features in a bokeh dashboard? Or is there a more straightforward alternative to creating an animated process map within a bokeh dashboard that is more native to Python?
Yes, you can create animations in bokeh. It's like interactions with widgets actually. It's possible with add_periodic_callback feature which automatically change values. For your example, if you want to change slider :> def animate_update(): # animation only change slider over and over here. value= slider.value + 1 if value> valuesdict[-1]: value= valuesdict[0] slider.value = value def animate(): global callback_id callback_id = curdoc().add_periodic_callback(animate_update, 500) callback_id = None
Hover data on bar graph without plotting text - plotly
I am struggling with something that is probably quite simple. consider this: I have unit ids with data for each unit id. I want to construct a bar graph and to be able to select whatever unit id I want. To do this I chose plotly px. I am constructing the bars according to some range split method (using pd.cut) The result I have is very close to what I want. when I’m hovering on the different units on the graph I see the unit id, range, and exact value but the text is also plotted on the graph and makes the plot very messy. I tried to use different px.bar attributed to replacing %{text}% but without success. Small code that reproduces the plot I have import dash_core_components as dcc import numpy as np import pandas as pd import dash import dash_html_components as html import plotly.express as px data = np.random.rand(300) unit_id = np.random.choice(range(1000), 300, replace=False) data_tuple = sorted([(str(uid), data) for uid, data in zip(unit_id, data)], key=lambda x: x[1]) y_data = [y[1] for y in data_tuple] x_data = [i for i in range(1, len(y_data) + 1)] range_bins = pd.cut(np.array(np.array(y_data).astype(float)), 10) data_range_lbl = [str(v) for v in range_bins] sorted_unit_id = [x[0] for x in data_tuple] unit_id_text = [sorted_unit_id[i] + '<br><b>Value</b>: ' + str(y_data[i]) for i in range(0, len(sorted_unit_id))] fig = px.bar(x=data_range_lbl, y=[1 for i in range(0, len(data_range_lbl))], text=unit_id_text) fig.update_layout(dragmode='select') fig.update_traces(hovertemplate= '<b>Die Id</b>: %{text}' + '<br><b>Range</b>: %{x}<br>') app = dash.Dash(__name__) app.layout = html.Div([ html.Div(id='container-button-timestamp'), dcc.Graph( id='sample-graph', figure=fig ), ]) if __name__ == '__main__': app.run_server(debug=True, port=8050) how it looks:
using hovertext did the trick. removed text from px.bar attributes: fig = px.bar(x=data_range_lbl, y=[1 for i in range(0, len(data_range_lbl))]) and changed fig.update_traces: fig.update_traces( hovertext=unit_id_text, hovertemplate= '<b>Die Id</b>: %{hovertext}' + '<br><b>Range</b>: %{x}<br>')
Multiple route mapping to different matplotlib graphs in flask app
I have this "flask app" with two links, each mapping to different matplotlib visualizations, for example: localhost:5000/line_chart and localhost:5000/bar_chart. When I start the server, and click the a route (any of them), I see what I expect. localhost:5000/bar_chart When I go back and view the other link, both graphs break. localhost:5000/line_chart localhost:5000/bar_chart I can reproduce this every time by closing the server then running the "run.py" script again. Seems to be an overwriting conflict with the in-memory buffer. Has anyone had this issue before? app/views.py import matplotlib matplotlib.use('Agg') # this allows PNG plotting import matplotlib.pyplot as plt import base64 from flask import render_template from app import app from io import BytesIO #app.route('/') #app.route('/index') def index(): res = '' navigation = [['Line Chart','line_chart'],['Bar Chart','bar_chart']] res = res + '<h1>Matplotlib Chart Examples</h1>' res = res + '<ul>' for item in navigation: name = item[0] link = item[1] res = res + '<li>'+ name +'</li>' res = res +'</ul>' return res #app.route('/bar_chart') def bar_chart(): movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"] num_oscars = [5, 11, 3, 8, 10] # bars are by default width 0.8, so we'll add 0.1 to the left coordinates # so that each bar is centered xs = [i + 0.1 for i, _ in enumerate(movies)] # plot bars with left x-coordinates [xs], heights [num_oscars] plt.bar(xs, num_oscars) plt.ylabel("# of Academy Awards") plt.title("My Favorite Movies") # label x-axis with movie names at bar centers plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies) return compute(plt) #app.route('/line_chart') def line_chart(): years = [1950, 1960, 1970, 1980, 1990, 2000, 2010] gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3] # create a line chart, years on x-axis, gdp on y-axis plt.plot(years, gdp, color='green', marker='o', linestyle='solid') # add a title plt.title("Nominal GDP") # add a label to the y-axis plt.ylabel("Billions of $") return compute(plt) def compute(plt): # run plt.plot, plt.title, etc. figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) # rewind to beginning of file #figfile.getvalue() extracts string (stream of bytes) figdata_png = base64.b64encode(figfile.getvalue()) return render_template('index.html', title='matplotlib chart', results=figdata_png) Thank you for your time.
I guess you need two figures, test this code and tell what happened: #app.route('/bar_chart') def bar_chart(): movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"] num_oscars = [5, 11, 3, 8, 10] # bars are by default width 0.8, so we'll add 0.1 to the left coordinates # so that each bar is centered xs = [i + 0.1 for i, _ in enumerate(movies)] # plot bars with left x-coordinates [xs], heights [num_oscars] plt.figure(1) plt.bar(xs, num_oscars) plt.ylabel("# of Academy Awards") plt.title("My Favorite Movies") # label x-axis with movie names at bar centers plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies) return compute(plt, 1) #app.route('/line_chart') def line_chart(): years = [1950, 1960, 1970, 1980, 1990, 2000, 2010] gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3] # create a line chart, years on x-axis, gdp on y-axis plt.figure(2) plt.plot(years, gdp, color='green', marker='o', linestyle='solid') # add a title plt.title("Nominal GDP") # add a label to the y-axis plt.ylabel("Billions of $") return compute(plt,2) def compute(plt, fignum): # run plt.plot, plt.title, etc. plt.figure(fignum) figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) # rewind to beginning of file #figfile.getvalue() extracts string (stream of bytes) figdata_png = base64.b64encode(figfile.getvalue()) return render_template('index.html', title='matplotlib chart', results=figdata_png)
In my case, that solution didn't work. It seems that there is a race condition when trying to access plot. I first tried to use a lock from a library, but that didn't work, so instead I sort of engineered out a lock. In my case, I wanted to create n images using the same function on the same view, so I started by creating a list in the following way: queue = [False for i in range(n)] Then, my flask app look something like this: #app.route('/vis/<j>') def vis(j): global queue # We check that it's image's #j turn, as if it was single threaded j = int(j) if j == 0: for i in range(len(queue)): queue[i] = False else: while not queue[j-1]: # If it's not, we sleep for a short time (from time import sleep) sleep(0.5) # This is not important, it's how I was plotting some random figures # (from random import seed) (from datetime import datetime) seed(datetime.now()) n = 10 p1 = [randint(0, 10) for _ in range(n)] p2 = [randint(0, 10) for _ in range(n)] t = [i for i in range(n)] fig = plt.figure(j) plt.clf() plt.plot(t, p1, color='blue') plt.plot(t, p2, color='orange') plt.xlabel('Time') plt.ylabel('Value') # Save the plot img = BytesIO() fig.savefig(img, dpi=128) img.seek(0) # We finished using everything related to plot, so we free the "lock" queue[j] = True # Return the object as a file that can be accessed return send_file(img, mimetype='image/png') Finally, when wanting to display this in my flask app, all I had to do was using this <img src="/vis/1"> in my html file. Edit: I forgot one of the most important part! For some reason, this would still create some unrelated thread issue. I looked it up and that's when I came with the full solution. The threading issue was solved by adding at the beginning of the file: import matplotlib import matplotlib.pyplot as plt matplotlib.use('Agg') For some reason, using that Agg backend solved the second threading I was having. I don't really have a good explanation for that, but it does work, so it's enough for me. Alternatively, what also worked was running the app disabling threads by adding: if __name__ == '__main__': app.run(threading=False, debug=True) I don't know however, at the moment, whether this works in production, so I preferred the other solution. :) I hope this helps if you had the same issue!
Plotly for python, only first data point is being graphed
I am new to plotly and working on a script to generate a graph based on some results pulled from a database. However when I send the data over to plotly, only the first data point for each of the three traces is being graphed. I've verified that the lists contain the right data, I've even simply pasted the lists in instead of dynamically creating the variables. Unfortunately each time only the first data point is being graphed. Does anyone know what I am missing here? I am also open to another library if needed. Is it also possible to have the x axis show as a string? import plotly.plotly as py import plotly.graph_objs as go # Custom database class, works fine. from classes.database import DatabaseConnection # Database Connections and instances db_instance = DatabaseConnection() db_conn = db_instance.conn db_cur = db_instance.cur def main(): # Get a list of versions and their stats. db_cur.execute( """ select row_to_json(x) from (SELECT versions.version_number, cast(AVG(results.average) as double precision) as average, cast(AVG(results.minimum) as double precision) as minimum, cast(AVG(results.maximum) as double precision) as maximum FROM versions,results WHERE versions.version_number = results.version_number GROUP BY versions.version_number) x; """ ) versions = [] average = [] minimum = [] maximum = [] unclean = db_cur.fetchall() # Create lists for x and y coordinates. for row in unclean: versions.append(row[0]['version_number']) average.append(int(row[0]['average'])) minimum.append(int(row[0]['minimum'])) maximum.append(int(row[0]['maximum'])) grph_average = go.Scatter( x=versions, y=average, name = 'Average', mode='lines', ) grph_minimum = go.Scatter( x=versions, y=minimum, name = 'Minimum', mode='lines', ) grph_maximum = go.Scatter( x=versions, y=maximum, name = 'Maximum', mode='lines', ) data = go.Data([grph_average, grph_minimum, grph_maximum]) # Edit the layout layout = dict(title = 'Responses', xaxis = dict(title = 'Versions'), yaxis = dict(title = 'Ms'), ) fig = dict(data=data, layout=layout) py.plot(fig, filename='response-times', auto_open=False) if __name__ == '__main__': main() The data that query returns is as follows, if you want to plug in the values : versions = ['6.1', '5.0', '5.2'] average = [11232, 29391, 10429] minimum = [3641, 7729, 3483] maximum = [57440, 62535, 45201]
Here is some matplotlib that might get you started on this: import matplotlib.pyplot as plt versions = ['6.1', '5.0', '5.2'] average = [11232, 29391, 10429] minimum = [3641, 7729, 3483] maximum = [57440, 62535, 45201] plt.plot(minimum) plt.plot(average) plt.plot(maximum) plt.xticks(range(len(versions)), versions)
It looks like it was an issue with my x axis. By adding some text before the version number and specifically type casting to a string I was able to get the graphs to generate properly. # Create lists for x and y coordinates. for row in unclean: versions.append("Version: " + str(row[0]['version_number'])) average.append(int(row[0]['average'])) minimum.append(int(row[0]['minimum'])) maximum.append(int(row[0]['maximum']))