Histogram with slider filter - python

I would like to create a histogram with a density plot combined in bokeh with a slider filter. Atm, I have the blocks to create a bokeh histogram with a density plot from another thread. I dont know how to create the callback function to update the data and rerender the plot.
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from numpy import histogram, linspace
from scipy.stats.kde import gaussian_kde
pdf = gaussian_kde(df.hp)
x = linspace(0,250,50)
p = figure(plot_height=300)
p.line(x, pdf(x))
# plot actual hist for comparison
hist, edges = histogram(df.hp, density=True, bins=20)
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha=0.4)
show(p)

There are two ways to implement callbacks in Bokeh:
with JS code. In that case, the plot remains a standalone object, the constraint being you need to do any data manipulation within Javascript (there is a small caveat to that statement but not relevant here: scipy can't be called from such a callback)
by having the callback executed in Bokeh server, in which case you have the full arsenal of python available to you. The cost being, there's a bit more to plotting and distributing the graph than in the first case (but it's not difficult, see example).
Considering you need to refit the kde each time you change the filter condition, the second way is the only option (unless you want to do that in javascript...).
That's how you would do it (example with a filter on cyl):
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Select
from bokeh.sampledata.autompg import autompg as df
from numpy import histogram, linspace
from scipy.stats.kde import gaussian_kde
output_notebook()
def modify_doc(doc):
x = linspace(0,250,50)
source_hist = ColumnDataSource({'top': [], 'left': [], 'right': []})
source_kde = ColumnDataSource({'x': [], 'y': []})
p = figure(plot_height=300)
p.line(x='x', y='y', source=source_kde)
p.quad(top='top', bottom=0, left='left', right='right', alpha=0.4, source=source_hist)
def update(attr, old, new):
if new == 'All':
filtered_df = df
else:
condition = df.cyl == int(new)
filtered_df = df[condition]
hist, edges = histogram(filtered_df.hp, density=True, bins=20)
pdf = gaussian_kde(filtered_df.hp)
source_hist.data = {'top': hist, 'left': edges[:-1], 'right': edges[1:]}
source_kde.data = {'x': x, 'y': pdf(x)}
update(None, None, 'All')
select = Select(title='# cyl', value='All', options=['All'] + [str(i) for i in df.cyl.unique()])
select.on_change('value', update)
doc.add_root(column(select, p))
# To run it in the notebook:
plot = Application(FunctionHandler(modify_doc))
show(plot)
# Or to run it stand-alone with `bokeh serve --show myapp.py`
# in which case you need to remove the `output_notebook()` call
# from bokeh.io import curdoc
# modify_doc(curdoc())
A few notes:
this is made to be run in jupyter notebook (see the output_notebook() and the last uncommented two lines).
to run it outside, comment the notebook lines (see above) and uncomment the last two lines. Then you can run it from the command line.
Select will only handle str values so you need to convert in (when creating it) and out (when using the values: old and new)
for multiple filters, you need to access the state of each Select at the same time. You do that by instantiating the Selects before defining the update function (but without any callbacks, yet!) and keeping a reference to them, access their value with your_ref.value and build your condition with that. After the update definition, you can then attach the callback on each Select.
Finally, an example with multiple selects:
def modify_doc(doc):
x = linspace(0,250,50)
source_hist = ColumnDataSource({'top': [], 'left': [], 'right': []})
source_kde = ColumnDataSource({'x': [], 'y': []})
p = figure(plot_height=300)
p.line(x='x', y='y', source=source_kde)
p.quad(top='top', bottom=0, left='left', right='right', alpha=0.4, source=source_hist)
select_cyl = Select(title='# cyl', value='All', options=['All'] + [str(i) for i in df.cyl.unique()])
select_ori = Select(title='origin', value='All', options=['All'] + [str(i) for i in df.origin.unique()])
def update(attr, old, new):
all = pd.Series(True, index=df.index)
if select_cyl.value == 'All':
cond_cyl = all
else:
cond_cyl = df.cyl == int(select_cyl.value)
if select_ori.value == 'All':
cond_ori = all
else:
cond_ori = df.origin == int(select_ori.value)
filtered_df = df[cond_cyl & cond_ori]
hist, edges = histogram(filtered_df.hp, density=True, bins=20)
pdf = gaussian_kde(filtered_df.hp)
source_hist.data = {'top': hist, 'left': edges[:-1], 'right': edges[1:]}
source_kde.data = {'x': x, 'y': pdf(x)}
update(None, None, 'All')
select_ori.on_change('value', update)
select_cyl.on_change('value', update)
doc.add_root(column(select_ori, select_cyl, p))

Related

How to define Python Bokeh RangeSlider.on_change callback function to alter IndexFilter for plots?

I'm trying to implement a python callback function for a RangeSlider. The Slider Value should tell which Index a IndexFilter should get for display.
For example: If rangeslider.value is (3, 25) my plots should only contain/view data with the Index from 3 to 25.
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions, CustomJS, CDSView, IndexFilter
from bokeh.plotting import gmap, ColumnDataSource, figure
from bokeh.layouts import column, row
from bokeh.models.widgets import RangeSlider
import numpy as np
def slider_callback(attr, old, new):
p.view = CDSView(source=source, filters=[IndexFilter(np.arange(new.value[0], new.value[1]))])
v.view = CDSView(source=source, filters=[IndexFilter(np.arange(new.value[0], new.value[1]))])
# data set
lon = [[48.7886, 48.7887, 48.7888, 48.7889, 48.789],
[48.7876, 48.7877, 48.78878, 48.7879, 48.787],
[48.7866, 48.7867, 48.7868, 48.7869, 48.786],
[48.7856, 48.7857, 48.7858, 48.7859, 48.785],
[48.7846, 48.7847, 48.7848, 48.7849, 48.784]]
lat = [[8.92, 8.921, 8.922, 8.923, 8.924],
[8.91, 8.911, 8.912, 8.913, 8.914],
[8.90, 8.901, 8.902, 8.903, 8.904],
[8.89, 8.891, 8.892, 8.893, 8.894],
[8.88, 8.881, 8.882, 8.883, 8.884]]
time = [0, 1, 2, 3, 4, 5]
velocity = [23, 24, 25, 24, 20]
lenght_dataset = len(lon)
# define source and map
source = ColumnDataSource(data = {'x': lon, 'y': lat, 't': time, 'v': velocity})
view = CDSView(source=source, filters=[IndexFilter(np.arange(0, lenght_dataset))])
map_options = GMapOptions(lat=48.7886, lng=8.92, map_type="satellite", zoom=13)
p = gmap("MY_API_KEY", map_options, title="Trajectory Map")
v = figure(plot_width=400, plot_height=400, title="Velocity")
# plot lines on map
p.multi_line('y', 'x', view=view, source=source, line_width=1)
v.line('t', 'v', view=view, source=source, line_width=3)
# slider to limit plotted data
range_slider = RangeSlider(title="Data Range Slider: ", start=0, end=lenght_dataset, value=(0, lenght_dataset), step=1)
range_slider.on_change('value', slider_callback)
# Layout to plot and output
layout = row(column(p, range_slider),
column(v)
)
output_file("diag_plot_bike_data.html")
show(layout)
Some notes:
time is longer than the rest of the columns - you will receive a warning about it. In my code below, I just removed its last element
view with filters in general should not be used for continuous glyphs like lines (v.line in particular - multi_line is fine). You will receive a warning about it. But if the indices in IndexFilter are always continuous, then you should be fine. Either way, you can use the segment glyph to avoid the warning
In your callback, you're trying to set view on the figures - views only exist on glyph renderers
In general, you don't want to recreate views, you want to recreate as few Bokeh models as possible. Ideally, you would have to just change the indices field of the filter. But there's some missing wiring in Bokeh, so you will have to set the filters field of the view, as below
new argument of Python callbacks receives the new value for the attribute passed as the first parameter to the corresponding on_change call. In this case, it will be a tuple, so instead of new.value[0] you should use new[0]
Since you've decided to use Python callbacks, you can no longer use show and have a static HTML file - you will have to use curdoc().add_root and bokeh serve. The UI needs that Python code to run somewhere in runtime
When changing the slider values, you will notice that the separate segments of multi_line will be joined together - it's a bug and I just created https://github.com/bokeh/bokeh/issues/10589 for it
Here's a working example:
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import GMapOptions, CDSView, IndexFilter
from bokeh.models.widgets import RangeSlider
from bokeh.plotting import gmap, ColumnDataSource, figure
lon = [[48.7886, 48.7887, 48.7888, 48.7889, 48.789],
[48.7876, 48.7877, 48.78878, 48.7879, 48.787],
[48.7866, 48.7867, 48.7868, 48.7869, 48.786],
[48.7856, 48.7857, 48.7858, 48.7859, 48.785],
[48.7846, 48.7847, 48.7848, 48.7849, 48.784]]
lat = [[8.92, 8.921, 8.922, 8.923, 8.924],
[8.91, 8.911, 8.912, 8.913, 8.914],
[8.90, 8.901, 8.902, 8.903, 8.904],
[8.89, 8.891, 8.892, 8.893, 8.894],
[8.88, 8.881, 8.882, 8.883, 8.884]]
time = [0, 1, 2, 3, 4]
velocity = [23, 24, 25, 24, 20]
lenght_dataset = len(lon)
# define source and map
source = ColumnDataSource(data={'x': lon, 'y': lat, 't': time, 'v': velocity})
view = CDSView(source=source, filters=[IndexFilter(list(range(lenght_dataset)))])
map_options = GMapOptions(lat=48.7886, lng=8.92, map_type="satellite", zoom=13)
p = gmap("API_KEY", map_options, title="Trajectory Map")
v = figure(plot_width=400, plot_height=400, title="Velocity")
p.multi_line('y', 'x', view=view, source=source, line_width=1)
v.line('t', 'v', view=view, source=source, line_width=3)
range_slider = RangeSlider(title="Data Range Slider: ", start=0, end=lenght_dataset, value=(0, lenght_dataset), step=1)
def slider_callback(attr, old, new):
view.filters = [IndexFilter(list(range(*new)))]
range_slider.on_change('value', slider_callback)
layout = row(column(p, range_slider), column(v))
curdoc().add_root(layout)

Can't get CrossHairTool in Bokeh to be linked over several plots

I've studied the post:
"How do I link the CrossHairTool in bokeh over several plots?" (See How do I link the CrossHairTool in bokeh over several plots?.
I used the function written by Hamid Fadishei on June 2020 within this post but cannot manage to get the CrossHairTool to correctly display over several plots.
In my implementation, the crosshair displays only within the plot hovered over. I am currently using Bokeh version 2.1.1 with Python Anaconda version 3.7.6 using the Python extension in VSCode version 1.48. I am not familiar with Javascript, so any help to debug my code to correctly display the crosshair across the two plots will be welcomed.
My code:
# Importing libraries:
import pandas as pd
import random
from datetime import datetime, timedelta
from bokeh.models import CustomJS, CrosshairTool, ColumnDataSource, DatetimeTickFormatter, HoverTool
from bokeh.layouts import gridplot
from bokeh.plotting import figure, output_file, show
# Function wrote by Hamid Fadishei to enable a linked crosshair within gridplot:
def add_vlinked_crosshairs(figs):
js_leave = ''
js_move = 'if(cb_obj.x >= fig.x_range.start && cb_obj.x <= fig.x_range.end &&\n'
js_move += 'cb_obj.y >= fig.y_range.start && cb_obj.y <= fig.y_range.end){\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = cb_obj.sx\n' % i
js_move += '}else{\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_leave += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_move += '}'
crosses = [CrosshairTool() for fig in figs]
for i, fig in enumerate(figs):
fig.add_tools(crosses[i])
args = {'fig': fig}
k = 0
for j in range(len(figs)):
if i != j:
args['other%d'%k] = crosses[j]
k += 1
fig.js_on_event('mousemove', CustomJS(args=args, code=js_move))
fig.js_on_event('mouseleave', CustomJS(args=args, code=js_leave))
# Create dataframe consisting of 5 random numbers within column A and B as a function of an arbitrary time range:
startDate = datetime(2020,5,1)
timeStep = timedelta(minutes = 5)
df = pd.DataFrame({
"Date": [startDate + (i * timeStep) for i in range(5)],
"A": [random.randrange(1, 50, 1) for i in range(5)],
"B": [random.randrange(1, 50, 1) for i in range(5)]})
# Generate output file as html file:
output_file("test_linked_crosshair.html", title='Results')
# Define selection tools within gridplot:
select_tools = ["xpan", "xwheel_zoom", "box_zoom", "reset", "save"]
sample = ColumnDataSource(df)
# Define figures:
fig_1 = figure(plot_height=250,
plot_width=800,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='A',
toolbar_location='right',
tools=select_tools)
fig_1.line(x='Date', y='A',
source=sample,
color='blue',
line_width=1)
fig_2 = figure(plot_height=250,
plot_width=800,
x_range=fig_1.x_range,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='B',
toolbar_location='right',
tools=select_tools)
fig_2.line(x='Date', y='B',
source=sample,
color='red',
line_width=1)
# Define hover tool for showing timestep and value of crosshair on graph:
fig_1.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#A{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
fig_2.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#B{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
# Calling function to enable linked crosshairs within gridplot:
add_vlinked_crosshairs([fig_1, fig_2])
# Generate gridplot:
p = gridplot([[fig_1], [fig_2]])
show(p)
myGraphenter code here
Here's a solution that works as of Bokeh 2.2.1: Just use the same crosshair tool object for all the plots that need it linked. Like so:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from bokeh.models import CrosshairTool
plots = [figure() for i in range(6)]
[plot.line(np.arange(10), np.random.random(10)) for plot in plots]
linked_crosshair = CrosshairTool(dimensions="both")
for plot in plots:
plot.add_tools(linked_crosshair)
show(gridplot(children=[plot for plot in plots], ncols=3))

Bokeh how to have an overlay histogram

I made a histogram in bokeh and now I want to plot two histograms in the same graph (overlay histogram). How can I do that?
this is my code for one histogram:
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
import numpy as np
import pandas as pd
def generate_time_differences(n=1000, skew_p=0.1,mean=0,std=1,skew_mean=1,skew_std=6):
normal_dist = np.random.normal(loc=mean, scale=std, size=int(n * (1-skew_p)))
skewed_dist = np.random.normal(loc=skew_mean, scale=skew_std, size=int(n * skew_p))
return np.append(normal_dist, skewed_dist)
def generate_plot_data(data, density=True, bins=50):
hist, edges = np.histogram(data, density=density, bins=bins)
return pd.DataFrame({'top': hist,
'left': edges[:-1],
'right': edges[1:]})
data = generate_time_differences(n=1000, skew_p=0.1,mean=1000,std=100,skew_mean=2000,skew_std=500)
plot_data = generate_plot_data(data, density=True, bins=50)
# Create the blank plot
p = figure(plot_height = 300, plot_width = 600,
title = 'Test Histogram',
x_axis_label = 'Milliseconds',
y_axis_label = 'Frequency')
# Add a quad glyph
p.quad(bottom=0, top=plot_data['top'],
left=plot_data['left'], right=plot_data['right'],
fill_color='blue', line_color='blue', fill_alpha=0.5,line_alpha=0.5 )
# Show the plot
show(p)
Bokeh just plots the glyphs you ask for, in the order you ask for. If you want to add a second histogram, make more calls to quad with the new data.

python bokeh: update scatter plot colors on callback

I only started to use Bokeh recently. I have a scatter plot in which I would like to color each marker according to a certain third property (say a quantity, while the x-axis is a date and the y-axis is a given value at that point in time).
Assuming my data is in a data frame, I managed to do this using a linear color map as follows:
min_q = df.quantity.min()
max_q = df.quantity.max()
mapper = linear_cmap(field_name='quantity', palette=palettes.Spectral6, low=min_q, high=max_q)
source = ColumnDataSource(data=get_data(df))
p = figure(x_axis_type="datetime")
p.scatter(x="date_column", y="value", marker="triangle", fill_color=mapper, line_color=None, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_layout(color_bar, 'right')
This seems to work as expected. Below is the plot I get upon starting the bokeh server.
Then I have a callback function update() triggered upon changing value in some widget (a select or a time picker).
def update():
# get new df (according to new date/select)
df = get_df()
# update min/max for colormap
min_q = df.quantity.min()
max_q = df.quantity.max()
# I think I should not create a new mapper but doing so I get closer
mapper = linear_cmap(field_name='quantity', palette=palettes.Spectral6 ,low=min_q, high=max_q)
color_bar.color_mapper=mapper['transform']
source.data = get_data(df)
# etc
This is the closest I could get. The color map is updated with new values, but it seems that the colors of the marker still follow the original pattern. See picture below (given that quantity I would expect green, but it is blue as it still seen as < 4000 as in the map of the first plot before the callback).
Should I just add a "color" column to the data frame? I feel there is an easier/more convenient way to do that.
EDIT: Here is a minimal working example using the answer by bigreddot:
from bokeh.io import curdoc
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import Button, ColumnDataSource, ColorBar, HoverTool
from bokeh.palettes import Spectral6
from bokeh.transform import linear_cmap
import numpy as np
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]
z = [1,2,3,4,5,7,8,9,10]
source = ColumnDataSource(dict(x=x, y=y, z=z))
#Use the field name of the column source
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(y) ,high=max(y))
p = figure(plot_width=300, plot_height=300, title="Linear Color Map Based on Y")
p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_tools(HoverTool(tooltips="#z", show_arrow=False, point_policy='follow_mouse'))
p.add_layout(color_bar, 'right')
b = Button()
def update():
new_z = np.exp2(z)
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(new_z), high=max(new_z))
color_bar.color_mapper=mapper['transform']
source.data = dict(x=x, y=y, z=new_z)
b.on_click(update)
curdoc().add_root(column(b, p))
Upon update, the circles will be colored according to the original scale: everything bigger than 10 will be red. Instead, I would expect everything blue until the last 3 circle on tops that should be colored green yellow and red respectively.
It's possible that is a bug, feel free to open a GitHub issue.
That said, the above code does not represent best practices for Bokeh usage, which is: always make the smallest update possible. In this case, this means setting new property values on the existing color transform, rather than replacing the existing color transform.
Here is a complete working example (made with Bokeh 1.0.2) that demonstrates the glyph's colormapped colors updating in response to the data column changing:
from bokeh.io import curdoc
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import Button, ColumnDataSource, ColorBar
from bokeh.palettes import Spectral6
from bokeh.transform import linear_cmap
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]
z = [1,2,3,4,5,7,8,9,10]
#Use the field name of the column source
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(y) ,high=max(y))
source = ColumnDataSource(dict(x=x, y=y, z=z))
p = figure(plot_width=300, plot_height=300, title="Linear Color Map Based on Y")
p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_layout(color_bar, 'right')
b = Button()
def update():
new_z = np.exp2(z)
# update the existing transform
mapper['transform'].low=min(new_z)
mapper['transform'].high=max(new_z)
source.data = dict(x=x, y=y, z=new_z)
b.on_click(update)
curdoc().add_root(column(b, p))
Here is the original plot:
And here is the update plot after clicking the button

Bokeh streaming axes

When I'm using Bokeh Stream on Bokeh Server I start with an empty ColumnDataSource - however, this presents a problem as the figure is then generated with no axes labels and despite the data in the plot being updated the axes remain unchanged when it's plotted. It appears the solution to this is to have a fixed x_range and y_range - however, since it's constantly streaming I don't want it to be fixed...
I guess the solution is to update the ranges too but I'm not sure how to do this?
My code currently is as followed:
source_ios = ColumnDataSource({'Date': [], 'Vol': []})
source_gp = ColumnDataSource({'Date': [], 'Vol': []})
ios = figure(toolbar_location=None, x_axis_type='datetime',plot_width=800, plot_height=250)
ios.circle(x='Date',y='Vol', fill_color="pink",line_color=None, fill_alpha=0.05, size=20, source=source_ios)
def update():
MAU_ios = pd.read_csv('myapp/data/pplus_ios_data.csv')
MAU_ios['Date'] = pd.to_datetime(MAU_ios['Date'])
MAU_ios['Vol'] = MAU_ios.Vol.astype(int)
new_MAU_ios = {'Date':MAU_ios['Date'], 'Vol':MAU_ios['Vol']}
source_ios.stream(new_MAU_ios)
curdoc().add_periodic_callback(update, 8000)
curdoc().add_root(ios
The graph looks like this, as can be seen the axes aren't updated automatically
If you don't create the axis + label beforehand you need to add some padding with the min_border properties of figure()
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from random import random
source_ios = ColumnDataSource({'Date': [], 'Vol': []})
ios = figure(toolbar_location=None,plot_width=800, plot_height=250)
ios.xaxis.axis_label = 'Date'
ios.yaxis.axis_label = 'Vol'
ios.min_border_left = 50
ios.min_border_bottom = 50
ios.circle(x='Date',y='Vol',color="pink", size=20, source=source_ios)
i=0
def update():
global i
new_MAU_ios = {'Date':range(i,i+10),'Vol':[random() for j in range(10)]}
source_ios.stream(new_MAU_ios)
i+=10
curdoc().add_periodic_callback(update, 8000)
curdoc().add_root(ios)

Categories