Select corresponding/multiple bars on bokeh vbar with taptool - python

I'm currently working with data from a sports related test. I want to visualize the (multidimensional) test-data of one athlete from several tests (different dates), so I made grouped vbar with the dates at the lowest level of grouping. Now I want to tap on one bar to select it and the corresponding ones from the same date should be selected (and highlighted), too.
Till now I was searching on stackoverflow with "[python][bokeh]taptool" query, I looked up the whole issues section on git/bokeh with the tag "taptool" and did a google with similar queries, but I can't find a matching thread.
To clarify my needs, I modified the grouped_bars_example from the bokeh repository. My goal is to select all bars of one manufacturer by clicking on one bar. (I know it's possible to hold shift-key for multiselections, but it is quiet annoying to select, for example, 6 corresponding bars out of 120 bars. Thatswhy I'm looking for an efficient way to do so with one click)
#### basic example code from ~/latest/docs/user_guide/categorical.html#grouped
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool, TapTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
# output_file('bars.html')
# preparing data for figure
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)
# setting up figure
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
x_range=group, toolbar_location=None, tools="")
# adding grouped vbar
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
line_color="white", fill_color=index_cmap, )
# figurestyling
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
# adding Tools
p.add_tools(HoverTool(tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr", "#cyl_mfr")]))
p.add_tools(TapTool())
#### my additional code
import pandas as pd
import sys
from bokeh.plotting import curdoc, figure
# redirect output in files (just for debugging)
save_stderr = sys.stderr
f_err = open('error.log', 'w')
sys.stderr = f_err
save_stdout = sys.stdout
f_info = open('info.log', 'w')
sys.stdout = f_info
# callbackfunction to obtain selected bars
def callback_tap(attr, old, new):
# write selected indices to file
output = source.selected['1d']['indices'] # indices of selected bars
print(output, type(output))
# make calculations only, if one bar is selected
if len(output) == 1:
# find all corresponding indices to manufacturer based on selected bar
# get manufacturer corresponding to retrieved index
man = source.data['cyl_mfr'][output][1]
# temporary DataFrame
tmp = pd.DataFrame(source.data['cyl_mfr'].tolist(), columns=['cyl', 'mfr'])
# look up all corresponding indices for manufacturer "man"
indices = tmp.index[tmp.mfr == man].values.tolist()
# assing list of indices
source.selected['1d']['indices'] = indices
# assigning callbackfunction
source.on_change('selected', callback_tap)
curdoc().add_root(p)
Please note, that I change the output to run a bokeh server in order to have custom python callback. For debugging reasons, I redirected the outputs to files.
In my callback function, the first part is working fine, and I retrieve the indicees of the selected bar. Additionally, I find the corresponding indicees with an DataFrame, but at the end I'm struggle to assign the new indicees in a way, that the vbar figure is updated.
I'll be very happy, if someone can help me.

Related

How to put interactive bokeh HTML image in Jupyterlab page

I'm using Jupyterlab (v 3.2.1) and bokeh to create a webpage that allows a user to load a .csv file containing a matrix, and a slider to optionally set a threshold on displayed results. The matrix contains simply some numerical values. The result would be an interactive heatmap displayed below the confirmation button. Whit my code the webpage is displayed correctly but the final plot is displayed in a new tab:
import warnings
warnings.filterwarnings('ignore')
import jupyter_bokeh
import ipywidgets as widgets
import pandas as pd
import io
from bokeh.io import show
from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.transform import transform
import bokeh.palettes
from IPython.display import display, clear_output, display_html
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.layouts import layout
#Display the webpage
file = widgets.FileUpload(accept=".txt, .csv, .dat", multiple=False)
threshold=widgets.IntSlider(value=0, min=0, max=20, step=1, description="Threshold:", disabled=False, continuous_update=False, orintation='horizontal', readout=True, readout_format="d")
button = widgets.Button(description='Run code')
text_0 = widgets.HTML(value="<header><h1>Phenotype Major Categories vs Genes Heatmap</h1></header>")
text_1 = widgets.HTML(value="<h3>Welcome to the heatmap plotter. By loading a csv file containing the counts of phenoypes for a gene into an IMPC major phenotype category, it will display an interactive heatmap.</h3>")
text_2 = widgets.HTML(value="Please load yor file (accepted formats: csv, txt, dat):")
text_3 = widgets.HTML(value="If desired, set a threshold for counts to be displayed:")
text_4 = widgets.HTML(value="<h2>Heatmap:</h2>")
vbox_head = widgets.VBox([text_0, text_1])
page_layout_plot = [text_2, file, text_3, threshold, button]
vbox_text = widgets.VBox(page_layout_plot)
page = widgets.VBox([vbox_head,vbox_text])
display(page)
#Set the endpage button to run the code
def on_button_clicked(result):
#Load the file and set the threshold
inp = list(file.value.values())[0] #if multiple setted to true, will not work!
content = inp['content']
content = io.StringIO(content.decode('utf-8'))
mat = pd.read_csv(content, sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#filtering phase
rem=[]
x = int(threshold.value)
if x != 0:
for i in mat.index:
if mat.loc[i].max() < x:
rem.append(i)
mat.drop(rem,inplace=True,axis=0)
#Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the
#values that we have in the matrix and retrieve a better representation
df = mat.stack(dropna=False).rename("value").reset_index()
fact= df.value.unique()
fact.sort()
fact = fact.astype(str)
df.value = df.value.astype(str)
mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(df.value.unique())), factors= fact, nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
x_range=list(df.phen_sys.drop_duplicates()[::-1]),
y_range=list(df.MGI_id.drop_duplicates()),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="phen_sys",
y="MGI_id",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
color_bar = ColorBar(
color_mapper=mapper,
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
button.on_click(on_button_clicked)
I already tried to use output_notebook() at the beginning but in that case nothing is displayed.
How can I fix it? It would be useful to display in real time the plot by changing the threshold without the need to click the confirmation button every time.
Thank you for all the help.
You might need to observe the value attribute of your treshold object to refresh your plot. So add something like this at the end of your code:
def on_value_change(change):
on_button_clicked(None)
threshold.observe(on_value_change, names='value')
More from the doc: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#Signatures

Dynamically adding and removing Bokeh legends

I'm trying to develop a relatively complex plotting application, which has a huge selection of data to plot. Using dropdowns, the user can select which lines they would like to plot. I've developed a largely simplified version of the code (shown below) to illustrate what my application is like.
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)
I've decided to dynamically add and remove line glyphs from the plot as they are selected in the MultiSelect. This is because if I simply hide the lines, the performance of the program suffers, given that there are so many line options in the real dataset.
Problem:
I want to add a legend to the plot which only contains entries for the Line glyphs that are currently in the plot (there are far too many line options in the real dataset to have all of them visible in the legend at all times.) I've been having issues finding any resources to help with this: for most applications, something like this is sufficient, but this doesn't work with the way I've defined the lines I'm plotting.
I've been adding legends manually, for example:
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#create first legend
legend_item = [LegendItem(label=lines[0],\
renderers=[glyph])]
legend = Legend(items=legend_item)
plot.add_layout(legend,place='right')
but I can't figure out how to effectively remove the legend layouts from the plot once I've added them. After reading the source code for add_layout, I realized that you could get a list of layouts in a given location by using something like getattr(plot,'right'). Trying to use this, I replaced the change_line function with the following:
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
#remove old legend
right_attrs_copy = list(getattr(plot,'right'))
for legend in right_attrs_copy:
getattr(plot,'right').remove(legend)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
legend_items.append(LegendItem(label='line '+str(line),\
renderers=[glyph]))
#create legend
legend = Legend(items=legend_items)
plot.add_layout(legend,place='right')
Checking the attributes of the plot, this appears to add and remove legends and lines correctly, but it causes the plot to completely stop visually updating.
Does anyone know how to accomplish this behavior? It's possible that I'm not even adding the legend in the correct way, but I couldn't figure out how else to add them when lines are defined as Glyph objects.
Basic glyphs provide much flexibility compared to chart/model classes. A basic line (not Line) glyph can be used here.
In the code below, I am adding basic glyphs to the chart. I am saving the glyphs in a dictionary which can be actioned later (as OP said its a complex application, I am sure this will be used later). I have commented the ColumnDataSource creation, as it will accessible through data_source.data of respective glyphs (now saved in dictionary).
Also, since now we are creating lines one by one, color needs to be provided for different lines. I have used a bokeh.palette function to generate a number of colors. More on this can be read here
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
import bokeh.palettes
#change the number as per the max number of glyphs in system
palette = bokeh.palettes.inferno(5)
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
linedict = {}
#add line 0 to plot initially
#source = ColumnDataSource(data={'x':x,'y':ys[0]})
#glyph = Line(x='x',y='y')
#glyph = plot.add_glyph(source,glyph)
l1 = plot.line(x = x, y= ys[0], legend=str(0), color = palette[0])
linedict[str(0)] = l1
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
#source = ColumnDataSource(data={'x':x,'y':y})
l1 = plot.line(x = x, y= y, legend=line, color = palette[i])
#linedict[line] = l1
glyph = Line(x='x',y='y', legend=line, color = palette[i])
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)
After much anguish, I finally figured it out (this link was helpful). #Eugene Pakhomov was correct in that the fact that I removed lines and legends in my initial code was a problem. Instead, the key was to initialize a new line only when the user requested to plot a new maximum number of lines. In all other cases, you can simply edit the data_source of existing lines. This allows the program to avoid having all the lines plotted and hidden when the user only wants to plot one or two of the total options.
Instead of deleting and remaking the legend, you can set it to be empty on every update, then add entries as needed.
The following code worked for me in a Jupyter Notebook running bokeh 1.4.0:
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.palettes import Category10 as palette
output_notebook()
plot = bk_figure(plot_width=750, plot_height=600, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines with 0,1, and 2 slope
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#intialize Legend
legend = Legend(items=[LegendItem(label=lines[0],renderers=[glyph])])
plot.add_layout(legend)
def change_line(attr,old,new):
plot.legend.items = [] #reset the legend
#add selected lines to plot
for i,line in enumerate(line_select.value):
line_num = int(line)
color = palette[10][i]
#if i lines have already been plotted in the past, just edit an existing line
if i < len(plot.renderers):
#edit the existing line's data source
plot.renderers[i]._property_values['data_source'].data = {'x':x, 'y':ys[line_num]}
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#otherwise, initialize an entirely new line
else:
#create a new glyph with a new data source
source = ColumnDataSource(data={'x':x,'y':ys[line_num]})
glyph = Line(x='x',y='y',line_color=color)
glyph = plot.add_glyph(source,glyph)
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#'Remove' all extra lines by making them contain no data
#instead of outright deleting them, which Bokeh dislikes
for extra_line_num in range(i+1,len(plot.renderers)):
plot.renderers[extra_line_num]._property_values['data_source'].data = {'x':[],'y':[]}
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

Bokeh not interpreting correct scale with SQL data

Explored lots of various solutions here but not finding one that works. I'm using sqlite and pandas to read data from a SQL database, but Bokeh doesn't like the date. I've tried conversions to datetime, unixepoch, etc. and they all seem to yield the same result.
EDIT: Here's the full code:
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN ds ELSE df END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# proj['projdate'] = [datetime.datetime.strptime(x, "%Y-%m-%d").date() for x in proj['projdate']]
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
The SQLite output in Terminal.app looks like this:
SQL
The result is, that the x-axis displays in milliseconds. Also, the y-axis is showing up as exponential notation:
Bokeh Plot
The issue seems somehow related to pandas use of indexing, and thus I can't reference "x" here. I rename the columns and force list sets which, by themselves, will print correctly... and should therefore plot into the line properly but as you'll see below, they don't:
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
To get the line to render in Bokeh, I have to pass it the index because passing it anything else doesn't seem to get the glyph to render. So currently I have this:
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
Tried converting to unixepoch in the SQL, same result.
Tried converting to unixepoch in the data, same result.
Tried using DateTimeTickFormatter, just shows all 5-6 years as one year (thinking it's just displaying the milliseconds as years rather than changing them from milliseconds to days.
I've looked here and in github, up and down, and tried different things but ultimately I can't find one working example where the source is a sql query not a csv.
None of these things have anything to do with SQL, Bokeh only cares about the data that you give it, not where it came from. You have specified that you want a datetime axis on the x-axis:
x_axis_type='datetime'
So, Bokeh will set up the plot with a ticker that picks "nice" values on a datetime scale, and with a tick formatter that displays tick locations as formatted dates. What is important, however, is that the data coordinates are in the appropriate units, which are floating point milliseconds since epoch.
You can provide x values directly in these units, but Bokeh will also automatically convert common datetime types (e.g. python stdlib, numpy, or pandas) to the right units automatically. So the easiest thing for you to do is pass a column of datetime values as the x values to line.
To be clear, this statement:
To render the line in Bokeh, it has to use the index
is incorrect. You can pass any dataframe column you like as the x-values, and I am suggesting you pass a column of datetimes.
I changed a line of the SQL to:
CASE WHEN df is null THEN strftime('%Y',ds) ELSE strftime('%Y',df) END AS 'projdate',
However, when I try expanding that specifier to %Y-%m-%d %H-%m-%s it just reads it as a string all over again.
And also by re-importing the data I was able to pass the date through here without using Index:
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
But then I get this weird output: link.
So it's clear that it can read the year, but I need to pass through the full date to display the time series forecast. And it's still displaying the dates and y-values in the incorrect scale, regardless.
Going to noodle on this some more but if anyone has other suggestions, I'm thankful.
SOLVED the datetime problem. Added this after the SQL query:
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
Which in turn yields this:
Bokeh Plot
Still got a problem with the x-axis but since that's a straight numerical value, x_axis_type should fix it.
So far the working code looks like this (again, still iterating to add other controls but everything about the Bokeh plot itself works as intended):
# main.py
# created by: <>
# version: 0.1.2
# created date: 07-Aug-2018
# modified date: 09-Aug-2018
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN strftime('%Y-%m-%d',ds) ELSE strftime('%Y-%m-%d',df) END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"

Python - Bokeh vbar hover tool

I have a simple multiple data bar graph (non stacked) and wish to be able to be shown the (max) value of the bar chart upon a hover over with the mouse.
I'm having trouble linking the hover location to the data though. I'm not sure how of the syntax/coding for calling an index from the bar chart.
Here is my code:
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
output_file("bars.html")
LOCATIONS = ['CPC','OG2','HS82-83','IG6','IG4','IG10']
CHECKS = ['AID CHECKS', 'ITEMS SCREENED', 'PERSONS SCREENED']
data = {'LOCATIONS' : LOCATIONS,
'AID CHECKS' : [208,622,140,1842,127,1304],
'PERSONS SCREENED' : [201,484,126,1073,81,676],
'ITEMS SCREENED' : [28,71,31,394,32,207]}
x = [ (location, check) for location in LOCATIONS for check in CHECKS ]
counts = sum(zip(data['AID CHECKS'], data['PERSONS SCREENED'], data['ITEMS SCREENED']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=600, plot_width=990, title="NPS Locations by Security Checks",
tools="pan,wheel_zoom,box_zoom,reset, save")
p.xaxis.axis_label_text_font_size = "5pt"
p.xaxis.axis_label_text_font_style='bold'
p.vbar(x='x', top='counts', width=0.9, source=source)
p.add_tools(HoverTool(tooltips=[("LOCATION", "#location"), ("TOTAL", "#check")]))
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
show(p)
Adjust the following line:
p.add_tools(HoverTool(tooltips=[("LOCATION", "#x"), ("TOTAL", "#counts")]))
See the documentation:
Field names that begin with # are associated with columns in a
ColumnDataSource. For instance the field name "#price" will display
values from the "price" column whenever a hover is triggered. If the
hover is for the 17th glyph, then the hover tooltip will
correspondingly display the 17th price value.

Bokeh (0.12.1) Update DataColumnSource Selection programmatically with bokeh serve (Python only)

using Bokeh, I am trying to update the .selected dictionary of a ColumnDataSource programmatically, via the callback of a Slider, but cannot manage to get the selection reflected in the plot.
In the following snippet, the idea is that I want to be able to make a y-axis selection both via the ybox_select tool and/or by adjusting the sliders that control the position of a pair of min/max lines (NOTE: for brevity, in this example I only included the 'max' slider and line). If possible, I want to achieve this without using CustomJS callbacks.
I got as far as adjusting the horizontal line and the slider value (and of course the selection, which happens implicitly) when I operate the ybox_select tool (which triggers the selection_change function). Instead, when I operate the slider (triggering the slider_selection function), I manage to control the horizontal line but, apparently, not the source selection. In other words, the modification of source.data that occurs in slider_selection is reflected in the plot (i.e. the modified position of the horizontal line) but the modification of source.selected is NOT reflected in the plot (nor in a DataTable, as I verified separately).
Following the suggestion in this thread (where I've asked a shorter version of this question but didn't get any answers so far), I've worked on a copy of source.selected and then copied back to .selected (same for .data), but this didn't have any effects.
I must be missing something rather fundamental, but cannot figure out what. Any idea? Please avoid suggestions based on CustomJS, unless you're sure that there is no pure-Python alternative.
Thanks a lot for any feedback!
(Note: run this code as a script with bokeh serve --show script.py)
from bokeh.io import curdoc
from bokeh.models import BoxSelectTool, Slider
from bokeh.plotting import figure, ColumnDataSource
from bokeh.sampledata.glucose import data
from bokeh.layouts import column
import numpy as np
#===============================================================================
# Data and source
y = data.ix['2010-10-06']['glucose']
x = np.arange(len(y))
maxval=[max(y)]*len(x)
source = ColumnDataSource(dict(x=x, y=y, maxval=maxval))
#===============================================================================
# Basic plot setup
tools = 'wheel_zoom,ybox_select,reset'
p = figure(plot_width=800, plot_height=400, tools=tools, title='Min/max selection')
# Plot data
cr = p.circle('x', 'y', color="blue", source = source,
selection_color="blue", nonselection_color="gray",
size=6, alpha=0.8)
# Plot max horizontal line
p.line('x', 'maxval', line_color='blue', line_width=0.5, source=source,
nonselection_alpha=1.0, nonselection_color='blue')
#===============================================================================
# Callbacks
def selection_change(attrname, old, new):
ixs = new['1d']['indices']
if ixs:
arr = np.asarray(source.data['y'])[ixs]
max_slider.value = np.max(arr)
source.data['maxval'] = [np.max(arr)]*len(source.data['x'])
def slider_selection(attrname, old, new):
selected = source.selected.copy()
data = source.data.copy()
data['maxval'] = [max_slider.value]*len(data['x'])
yy = np.asarray(data['y'])
maxi = np.asarray(data['maxval'])
# Below is the new selection I would to visualize
selected['1d']['indices'] = np.where(yy <= maxi)[0].tolist()
# Updated data is reflected in the plot (horizontal line at 'maxval' moves)
source.data = data.copy()
# Updated selection is NOT reflected in the plot
# (nor in a DataTable, as tested separately)
source.selected = selected.copy()
#===============================================================================
# Slider
max_slider = Slider(start=min(y), end=max(y),
value=max(y), step=0.1, title="Maximum")
#===============================================================================
# Trigger callbacks
source.on_change('selected', selection_change)
max_slider.on_change('value', slider_selection)
#===============================================================================
# Layout
plot_layout = column(p, max_slider)
curdoc().add_root(plot_layout)
curdoc().title = "Demo"
Adding the following line to slider_selection seems to do what you want:
source.trigger("selected", old, selected)
the new function definition:
def slider_selection(attrname, old, new):
selected = source.selected.copy()
data = source.data.copy()
data['maxval'] = [max_slider.value]*len(data['x'])
yy = np.asarray(data['y'])
maxi = np.asarray(data['maxval'])
# Below is the new selection I would to visualize
selected['1d']['indices'] = np.where(yy <= maxi)[0].tolist()
# Updated data is reflected in the plot (horizontal line at 'maxval' moves)
source.data = data.copy()
# Updated selection is NOT reflected in the plot
# (nor in a DataTable, as tested separately)
source.selected = selected.copy()
source.trigger("selected", old, selected)
(Though it's a bit late, I found your question trying to find a similar answer, I figured this might be useful to others).

Categories