Python - Bokeh vbar hover tool - python

I have a simple multiple data bar graph (non stacked) and wish to be able to be shown the (max) value of the bar chart upon a hover over with the mouse.
I'm having trouble linking the hover location to the data though. I'm not sure how of the syntax/coding for calling an index from the bar chart.
Here is my code:
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
output_file("bars.html")
LOCATIONS = ['CPC','OG2','HS82-83','IG6','IG4','IG10']
CHECKS = ['AID CHECKS', 'ITEMS SCREENED', 'PERSONS SCREENED']
data = {'LOCATIONS' : LOCATIONS,
'AID CHECKS' : [208,622,140,1842,127,1304],
'PERSONS SCREENED' : [201,484,126,1073,81,676],
'ITEMS SCREENED' : [28,71,31,394,32,207]}
x = [ (location, check) for location in LOCATIONS for check in CHECKS ]
counts = sum(zip(data['AID CHECKS'], data['PERSONS SCREENED'], data['ITEMS SCREENED']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=600, plot_width=990, title="NPS Locations by Security Checks",
tools="pan,wheel_zoom,box_zoom,reset, save")
p.xaxis.axis_label_text_font_size = "5pt"
p.xaxis.axis_label_text_font_style='bold'
p.vbar(x='x', top='counts', width=0.9, source=source)
p.add_tools(HoverTool(tooltips=[("LOCATION", "#location"), ("TOTAL", "#check")]))
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
show(p)

Adjust the following line:
p.add_tools(HoverTool(tooltips=[("LOCATION", "#x"), ("TOTAL", "#counts")]))
See the documentation:
Field names that begin with # are associated with columns in a
ColumnDataSource. For instance the field name "#price" will display
values from the "price" column whenever a hover is triggered. If the
hover is for the 17th glyph, then the hover tooltip will
correspondingly display the 17th price value.

Related

How to colour code points using Bokeh with gmap

I currently have a gmap displaying gps points, however, I was hoping there was a way to colour code my GPS points based on which month they were recorded ? I have looked around online but am struggling to implement it into my own code. My dataset consists of GPS points collected throughout 2017, with a localDate index (in datetime format), and a longitude and latitude:
2017-11-12 |5.043978|118.715237
Bokeh and gmap code:
def plot(lat, lng, zoom=10, map_type='roadmap'):
gmap_options = GMapOptions(lat=lat, lng=lng,
map_type=map_type, zoom=zoom)
# the tools are defined below:
hover = HoverTool(
tooltips = [
# #price refers to the price column
# in the ColumnDataSource.
('Date', '#{Local Date}{%c}'),
('Lat', '#Lat'),
('Lon', '#Lon'),
],
formatters={'#{Local Date}': 'datetime'}
)
# below we replaced 'hover' (the default hover tool),
# by our custom hover tool
p = gmap(api_key, gmap_options, title='Malaysia',
width=bokeh_width, height=bokeh_height,
tools=[hover, 'reset', 'wheel_zoom', 'pan'])
source = ColumnDataSource(day2017Averageddf)
center = p.circle('Lon', 'Lat', size=4, alpha=0.5,
color='yellow', source=source)
show(p)
return p
p = plot(Lat, Lon, map_type='satellite')
The base idea is to pass the colors to the color keyword in p.circle(). You are using one color, but you could create also a list of colors with the correct length and implement your own logic or you could make use of a mapper.
The code below is a copy from the original documentation about mappers.
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
from bokeh.plotting import figure, output_notebook, show
from bokeh.transform import linear_cmap
output_notebook()
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]
#Use the field name of the column source
mapper = linear_cmap(field_name='y', palette=Spectral6 ,low=min(y) ,high=max(y))
source = ColumnDataSource(dict(x=x,y=y))
p = figure(width=300, height=300, title="Linear Color Map Based on Y")
p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8)
p.add_layout(color_bar, 'right')
show(p)
To come back to you problem. If the items in your Local Date column are of type pd.Timestamp, you can create a column "month" by this line
day2017Averageddf["month"] = day2017Averageddf["Local Date"].month
and use it for the mapper.

Adding simple dropdown to Bokeh plot using pandas data

I am very new to Bokeh and cannot seem to find any really good examples of a simple dropdown plot with data from a pandas dataframe. I am working with a dictionary that has 4 different keys, where each key contains a dataframe.
df_vals.keys()
dict_keys(['corn', 'soybeans', 'winterwheat', 'springwheat'])
df_vals['corn'].head()
time 2m_temp_prod 2m_temp_area total_precip_prod total_precip_area
0 2020-09-16 00:00:00 299.346777 299.799234 0.000000 0.000000
1 2020-09-16 06:00:00 294.039512 294.443352 0.191070 0.286952
2 2020-09-16 12:00:00 292.959274 293.182931 0.155765 0.216606
3 2020-09-16 18:00:00 301.318046 301.767516 0.421768 0.485691
4 2020-09-17 00:00:00 300.623567 300.979650 0.363572 0.501164
Next, I can add this data to a source.
source=ColumnDataSource(data=df_vals['corn'])
Plotting from here is simple.
p1=figure(x_axis_type='datetime')
p1.line(x='time', y='2m_temp_prod',source=source)
show(p1)
This does exactly what I want. It plots a line plot with datetime as the x-axis. However, now I want to add a dropdown widget to switch between 2 columns in df_vals['corn'] (2m_temp_prod and total_precip_prod). I have tried this code below but it is not working and I am not sure if that's even the right way to go about it.
def update_plot(attr, old, new):
if new == 'total_precip_prod':
source.data = {
'x' : df_vals['corn']['time'],
'y' : df_vals['corn']['total_precip_prod'].cumsum()
}
select = Select(title="hi", options=['2m_temp_area', 'total_precip_prod'], value='2m_temp_area')
select.on_change('value', update_plot)
# Create layout and add to current document
layout = row(select, p1)
curdoc().add_root(layout)
Ideally, the button would have two options: temps and precip. How would I go about do this?
You can use Bokeh CustomJS callbacks to toggle visibility of the plots according to what's selected in a dropdown. A basic example would look like this:
from bokeh.models import ColumnDataSource, CustomJS, Range1d, Select
from bokeh.plotting import figure, output_notebook, show
from bokeh.layouts import column
import pandas as pd
import numpy as np
# you can also output to an HTML file
output_notebook()
# toy dataframe with two y columns you can switch between
df = pd.DataFrame(data={
"x" : range(0,100),
"y1" : np.random.randint(10, 20, 100),
"y2" : np.random.randint(40, 50, 100)
})
# fix the y_range to be the same for two lines
p = figure(y_range=Range1d(0, 60), plot_width=600, plot_height=400)
# shared datasource
source = ColumnDataSource(df)
plot_1 = p.line(x="x", y="y1", color="teal", source=source, line_width=1)
plot_2 = p.line(x="x", y="y2", color="firebrick", source=source, line_width=1)
# initialise the plot with only y1 visible - to match the dropdown default
plot_2.visible = False
# dropdown widget + Javascript code for interactivity
select = Select(title="Plot to show:", value="Line 1", options=["Line 1", "Line 2"])
select.js_on_change("value", CustomJS(args=dict(line_1=plot_1, line_2=plot_2), code="""
line_1.visible = true
line_2.visible = true
if (this.value === "Line 1") {
line_2.visible = false
} else {
line_1.visible = false
}
"""))
layout = column(select, p)
show(layout)

python bokeh: update scatter plot colors on callback

I only started to use Bokeh recently. I have a scatter plot in which I would like to color each marker according to a certain third property (say a quantity, while the x-axis is a date and the y-axis is a given value at that point in time).
Assuming my data is in a data frame, I managed to do this using a linear color map as follows:
min_q = df.quantity.min()
max_q = df.quantity.max()
mapper = linear_cmap(field_name='quantity', palette=palettes.Spectral6, low=min_q, high=max_q)
source = ColumnDataSource(data=get_data(df))
p = figure(x_axis_type="datetime")
p.scatter(x="date_column", y="value", marker="triangle", fill_color=mapper, line_color=None, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_layout(color_bar, 'right')
This seems to work as expected. Below is the plot I get upon starting the bokeh server.
Then I have a callback function update() triggered upon changing value in some widget (a select or a time picker).
def update():
# get new df (according to new date/select)
df = get_df()
# update min/max for colormap
min_q = df.quantity.min()
max_q = df.quantity.max()
# I think I should not create a new mapper but doing so I get closer
mapper = linear_cmap(field_name='quantity', palette=palettes.Spectral6 ,low=min_q, high=max_q)
color_bar.color_mapper=mapper['transform']
source.data = get_data(df)
# etc
This is the closest I could get. The color map is updated with new values, but it seems that the colors of the marker still follow the original pattern. See picture below (given that quantity I would expect green, but it is blue as it still seen as < 4000 as in the map of the first plot before the callback).
Should I just add a "color" column to the data frame? I feel there is an easier/more convenient way to do that.
EDIT: Here is a minimal working example using the answer by bigreddot:
from bokeh.io import curdoc
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import Button, ColumnDataSource, ColorBar, HoverTool
from bokeh.palettes import Spectral6
from bokeh.transform import linear_cmap
import numpy as np
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]
z = [1,2,3,4,5,7,8,9,10]
source = ColumnDataSource(dict(x=x, y=y, z=z))
#Use the field name of the column source
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(y) ,high=max(y))
p = figure(plot_width=300, plot_height=300, title="Linear Color Map Based on Y")
p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_tools(HoverTool(tooltips="#z", show_arrow=False, point_policy='follow_mouse'))
p.add_layout(color_bar, 'right')
b = Button()
def update():
new_z = np.exp2(z)
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(new_z), high=max(new_z))
color_bar.color_mapper=mapper['transform']
source.data = dict(x=x, y=y, z=new_z)
b.on_click(update)
curdoc().add_root(column(b, p))
Upon update, the circles will be colored according to the original scale: everything bigger than 10 will be red. Instead, I would expect everything blue until the last 3 circle on tops that should be colored green yellow and red respectively.
It's possible that is a bug, feel free to open a GitHub issue.
That said, the above code does not represent best practices for Bokeh usage, which is: always make the smallest update possible. In this case, this means setting new property values on the existing color transform, rather than replacing the existing color transform.
Here is a complete working example (made with Bokeh 1.0.2) that demonstrates the glyph's colormapped colors updating in response to the data column changing:
from bokeh.io import curdoc
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import Button, ColumnDataSource, ColorBar
from bokeh.palettes import Spectral6
from bokeh.transform import linear_cmap
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]
z = [1,2,3,4,5,7,8,9,10]
#Use the field name of the column source
mapper = linear_cmap(field_name='z', palette=Spectral6 ,low=min(y) ,high=max(y))
source = ColumnDataSource(dict(x=x, y=y, z=z))
p = figure(plot_width=300, plot_height=300, title="Linear Color Map Based on Y")
p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
p.add_layout(color_bar, 'right')
b = Button()
def update():
new_z = np.exp2(z)
# update the existing transform
mapper['transform'].low=min(new_z)
mapper['transform'].high=max(new_z)
source.data = dict(x=x, y=y, z=new_z)
b.on_click(update)
curdoc().add_root(column(b, p))
Here is the original plot:
And here is the update plot after clicking the button

Bokeh multiline plot

I am trying to plot RPI, CPI and CPIH on one chart with a HoverTool showing the value of each when you pan over a given area of the chart.
I initially tried adding each line separately using line() which kind of worked:
However, the HoverTool only works correctly when you scroll over the individual lines.
I have tried using multi_line() like:
combined_inflation_metrics = 'combined_inflation_metrics.csv'
df_combined_inflation_metrics = pd.read_csv(combined_inflation_metrics)
combined_source = ColumnDataSource(df_combined_inflation_metrics)
l.multi_line(xs=['Date','Date','Date'],ys=['RPI', 'CPI', 'CPIH'], source=combined_source)
#l.multi_line(xs=[['Date'],['Date'],['Date']],ys=[['RPI'], ['CPI'], ['CPIH']], source=combined_source)
show(l)
However, this is throwing the following:
RuntimeError:
Supplying a user-defined data source AND iterable values to glyph methods is
not possibe. Either:
Pass all data directly as literals:
p.circe(x=a_list, y=an_array, ...)
Or, put all data in a ColumnDataSource and pass column names:
source = ColumnDataSource(data=dict(x=a_list, y=an_array))
p.circe(x='x', y='y', source=source, ...)
But I am not too sure why this is?
Update:
I figured out a workaround by adding all of the values in each of the data sources. It works, but doesn't feel most efficient and would still like to know how to do this properly.
Edit - Code request:
from bokeh.plotting import figure, output_file, show
from bokeh.models import NumeralTickFormatter, DatetimeTickFormatter, ColumnDataSource, HoverTool, CrosshairTool, SaveTool, PanTool
import pandas as pd
import os
os.chdir(r'path')
#output_file('Inflation.html', title='Inflation')
RPI = 'RPI.csv'
CPI = 'CPI.csv'
CPIH = 'CPIH.csv'
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
def to_date_time(data_frame, data_series):
data_frame[data_series] = data_frame[data_series].astype('datetime64[ns]')
to_date_time(df_RPI, 'Date')
to_date_time(df_CPI, 'Date')
to_date_time(df_CPIH, 'Date')
RPI_source = ColumnDataSource(df_RPI)
CPI_source = ColumnDataSource(df_CPI)
CPIH_source = ColumnDataSource(df_CPIH)
l = figure(title="Historic Inflaiton Metrics", logo=None)
l.plot_width = 1200
l.xaxis[0].formatter=DatetimeTickFormatter(
days=["%d %B %Y"],
months=["%d %B %Y"],
years=["%d %B %Y"],
)
glyph_1 = l.line('Date','RPI',source=RPI_source, legend='TYPE', color='red')
glyph_2 = l.line('Date','CPI',source=CPI_source, legend='TYPE', color='blue')
glyph_3 = l.line('Date','CPIH',source=CPIH_source, legend='TYPE', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
l.tools = [SaveTool(), PanTool(), hover, CrosshairTool()]
show(l)
The hover tool looks up the data to show in the ColumnDataSource. Because you created a new ColumnDataSource for each line and restricted the hover tool to line1 it can only lookup data in the data source there.
The general solution is to only create one ColumnDataSource and reuse that in each line:
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
df = df_RPI.merge(dfd_CPI, on="date")
df = df.merge(df_CPIH, on="date")
source = ColumnDataSource(df)
l = figure(title="Historic Inflation Metrics", logo=None)
glyph_1 = l.line('Date','RPI',source=source, legend='RPI', color='red')
l.line('Date','CPI',source=source, legend='CPI', color='blue')
l.line('Date','CPIH',source=source, legend='CPIH', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
show(l)
This is of course only possible if you all your dataframes can be merged into one, i.e. the measurement timepoints are the same. If they are not besides resampling/interpolating I do not know a good method to do what you want.

Select corresponding/multiple bars on bokeh vbar with taptool

I'm currently working with data from a sports related test. I want to visualize the (multidimensional) test-data of one athlete from several tests (different dates), so I made grouped vbar with the dates at the lowest level of grouping. Now I want to tap on one bar to select it and the corresponding ones from the same date should be selected (and highlighted), too.
Till now I was searching on stackoverflow with "[python][bokeh]taptool" query, I looked up the whole issues section on git/bokeh with the tag "taptool" and did a google with similar queries, but I can't find a matching thread.
To clarify my needs, I modified the grouped_bars_example from the bokeh repository. My goal is to select all bars of one manufacturer by clicking on one bar. (I know it's possible to hold shift-key for multiselections, but it is quiet annoying to select, for example, 6 corresponding bars out of 120 bars. Thatswhy I'm looking for an efficient way to do so with one click)
#### basic example code from ~/latest/docs/user_guide/categorical.html#grouped
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool, TapTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
# output_file('bars.html')
# preparing data for figure
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)
# setting up figure
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
x_range=group, toolbar_location=None, tools="")
# adding grouped vbar
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
line_color="white", fill_color=index_cmap, )
# figurestyling
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
# adding Tools
p.add_tools(HoverTool(tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr", "#cyl_mfr")]))
p.add_tools(TapTool())
#### my additional code
import pandas as pd
import sys
from bokeh.plotting import curdoc, figure
# redirect output in files (just for debugging)
save_stderr = sys.stderr
f_err = open('error.log', 'w')
sys.stderr = f_err
save_stdout = sys.stdout
f_info = open('info.log', 'w')
sys.stdout = f_info
# callbackfunction to obtain selected bars
def callback_tap(attr, old, new):
# write selected indices to file
output = source.selected['1d']['indices'] # indices of selected bars
print(output, type(output))
# make calculations only, if one bar is selected
if len(output) == 1:
# find all corresponding indices to manufacturer based on selected bar
# get manufacturer corresponding to retrieved index
man = source.data['cyl_mfr'][output][1]
# temporary DataFrame
tmp = pd.DataFrame(source.data['cyl_mfr'].tolist(), columns=['cyl', 'mfr'])
# look up all corresponding indices for manufacturer "man"
indices = tmp.index[tmp.mfr == man].values.tolist()
# assing list of indices
source.selected['1d']['indices'] = indices
# assigning callbackfunction
source.on_change('selected', callback_tap)
curdoc().add_root(p)
Please note, that I change the output to run a bokeh server in order to have custom python callback. For debugging reasons, I redirected the outputs to files.
In my callback function, the first part is working fine, and I retrieve the indicees of the selected bar. Additionally, I find the corresponding indicees with an DataFrame, but at the end I'm struggle to assign the new indicees in a way, that the vbar figure is updated.
I'll be very happy, if someone can help me.

Categories