I am creating an interactive world map with Bokeh (Bokeh server). The countries are represented by patches. Countries should be selectable by the Taptool. However, some countries consist of several patches. By clicking on one country patch, the entire country, i.e. all corresponding patches should appear as selected.
I can solve this by the following code. However, there is a visible time lag between the selection of the patch I click on, and the other patches that belong to this country. Therefore I wonder, is there a more efficient/easy way achieve this?
from bokeh.models import ColumnDataSource, Patches
from bokeh.plotting import figure
from bokeh.layouts import row
from bokeh.io import curdoc
import pandas as pd
from bokeh.models.selections import Selection
x = [[5,2,4], [3,5,6], [6,9,7], [8,7,6]]
y = [[5,3,2], [6,5,8], [3,1,6], [1,2,1]]
country = ['A', 'A', 'B', 'B']
id = [0,1,2,3]
df = pd.DataFrame(data=dict(x=x, y=y, country=country, id=id))
source = ColumnDataSource(df)
p = figure(tools="tap")
renderer = p.patches('x', 'y', source=source)
def my_tap_handler(attr,old,new):
indices = source.selected.indices
country_name = source.data['country'][indices[0]]
country_indices = df['id'][df['country'] == country_name]
if len(source.selected.indices) == 1:
new_indices = list(country_indices)
source.selected = Selection(indices=new_indices)
renderer.data_source.on_change("selected", my_tap_handler)
curdoc().add_root(row(p, width=800))
Run in terminal: bokeh serve filename.py --show
Related
I'm using Jupyterlab (v 3.2.1) and bokeh to create a webpage that allows a user to load a .csv file containing a matrix, and a slider to optionally set a threshold on displayed results. The matrix contains simply some numerical values. The result would be an interactive heatmap displayed below the confirmation button. Whit my code the webpage is displayed correctly but the final plot is displayed in a new tab:
import warnings
warnings.filterwarnings('ignore')
import jupyter_bokeh
import ipywidgets as widgets
import pandas as pd
import io
from bokeh.io import show
from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.transform import transform
import bokeh.palettes
from IPython.display import display, clear_output, display_html
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.layouts import layout
#Display the webpage
file = widgets.FileUpload(accept=".txt, .csv, .dat", multiple=False)
threshold=widgets.IntSlider(value=0, min=0, max=20, step=1, description="Threshold:", disabled=False, continuous_update=False, orintation='horizontal', readout=True, readout_format="d")
button = widgets.Button(description='Run code')
text_0 = widgets.HTML(value="<header><h1>Phenotype Major Categories vs Genes Heatmap</h1></header>")
text_1 = widgets.HTML(value="<h3>Welcome to the heatmap plotter. By loading a csv file containing the counts of phenoypes for a gene into an IMPC major phenotype category, it will display an interactive heatmap.</h3>")
text_2 = widgets.HTML(value="Please load yor file (accepted formats: csv, txt, dat):")
text_3 = widgets.HTML(value="If desired, set a threshold for counts to be displayed:")
text_4 = widgets.HTML(value="<h2>Heatmap:</h2>")
vbox_head = widgets.VBox([text_0, text_1])
page_layout_plot = [text_2, file, text_3, threshold, button]
vbox_text = widgets.VBox(page_layout_plot)
page = widgets.VBox([vbox_head,vbox_text])
display(page)
#Set the endpage button to run the code
def on_button_clicked(result):
#Load the file and set the threshold
inp = list(file.value.values())[0] #if multiple setted to true, will not work!
content = inp['content']
content = io.StringIO(content.decode('utf-8'))
mat = pd.read_csv(content, sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#filtering phase
rem=[]
x = int(threshold.value)
if x != 0:
for i in mat.index:
if mat.loc[i].max() < x:
rem.append(i)
mat.drop(rem,inplace=True,axis=0)
#Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the
#values that we have in the matrix and retrieve a better representation
df = mat.stack(dropna=False).rename("value").reset_index()
fact= df.value.unique()
fact.sort()
fact = fact.astype(str)
df.value = df.value.astype(str)
mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(df.value.unique())), factors= fact, nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
x_range=list(df.phen_sys.drop_duplicates()[::-1]),
y_range=list(df.MGI_id.drop_duplicates()),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="phen_sys",
y="MGI_id",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
color_bar = ColorBar(
color_mapper=mapper,
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
button.on_click(on_button_clicked)
I already tried to use output_notebook() at the beginning but in that case nothing is displayed.
How can I fix it? It would be useful to display in real time the plot by changing the threshold without the need to click the confirmation button every time.
Thank you for all the help.
You might need to observe the value attribute of your treshold object to refresh your plot. So add something like this at the end of your code:
def on_value_change(change):
on_button_clicked(None)
threshold.observe(on_value_change, names='value')
More from the doc: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#Signatures
I've studied the post:
"How do I link the CrossHairTool in bokeh over several plots?" (See How do I link the CrossHairTool in bokeh over several plots?.
I used the function written by Hamid Fadishei on June 2020 within this post but cannot manage to get the CrossHairTool to correctly display over several plots.
In my implementation, the crosshair displays only within the plot hovered over. I am currently using Bokeh version 2.1.1 with Python Anaconda version 3.7.6 using the Python extension in VSCode version 1.48. I am not familiar with Javascript, so any help to debug my code to correctly display the crosshair across the two plots will be welcomed.
My code:
# Importing libraries:
import pandas as pd
import random
from datetime import datetime, timedelta
from bokeh.models import CustomJS, CrosshairTool, ColumnDataSource, DatetimeTickFormatter, HoverTool
from bokeh.layouts import gridplot
from bokeh.plotting import figure, output_file, show
# Function wrote by Hamid Fadishei to enable a linked crosshair within gridplot:
def add_vlinked_crosshairs(figs):
js_leave = ''
js_move = 'if(cb_obj.x >= fig.x_range.start && cb_obj.x <= fig.x_range.end &&\n'
js_move += 'cb_obj.y >= fig.y_range.start && cb_obj.y <= fig.y_range.end){\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = cb_obj.sx\n' % i
js_move += '}else{\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_leave += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_move += '}'
crosses = [CrosshairTool() for fig in figs]
for i, fig in enumerate(figs):
fig.add_tools(crosses[i])
args = {'fig': fig}
k = 0
for j in range(len(figs)):
if i != j:
args['other%d'%k] = crosses[j]
k += 1
fig.js_on_event('mousemove', CustomJS(args=args, code=js_move))
fig.js_on_event('mouseleave', CustomJS(args=args, code=js_leave))
# Create dataframe consisting of 5 random numbers within column A and B as a function of an arbitrary time range:
startDate = datetime(2020,5,1)
timeStep = timedelta(minutes = 5)
df = pd.DataFrame({
"Date": [startDate + (i * timeStep) for i in range(5)],
"A": [random.randrange(1, 50, 1) for i in range(5)],
"B": [random.randrange(1, 50, 1) for i in range(5)]})
# Generate output file as html file:
output_file("test_linked_crosshair.html", title='Results')
# Define selection tools within gridplot:
select_tools = ["xpan", "xwheel_zoom", "box_zoom", "reset", "save"]
sample = ColumnDataSource(df)
# Define figures:
fig_1 = figure(plot_height=250,
plot_width=800,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='A',
toolbar_location='right',
tools=select_tools)
fig_1.line(x='Date', y='A',
source=sample,
color='blue',
line_width=1)
fig_2 = figure(plot_height=250,
plot_width=800,
x_range=fig_1.x_range,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='B',
toolbar_location='right',
tools=select_tools)
fig_2.line(x='Date', y='B',
source=sample,
color='red',
line_width=1)
# Define hover tool for showing timestep and value of crosshair on graph:
fig_1.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#A{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
fig_2.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#B{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
# Calling function to enable linked crosshairs within gridplot:
add_vlinked_crosshairs([fig_1, fig_2])
# Generate gridplot:
p = gridplot([[fig_1], [fig_2]])
show(p)
myGraphenter code here
Here's a solution that works as of Bokeh 2.2.1: Just use the same crosshair tool object for all the plots that need it linked. Like so:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from bokeh.models import CrosshairTool
plots = [figure() for i in range(6)]
[plot.line(np.arange(10), np.random.random(10)) for plot in plots]
linked_crosshair = CrosshairTool(dimensions="both")
for plot in plots:
plot.add_tools(linked_crosshair)
show(gridplot(children=[plot for plot in plots], ncols=3))
I'm trying to label a pandas-df (containing timeseries data) with the help of
a bokeh-lineplot, box_select tool and a TextInput widget in a jupyter-notebook. How can I access the by the box_select selected data points?
I tried to edit a similar problems code (Get selected data contained within box select tool in Bokeh) by changing the CustomJS to something like:
source.callback = CustomJS(args=dict(p=p), code="""
var inds = cb_obj.get('selected')['1d'].indices;
[source.data['xvals'][i] for i in inds] = 'b'
"""
)
but couldn't apply a change on the source of the selected points.
So the shortterm goal is to manipulate a specific column of source of the selected points.
Longterm I want to use a TextInput widget to label the selected points by the supplied Textinput. That would look like:
EDIT:
That's the current code I'm trying in the notebook, to reconstruct the issue:
from random import random
import bokeh as bk
from bokeh.layouts import row
from bokeh.models import CustomJS, ColumnDataSource, HoverTool
from bokeh.plotting import figure, output_file, show, output_notebook
output_notebook()
x = [random() for x in range(20)]
y = [random() for y in range(20)]
hovertool=HoverTool(tooltips=[("Index", "$index"), ("Label", "#label")])
source = ColumnDataSource(data=dict(x=x, y=y, label=[i for i in "a"*20]))
p1 = figure(plot_width=400, plot_height=400, tools="box_select", title="Select Here")
p1.circle('x', 'y', source=source, alpha=0.6)
p1.add_tools(hovertool)
source.selected.js_on_change('indices', CustomJS(args=dict(source=source), code="""
var inds = cb_obj.indices;
for (var i = 0; i < inds.length; i++) {
source.data['label'][inds[i]] = 'b'
}
source.change.emit();
""")
)
layout = row(p1)
show(layout)
The main thing to note is that BokehJS can only automatically notice updates when actual assignments are made, e.g.
source.data = some_new_data
That would trigger an update. If you update the data "in place" then BokehJS is not able to notice that. You will have to be explicit and call source.change.emit() to let BokehJS know something has been updated.
However, you should also know that you are using three different things that are long-deprecated and will be removed in the release after next.
cb_obj.get('selected')
There is no need to ever use .get You can just access properties directly:
cb_obj.selected
The ['1d'] syntax. This dict approach was very clumsy and will be removed very soon. For most selections you want the indices property of the selection:
source.selected.indices
source.callback
This is an ancient ad-hoc callback. There is a newer general mechanism for callbacks on properties that should always be used instead
source.selected.js_on_change('indices', CustomJS(...))
Note that in this case, the cb_obj is the selection, not the data source.
With the help of this guide on how to embed a bokeh server in the notebook I figured out the following minimal example for my purpose:
from random import random
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.models import Button
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, BoxSelectTool
from bokeh.models.widgets import TextInput
output_notebook()
def modify_doc(doc):
# create a plot and style its properties
TOOLS="pan,wheel_zoom,reset"
p = figure(title = "My chart", tools=TOOLS)
p.xaxis.axis_label = 'X'
p.yaxis.axis_label = 'Y'
hovertool=HoverTool(tooltips=[("Index", "$index"), ("Label", "#label")])
source = ColumnDataSource(
data=dict(
xvals=list(range(0, 10)),
yvals=list(np.random.normal(0, 1, 10)),
label = [i for i in "a"*10]
))
p.scatter("xvals", "yvals",source=source, color="white")
p.line("xvals", "yvals",source=source)
p.add_tools(BoxSelectTool(dimensions="width"))
p.add_tools(hovertool)
# create a callback that will add a number in a random location
def callback():
inds = source.selected.indices
for i in inds:
source.data['label'][i] = label_input.value.strip()
print(source.data)
new_data = pd.DataFrame(source.data)
new_data.to_csv("new_data.csv", index=False)
# TextInput to specify the label
label_input = TextInput(title="Label")
# add a button widget and configure with the call back
button = Button(label="Label Data")
button.on_click(callback)
# put the button and plot in a layout and add to the document
doc.add_root(column(button,label_input, p))
show(modify_doc, notebook_url="http://localhost:8888")
That generates the following UI:
BTW: Due to the non-existing box_select tool for the line glyph I use a workaround by combining it with invisible scatter points.
So far so good, is there a more elegant way to access the data.source/new_data df in the notebook outside modify_doc() than exporting it within the callback?
I have a piece of sample Python that makes a waterfall visual.
It uses the bokeh lib
It looks great and works well in Jupyter but when I come to use it in PowerBI I get an error saying that no image was created
the code uses show(p) which seems to open an internet explorer page when I run it in PowerBI
I tried a matplotlib example and it uses :
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
is there something similar for bokeh lib ?
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd
#output_notebook()
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)
# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']
# Where do we want to place the label?
df['label_pos'] = df['running_total']
df_net = pd.DataFrame.from_records([(net, net, 0, net)],
columns=['amount', 'running_total', 'y_start', 'label_pos'],
index=["net"])
df = df.append(df_net)
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'
df.loc[df.amount > 0, 'color'] = 'green'
df.loc[df.amount > 300000, 'color'] = 'blue'
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)
TOOLS = "box_zoom,reset,save"
source = ColumnDataSource(df)
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000),
plot_width=800, title = "Sales Waterfall")
p.segment(x0='index', y0='y_start', x1="index", y1='running_total',
source=source, color="color", line_width=55)
p.grid.grid_line_alpha=0.3
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"
labels = LabelSet(x='index', y='label_pos', text='bar_label',
text_font_size="8pt", level='glyph',
x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)
show(p)
There is a chapter of the User's Guide dedicated to Exporting Plots:
from bokeh.io import export_png
export_png(plot, filename="plot.png")
Note that you will need to have the necessary optional dependencies (PhantomJS and selenium) installed.
I am trying to plot RPI, CPI and CPIH on one chart with a HoverTool showing the value of each when you pan over a given area of the chart.
I initially tried adding each line separately using line() which kind of worked:
However, the HoverTool only works correctly when you scroll over the individual lines.
I have tried using multi_line() like:
combined_inflation_metrics = 'combined_inflation_metrics.csv'
df_combined_inflation_metrics = pd.read_csv(combined_inflation_metrics)
combined_source = ColumnDataSource(df_combined_inflation_metrics)
l.multi_line(xs=['Date','Date','Date'],ys=['RPI', 'CPI', 'CPIH'], source=combined_source)
#l.multi_line(xs=[['Date'],['Date'],['Date']],ys=[['RPI'], ['CPI'], ['CPIH']], source=combined_source)
show(l)
However, this is throwing the following:
RuntimeError:
Supplying a user-defined data source AND iterable values to glyph methods is
not possibe. Either:
Pass all data directly as literals:
p.circe(x=a_list, y=an_array, ...)
Or, put all data in a ColumnDataSource and pass column names:
source = ColumnDataSource(data=dict(x=a_list, y=an_array))
p.circe(x='x', y='y', source=source, ...)
But I am not too sure why this is?
Update:
I figured out a workaround by adding all of the values in each of the data sources. It works, but doesn't feel most efficient and would still like to know how to do this properly.
Edit - Code request:
from bokeh.plotting import figure, output_file, show
from bokeh.models import NumeralTickFormatter, DatetimeTickFormatter, ColumnDataSource, HoverTool, CrosshairTool, SaveTool, PanTool
import pandas as pd
import os
os.chdir(r'path')
#output_file('Inflation.html', title='Inflation')
RPI = 'RPI.csv'
CPI = 'CPI.csv'
CPIH = 'CPIH.csv'
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
def to_date_time(data_frame, data_series):
data_frame[data_series] = data_frame[data_series].astype('datetime64[ns]')
to_date_time(df_RPI, 'Date')
to_date_time(df_CPI, 'Date')
to_date_time(df_CPIH, 'Date')
RPI_source = ColumnDataSource(df_RPI)
CPI_source = ColumnDataSource(df_CPI)
CPIH_source = ColumnDataSource(df_CPIH)
l = figure(title="Historic Inflaiton Metrics", logo=None)
l.plot_width = 1200
l.xaxis[0].formatter=DatetimeTickFormatter(
days=["%d %B %Y"],
months=["%d %B %Y"],
years=["%d %B %Y"],
)
glyph_1 = l.line('Date','RPI',source=RPI_source, legend='TYPE', color='red')
glyph_2 = l.line('Date','CPI',source=CPI_source, legend='TYPE', color='blue')
glyph_3 = l.line('Date','CPIH',source=CPIH_source, legend='TYPE', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
l.tools = [SaveTool(), PanTool(), hover, CrosshairTool()]
show(l)
The hover tool looks up the data to show in the ColumnDataSource. Because you created a new ColumnDataSource for each line and restricted the hover tool to line1 it can only lookup data in the data source there.
The general solution is to only create one ColumnDataSource and reuse that in each line:
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
df = df_RPI.merge(dfd_CPI, on="date")
df = df.merge(df_CPIH, on="date")
source = ColumnDataSource(df)
l = figure(title="Historic Inflation Metrics", logo=None)
glyph_1 = l.line('Date','RPI',source=source, legend='RPI', color='red')
l.line('Date','CPI',source=source, legend='CPI', color='blue')
l.line('Date','CPIH',source=source, legend='CPIH', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
show(l)
This is of course only possible if you all your dataframes can be merged into one, i.e. the measurement timepoints are the same. If they are not besides resampling/interpolating I do not know a good method to do what you want.