How to put interactive bokeh HTML image in Jupyterlab page

How to put interactive bokeh HTML image in Jupyterlab page - python

I'm using Jupyterlab (v 3.2.1) and bokeh to create a webpage that allows a user to load a .csv file containing a matrix, and a slider to optionally set a threshold on displayed results. The matrix contains simply some numerical values. The result would be an interactive heatmap displayed below the confirmation button. Whit my code the webpage is displayed correctly but the final plot is displayed in a new tab:
import warnings
warnings.filterwarnings('ignore')
import jupyter_bokeh
import ipywidgets as widgets
import pandas as pd
import io
from bokeh.io import show
from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.transform import transform
import bokeh.palettes
from IPython.display import display, clear_output, display_html
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.layouts import layout
#Display the webpage
file = widgets.FileUpload(accept=".txt, .csv, .dat", multiple=False)
threshold=widgets.IntSlider(value=0, min=0, max=20, step=1, description="Threshold:", disabled=False, continuous_update=False, orintation='horizontal', readout=True, readout_format="d")
button = widgets.Button(description='Run code')
text_0 = widgets.HTML(value="<header><h1>Phenotype Major Categories vs Genes Heatmap</h1></header>")
text_1 = widgets.HTML(value="<h3>Welcome to the heatmap plotter. By loading a csv file containing the counts of phenoypes for a gene into an IMPC major phenotype category, it will display an interactive heatmap.</h3>")
text_2 = widgets.HTML(value="Please load yor file (accepted formats: csv, txt, dat):")
text_3 = widgets.HTML(value="If desired, set a threshold for counts to be displayed:")
text_4 = widgets.HTML(value="<h2>Heatmap:</h2>")
vbox_head = widgets.VBox([text_0, text_1])
page_layout_plot = [text_2, file, text_3, threshold, button]
vbox_text = widgets.VBox(page_layout_plot)
page = widgets.VBox([vbox_head,vbox_text])
display(page)
#Set the endpage button to run the code
def on_button_clicked(result):
#Load the file and set the threshold
inp = list(file.value.values())[0] #if multiple setted to true, will not work!
content = inp['content']
content = io.StringIO(content.decode('utf-8'))
mat = pd.read_csv(content, sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#filtering phase
rem=[]
x = int(threshold.value)
if x != 0:
for i in mat.index:
if mat.loc[i].max() < x:
rem.append(i)
mat.drop(rem,inplace=True,axis=0)
#Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the
#values that we have in the matrix and retrieve a better representation
df = mat.stack(dropna=False).rename("value").reset_index()
fact= df.value.unique()
fact.sort()
fact = fact.astype(str)
df.value = df.value.astype(str)
mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(df.value.unique())), factors= fact, nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
x_range=list(df.phen_sys.drop_duplicates()[::-1]),
y_range=list(df.MGI_id.drop_duplicates()),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="phen_sys",
y="MGI_id",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
color_bar = ColorBar(
color_mapper=mapper,
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
button.on_click(on_button_clicked)
I already tried to use output_notebook() at the beginning but in that case nothing is displayed.
How can I fix it? It would be useful to display in real time the plot by changing the threshold without the need to click the confirmation button every time.
Thank you for all the help.

You might need to observe the value attribute of your treshold object to refresh your plot. So add something like this at the end of your code:
def on_value_change(change):
on_button_clicked(None)
threshold.observe(on_value_change, names='value')
More from the doc: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#Signatures

Related

How to plot visualization with Interactive Feature Selection in Bokeh, Python

The task is to automate the Visualization. The CSV file contains large nos of features (column names e:g. 32 nos it may increase in future). The task is to plot Interactive Visualization. All the examples I found are hardcoded for the dynamic features selection.
But the requirement is to make the stuff dynamic. How to make it dynamic? Please guide.
I have successfully plotted the graph dynamically, but could not connect the interactive part. The code is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Features")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
a=t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1)
#Adding Label Selection Check Box List
column_name= list(column_name)
checkboxes = CheckboxGroup(labels = column_name, active= [0,1,2])
show(row(t,checkboxes))
The above function can be used as follows:
dataframe= pd.read_csv('data.csv')
creat_plot(dataframe)
**The above code is executed on following requirements:
Bokeh version: 2.2.3
Panda Version: 1.1.3
The plot should be linked with the checkbox values. The features selected through the checkboxes shall be plotted only.
The solution to the above requirement is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show,output_file
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Visualization")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
feature_lines = []
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
#Solution to my question is here
feature_lines.append(t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1, visible=False))
#Adding Label Selection Check Box List
column_name= list(column_name)
#Solution to my question,
checkbox = CheckboxGroup(labels=column_name, active=[])
#Solution to my question
callback = CustomJS(args=dict(feature_lines=feature_lines, checkbox=checkbox), code="""
for (let i=0; i<feature_lines.length; ++i) {
feature_lines[i].visible = i in checkbox.active
}
""")
checkbox.js_on_change('active', callback)
output_file('Interactive_data_visualization.html')
show(row(t, checkbox))

Dynamically adding and removing Bokeh legends

I'm trying to develop a relatively complex plotting application, which has a huge selection of data to plot. Using dropdowns, the user can select which lines they would like to plot. I've developed a largely simplified version of the code (shown below) to illustrate what my application is like.
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)
I've decided to dynamically add and remove line glyphs from the plot as they are selected in the MultiSelect. This is because if I simply hide the lines, the performance of the program suffers, given that there are so many line options in the real dataset.
Problem:
I want to add a legend to the plot which only contains entries for the Line glyphs that are currently in the plot (there are far too many line options in the real dataset to have all of them visible in the legend at all times.) I've been having issues finding any resources to help with this: for most applications, something like this is sufficient, but this doesn't work with the way I've defined the lines I'm plotting.
I've been adding legends manually, for example:
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#create first legend
legend_item = [LegendItem(label=lines[0],\
renderers=[glyph])]
legend = Legend(items=legend_item)
plot.add_layout(legend,place='right')
but I can't figure out how to effectively remove the legend layouts from the plot once I've added them. After reading the source code for add_layout, I realized that you could get a list of layouts in a given location by using something like getattr(plot,'right'). Trying to use this, I replaced the change_line function with the following:
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
#remove old legend
right_attrs_copy = list(getattr(plot,'right'))
for legend in right_attrs_copy:
getattr(plot,'right').remove(legend)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
legend_items.append(LegendItem(label='line '+str(line),\
renderers=[glyph]))
#create legend
legend = Legend(items=legend_items)
plot.add_layout(legend,place='right')
Checking the attributes of the plot, this appears to add and remove legends and lines correctly, but it causes the plot to completely stop visually updating.
Does anyone know how to accomplish this behavior? It's possible that I'm not even adding the legend in the correct way, but I couldn't figure out how else to add them when lines are defined as Glyph objects.

Basic glyphs provide much flexibility compared to chart/model classes. A basic line (not Line) glyph can be used here.
In the code below, I am adding basic glyphs to the chart. I am saving the glyphs in a dictionary which can be actioned later (as OP said its a complex application, I am sure this will be used later). I have commented the ColumnDataSource creation, as it will accessible through data_source.data of respective glyphs (now saved in dictionary).
Also, since now we are creating lines one by one, color needs to be provided for different lines. I have used a bokeh.palette function to generate a number of colors. More on this can be read here
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
import bokeh.palettes
#change the number as per the max number of glyphs in system
palette = bokeh.palettes.inferno(5)
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
linedict = {}
#add line 0 to plot initially
#source = ColumnDataSource(data={'x':x,'y':ys[0]})
#glyph = Line(x='x',y='y')
#glyph = plot.add_glyph(source,glyph)
l1 = plot.line(x = x, y= ys[0], legend=str(0), color = palette[0])
linedict[str(0)] = l1
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
#source = ColumnDataSource(data={'x':x,'y':y})
l1 = plot.line(x = x, y= y, legend=line, color = palette[i])
#linedict[line] = l1
glyph = Line(x='x',y='y', legend=line, color = palette[i])
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

After much anguish, I finally figured it out (this link was helpful). #Eugene Pakhomov was correct in that the fact that I removed lines and legends in my initial code was a problem. Instead, the key was to initialize a new line only when the user requested to plot a new maximum number of lines. In all other cases, you can simply edit the data_source of existing lines. This allows the program to avoid having all the lines plotted and hidden when the user only wants to plot one or two of the total options.
Instead of deleting and remaking the legend, you can set it to be empty on every update, then add entries as needed.
The following code worked for me in a Jupyter Notebook running bokeh 1.4.0:
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.palettes import Category10 as palette
output_notebook()
plot = bk_figure(plot_width=750, plot_height=600, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines with 0,1, and 2 slope
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#intialize Legend
legend = Legend(items=[LegendItem(label=lines[0],renderers=[glyph])])
plot.add_layout(legend)
def change_line(attr,old,new):
plot.legend.items = [] #reset the legend
#add selected lines to plot
for i,line in enumerate(line_select.value):
line_num = int(line)
color = palette[10][i]
#if i lines have already been plotted in the past, just edit an existing line
if i < len(plot.renderers):
#edit the existing line's data source
plot.renderers[i]._property_values['data_source'].data = {'x':x, 'y':ys[line_num]}
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#otherwise, initialize an entirely new line
else:
#create a new glyph with a new data source
source = ColumnDataSource(data={'x':x,'y':ys[line_num]})
glyph = Line(x='x',y='y',line_color=color)
glyph = plot.add_glyph(source,glyph)
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#'Remove' all extra lines by making them contain no data
#instead of outright deleting them, which Bokeh dislikes
for extra_line_num in range(i+1,len(plot.renderers)):
plot.renderers[extra_line_num]._property_values['data_source'].data = {'x':[],'y':[]}
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

bokeh: How to edit a df or CDS-object through box_select?

I'm trying to label a pandas-df (containing timeseries data) with the help of
a bokeh-lineplot, box_select tool and a TextInput widget in a jupyter-notebook. How can I access the by the box_select selected data points?
I tried to edit a similar problems code (Get selected data contained within box select tool in Bokeh) by changing the CustomJS to something like:
source.callback = CustomJS(args=dict(p=p), code="""
var inds = cb_obj.get('selected')['1d'].indices;
[source.data['xvals'][i] for i in inds] = 'b'
"""
)
but couldn't apply a change on the source of the selected points.
So the shortterm goal is to manipulate a specific column of source of the selected points.
Longterm I want to use a TextInput widget to label the selected points by the supplied Textinput. That would look like:
EDIT:
That's the current code I'm trying in the notebook, to reconstruct the issue:
from random import random
import bokeh as bk
from bokeh.layouts import row
from bokeh.models import CustomJS, ColumnDataSource, HoverTool
from bokeh.plotting import figure, output_file, show, output_notebook
output_notebook()
x = [random() for x in range(20)]
y = [random() for y in range(20)]
hovertool=HoverTool(tooltips=[("Index", "$index"), ("Label", "#label")])
source = ColumnDataSource(data=dict(x=x, y=y, label=[i for i in "a"*20]))
p1 = figure(plot_width=400, plot_height=400, tools="box_select", title="Select Here")
p1.circle('x', 'y', source=source, alpha=0.6)
p1.add_tools(hovertool)
source.selected.js_on_change('indices', CustomJS(args=dict(source=source), code="""
var inds = cb_obj.indices;
for (var i = 0; i < inds.length; i++) {
source.data['label'][inds[i]] = 'b'
}
source.change.emit();
""")
)
layout = row(p1)
show(layout)

The main thing to note is that BokehJS can only automatically notice updates when actual assignments are made, e.g.
source.data = some_new_data
That would trigger an update. If you update the data "in place" then BokehJS is not able to notice that. You will have to be explicit and call source.change.emit() to let BokehJS know something has been updated.
However, you should also know that you are using three different things that are long-deprecated and will be removed in the release after next.
cb_obj.get('selected')
There is no need to ever use .get You can just access properties directly:
cb_obj.selected
The ['1d'] syntax. This dict approach was very clumsy and will be removed very soon. For most selections you want the indices property of the selection:
source.selected.indices
source.callback
This is an ancient ad-hoc callback. There is a newer general mechanism for callbacks on properties that should always be used instead
source.selected.js_on_change('indices', CustomJS(...))
Note that in this case, the cb_obj is the selection, not the data source.

With the help of this guide on how to embed a bokeh server in the notebook I figured out the following minimal example for my purpose:
from random import random
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.models import Button
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, BoxSelectTool
from bokeh.models.widgets import TextInput
output_notebook()
def modify_doc(doc):
# create a plot and style its properties
TOOLS="pan,wheel_zoom,reset"
p = figure(title = "My chart", tools=TOOLS)
p.xaxis.axis_label = 'X'
p.yaxis.axis_label = 'Y'
hovertool=HoverTool(tooltips=[("Index", "$index"), ("Label", "#label")])
source = ColumnDataSource(
data=dict(
xvals=list(range(0, 10)),
yvals=list(np.random.normal(0, 1, 10)),
label = [i for i in "a"*10]
))
p.scatter("xvals", "yvals",source=source, color="white")
p.line("xvals", "yvals",source=source)
p.add_tools(BoxSelectTool(dimensions="width"))
p.add_tools(hovertool)
# create a callback that will add a number in a random location
def callback():
inds = source.selected.indices
for i in inds:
source.data['label'][i] = label_input.value.strip()
print(source.data)
new_data = pd.DataFrame(source.data)
new_data.to_csv("new_data.csv", index=False)
# TextInput to specify the label
label_input = TextInput(title="Label")
# add a button widget and configure with the call back
button = Button(label="Label Data")
button.on_click(callback)
# put the button and plot in a layout and add to the document
doc.add_root(column(button,label_input, p))
show(modify_doc, notebook_url="http://localhost:8888")
That generates the following UI:
BTW: Due to the non-existing box_select tool for the line glyph I use a workaround by combining it with invisible scatter points.
So far so good, is there a more elegant way to access the data.source/new_data df in the notebook outside modify_doc() than exporting it within the callback?

Bokeh not interpreting correct scale with SQL data

Explored lots of various solutions here but not finding one that works. I'm using sqlite and pandas to read data from a SQL database, but Bokeh doesn't like the date. I've tried conversions to datetime, unixepoch, etc. and they all seem to yield the same result.
EDIT: Here's the full code:
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN ds ELSE df END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# proj['projdate'] = [datetime.datetime.strptime(x, "%Y-%m-%d").date() for x in proj['projdate']]
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
The SQLite output in Terminal.app looks like this:
SQL
The result is, that the x-axis displays in milliseconds. Also, the y-axis is showing up as exponential notation:
Bokeh Plot
The issue seems somehow related to pandas use of indexing, and thus I can't reference "x" here. I rename the columns and force list sets which, by themselves, will print correctly... and should therefore plot into the line properly but as you'll see below, they don't:
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
To get the line to render in Bokeh, I have to pass it the index because passing it anything else doesn't seem to get the glyph to render. So currently I have this:
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
Tried converting to unixepoch in the SQL, same result.
Tried converting to unixepoch in the data, same result.
Tried using DateTimeTickFormatter, just shows all 5-6 years as one year (thinking it's just displaying the milliseconds as years rather than changing them from milliseconds to days.
I've looked here and in github, up and down, and tried different things but ultimately I can't find one working example where the source is a sql query not a csv.

None of these things have anything to do with SQL, Bokeh only cares about the data that you give it, not where it came from. You have specified that you want a datetime axis on the x-axis:
x_axis_type='datetime'
So, Bokeh will set up the plot with a ticker that picks "nice" values on a datetime scale, and with a tick formatter that displays tick locations as formatted dates. What is important, however, is that the data coordinates are in the appropriate units, which are floating point milliseconds since epoch.
You can provide x values directly in these units, but Bokeh will also automatically convert common datetime types (e.g. python stdlib, numpy, or pandas) to the right units automatically. So the easiest thing for you to do is pass a column of datetime values as the x values to line.
To be clear, this statement:
To render the line in Bokeh, it has to use the index
is incorrect. You can pass any dataframe column you like as the x-values, and I am suggesting you pass a column of datetimes.

I changed a line of the SQL to:
CASE WHEN df is null THEN strftime('%Y',ds) ELSE strftime('%Y',df) END AS 'projdate',
However, when I try expanding that specifier to %Y-%m-%d %H-%m-%s it just reads it as a string all over again.
And also by re-importing the data I was able to pass the date through here without using Index:
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
But then I get this weird output: link.
So it's clear that it can read the year, but I need to pass through the full date to display the time series forecast. And it's still displaying the dates and y-values in the incorrect scale, regardless.
Going to noodle on this some more but if anyone has other suggestions, I'm thankful.

SOLVED the datetime problem. Added this after the SQL query:
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
Which in turn yields this:
Bokeh Plot
Still got a problem with the x-axis but since that's a straight numerical value, x_axis_type should fix it.
So far the working code looks like this (again, still iterating to add other controls but everything about the Bokeh plot itself works as intended):
# main.py
# created by: <>
# version: 0.1.2
# created date: 07-Aug-2018
# modified date: 09-Aug-2018
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN strftime('%Y-%m-%d',ds) ELSE strftime('%Y-%m-%d',df) END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"

Set the zoom level of a bokeh map when using a tile provider

I've followed the example here: http://docs.bokeh.org/en/latest/docs/user_guide/geo.html#tile-providers
I got a basic map loading a GeoJSON file with a list of polygons (already projected to Web Mercator EPSG:3857) so then I could use STAMEN_TONER as a tile provider.
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.tile_providers import STAMEN_TONER, STAMEN_TERRAIN
from bokeh.models import Range1d, GeoJSONDataSource
# bokeh configuration for jupyter
from bokeh.io import output_notebook
output_notebook()
# bounding box (x,y web mercator projection, not lon/lat)
mercator_extent_x = dict(start=x_low, end=x_high, bounds=None)
mercator_extent_y = dict(start=y_low, end=y_high, bounds=None)
x_range1d = Range1d(**mercator_extent_x)
y_range1d = Range1d(**mercator_extent_y)
fig = figure(
tools='pan, zoom_in, zoom_out, box_zoom, reset, save',
x_range=x_range1d,
y_range=y_range1d,
plot_width=800,
plot_height=600
)
fig.axis.visible = False
fig.add_tile(STAMEN_TERRAIN)
# the GeoJSON is already in x,y web mercator projection, not lon/lat
with open('/path/to/my_polygons.geojson', 'r') as f:
my_polygons_geo_json = GeoJSONDataSource(geojson=f.read())
fig.multi_line(
xs='xs',
ys='ys',
line_color='black',
line_width=1,
source=my_polygons_geo_json
)
show(fig)
However I am not able to set a default zoom level for the tiles. I thought it could have been a tool setting (http://docs.bokeh.org/en/latest/docs/user_guide/tools.html) but in there I can not find a default value for the zoom capabilities.
How can I set a default value for the zoom level of the tiles?

Old question but answering if someone would have the same problem. Set range for your map and this way you can zoom into the desired area on load. Below example with Papua New Guinea
p = figure(title="PNG Highlands Earthquake 7.5 Affected Villages",y_range=(-4.31509, -7.0341),x_range=( 141.26667, 145.56598))
p.xaxis.axis_label = 'longitude'
p.yaxis.axis_label = 'latitude'

I've just run into this issue myself, and found a good solution that should work under most circumstances. This requires making sure the data and the x_range/y_range to be projected properly (I used Proj and transform from pyproj but I'm sure there are other packages that will work the same).
Import modules:
import pandas as pd
import numpy as np
from pyproj import Proj, transform
import datashader as ds
from datashader import transfer_functions as tf
from datashader.bokeh_ext import InteractiveImage
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from IPython.core.display import HTML, display
from bokeh.plotting import figure, output_notebook, output_file, show
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.tile_providers import STAMEN_TONER
from bokeh.tile_providers import STAMEN_TERRAIN
from bokeh.embed import file_html
from functools import partial
output_notebook()
Read in data (I took a few extra steps to try and clean the coordinates since I'm working with an extremely messy dataset that contains NaN and broken text in the coordinates columns):
df = pd.read_csv('data.csv', usecols=['latitude', 'longitude'])
df.apply(lambda x: pd.to_numeric(x,errors='coerced')).dropna()
df = df.loc[(df['latitude'] > - 90) & (df['latitude'] < 90) & (df['longitude'] > -180) & (df['longitude'] < 180)]
Reproject data:
# WGS 84
inProj = Proj(init='epsg:4326')
# WGS84 Pseudo Web Mercator, projection for most WMS services
outProj = Proj(init='epsg:3857')
df['xWeb'],df['yWeb'] = transform(inProj,outProj,df['longitude'].values,df['latitude'].values)
Reproject the x_range, y_range. This is critical as these values set the extent of the bokeh map - the coordinates of these values need to match the projection. To make sure you have the correct coordinates, I suggest using http://bboxfinder.com to create a bounding box AOI and get the correct min/max and min/max coordinates (making sure EPSG:3857 - WGS 84/Pseudo-Mercator is selected). Using this method, just copy the coodinates next to "box" - these are in the order of minx,miny,maxx,maxy and should then be reordered as minx,maxx,miny,maxy (x_range = (minx,maxx))(y_range=(miny,maxy)):
world = x_range, y_range = ((-18706892.5544, 21289852.6142), (-7631472.9040, 12797393.0236))
plot_width = int(950)
plot_height = int(plot_width//1.2)
def base_plot(tools='pan,wheel_zoom,save,reset',plot_width=plot_width,
plot_height=plot_height, **plot_args):
p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
x_range=x_range, y_range=y_range, outline_line_color=None,
min_border=0, min_border_left=0, min_border_right=0,
min_border_top=0, min_border_bottom=0, **plot_args)
p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
return p
options = dict(line_color=None, fill_color='blue', size=1.5, alpha=0.25)
background = "black"
export = partial(export_image, export_path="export", background=background)
cm = partial(colormap_select, reverse=(background=="white"))
def create_image(x_range, y_range, w=plot_width, h=plot_height):
cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
agg = cvs.points(df, 'xWeb', 'yWeb')
magma = ['#3B0F6F', '#8C2980', '#DD4968', '#FD9F6C', '#FBFCBF']
img = tf.shade(agg, cmap=magma, how='eq_hist') # how='linear', 'log', 'eq_hist'
return tf.dynspread(img, threshold=.05, max_px=15)
p = base_plot()
p.add_tile("WMS service")
#used to export image (without the WMS)
export(create_image(*world),"TweetGeos")
#call interactive image
InteractiveImage(p, create_image)

The notion of a zoom "level" only applies to GMapPlot and there only because google controls the presentation of the maps very carefully, and that is the API they provide. All other Bokeh plots have explicitly user-settable x_range and y_range properties. You can set the start and end of these ranges to be whatever you want, and the plot will display the corresponding area defined by those bounds.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to put interactive bokeh HTML image in Jupyterlab page - python

Related

How to plot visualization with Interactive Feature Selection in Bokeh, Python

Dynamically adding and removing Bokeh legends

bokeh: How to edit a df or CDS-object through box_select?

Bokeh not interpreting correct scale with SQL data

Set the zoom level of a bokeh map when using a tile provider

Categories

Resources