The task is to automate the Visualization. The CSV file contains large nos of features (column names e:g. 32 nos it may increase in future). The task is to plot Interactive Visualization. All the examples I found are hardcoded for the dynamic features selection.
But the requirement is to make the stuff dynamic. How to make it dynamic? Please guide.
I have successfully plotted the graph dynamically, but could not connect the interactive part. The code is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Features")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
a=t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1)
#Adding Label Selection Check Box List
column_name= list(column_name)
checkboxes = CheckboxGroup(labels = column_name, active= [0,1,2])
show(row(t,checkboxes))
The above function can be used as follows:
dataframe= pd.read_csv('data.csv')
creat_plot(dataframe)
**The above code is executed on following requirements:
Bokeh version: 2.2.3
Panda Version: 1.1.3
The plot should be linked with the checkbox values. The features selected through the checkboxes shall be plotted only.
The solution to the above requirement is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show,output_file
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Visualization")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
feature_lines = []
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
#Solution to my question is here
feature_lines.append(t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1, visible=False))
#Adding Label Selection Check Box List
column_name= list(column_name)
#Solution to my question,
checkbox = CheckboxGroup(labels=column_name, active=[])
#Solution to my question
callback = CustomJS(args=dict(feature_lines=feature_lines, checkbox=checkbox), code="""
for (let i=0; i<feature_lines.length; ++i) {
feature_lines[i].visible = i in checkbox.active
}
""")
checkbox.js_on_change('active', callback)
output_file('Interactive_data_visualization.html')
show(row(t, checkbox))
Related
I'm using Jupyterlab (v 3.2.1) and bokeh to create a webpage that allows a user to load a .csv file containing a matrix, and a slider to optionally set a threshold on displayed results. The matrix contains simply some numerical values. The result would be an interactive heatmap displayed below the confirmation button. Whit my code the webpage is displayed correctly but the final plot is displayed in a new tab:
import warnings
warnings.filterwarnings('ignore')
import jupyter_bokeh
import ipywidgets as widgets
import pandas as pd
import io
from bokeh.io import show
from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.transform import transform
import bokeh.palettes
from IPython.display import display, clear_output, display_html
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.layouts import layout
#Display the webpage
file = widgets.FileUpload(accept=".txt, .csv, .dat", multiple=False)
threshold=widgets.IntSlider(value=0, min=0, max=20, step=1, description="Threshold:", disabled=False, continuous_update=False, orintation='horizontal', readout=True, readout_format="d")
button = widgets.Button(description='Run code')
text_0 = widgets.HTML(value="<header><h1>Phenotype Major Categories vs Genes Heatmap</h1></header>")
text_1 = widgets.HTML(value="<h3>Welcome to the heatmap plotter. By loading a csv file containing the counts of phenoypes for a gene into an IMPC major phenotype category, it will display an interactive heatmap.</h3>")
text_2 = widgets.HTML(value="Please load yor file (accepted formats: csv, txt, dat):")
text_3 = widgets.HTML(value="If desired, set a threshold for counts to be displayed:")
text_4 = widgets.HTML(value="<h2>Heatmap:</h2>")
vbox_head = widgets.VBox([text_0, text_1])
page_layout_plot = [text_2, file, text_3, threshold, button]
vbox_text = widgets.VBox(page_layout_plot)
page = widgets.VBox([vbox_head,vbox_text])
display(page)
#Set the endpage button to run the code
def on_button_clicked(result):
#Load the file and set the threshold
inp = list(file.value.values())[0] #if multiple setted to true, will not work!
content = inp['content']
content = io.StringIO(content.decode('utf-8'))
mat = pd.read_csv(content, sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#filtering phase
rem=[]
x = int(threshold.value)
if x != 0:
for i in mat.index:
if mat.loc[i].max() < x:
rem.append(i)
mat.drop(rem,inplace=True,axis=0)
#Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the
#values that we have in the matrix and retrieve a better representation
df = mat.stack(dropna=False).rename("value").reset_index()
fact= df.value.unique()
fact.sort()
fact = fact.astype(str)
df.value = df.value.astype(str)
mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(df.value.unique())), factors= fact, nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
x_range=list(df.phen_sys.drop_duplicates()[::-1]),
y_range=list(df.MGI_id.drop_duplicates()),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="phen_sys",
y="MGI_id",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
color_bar = ColorBar(
color_mapper=mapper,
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
button.on_click(on_button_clicked)
I already tried to use output_notebook() at the beginning but in that case nothing is displayed.
How can I fix it? It would be useful to display in real time the plot by changing the threshold without the need to click the confirmation button every time.
Thank you for all the help.
You might need to observe the value attribute of your treshold object to refresh your plot. So add something like this at the end of your code:
def on_value_change(change):
on_button_clicked(None)
threshold.observe(on_value_change, names='value')
More from the doc: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#Signatures
I've studied the post:
"How do I link the CrossHairTool in bokeh over several plots?" (See How do I link the CrossHairTool in bokeh over several plots?.
I used the function written by Hamid Fadishei on June 2020 within this post but cannot manage to get the CrossHairTool to correctly display over several plots.
In my implementation, the crosshair displays only within the plot hovered over. I am currently using Bokeh version 2.1.1 with Python Anaconda version 3.7.6 using the Python extension in VSCode version 1.48. I am not familiar with Javascript, so any help to debug my code to correctly display the crosshair across the two plots will be welcomed.
My code:
# Importing libraries:
import pandas as pd
import random
from datetime import datetime, timedelta
from bokeh.models import CustomJS, CrosshairTool, ColumnDataSource, DatetimeTickFormatter, HoverTool
from bokeh.layouts import gridplot
from bokeh.plotting import figure, output_file, show
# Function wrote by Hamid Fadishei to enable a linked crosshair within gridplot:
def add_vlinked_crosshairs(figs):
js_leave = ''
js_move = 'if(cb_obj.x >= fig.x_range.start && cb_obj.x <= fig.x_range.end &&\n'
js_move += 'cb_obj.y >= fig.y_range.start && cb_obj.y <= fig.y_range.end){\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = cb_obj.sx\n' % i
js_move += '}else{\n'
for i in range(len(figs)-1):
js_move += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_leave += '\t\t\tother%d.spans.height.computed_location = null\n' % i
js_move += '}'
crosses = [CrosshairTool() for fig in figs]
for i, fig in enumerate(figs):
fig.add_tools(crosses[i])
args = {'fig': fig}
k = 0
for j in range(len(figs)):
if i != j:
args['other%d'%k] = crosses[j]
k += 1
fig.js_on_event('mousemove', CustomJS(args=args, code=js_move))
fig.js_on_event('mouseleave', CustomJS(args=args, code=js_leave))
# Create dataframe consisting of 5 random numbers within column A and B as a function of an arbitrary time range:
startDate = datetime(2020,5,1)
timeStep = timedelta(minutes = 5)
df = pd.DataFrame({
"Date": [startDate + (i * timeStep) for i in range(5)],
"A": [random.randrange(1, 50, 1) for i in range(5)],
"B": [random.randrange(1, 50, 1) for i in range(5)]})
# Generate output file as html file:
output_file("test_linked_crosshair.html", title='Results')
# Define selection tools within gridplot:
select_tools = ["xpan", "xwheel_zoom", "box_zoom", "reset", "save"]
sample = ColumnDataSource(df)
# Define figures:
fig_1 = figure(plot_height=250,
plot_width=800,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='A',
toolbar_location='right',
tools=select_tools)
fig_1.line(x='Date', y='A',
source=sample,
color='blue',
line_width=1)
fig_2 = figure(plot_height=250,
plot_width=800,
x_range=fig_1.x_range,
x_axis_type="datetime",
x_axis_label='Time',
y_axis_label='B',
toolbar_location='right',
tools=select_tools)
fig_2.line(x='Date', y='B',
source=sample,
color='red',
line_width=1)
# Define hover tool for showing timestep and value of crosshair on graph:
fig_1.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#A{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
fig_2.add_tools(HoverTool(tooltips=[('','#Date{%F,%H:%M}'),
('','#B{0.00 a}')],
formatters={'#Date':'datetime'},mode='vline'))
# Calling function to enable linked crosshairs within gridplot:
add_vlinked_crosshairs([fig_1, fig_2])
# Generate gridplot:
p = gridplot([[fig_1], [fig_2]])
show(p)
myGraphenter code here
Here's a solution that works as of Bokeh 2.2.1: Just use the same crosshair tool object for all the plots that need it linked. Like so:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from bokeh.models import CrosshairTool
plots = [figure() for i in range(6)]
[plot.line(np.arange(10), np.random.random(10)) for plot in plots]
linked_crosshair = CrosshairTool(dimensions="both")
for plot in plots:
plot.add_tools(linked_crosshair)
show(gridplot(children=[plot for plot in plots], ncols=3))
I have a dataset where I want to plot make plots with 2 different variables on the X-axis (in 2 different plots), but I want to get the other value into the Hovertool
from io import StringIO
import pandas as pd
data = """,item_id,start,station,rejects
0,item1,2019-10-14 19:00:00,assembly,4.297994269340974
1,item1,2019-10-14 19:00:00,ST1,0.20546537908362442
2,item1,2019-10-14 19:00:00,ST2,0.494539460127756
3,item1,2019-10-14 19:00:00,ST3,0.6892230576441103
4,item2,2019-10-14 23:30:00,assembly,4.432249894470241
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
6,item2,2019-10-14 23:30:00,ST2,0.7651434643995749
7,item2,2019-10-14 23:30:00,ST3,0.7748600947051227
8,item3,2019-10-15 04:00:00,assembly,3.55576079427384
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
10,item3,2019-10-19 04:00:00,ST2,0.7195914577530177
11,item3,2019-10-19 04:00:00,ST3,0.492379835873388
12,item4,2019-10-19 10:30:00,assembly,4.02656704026567
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024177
14,item4,2019-10-19 10:30:00,ST2,0.690376569037657
15,item4,2019-10-19 10:30:00,ST3,0.838745695410320"""
data_reduced = pd.read_csv(StringIO(data), parse_dates=["start"], index_col=0)
I want to produce a graph with the item_id on the x-axis and with the start date on the x-axis. I want to track the rejects per station, and the combined of the assembly.
import holoviews as hv
import bokeh
from holoviews import opts
hv.extension('bokeh')
bokeh.plotting.output_notebook()
def plot(data_reduced, x_axis="item_id"):
x_label = x_axis if x_axis in {"start", "item_id"} else "item_id"
key_dimensions = [(x_label, x_label), ("station", "station")]
value_dimensions = [
("rejects", "rejects"),
("start", "start"),
("item_id", "item_id"),
("start", "start"),
]
datatable = hv.Table(
data_reduced, kdims=key_dimensions, vdims=value_dimensions
)
scatter_plot = datatable.to.scatter(x_label, ["rejects"])
overlay = scatter_plot.overlay("station")
tooltips = [
("item_id", "#item_id"),
("start", "#start{%Y-%m-%d %H:%M}"),
("station", "#station"),
("rejects", "#rejects"),
]
hover = bokeh.models.HoverTool(
tooltips=tooltips, formatters={"start": "datetime"}
)
return overlay.opts(
opts.Scatter(
color=hv.Cycle("Category10"),
show_grid=True,
padding=0.1,
height=400,
tools=[hover],
),
opts.NdOverlay(
legend_position="right", show_frame=False, xrotation=90
),
)
And then I make the graphs with plot(data_reduced, x_axis="start") or plot(data_reduced, x_axis="item_id")
plot(data_reduced, x_axis="start")
plot(data_reduced, x_axis="item_id")
How do I get the ??? filled in?
If I want to get the data from an individual line (list(p.items())[0][1].data), I get:
,item_id,start,station,rejects
1,item1,2019-10-14 19:00:00,ST1,0.2054653790836244
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024175
So the data seems to be in the source
In cases like this I prefer to use hvplot which is a library built on top of holoviews, made by the same group of developers. This really makes life I think a lot easier and creates your plot all in one go.
1) With Hvplot you can specify extra hover columns easily with keyword hover_cols=['your_column']:
# with this import you can use .hvplot() on your df and create interactive holoviews plots
import hvplot.pandas
item_plot = data_reduced.hvplot(
kind='scatter',
x='item_id',
y='rejects',
by='station', # this creates the overlay
hover_cols=['start'],
padding=0.1,
)
start_plot = data_reduced.hvplot(
kind='scatter',
x='start',
y='rejects',
by='station',
hover_cols=['item_id'],
padding=0.1,
)
2) If you want a pure Holoviews solution, you can do:
import holoviews as hv
from holoviews import opts
hv_df = hv.Dataset(
data_reduced,
kdims=['item_id', 'station'],
vdims=['rejects', 'start'],
)
hv_df.to(hv.Scatter).overlay().opts(opts.Scatter(tools=['hover']))
Example plot with extra hover columns:
Explored lots of various solutions here but not finding one that works. I'm using sqlite and pandas to read data from a SQL database, but Bokeh doesn't like the date. I've tried conversions to datetime, unixepoch, etc. and they all seem to yield the same result.
EDIT: Here's the full code:
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN ds ELSE df END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# proj['projdate'] = [datetime.datetime.strptime(x, "%Y-%m-%d").date() for x in proj['projdate']]
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
The SQLite output in Terminal.app looks like this:
SQL
The result is, that the x-axis displays in milliseconds. Also, the y-axis is showing up as exponential notation:
Bokeh Plot
The issue seems somehow related to pandas use of indexing, and thus I can't reference "x" here. I rename the columns and force list sets which, by themselves, will print correctly... and should therefore plot into the line properly but as you'll see below, they don't:
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
To get the line to render in Bokeh, I have to pass it the index because passing it anything else doesn't seem to get the glyph to render. So currently I have this:
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
Tried converting to unixepoch in the SQL, same result.
Tried converting to unixepoch in the data, same result.
Tried using DateTimeTickFormatter, just shows all 5-6 years as one year (thinking it's just displaying the milliseconds as years rather than changing them from milliseconds to days.
I've looked here and in github, up and down, and tried different things but ultimately I can't find one working example where the source is a sql query not a csv.
None of these things have anything to do with SQL, Bokeh only cares about the data that you give it, not where it came from. You have specified that you want a datetime axis on the x-axis:
x_axis_type='datetime'
So, Bokeh will set up the plot with a ticker that picks "nice" values on a datetime scale, and with a tick formatter that displays tick locations as formatted dates. What is important, however, is that the data coordinates are in the appropriate units, which are floating point milliseconds since epoch.
You can provide x values directly in these units, but Bokeh will also automatically convert common datetime types (e.g. python stdlib, numpy, or pandas) to the right units automatically. So the easiest thing for you to do is pass a column of datetime values as the x values to line.
To be clear, this statement:
To render the line in Bokeh, it has to use the index
is incorrect. You can pass any dataframe column you like as the x-values, and I am suggesting you pass a column of datetimes.
I changed a line of the SQL to:
CASE WHEN df is null THEN strftime('%Y',ds) ELSE strftime('%Y',df) END AS 'projdate',
However, when I try expanding that specifier to %Y-%m-%d %H-%m-%s it just reads it as a string all over again.
And also by re-importing the data I was able to pass the date through here without using Index:
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
But then I get this weird output: link.
So it's clear that it can read the year, but I need to pass through the full date to display the time series forecast. And it's still displaying the dates and y-values in the incorrect scale, regardless.
Going to noodle on this some more but if anyone has other suggestions, I'm thankful.
SOLVED the datetime problem. Added this after the SQL query:
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
Which in turn yields this:
Bokeh Plot
Still got a problem with the x-axis but since that's a straight numerical value, x_axis_type should fix it.
So far the working code looks like this (again, still iterating to add other controls but everything about the Bokeh plot itself works as intended):
# main.py
# created by: <>
# version: 0.1.2
# created date: 07-Aug-2018
# modified date: 09-Aug-2018
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN strftime('%Y-%m-%d',ds) ELSE strftime('%Y-%m-%d',df) END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
I am trying to plot RPI, CPI and CPIH on one chart with a HoverTool showing the value of each when you pan over a given area of the chart.
I initially tried adding each line separately using line() which kind of worked:
However, the HoverTool only works correctly when you scroll over the individual lines.
I have tried using multi_line() like:
combined_inflation_metrics = 'combined_inflation_metrics.csv'
df_combined_inflation_metrics = pd.read_csv(combined_inflation_metrics)
combined_source = ColumnDataSource(df_combined_inflation_metrics)
l.multi_line(xs=['Date','Date','Date'],ys=['RPI', 'CPI', 'CPIH'], source=combined_source)
#l.multi_line(xs=[['Date'],['Date'],['Date']],ys=[['RPI'], ['CPI'], ['CPIH']], source=combined_source)
show(l)
However, this is throwing the following:
RuntimeError:
Supplying a user-defined data source AND iterable values to glyph methods is
not possibe. Either:
Pass all data directly as literals:
p.circe(x=a_list, y=an_array, ...)
Or, put all data in a ColumnDataSource and pass column names:
source = ColumnDataSource(data=dict(x=a_list, y=an_array))
p.circe(x='x', y='y', source=source, ...)
But I am not too sure why this is?
Update:
I figured out a workaround by adding all of the values in each of the data sources. It works, but doesn't feel most efficient and would still like to know how to do this properly.
Edit - Code request:
from bokeh.plotting import figure, output_file, show
from bokeh.models import NumeralTickFormatter, DatetimeTickFormatter, ColumnDataSource, HoverTool, CrosshairTool, SaveTool, PanTool
import pandas as pd
import os
os.chdir(r'path')
#output_file('Inflation.html', title='Inflation')
RPI = 'RPI.csv'
CPI = 'CPI.csv'
CPIH = 'CPIH.csv'
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
def to_date_time(data_frame, data_series):
data_frame[data_series] = data_frame[data_series].astype('datetime64[ns]')
to_date_time(df_RPI, 'Date')
to_date_time(df_CPI, 'Date')
to_date_time(df_CPIH, 'Date')
RPI_source = ColumnDataSource(df_RPI)
CPI_source = ColumnDataSource(df_CPI)
CPIH_source = ColumnDataSource(df_CPIH)
l = figure(title="Historic Inflaiton Metrics", logo=None)
l.plot_width = 1200
l.xaxis[0].formatter=DatetimeTickFormatter(
days=["%d %B %Y"],
months=["%d %B %Y"],
years=["%d %B %Y"],
)
glyph_1 = l.line('Date','RPI',source=RPI_source, legend='TYPE', color='red')
glyph_2 = l.line('Date','CPI',source=CPI_source, legend='TYPE', color='blue')
glyph_3 = l.line('Date','CPIH',source=CPIH_source, legend='TYPE', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
l.tools = [SaveTool(), PanTool(), hover, CrosshairTool()]
show(l)
The hover tool looks up the data to show in the ColumnDataSource. Because you created a new ColumnDataSource for each line and restricted the hover tool to line1 it can only lookup data in the data source there.
The general solution is to only create one ColumnDataSource and reuse that in each line:
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
df = df_RPI.merge(dfd_CPI, on="date")
df = df.merge(df_CPIH, on="date")
source = ColumnDataSource(df)
l = figure(title="Historic Inflation Metrics", logo=None)
glyph_1 = l.line('Date','RPI',source=source, legend='RPI', color='red')
l.line('Date','CPI',source=source, legend='CPI', color='blue')
l.line('Date','CPIH',source=source, legend='CPIH', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
show(l)
This is of course only possible if you all your dataframes can be merged into one, i.e. the measurement timepoints are the same. If they are not besides resampling/interpolating I do not know a good method to do what you want.