I have a dataset where I want to plot make plots with 2 different variables on the X-axis (in 2 different plots), but I want to get the other value into the Hovertool
from io import StringIO
import pandas as pd
data = """,item_id,start,station,rejects
0,item1,2019-10-14 19:00:00,assembly,4.297994269340974
1,item1,2019-10-14 19:00:00,ST1,0.20546537908362442
2,item1,2019-10-14 19:00:00,ST2,0.494539460127756
3,item1,2019-10-14 19:00:00,ST3,0.6892230576441103
4,item2,2019-10-14 23:30:00,assembly,4.432249894470241
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
6,item2,2019-10-14 23:30:00,ST2,0.7651434643995749
7,item2,2019-10-14 23:30:00,ST3,0.7748600947051227
8,item3,2019-10-15 04:00:00,assembly,3.55576079427384
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
10,item3,2019-10-19 04:00:00,ST2,0.7195914577530177
11,item3,2019-10-19 04:00:00,ST3,0.492379835873388
12,item4,2019-10-19 10:30:00,assembly,4.02656704026567
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024177
14,item4,2019-10-19 10:30:00,ST2,0.690376569037657
15,item4,2019-10-19 10:30:00,ST3,0.838745695410320"""
data_reduced = pd.read_csv(StringIO(data), parse_dates=["start"], index_col=0)
I want to produce a graph with the item_id on the x-axis and with the start date on the x-axis. I want to track the rejects per station, and the combined of the assembly.
import holoviews as hv
import bokeh
from holoviews import opts
hv.extension('bokeh')
bokeh.plotting.output_notebook()
def plot(data_reduced, x_axis="item_id"):
x_label = x_axis if x_axis in {"start", "item_id"} else "item_id"
key_dimensions = [(x_label, x_label), ("station", "station")]
value_dimensions = [
("rejects", "rejects"),
("start", "start"),
("item_id", "item_id"),
("start", "start"),
]
datatable = hv.Table(
data_reduced, kdims=key_dimensions, vdims=value_dimensions
)
scatter_plot = datatable.to.scatter(x_label, ["rejects"])
overlay = scatter_plot.overlay("station")
tooltips = [
("item_id", "#item_id"),
("start", "#start{%Y-%m-%d %H:%M}"),
("station", "#station"),
("rejects", "#rejects"),
]
hover = bokeh.models.HoverTool(
tooltips=tooltips, formatters={"start": "datetime"}
)
return overlay.opts(
opts.Scatter(
color=hv.Cycle("Category10"),
show_grid=True,
padding=0.1,
height=400,
tools=[hover],
),
opts.NdOverlay(
legend_position="right", show_frame=False, xrotation=90
),
)
And then I make the graphs with plot(data_reduced, x_axis="start") or plot(data_reduced, x_axis="item_id")
plot(data_reduced, x_axis="start")
plot(data_reduced, x_axis="item_id")
How do I get the ??? filled in?
If I want to get the data from an individual line (list(p.items())[0][1].data), I get:
,item_id,start,station,rejects
1,item1,2019-10-14 19:00:00,ST1,0.2054653790836244
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024175
So the data seems to be in the source
In cases like this I prefer to use hvplot which is a library built on top of holoviews, made by the same group of developers. This really makes life I think a lot easier and creates your plot all in one go.
1) With Hvplot you can specify extra hover columns easily with keyword hover_cols=['your_column']:
# with this import you can use .hvplot() on your df and create interactive holoviews plots
import hvplot.pandas
item_plot = data_reduced.hvplot(
kind='scatter',
x='item_id',
y='rejects',
by='station', # this creates the overlay
hover_cols=['start'],
padding=0.1,
)
start_plot = data_reduced.hvplot(
kind='scatter',
x='start',
y='rejects',
by='station',
hover_cols=['item_id'],
padding=0.1,
)
2) If you want a pure Holoviews solution, you can do:
import holoviews as hv
from holoviews import opts
hv_df = hv.Dataset(
data_reduced,
kdims=['item_id', 'station'],
vdims=['rejects', 'start'],
)
hv_df.to(hv.Scatter).overlay().opts(opts.Scatter(tools=['hover']))
Example plot with extra hover columns:
Related
The task is to automate the Visualization. The CSV file contains large nos of features (column names e:g. 32 nos it may increase in future). The task is to plot Interactive Visualization. All the examples I found are hardcoded for the dynamic features selection.
But the requirement is to make the stuff dynamic. How to make it dynamic? Please guide.
I have successfully plotted the graph dynamically, but could not connect the interactive part. The code is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Features")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
a=t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1)
#Adding Label Selection Check Box List
column_name= list(column_name)
checkboxes = CheckboxGroup(labels = column_name, active= [0,1,2])
show(row(t,checkboxes))
The above function can be used as follows:
dataframe= pd.read_csv('data.csv')
creat_plot(dataframe)
**The above code is executed on following requirements:
Bokeh version: 2.2.3
Panda Version: 1.1.3
The plot should be linked with the checkbox values. The features selected through the checkboxes shall be plotted only.
The solution to the above requirement is as follows:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show,output_file
from bokeh.models import CustomJS,HoverTool,ColumnDataSource,Select
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.annotations import Title, Legend
import itertools
from bokeh.palettes import inferno
from bokeh.layouts import row
def creat_plot(dataframe):
data=dataframe
#Converting the timestamp Column to Timestamp datatype so that it can be used for Plotting on X-axis
data['timestamp'] = pd.to_datetime(data['timestamp'])
#Segregating Date and Time from timestamp column. It will be used in Hover Tool
date = lambda x: str(x)[:10]
data['date'] = data[['timestamp']].applymap(date)
time= lambda x: str(x)[11:]
data['time'] = data[['timestamp']].applymap(time)
#Converting whole dataframe ColumnDatasource for easy usage in hover tool
source = ColumnDataSource(data)
# List all the tools that you want in your plot separated by comas, all in one string.
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover"
# New figure
t = figure(x_axis_type = "datetime", width=1500, height=600,tools=TOOLS,title="Plot for Interactive Visualization")
#X-axis Legend Formatter
t.xaxis.formatter.days = '%d/%m/%Y'
#Axis Labels
t.yaxis.axis_label = 'Count'
t.xaxis.axis_label = 'Date and Time Span'
#Grid Line Formatter
t.ygrid.minor_grid_line_color = 'navy'
t.ygrid.minor_grid_line_alpha = 0.1
t.xgrid.visible = True
t.ygrid.visible= True
#Hover Tool Usage
t.select_one(HoverTool).tooltips = [('Date', '#date'),('Time', '#time')]
#A color iterator creation
colors = itertools.cycle(inferno(len(data.columns)))
#A Line type iterator creation
line_types= ['solid','dashed','dotted','dotdash','dashdot']
lines= itertools.cycle(line_types)
feature_lines = []
column_name=[]
#Looping over the columns to plot the Data
for m in data.columns[2:len(data.columns)-2]:
column_name.append(m)
#Solution to my question is here
feature_lines.append(t.line(data.columns[0], m ,color=next(colors),source=source,line_dash=next(lines), alpha= 1, visible=False))
#Adding Label Selection Check Box List
column_name= list(column_name)
#Solution to my question,
checkbox = CheckboxGroup(labels=column_name, active=[])
#Solution to my question
callback = CustomJS(args=dict(feature_lines=feature_lines, checkbox=checkbox), code="""
for (let i=0; i<feature_lines.length; ++i) {
feature_lines[i].visible = i in checkbox.active
}
""")
checkbox.js_on_change('active', callback)
output_file('Interactive_data_visualization.html')
show(row(t, checkbox))
I posed a question at Plotly: How to add a horizontal scrollbar to a plotly express figure? asking how to add a horizontal scrollbar to a plotly express figure for purposes of visualizing a long multivariate time series. A solution for a simple example consisting of three series having 100K points each was given as follows:
import plotly.express as px
import numpy as np
import pandas as pd
np.random.seed(123)
e = np.random.randn(100000,3)
df=pd.DataFrame(e, columns=['a','b','c'])
df['x'] = df.index
df_melt = pd.melt(df, id_vars="x", value_vars=df.columns[:-1])
fig=px.line(df_melt, x="x", y="value",color="variable")
# Add range slider
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),
type="linear"))
fig.show()
This code is nice, but I'd like to have the plots not superimposed on a single set of axes--instead one above the other as would be done with subplot. For example, signal 'a' would appear above signal 'b', which would appear above signal 'c'.
Because my actual time series have at least 50 channels, a vertical scrollbar will likely be necessary.
As far as I know, it may be possible in dash, but it does not exist in plotly. The question you quoted also suggests a range slider as a substitute for the scroll function. At the same time, the range slider is integrated with the graph, so if you don't make the slider function independent, it will disappear on scrolling, which is not a good idea. I think the solution at the moment is to have 50 channels side by side and add a slider.
import plotly.graph_objects as go
import numpy as np
import pandas as pd
np.random.seed(123)
e = np.random.randn(100000,3)
df=pd.DataFrame(e, columns=['a','b','c'])
df['x'] = df.index
df_melt = pd.melt(df, id_vars="x", value_vars=df.columns[:-1])
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_melt.query('variable == "a"')['x'],
y=df_melt.query('variable == "a"')['value'], yaxis='y'))
fig.add_trace(go.Scatter(x=df_melt.query('variable == "b"')['x'],
y=df_melt.query('variable == "b"')['value'], yaxis='y2'))
fig.add_trace(go.Scatter(x=df_melt.query('variable == "c"')['x'],
y=df_melt.query('variable == "c"')['value'], yaxis='y3'))
# Add range slider
fig.update_layout(
xaxis=dict(
rangeslider=dict(visible=True),
type="linear"),
yaxis=dict(
anchor='x',
domain=[0, 0.33],
linecolor='blue',
type='linear',
zeroline=False
),
yaxis2=dict(
anchor='x',
domain=[0.33, 0.66],
linecolor='red',
type='linear',
zeroline=False
),
yaxis3=dict(
anchor='x',
domain=[0.66, 1.0],
linecolor='green',
type='linear',
zeroline=False
),
)
fig.show()
Explored lots of various solutions here but not finding one that works. I'm using sqlite and pandas to read data from a SQL database, but Bokeh doesn't like the date. I've tried conversions to datetime, unixepoch, etc. and they all seem to yield the same result.
EDIT: Here's the full code:
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN ds ELSE df END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# proj['projdate'] = [datetime.datetime.strptime(x, "%Y-%m-%d").date() for x in proj['projdate']]
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
The SQLite output in Terminal.app looks like this:
SQL
The result is, that the x-axis displays in milliseconds. Also, the y-axis is showing up as exponential notation:
Bokeh Plot
The issue seems somehow related to pandas use of indexing, and thus I can't reference "x" here. I rename the columns and force list sets which, by themselves, will print correctly... and should therefore plot into the line properly but as you'll see below, they don't:
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
To get the line to render in Bokeh, I have to pass it the index because passing it anything else doesn't seem to get the glyph to render. So currently I have this:
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=proj.index, y=y, line_width=2, line_alpha=0.6)
Tried converting to unixepoch in the SQL, same result.
Tried converting to unixepoch in the data, same result.
Tried using DateTimeTickFormatter, just shows all 5-6 years as one year (thinking it's just displaying the milliseconds as years rather than changing them from milliseconds to days.
I've looked here and in github, up and down, and tried different things but ultimately I can't find one working example where the source is a sql query not a csv.
None of these things have anything to do with SQL, Bokeh only cares about the data that you give it, not where it came from. You have specified that you want a datetime axis on the x-axis:
x_axis_type='datetime'
So, Bokeh will set up the plot with a ticker that picks "nice" values on a datetime scale, and with a tick formatter that displays tick locations as formatted dates. What is important, however, is that the data coordinates are in the appropriate units, which are floating point milliseconds since epoch.
You can provide x values directly in these units, but Bokeh will also automatically convert common datetime types (e.g. python stdlib, numpy, or pandas) to the right units automatically. So the easiest thing for you to do is pass a column of datetime values as the x values to line.
To be clear, this statement:
To render the line in Bokeh, it has to use the index
is incorrect. You can pass any dataframe column you like as the x-values, and I am suggesting you pass a column of datetimes.
I changed a line of the SQL to:
CASE WHEN df is null THEN strftime('%Y',ds) ELSE strftime('%Y',df) END AS 'projdate',
However, when I try expanding that specifier to %Y-%m-%d %H-%m-%s it just reads it as a string all over again.
And also by re-importing the data I was able to pass the date through here without using Index:
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
But then I get this weird output: link.
So it's clear that it can read the year, but I need to pass through the full date to display the time series forecast. And it's still displaying the dates and y-values in the incorrect scale, regardless.
Going to noodle on this some more but if anyone has other suggestions, I'm thankful.
SOLVED the datetime problem. Added this after the SQL query:
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
Which in turn yields this:
Bokeh Plot
Still got a problem with the x-axis but since that's a straight numerical value, x_axis_type should fix it.
So far the working code looks like this (again, still iterating to add other controls but everything about the Bokeh plot itself works as intended):
# main.py
# created by: <>
# version: 0.1.2
# created date: 07-Aug-2018
# modified date: 09-Aug-2018
from os.path import dirname, join
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import sqlite3
import os
from math import pi
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Div, DatetimeTickFormatter
from bokeh.models.widgets import Slider, Select, RadioButtonGroup
from bokeh.layouts import layout, widgetbox
import warnings
import datetime
warnings.filterwarnings('ignore')
## Set up the SQL Connection
conn = sqlite3.connect('/Users/<>/Documents/python_scripts/reptool/reptool_db')
c = conn.cursor()
## Run the SQL
proj = pd.read_sql(
"""
SELECT
CASE WHEN df is null THEN strftime('%Y-%m-%d',ds) ELSE strftime('%Y-%m-%d',df) END AS 'projdate',
CASE WHEN yhat is null THEN y ELSE yhat END AS 'projvol',
strftime('%Y',ds) as 'year'
FROM forecast
LEFT JOIN actuals
ON forecast.ds = actuals.df
""", con=conn)
proj['projdate'] = proj['projdate'].astype('datetime64[ns]')
# HTML index page and inline CSS stylesheet
desc = Div(text=open("/Users/<>/Documents/python_scripts/reptool/description.html").read(), width=800)
## Rename Columns and create list sets
proj.rename(columns={'projdate': 'x', 'projvol': 'y'}, inplace=True)
x=list(proj['x'])
y=list(proj['y'])
# Create input controls
radio_button_group = RadioButtonGroup(
labels=["Actuals", "Forecast","FY Projection"], active=0)
min_year = Slider(title="Period Start", start=2012, end=2018, value=2013, step=1)
max_year = Slider(title="Period End", start=2012, end=2018, value=2017, step=1)
## Declare systemic source
source = ColumnDataSource(data=dict(x=[], y=[], year=[]))
## Bokeh tools
TOOLS="pan,wheel_zoom,box_zoom,reset,xbox_select"
## Set up plot
p = figure(title="REP Forecast", plot_width=900, plot_height=300, tools=TOOLS, x_axis_label='date', x_axis_type='datetime', y_axis_label='volume', active_drag="xbox_select")
p.line(x=x, y=y, line_width=2, line_alpha=0.6)
p.xaxis.major_label_orientation = pi/4
# p.xaxis.formatter = DatetimeTickFormatter(seconds=["%Y:%M"],
# minutes=["%Y:%M"],
# minsec=["%Y:%M"],
# hours=["%Y:%M"])
# axis map
# definitions
def select_rep():
selected = proj[
(proj.year >= min_year.value) &
(proj.year >= max_year.value)
]
return selected
def update():
proj = select_rep()
source.data = dict(
year=proj["year"]
)
controls = [min_year, max_year]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
## Build the html page and inline CSS
inputs = widgetbox(*controls)
l = layout([
[desc],
[p],
[inputs],
], )
# update()
curdoc().add_root(l)
curdoc().title = "REP"
I am trying to plot RPI, CPI and CPIH on one chart with a HoverTool showing the value of each when you pan over a given area of the chart.
I initially tried adding each line separately using line() which kind of worked:
However, the HoverTool only works correctly when you scroll over the individual lines.
I have tried using multi_line() like:
combined_inflation_metrics = 'combined_inflation_metrics.csv'
df_combined_inflation_metrics = pd.read_csv(combined_inflation_metrics)
combined_source = ColumnDataSource(df_combined_inflation_metrics)
l.multi_line(xs=['Date','Date','Date'],ys=['RPI', 'CPI', 'CPIH'], source=combined_source)
#l.multi_line(xs=[['Date'],['Date'],['Date']],ys=[['RPI'], ['CPI'], ['CPIH']], source=combined_source)
show(l)
However, this is throwing the following:
RuntimeError:
Supplying a user-defined data source AND iterable values to glyph methods is
not possibe. Either:
Pass all data directly as literals:
p.circe(x=a_list, y=an_array, ...)
Or, put all data in a ColumnDataSource and pass column names:
source = ColumnDataSource(data=dict(x=a_list, y=an_array))
p.circe(x='x', y='y', source=source, ...)
But I am not too sure why this is?
Update:
I figured out a workaround by adding all of the values in each of the data sources. It works, but doesn't feel most efficient and would still like to know how to do this properly.
Edit - Code request:
from bokeh.plotting import figure, output_file, show
from bokeh.models import NumeralTickFormatter, DatetimeTickFormatter, ColumnDataSource, HoverTool, CrosshairTool, SaveTool, PanTool
import pandas as pd
import os
os.chdir(r'path')
#output_file('Inflation.html', title='Inflation')
RPI = 'RPI.csv'
CPI = 'CPI.csv'
CPIH = 'CPIH.csv'
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
def to_date_time(data_frame, data_series):
data_frame[data_series] = data_frame[data_series].astype('datetime64[ns]')
to_date_time(df_RPI, 'Date')
to_date_time(df_CPI, 'Date')
to_date_time(df_CPIH, 'Date')
RPI_source = ColumnDataSource(df_RPI)
CPI_source = ColumnDataSource(df_CPI)
CPIH_source = ColumnDataSource(df_CPIH)
l = figure(title="Historic Inflaiton Metrics", logo=None)
l.plot_width = 1200
l.xaxis[0].formatter=DatetimeTickFormatter(
days=["%d %B %Y"],
months=["%d %B %Y"],
years=["%d %B %Y"],
)
glyph_1 = l.line('Date','RPI',source=RPI_source, legend='TYPE', color='red')
glyph_2 = l.line('Date','CPI',source=CPI_source, legend='TYPE', color='blue')
glyph_3 = l.line('Date','CPIH',source=CPIH_source, legend='TYPE', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
l.tools = [SaveTool(), PanTool(), hover, CrosshairTool()]
show(l)
The hover tool looks up the data to show in the ColumnDataSource. Because you created a new ColumnDataSource for each line and restricted the hover tool to line1 it can only lookup data in the data source there.
The general solution is to only create one ColumnDataSource and reuse that in each line:
df_RPI = pd.read_csv(RPI)
df_CPI = pd.read_csv(CPI)
df_CPIH = pd.read_csv(CPIH)
df = df_RPI.merge(dfd_CPI, on="date")
df = df.merge(df_CPIH, on="date")
source = ColumnDataSource(df)
l = figure(title="Historic Inflation Metrics", logo=None)
glyph_1 = l.line('Date','RPI',source=source, legend='RPI', color='red')
l.line('Date','CPI',source=source, legend='CPI', color='blue')
l.line('Date','CPIH',source=source, legend='CPIH', color='gold')
hover = HoverTool(renderers=[glyph_1],
tooltips=[ ("Date","#Date{%F}"),
("RPI","#RPI"),
("CPI","#CPI"),
("CPIH","#CPIH")],
formatters={"Date": "datetime"},
mode='vline'
)
show(l)
This is of course only possible if you all your dataframes can be merged into one, i.e. the measurement timepoints are the same. If they are not besides resampling/interpolating I do not know a good method to do what you want.
I am trying out Holoviews for the first time, and I'd like to reproduce this animated "Gapminder" plot as described here.
The code runs but I do not know how to handle the output so that it is displayed in a Jupyter Notebook (I assume that is possible, since Jupyter can display arbitrary HTML).
# Get HoloViews plot and attach document
doc = curdoc()
hvplot = BokehRenderer.get_plot(hvgapminder, doc)
# Make a bokeh layout and add it as the Document root
plot = layout([[hvplot.state], [slider, button]], sizing_mode='fixed')
doc.add_root(plot)
Specifically, what should I do with the resulting doc or hvplot objects?
That particular example combines both HoloViews and bokeh components and bokeh widgets cannot easily communicate with Python in the notebook. You can however use the holoviews 'scrubber' widget to achieve the same thing:
import pandas as pd
import numpy as np
import holoviews as hv
from bokeh.sampledata import gapminder
hv.extension('bokeh')
# Switch to sending data 'live' and using the scrubber widget
%output widgets='live' holomap='scrubber'
# Declare dataset
panel = pd.Panel({'Fertility': gapminder.fertility,
'Population': gapminder.population,
'Life expectancy': gapminder.life_expectancy})
gapminder_df = panel.to_frame().reset_index().rename(columns={'minor': 'Year'})
gapminder_df = gapminder_df.merge(gapminder.regions.reset_index(), on='Country')
gapminder_df['Country'] = gapminder_df['Country'].astype('str')
gapminder_df['Group'] = gapminder_df['Group'].astype('str')
gapminder_df.Year = gapminder_df.Year.astype('f')
ds = hv.Dataset(gapminder_df)
# Apply dimension labels and ranges
kdims = ['Fertility', 'Life expectancy']
vdims = ['Country', 'Population', 'Group']
dimensions = {
'Fertility' : dict(label='Children per woman (total fertility)', range=(0, 10)),
'Life expectancy': dict(label='Life expectancy at birth (years)', range=(15, 100)),
'Population': ('population', 'Population')
}
# Create Points plotting fertility vs life expectancy indexed by Year
gapminder_ds = ds.redim(**dimensions).to(hv.Points, kdims, vdims, 'Year')
# Define annotations
text = gapminder_ds.clone({yr: hv.Text(1.2, 25, str(int(yr)), fontsize=30)
for yr in gapminder_ds.keys()})
# Define options
opts = {'plot': dict(width=1000, height=600,tools=['hover'], size_index='Population',
color_index='Group', size_fn=np.sqrt, title_format="{label}"),
'style': dict(cmap='Set1', size=0.3, line_color='black', alpha=0.6)}
text_opts = {'style': dict(text_font_size='52pt', text_color='lightgray')}
# Combine Points and Text
(gapminder_ds({'Points': opts}) * text({'Text': text_opts})).relabel('Gapminder Demo')