How to add annotation to heatmap cells? - python

This is a follow-up question of this one.
I would like to add text to the cells in the heatmap. I thought I could use LabelSet as described here. However, unfortunately, I don't see any labels when I run the following code:
import pandas as pd
from bokeh.io import show
from bokeh.models import (CategoricalColorMapper, LinearColorMapper,
BasicTicker, PrintfTickFormatter, ColorBar,
ColumnDataSource, LabelSet)
from bokeh.plotting import figure
from bokeh.palettes import all_palettes
from bokeh.transform import transform
df = pd.DataFrame({
'row': list('xxxxxxyyyyyyzzzzzz'),
'column': list('aabbccaabbccaabbcc'),
'content': ['c1', 'c2', 'c3', 'c1', 'c2', 'c3'] * 3,
'amount': list('123212123212123212')})
df = df.drop_duplicates(subset=['row', 'column'])
source = ColumnDataSource(df)
rows = df['row'].unique()
columns = df['column'].unique()
content = df['content'].unique()
colors = all_palettes['Viridis'][max(len(content), 3)]
mapper = CategoricalColorMapper(palette=colors, factors=content)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title="My great heatmap",
x_range=rows, y_range=columns,
x_axis_location="above", plot_width=600, plot_height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('cell content', '#content'), ('amount', '#amount')])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.rect(x="row", y="column", width=1, height=1,
source=source,
fill_color=transform('content', mapper))
labels = LabelSet(x='row', y='column', text='content', level='glyph',
x_offset=1, y_offset=1, source=source,
render_mode='canvas')
p.add_layout(labels)
show(p)
I see the heatmap, but no labels. How can I display the text?

There are five levels: "image, underlay, glyph, annotation, overlay". The level of p.rect is glyph,
if you don't set the level argument of LabelSet, the level of it is annotation, which is on top of
the level glyph.

Interestingly, OP's code worked for me. I came here because I had the same problem. Turns out that the annotation data should be a string. After converting the respective column in ColumnDataSource() my annotations (numbers) showed up in the heatmap.

Related

How do you make the RangeTool in Bokeh select over multiple plots?

In reference to
https://docs.bokeh.org/en/latest/docs/gallery/range_tool.html
where you have the range tool control the main top chart.
Can you modify this so that you can select over several charts? So far what I tried displays the charts but only the chart I synch with x_range is the chart that moves. I tried passing a list, a series, nothing works. Can someone assist?
Sample code:
import numpy as np
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure
from bokeh.sampledata.stocks import AAPL, GOOG
from bokeh.layouts import gridplot
dates = np.array(AAPL['date'], dtype=np.datetime64)
source = ColumnDataSource(data=dict(date=dates, aapl=AAPL['adj_close'], goog=GOOG['adj_close']))
p1 = figure(plot_height=300, plot_width=800, tools="xpan", toolbar_location=None,
x_axis_type="datetime", x_axis_location="above",
background_fill_color="#efefef", x_range=(dates[1500], dates[2500]))
p1.line('date', 'aapl', source=source)
p1.yaxis.axis_label = 'Price'
p2 = figure(plot_height=300, plot_width=800, tools="xpan", toolbar_location=None,
x_axis_type="datetime", x_axis_location="above",
background_fill_color="#efefef", x_range=(dates[1500], dates[2500]))
p2.line('date', 'goog', source=source)
p2.yaxis.axis_label = 'Price'
p = gridplot([[p1,p2]])
select = figure(title="Drag the middle and edges of the selection box to change the range above",
plot_height=130, plot_width=1600, y_range=p1.y_range,
x_axis_type="datetime", y_axis_type=None,
tools="", toolbar_location=None, background_fill_color="#efefef")
range_tool = RangeTool(x_range=p1.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2
select.line('date', 'aapl', source=source)
select.line('date', 'goog', source=source)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool
show(column(p, select))
Output:
You will also have to configure all the plots that you want to be synchronized, with the same range e.g.
p2 = figure(..., x_range=p1.x_range)

Exporting Bokeh Plots as images

I have a piece of sample Python that makes a waterfall visual.
It uses the bokeh lib
It looks great and works well in Jupyter but when I come to use it in PowerBI I get an error saying that no image was created
the code uses show(p) which seems to open an internet explorer page when I run it in PowerBI
I tried a matplotlib example and it uses :
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
is there something similar for bokeh lib ?
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd
#output_notebook()
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)
# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']
# Where do we want to place the label?
df['label_pos'] = df['running_total']
df_net = pd.DataFrame.from_records([(net, net, 0, net)],
columns=['amount', 'running_total', 'y_start', 'label_pos'],
index=["net"])
df = df.append(df_net)
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'
df.loc[df.amount > 0, 'color'] = 'green'
df.loc[df.amount > 300000, 'color'] = 'blue'
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)
TOOLS = "box_zoom,reset,save"
source = ColumnDataSource(df)
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000),
plot_width=800, title = "Sales Waterfall")
p.segment(x0='index', y0='y_start', x1="index", y1='running_total',
source=source, color="color", line_width=55)
p.grid.grid_line_alpha=0.3
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"
labels = LabelSet(x='index', y='label_pos', text='bar_label',
text_font_size="8pt", level='glyph',
x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)
show(p)
There is a chapter of the User's Guide dedicated to Exporting Plots:
from bokeh.io import export_png
export_png(plot, filename="plot.png")
Note that you will need to have the necessary optional dependencies (PhantomJS and selenium) installed.

Bokeh: how to add legend to patches glyph with GeoJSONDataSource and CategoricalColorMapper?

I'm trying to add a legend to a Bokeh patches figure, but I end up with only one legend item (and with the wrong label).
I have a shape file with polygons. Each polygon has an attribute called 'category', which can take the values 'A', 'B, 'C', 'D' and 'E'. I convert the shape file to geojson and subsequently create a Bokeh patches figure, using CategoricalColorMapper to add a colour to each polygon depending on the 'category' it is in. Now I want the legend to show the five category options and their respective colours.
Here's my code:
import geopandas as gpd
from bokeh.io import show, output_notebook, output_file, export_png
from bokeh.models import GeoJSONDataSource, CategoricalColorMapper, Legend, LegendItem
from bokeh.plotting import figure, reset_output
from bokeh.transform import factor_cmap
import selenium
import numpy as np
gdf = gpd.GeoDataFrame.from_file("test.shp")
gdf_json = gdf.to_json()
source_shape = GeoJSONDataSource(geojson=gdf_json)
cmap = CategoricalColorMapper(palette=["black", "purple", "pink", "brown", "blue"], factors=['A','B','C','D', 'E'])
p = figure(height=500, match_aspect=True,
h_symmetry=False, v_symmetry=False, min_border=0)
p.patches('xs', 'ys', source=source_shape, fill_color={'field': 'category', 'transform': cmap},
line_color='black', line_width=0.5, legend='category')
export_png(p, filename="map.png")
However, the output I get is as follows:
map.png output
The legend shows only one item, with the label 'category' rather than the actual category names. How can I fix this such that the legend shows all 5 categories with their labels (A,B,C,D,E)?
This code does what you want, however, I think it could be easier to manipulate the GeoDataFrame directly instead of converting to JSON. This code is compatible with Bokeh v1.0.4.
from bokeh.models import GeoJSONDataSource, CategoricalColorMapper
from bokeh.plotting import figure, show
from bokeh.io import export_png
import geopandas as gpd
import random
import json
gdf = gpd.GeoDataFrame.from_file("Judete/Judete.shp")
gdf_json = gdf.to_json()
gjson = json.loads(gdf_json)
categories = ['A', 'B', 'C', 'D', 'E']
for item in gjson['features']:
item['properties']['category'] = random.choice(categories)
source_shapes = {}
for category in categories:
source_shapes[category] = {"type": "FeatureCollection", "features": []}
for item in gjson['features']:
source_shapes[item['properties']['category']]['features'].append(item)
p = figure(match_aspect = True, min_border = 0,
h_symmetry = False, v_symmetry = False,
x_axis_location = None, y_axis_location = None)
cmap = CategoricalColorMapper(palette = ["orange", "purple", "pink", "brown", "blue"],
factors = ['A', 'B', 'C', 'D', 'E'])
for category in categories:
source_shape = GeoJSONDataSource(geojson = json.dumps(source_shapes[category]))
p.patches('xs', 'ys', fill_color = {'field': 'category', 'transform': cmap},
line_color = 'black', line_width = 0.5,
legend = category, source = source_shape,)
p.legend.click_policy = 'hide'
show(p) # export_png(p, filename = "map.png")
Result:
It seems that the legend is not currently working with GeoJSONDataSource as there is an open issue Legend not working with GeoJSONDataSource #5904 that is still unresolved.

Bokeh: how to give different colours to tick labels

I am trying to give each y tick label a different colour. "Very Poor" in Red, "Poor" in Orange, "Fair" in Yellow, "Good" in Green and "Very Good" in Blue, for example. Is there a way to do this with Bokeh?
Below is the code of the plot.
from bokeh.plotting import output_file, show,figure
from bokeh.models.sources import ColumnDataSource
from bokeh.transform import linear_cmap
from bokeh.models import Range1d, FuncTickFormatter
import pandas as pd
output_file("gridbands.html")
#The data
df = pd.DataFrame([4.5, 9.32, 3.4, 7.1,1.4], columns = ['Score'])
df.index = pd.to_datetime(['2000-12-30 22:00:00','2001-12-30 22:00:00','2002-12-30 22:00:00','2003-12-30 22:00:00','2004-12-30 22:00:00'])
df.index.name = 'Date'
df.sort_index(inplace=True)
source = ColumnDataSource(df)
#Prepare the plot area
p = figure(x_axis_type="datetime", plot_width=800, plot_height=500)
p.y_range = Range1d(0, 10)
def custom_label():
new_labels = ["Very Poor", "Poor", "Fair", "Good", "Very Good"]
return new_labels[(tick-1)/2 ]
p.yaxis.ticker = [1, 3, 5,7,9]
p.yaxis.formatter = FuncTickFormatter.from_py_func(custom_label)
#Draw
mapper = linear_cmap(field_name='Score', palette=["red", "orange","yellow","green","blue"] ,low=0 ,high=10)
p.circle('Date', 'Score', source=source, line_width=5, line_color=mapper,color=mapper)
p.line('Date', 'Score', source=source, line_width=2, line_color="gray",color=mapper, line_alpha = 0.5)
show(p)
I can set all y tick labels to one colour but not to different colours. As a workaround, I tried having 5 y axes, each with one label, so I can set colour of them individually. But it seems I cannot control their position to make them overlap.
I want to achieve something like this for y tick labels.
Any suggestions? Thanks

In Bokeh, Weird Date Axis Issue

I'm trying to plot some data with Bokeh via pandas. The x-axis is date, and I can get Bokeh to plot the axis "mostly" correct (the range may be off). However, the line it outputs is all over the place.
For example:
It looks like maybe it's one big, continuous line?
Here's my code:
# library imports
import pandas as pd
from bokeh.io import output_file, show, vform
from bokeh.plotting import figure, output_file, ColumnDataSource, show
from bokeh.models import HoverTool, BoxAnnotation, BoxSelectTool, BoxZoomTool, WheelZoomTool, ResetTool
# Import csv into pandas dataframe
df = pd.read_csv(r"C:\Users\paul.shapiro\Documents\kwdata.csv", parse_dates=['Interest over time_time'])
df.rename(columns={'Search Term': 'keyword', 'Interest over time_time': 'date', 'Weekly Volume': 'volume'}, inplace=True)
source = ColumnDataSource(data=dict(x=df['date'], y=df['volume'], desc=df['keyword']))
TOOLS = [HoverTool(tooltips=[("Keyword", "#desc"),("Date", "#x"),("Search Volume", "#y")]), BoxZoomTool(), WheelZoomTool(), ResetTool()]
# Output html for embedding
output_file("line.html")
p = figure(plot_width=800, plot_height=800, tools=TOOLS, x_axis_type="datetime")
# add both a line and circles on the same plot
p.line(df['date'], df['volume'], line_width=2, color=df['keyword'], source=source)
p.circle(df['date'], df['volume'], fill_color="white", size=8, source=source)
show(p)
It's also interesting to note, that if you plot it using bokeh.charts (if I did this the tooltips wouldn't work, so it's not an option), it plots fine:
defaults.width = 800
defaults.height = 800
TOOLS = [BoxZoomTool(), WheelZoomTool(), ResetTool()]
line = Line(df, x='date', y='volume', color='keyword', source=source, tools=TOOLS)
show(line)
output_file("line.html", title="Search Volume")
Any help would be much appreciated. This has been driving me crazy!
SOLVED using multi_line() and a for loop:
import pandas as pd
from bokeh.io import output_file, show, vform
from bokeh.plotting import figure, output_file, ColumnDataSource, show
from bokeh.models import HoverTool, BoxAnnotation, BoxSelectTool, BoxZoomTool, WheelZoomTool, ResetTool
df = pd.read_csv(r"C:\Users\paul.shapiro\Documents\kwdata.csv", parse_dates=['Interest over time_time'])
df.rename(columns={'Search Term': 'keyword', 'Interest over time_time': 'date', 'Weekly Volume': 'volume'}, inplace=True)
gp = df.groupby('volume')
source = ColumnDataSource(data=dict(x=df['date'], y=df['volume'], desc=df['keyword']))
TOOLS = [HoverTool(tooltips=[("Keyword", "#desc"),("Date", "#x"),("Search Volume", "#y")]), BoxZoomTool(), WheelZoomTool(), ResetTool()]
p = figure(plot_width=800, plot_height=800, tools=TOOLS, x_axis_type="datetime")
gp = df.groupby('keyword')
# groups() returns a dict with 'Gene':indices as k:v pair
for g in gp.groups.items():
p.multi_line(xs=[df.loc[g[1], 'date']], ys=[df.loc[g[1], 'volume']])
p.circle(df['date'], df['volume'], fill_color="white", size=8, source=source)
output_file("newline.html")
show(p)
I cannot see anything wrong with your code. Try to see how different the dataframe df is from a simple nested list of values as per the bokeh example. Maybe by doing some manipulation to the dataframe you can get this working.
http://docs.bokeh.org/en/latest/docs/reference/plotting.html
from bokeh.plotting import figure, output_file, show
p = figure(plot_width=300, plot_height=300)
p.multi_line(xs=[[1, 2, 3], [2, 3, 4]], ys=[[6, 7, 2], [4, 5, 7]],
color=['red','green'])
show(p)

Categories