Bokeh how to have an overlay histogram - python

I made a histogram in bokeh and now I want to plot two histograms in the same graph (overlay histogram). How can I do that?
this is my code for one histogram:
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
import numpy as np
import pandas as pd
def generate_time_differences(n=1000, skew_p=0.1,mean=0,std=1,skew_mean=1,skew_std=6):
normal_dist = np.random.normal(loc=mean, scale=std, size=int(n * (1-skew_p)))
skewed_dist = np.random.normal(loc=skew_mean, scale=skew_std, size=int(n * skew_p))
return np.append(normal_dist, skewed_dist)
def generate_plot_data(data, density=True, bins=50):
hist, edges = np.histogram(data, density=density, bins=bins)
return pd.DataFrame({'top': hist,
'left': edges[:-1],
'right': edges[1:]})
data = generate_time_differences(n=1000, skew_p=0.1,mean=1000,std=100,skew_mean=2000,skew_std=500)
plot_data = generate_plot_data(data, density=True, bins=50)
# Create the blank plot
p = figure(plot_height = 300, plot_width = 600,
title = 'Test Histogram',
x_axis_label = 'Milliseconds',
y_axis_label = 'Frequency')
# Add a quad glyph
p.quad(bottom=0, top=plot_data['top'],
left=plot_data['left'], right=plot_data['right'],
fill_color='blue', line_color='blue', fill_alpha=0.5,line_alpha=0.5 )
# Show the plot
show(p)

Bokeh just plots the glyphs you ask for, in the order you ask for. If you want to add a second histogram, make more calls to quad with the new data.

Related

Seaborn boxplot : set median color and set tick label colors to boxes color

I'm using this nice boxplot graph, answer from #Parfait.
I got an out of bound error on j and had to use range(i*5,i*5+5). Why?
I'd like to set the median to a particular color, let's say red. medianprops=dict(color="red") won't work. How to do it?
How to set the y-axis tick labels to the same color as the boxes?
Disclaimer: I don't know what I'm doing.
Here's the code using random data :
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import string
import matplotlib.colors as mc
import colorsys
# data
df = pd.DataFrame(np.random.normal(np.random.randint(5,15),np.random.randint(1,5),size=(100, 16)), columns=list(string.ascii_uppercase)[:16])
# Boxplot
fig, ax = plt.subplots(figsize=(9, 10))
medianprops=dict(color="red")
ax = sns.boxplot(data=df, orient="h", showfliers=False, palette = "husl")
ax = sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette = "husl") # show data points
ax.set_title("Title")
plt.xlabel("X label")
def lighten_color(color, amount=0.5):
# --------------------- SOURCE: #IanHincks ---------------------
try:
c = mc.cnames[color]
except:
c = color
c = colorsys.rgb_to_hls(*mc.to_rgb(c))
return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
for i,artist in enumerate(ax.artists):
# Set the linecolor on the artist to the facecolor, and set the facecolor to None
col = lighten_color(artist.get_facecolor(), 1.2)
artist.set_edgecolor(col)
# Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
# Loop over them here, and use the same colour as above
for j in range(i*5,i*5+5):
line = ax.lines[j]
line.set_color(col)
line.set_mfc(col)
line.set_mec(col)
#line.set_linewidth(0.5)
To change the color of the median, you can use the medianprops in sns.boxplot(..., medianprops=...). If you also set a unique label, that label can be tested again when iterating through the lines.
To know how many lines belong to each boxplot, you can divide the number of lines by the number of artists (just after the boxplot has been created, before other elements have been added to the plot). Note that a line potentially has 3 colors: the line color, the marker face color and the marker edge color. Matplotlib creates the fliers as an invisible line with markers. The code below thus also changes these colors to make it more robust to different options and possible future changes.
Looping simultaneously through the boxes and the y tick labels allows copying the color. Making them a bit larger and darker helps for readability.
import matplotlib.pyplot as plt
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb, to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
def enlighten(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, 1 - factor * (1 - v)))
def endarken(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, factor * v))
df = pd.DataFrame(np.random.normal(1, 5, size=(100, 16)).cumsum(axis=0),
columns=['Hydrogen', 'Helium', 'Lithium', 'Beryllium', 'Boron', 'Carbon', 'Nitrogen', 'Oxygen',
'Fluorine', 'Neon', 'Sodium', 'Magnesium', 'Aluminum', 'Silicon', 'Phosphorus', 'Sulfur'])
sns.set_style('white')
fig, ax = plt.subplots(figsize=(9, 10))
colors = sns.color_palette("husl", len(df.columns))
sns.boxplot(data=df, orient="h", showfliers=False, palette='husl',
medianprops=dict(color="yellow", label='median'), ax=ax)
lines_per_boxplot = len(ax.lines) // len(ax.artists)
for i, (box, ytick) in enumerate(zip(ax.artists, ax.get_yticklabels())):
ytick.set_color(endarken(box.get_facecolor()))
ytick.set_fontsize(20)
color = enlighten(box.get_facecolor())
box.set_color(color)
for lin in ax.lines[i * lines_per_boxplot: (i + 1) * lines_per_boxplot]:
if lin.get_label() != 'median':
lin.set_color(color)
lin.set_markerfacecolor(color)
lin.set_markeredgecolor(color)
sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette='husl', ax=ax)
sns.despine(ax=ax)
ax.set_title("Title")
ax.set_xlabel("X label")
plt.tight_layout()
plt.show()
I just answer point 2. of my question.
After tinkering, I found this to work :
# Each box has 5 associated Line2D objects (the whiskers and median)
# Loop over them here, and use the same colour as above
n=5 # this was for tinkering
for j in range(i*n,i*n+n):
if j != i*n+4 : line = ax.lines[j] # not the median
line.set_color(col)
Again, I don't know what I'm doing. So someone more knowledgeable may provide a more valuable answer.
I removed the stripplot for better clarity.

Change color of chart bars based on mouse press event

I am trying to create an interactive bar chart where the bars of the chart change color when the user selects a value (based on mouse click). The selected value displays at the bottom of the chart and the bars are supposed to change color dependent on the probability of the selected value being above or below the mean of the a sample.
I am stuck on the coloring of the bars. When I click on the chart only the first bar changes colors and then does not update with subsequent clicks.
Overall expected result is to allow multiple values to be selected based on mouse click events. Intention is then to draw the horizontal line at the selected value and then recolor the bars based on the probability of the selected value being within the range of the sample mean. This is being run in jupyter.
I am still new to this so certainly appreciate any advice that you may have.
import numpy as np
from scipy import stats
from scipy.stats import norm
import math
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import ipywidgets as wdg
from matplotlib.cm import ScalarMappable
%matplotlib notebook
###Set up dummy data
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(32000,200000,3650),
np.random.normal(43000,100000,3650),
np.random.normal(43500,140000,3650),
np.random.normal(48000,70000,3650)],
index=[1992,1993,1994,1995])
###Calculate statistics incl confidence interval for the mean. Calculate 97.5% interquantile range of the normal distribution (being 1.96 x standard error)
df = df.T
stats = df.describe(percentiles = [0.025, 0.25, 0.5, 0.75, 0.975])
mean = stats.loc['mean']
onesd_meanerror = df.sem(axis = 0)
error_low = onesd_meanerror*1.96
error_high = onesd_meanerror*1.96
###Setup initial chart and plot bar chart
fig = plt.figure()
ax = fig.add_subplot(111)
x_axis_label = df.columns.values
plt.xticks(x_axis_label)
bars = (ax.bar(x_axis_label, mean, width=0.85, alpha=0.9, align='center',
yerr = (error_low, error_high), error_kw={'capsize': 10, 'elinewidth': 2, 'alpha':1}))
###Create and display textarea widget
txt = wdg.Textarea(
value='',
placeholder='',
description='Y Value:',
disabled=False)
display(txt)
### Formats color bar. Need the scalar mapable to enable use of the color bar.
my_cmap = plt.cm.get_cmap('coolwarm')
sm = ScalarMappable(cmap=my_cmap, norm=plt.Normalize(0,1))
sm.set_array([])
cbar = plt.colorbar(sm)
cbar.set_label('Probability', rotation=270,labelpad=25)
ydataselect = 40000
class ClickChart(object):
def __init__(self, ax):
self.fig=ax.figure
self.ax = ax
self.horiz_line = ax.axhline(y=ydataselect, color='black', linewidth=2)
self.fig.canvas.mpl_connect('button_press_event', self.onclick)
### Event handlers
def onclick(self, event):
self.horiz_line.remove()
self.ypress = event.ydata
self.horiz_line = ax.axhline(y=self.ypress, color='red', linewidth=2)
txt.value = str(event.ydata)
self.color_bar(event)
def color_bar(self, event):
for index, bar in enumerate(bars):
bar.set_color(c=my_cmap(self.calc_prob(index)))
print(index)
def calc_prob(self, index):
global mean, onesd_meanerror
mean = mean.iloc[index]
err = onesd_meanerror.iloc[index]
result = norm.cdf(self.ypress, loc=mean, scale=err)
return result
click = ClickChart(ax)```
You are so close! The problem is you are re-defining mean inside of calc_prob(). Making changes to avoid this will fix the code and give the behavior you want:
def calc_prob(self, index):
global mean, onesd_meanerror
mean2 = mean.iloc[index] # Changed
err = onesd_meanerror.iloc[index]
result = norm.cdf(self.ypress, loc=mean2, scale=err) # Changed
return result

Histogram with slider filter

I would like to create a histogram with a density plot combined in bokeh with a slider filter. Atm, I have the blocks to create a bokeh histogram with a density plot from another thread. I dont know how to create the callback function to update the data and rerender the plot.
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from numpy import histogram, linspace
from scipy.stats.kde import gaussian_kde
pdf = gaussian_kde(df.hp)
x = linspace(0,250,50)
p = figure(plot_height=300)
p.line(x, pdf(x))
# plot actual hist for comparison
hist, edges = histogram(df.hp, density=True, bins=20)
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha=0.4)
show(p)
There are two ways to implement callbacks in Bokeh:
with JS code. In that case, the plot remains a standalone object, the constraint being you need to do any data manipulation within Javascript (there is a small caveat to that statement but not relevant here: scipy can't be called from such a callback)
by having the callback executed in Bokeh server, in which case you have the full arsenal of python available to you. The cost being, there's a bit more to plotting and distributing the graph than in the first case (but it's not difficult, see example).
Considering you need to refit the kde each time you change the filter condition, the second way is the only option (unless you want to do that in javascript...).
That's how you would do it (example with a filter on cyl):
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Select
from bokeh.sampledata.autompg import autompg as df
from numpy import histogram, linspace
from scipy.stats.kde import gaussian_kde
output_notebook()
def modify_doc(doc):
x = linspace(0,250,50)
source_hist = ColumnDataSource({'top': [], 'left': [], 'right': []})
source_kde = ColumnDataSource({'x': [], 'y': []})
p = figure(plot_height=300)
p.line(x='x', y='y', source=source_kde)
p.quad(top='top', bottom=0, left='left', right='right', alpha=0.4, source=source_hist)
def update(attr, old, new):
if new == 'All':
filtered_df = df
else:
condition = df.cyl == int(new)
filtered_df = df[condition]
hist, edges = histogram(filtered_df.hp, density=True, bins=20)
pdf = gaussian_kde(filtered_df.hp)
source_hist.data = {'top': hist, 'left': edges[:-1], 'right': edges[1:]}
source_kde.data = {'x': x, 'y': pdf(x)}
update(None, None, 'All')
select = Select(title='# cyl', value='All', options=['All'] + [str(i) for i in df.cyl.unique()])
select.on_change('value', update)
doc.add_root(column(select, p))
# To run it in the notebook:
plot = Application(FunctionHandler(modify_doc))
show(plot)
# Or to run it stand-alone with `bokeh serve --show myapp.py`
# in which case you need to remove the `output_notebook()` call
# from bokeh.io import curdoc
# modify_doc(curdoc())
A few notes:
this is made to be run in jupyter notebook (see the output_notebook() and the last uncommented two lines).
to run it outside, comment the notebook lines (see above) and uncomment the last two lines. Then you can run it from the command line.
Select will only handle str values so you need to convert in (when creating it) and out (when using the values: old and new)
for multiple filters, you need to access the state of each Select at the same time. You do that by instantiating the Selects before defining the update function (but without any callbacks, yet!) and keeping a reference to them, access their value with your_ref.value and build your condition with that. After the update definition, you can then attach the callback on each Select.
Finally, an example with multiple selects:
def modify_doc(doc):
x = linspace(0,250,50)
source_hist = ColumnDataSource({'top': [], 'left': [], 'right': []})
source_kde = ColumnDataSource({'x': [], 'y': []})
p = figure(plot_height=300)
p.line(x='x', y='y', source=source_kde)
p.quad(top='top', bottom=0, left='left', right='right', alpha=0.4, source=source_hist)
select_cyl = Select(title='# cyl', value='All', options=['All'] + [str(i) for i in df.cyl.unique()])
select_ori = Select(title='origin', value='All', options=['All'] + [str(i) for i in df.origin.unique()])
def update(attr, old, new):
all = pd.Series(True, index=df.index)
if select_cyl.value == 'All':
cond_cyl = all
else:
cond_cyl = df.cyl == int(select_cyl.value)
if select_ori.value == 'All':
cond_ori = all
else:
cond_ori = df.origin == int(select_ori.value)
filtered_df = df[cond_cyl & cond_ori]
hist, edges = histogram(filtered_df.hp, density=True, bins=20)
pdf = gaussian_kde(filtered_df.hp)
source_hist.data = {'top': hist, 'left': edges[:-1], 'right': edges[1:]}
source_kde.data = {'x': x, 'y': pdf(x)}
update(None, None, 'All')
select_ori.on_change('value', update)
select_cyl.on_change('value', update)
doc.add_root(column(select_ori, select_cyl, p))

Add either a density or box plot to the margins of a plot in Matplotlib

I have a scatter plot in linear scale. I want to add a box plot to the margins (left and bottom) of my scatter plot like this figure from Marginal Histograms and Box Charts?
Update
Here is my current working solution, share your thoughts on it or make a better suggestion.
ax.plot(df['vcnt'], df['ecnt'], 'ko', alpha=0.5)
# Save the default tick positions, so we can reset them..
tcksx = ax.get_xticks()
tcksy = ax.get_yticks()
ax.boxplot(df['ecnt'], positions=[min(tcksx)], notch=True, widths=1.)
ax.boxplot(df['vcnt'], positions=[min(tcksy)], vert=False, notch=True, widths=1.)
ax.set_yticks(tcksy) # pos = tcksy
ax.set_xticks(tcksx) # pos = tcksx
ax.set_yticklabels([int(j) for j in tcksy])
ax.set_xticklabels([int(j) for j in tcksx])
ax.set_ylim([min(tcksy-1),max(tcksy)])
ax.set_xlim([min(tcksx-1),max(tcksx)])
You can achieve this by creating additional axes for the bar plots.
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
x_data = np.random.randn(100)
y_data = -x_data + np.random.randn(100)*0.5
df = pd.DataFrame()
df['vcnt'] = x_data
df['ecnt'] = y_data
left = 0.1
bottom = 0.1
top = 0.8
right = 0.8
main_ax = plt.axes([left,bottom,right-left,top-bottom])
# create axes to the top and right of the main axes and hide them
top_ax = plt.axes([left,top,right - left,1-top])
plt.axis('off')
right_ax = plt.axes([right,bottom,1-right,top-bottom])
plt.axis('off')
main_ax.plot(df['vcnt'], df['ecnt'], 'ko', alpha=0.5)
# Save the default tick positions, so we can reset them..
tcksx = main_ax.get_xticks()
tcksy = main_ax.get_yticks()
right_ax.boxplot(df['ecnt'], positions=[0], notch=True, widths=1.)
top_ax.boxplot(df['vcnt'], positions=[0], vert=False, notch=True, widths=1.)
main_ax.set_yticks(tcksy) # pos = tcksy
main_ax.set_xticks(tcksx) # pos = tcksx
main_ax.set_yticklabels([int(j) for j in tcksy])
main_ax.set_xticklabels([int(j) for j in tcksx])
main_ax.set_ylim([min(tcksy-1),max(tcksy)])
main_ax.set_xlim([min(tcksx-1),max(tcksx)])
# set the limits to the box axes
top_ax.set_xlim(main_ax.get_xlim())
top_ax.set_ylim(-1,1)
right_ax.set_ylim(main_ax.get_ylim())
right_ax.set_xlim(-1,1)
plt.show()

How to remove axis in pyplot.bar?

Is there any possibility to do a bar plot without y-(x-)axis? In presentations all redundant informations have to be erased, so I would like to begin to delete the axis. I did not see helpful informations in the matplotlib documentation. Maybe you have better solutions than pyplot..?
Edit: I would like to have lines around the bars except the axis at the bottom. Is this possible
#!/usr/bin/env python
import matplotlib.pyplot as plt
ind = (1,2,3)
width = 0.8
fig = plt.figure(1)
p1 = plt.bar(ind,ind)
# plt.show()
fig.savefig("test.svg")
Edit: I did not see using plt.show()
that there is still the yaxis without ticks.
To make the axes not visible, try something like
import matplotlib.pyplot as plt
ind = (1,2,3)
width = 0.8
fig,a = plt.subplots()
p1 = a.bar(ind,ind)
a.xaxis.set_visible(False)
a.yaxis.set_visible(False)
plt.show()
Is this what you meant?
Here is the code I used at the end. It is not minimal anymore. Maybe it helps.
import matplotlib.pyplot as plt
import numpy as np
def adjust_spines(ax,spines):
for loc, spine in ax.spines.items():
if loc in spines:
spine.set_smart_bounds(True)
else:
spine.set_color('none') # don't draw spine
# turn off ticks where there is no spine
if 'left' in spines:
ax.yaxis.set_ticks_position('left')
else:
# no yaxis ticks
ax.yaxis.set_ticks([])
def nbar(samples, data, err, bWidth=0.4, bSafe=True, svgName='out'):
fig,a = plt.subplots(frameon=False)
if len(data)!=len(samples):
print("length(data) must be equal to length(samples)!")
return
ticks = np.arange(len(data))
p1 = plt.bar(ticks, data, bWidth, yerr=err)
plt.xticks(ticks+bWidth/2., samples )
adjust_spines(a,['bottom'])
a.xaxis.tick_bottom()
if bSafe:
fig.savefig(svgName+".svg")
samples = ('Sample1', 'Sample2','Sample3')
qyss = (91, 44, 59)
qysserr = (1,5,4)
nbar(samples,qyss,qysserr,svgName="test")
Thx to all contributors.

Categories