I am trying out Holoviews for the first time, and I'd like to reproduce this animated "Gapminder" plot as described here.
The code runs but I do not know how to handle the output so that it is displayed in a Jupyter Notebook (I assume that is possible, since Jupyter can display arbitrary HTML).
# Get HoloViews plot and attach document
doc = curdoc()
hvplot = BokehRenderer.get_plot(hvgapminder, doc)
# Make a bokeh layout and add it as the Document root
plot = layout([[hvplot.state], [slider, button]], sizing_mode='fixed')
doc.add_root(plot)
Specifically, what should I do with the resulting doc or hvplot objects?
That particular example combines both HoloViews and bokeh components and bokeh widgets cannot easily communicate with Python in the notebook. You can however use the holoviews 'scrubber' widget to achieve the same thing:
import pandas as pd
import numpy as np
import holoviews as hv
from bokeh.sampledata import gapminder
hv.extension('bokeh')
# Switch to sending data 'live' and using the scrubber widget
%output widgets='live' holomap='scrubber'
# Declare dataset
panel = pd.Panel({'Fertility': gapminder.fertility,
'Population': gapminder.population,
'Life expectancy': gapminder.life_expectancy})
gapminder_df = panel.to_frame().reset_index().rename(columns={'minor': 'Year'})
gapminder_df = gapminder_df.merge(gapminder.regions.reset_index(), on='Country')
gapminder_df['Country'] = gapminder_df['Country'].astype('str')
gapminder_df['Group'] = gapminder_df['Group'].astype('str')
gapminder_df.Year = gapminder_df.Year.astype('f')
ds = hv.Dataset(gapminder_df)
# Apply dimension labels and ranges
kdims = ['Fertility', 'Life expectancy']
vdims = ['Country', 'Population', 'Group']
dimensions = {
'Fertility' : dict(label='Children per woman (total fertility)', range=(0, 10)),
'Life expectancy': dict(label='Life expectancy at birth (years)', range=(15, 100)),
'Population': ('population', 'Population')
}
# Create Points plotting fertility vs life expectancy indexed by Year
gapminder_ds = ds.redim(**dimensions).to(hv.Points, kdims, vdims, 'Year')
# Define annotations
text = gapminder_ds.clone({yr: hv.Text(1.2, 25, str(int(yr)), fontsize=30)
for yr in gapminder_ds.keys()})
# Define options
opts = {'plot': dict(width=1000, height=600,tools=['hover'], size_index='Population',
color_index='Group', size_fn=np.sqrt, title_format="{label}"),
'style': dict(cmap='Set1', size=0.3, line_color='black', alpha=0.6)}
text_opts = {'style': dict(text_font_size='52pt', text_color='lightgray')}
# Combine Points and Text
(gapminder_ds({'Points': opts}) * text({'Text': text_opts})).relabel('Gapminder Demo')
Related
I'm using Jupyterlab (v 3.2.1) and bokeh to create a webpage that allows a user to load a .csv file containing a matrix, and a slider to optionally set a threshold on displayed results. The matrix contains simply some numerical values. The result would be an interactive heatmap displayed below the confirmation button. Whit my code the webpage is displayed correctly but the final plot is displayed in a new tab:
import warnings
warnings.filterwarnings('ignore')
import jupyter_bokeh
import ipywidgets as widgets
import pandas as pd
import io
from bokeh.io import show
from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.transform import transform
import bokeh.palettes
from IPython.display import display, clear_output, display_html
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.layouts import layout
#Display the webpage
file = widgets.FileUpload(accept=".txt, .csv, .dat", multiple=False)
threshold=widgets.IntSlider(value=0, min=0, max=20, step=1, description="Threshold:", disabled=False, continuous_update=False, orintation='horizontal', readout=True, readout_format="d")
button = widgets.Button(description='Run code')
text_0 = widgets.HTML(value="<header><h1>Phenotype Major Categories vs Genes Heatmap</h1></header>")
text_1 = widgets.HTML(value="<h3>Welcome to the heatmap plotter. By loading a csv file containing the counts of phenoypes for a gene into an IMPC major phenotype category, it will display an interactive heatmap.</h3>")
text_2 = widgets.HTML(value="Please load yor file (accepted formats: csv, txt, dat):")
text_3 = widgets.HTML(value="If desired, set a threshold for counts to be displayed:")
text_4 = widgets.HTML(value="<h2>Heatmap:</h2>")
vbox_head = widgets.VBox([text_0, text_1])
page_layout_plot = [text_2, file, text_3, threshold, button]
vbox_text = widgets.VBox(page_layout_plot)
page = widgets.VBox([vbox_head,vbox_text])
display(page)
#Set the endpage button to run the code
def on_button_clicked(result):
#Load the file and set the threshold
inp = list(file.value.values())[0] #if multiple setted to true, will not work!
content = inp['content']
content = io.StringIO(content.decode('utf-8'))
mat = pd.read_csv(content, sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#filtering phase
rem=[]
x = int(threshold.value)
if x != 0:
for i in mat.index:
if mat.loc[i].max() < x:
rem.append(i)
mat.drop(rem,inplace=True,axis=0)
#Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the
#values that we have in the matrix and retrieve a better representation
df = mat.stack(dropna=False).rename("value").reset_index()
fact= df.value.unique()
fact.sort()
fact = fact.astype(str)
df.value = df.value.astype(str)
mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(df.value.unique())), factors= fact, nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
x_range=list(df.phen_sys.drop_duplicates()[::-1]),
y_range=list(df.MGI_id.drop_duplicates()),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="phen_sys",
y="MGI_id",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
color_bar = ColorBar(
color_mapper=mapper,
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
button.on_click(on_button_clicked)
I already tried to use output_notebook() at the beginning but in that case nothing is displayed.
How can I fix it? It would be useful to display in real time the plot by changing the threshold without the need to click the confirmation button every time.
Thank you for all the help.
You might need to observe the value attribute of your treshold object to refresh your plot. So add something like this at the end of your code:
def on_value_change(change):
on_button_clicked(None)
threshold.observe(on_value_change, names='value')
More from the doc: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#Signatures
I've created a waterfall graph using Jupyter and Atom (as I'm looking for a decent substitute for Jupyter, specially when it's related to dataframe visualisation)
Thing is that I used the same exact code in both editors but the output of the graph is different.
Does someone have an explanation?
Here is the code used:
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objs as go
#read excel file
df=pd.read_csv('C:/Users/Usuario/Desktop/python/HP/waterfall.csv',sep=';')
df['Measure']=df['Measure'].str.lower()
display(df)
#store values in different variables
x=df['Deal ID']
y=df['deal value (USD)']
measure = df['Measure']
text=df['deal value (USD)']
#let's create the figure
fig = go.Figure(go.Waterfall(
measure=measure,
x=x,
y=y,
text=text,
textposition="outside",
decreasing = {"marker":{"color":"Maroon", "line":{"color":"red", "width":2}}},
increasing = {"marker":{"color":"Teal"}},
totals = {"marker":{"color":"deep sky blue", "line":{"color":"blue", "width":3}}},
showlegend=False
))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False, visible=False)
fig.update_traces(hovertemplate=None)
fig.update_layout(title='Total deal value per customer X', height=470,
margin=dict(t=90, b=20, l=70, r=70),
hovermode="x unified",
xaxis_title='QvsQ ', yaxis_title="deal value in USD",
plot_bgcolor='rgba(0,0,0,0)',
#paper_bgcolor='#333',
title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
font=dict(color='#8a8d93'))
Atom otput:
Jupyter output: [![Jupyter output][2]][2]
Thanks
[2]: https://i.stack.imgur.com/wYPEG.png
I am very new to using Python and especially new to using the Bokeh library. I am trying to plot a Choropleth map of the United States with the fill color of each state corresponding to their bee population of a year.
It shows the value when you hover over it, but only the states with a value of zero have color.
Link to an image of the output plot is here.
I know there is a big difference in the range (minimum:0, maximum: 310,000) which I believe is causing the problem. How can I change the range of the color map to not fill all of the higher values with grey?
Code for reference below:
from bokeh.models import LogColorMapper
from bokeh.palettes import YlGnBu9 as YlGnBu
from bokeh.sampledata.us_states import data as us_states
import pandas as pd
import numpy as np
bee_pop = pd.read_csv('./BeePopulation.csv')
us_states_df = pd.DataFrame(us_states).T
us_states_df = us_states_df[~us_states_df["name"].isin(['Alaska', "Hawaii", "District of
Columbia"])]
us_states_df["lons"] = us_states_df.lons.values.tolist()
us_states_df["lats"] = us_states_df.lats.values.tolist()
us_states_df = us_states_df.reset_index()
bee_2016 = bee_pop[bee_pop['Year']==2016]
us_states_df = us_states_df.merge(bee_2016[["State", "Pop"]], how="left", left_on="index",
right_on="State")
us_states_df.head()
us_states_datasource = {}
us_states_datasource["lons"] = us_states_df.lons.values.tolist()
us_states_datasource["lats"] = us_states_df.lats.values.tolist()
us_states_datasource["name"] = us_states_df.name.values.tolist()
us_states_datasource["BeePop"] = us_states_df.Pop.values.tolist()
fig = figure(plot_width=900, plot_height=600,
title="United Bee Population Per State Choropleth Map",
x_axis_location=None, y_axis_location=None,
tooltips=[
("Name", "#name"), ("Bee Population", "#BeePop")
])
fig.grid.grid_line_color = None
fig.patches("lons", "lats", source=us_states_datasource,
fill_color={'field': 'BeePop', 'transform': LogColorMapper(palette=YlGnBu[::-1])},
fill_alpha=0.7, line_color="white", line_width=0.5)
show(fig)
Thank you in advance!
The LogColorMapper has configurable high and low properties. Another option, of course, is to use a different color mapper, e.g. LinearColorMapper or CategorgicalColorMapper in conjunction with some categorical binning.
Here is my dataset after cleaning csv file
Here is output what I want
What I want is , I have to display years in x axis and column values in y axis.and I want to display bubbles with different colors and size with play animation button
I am new to data science , can someone help me ,how can I achieve this?
Judging by your dataset and attached image, what you're asking for is something like this:
But I'm not sure that is what you actually want. You see, with your particular dataset there aren't enough dimensions to justify an animation. Or even a bubble plot. This is because you're only looking at one value. So you end up showing the same value throuh the bubble sizes and on the y axis. And there's really no need to change your dataset given that your provided screenshot is in fact your desired plot. But we can talk more about that if you'd like.
Since you haven't provided a sample dataset, I've used a dataset that's available through plotly express and reshaped it so that is matches your dataset:
Complete code:
# imports
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import math
import numpy as np
# color cycle
colors = px.colors.qualitative.Alphabet*10
# sample data with similar structure as OP
df = px.data.gapminder().query("continent=='Americas'")
dfp=df.pivot(index='year', columns='country', values='pop')
dfp=dfp[['United States', 'Mexico', 'Argentina', 'Brazil', 'Colombia']]
dfp=dfp.sort_values(by='United States', ascending = False)
dfp=dfp.T
dfp.columns = [str(yr) for yr in dfp.columns]
dfp = dfp[dfp.columns[::-1]].T
# build figure and add traces
fig=go.Figure()
for col, country in enumerate(dfp):
vals = dfp[country].values
yVals = [col]*len(vals)
fig.add_traces(go.Scatter(
y=yVals,
x=dfp.index,
mode='markers',
marker=dict(color=colors[col],
size=vals,
sizemode='area',
#sizeref=2.*max(vals)/(40.**2),
sizeref=2.*max(dfp.max())/(40.**2),
sizemin=4),
name = country
))
# edit y tick layout
tickVals = np.arange(0, len(df.columns))
fig.update_layout(
yaxis = dict(tickmode = 'array',
tickvals = tickVals,
ticktext = dfp.columns.tolist()))
fig.show()
I have a dataset where I want to plot make plots with 2 different variables on the X-axis (in 2 different plots), but I want to get the other value into the Hovertool
from io import StringIO
import pandas as pd
data = """,item_id,start,station,rejects
0,item1,2019-10-14 19:00:00,assembly,4.297994269340974
1,item1,2019-10-14 19:00:00,ST1,0.20546537908362442
2,item1,2019-10-14 19:00:00,ST2,0.494539460127756
3,item1,2019-10-14 19:00:00,ST3,0.6892230576441103
4,item2,2019-10-14 23:30:00,assembly,4.432249894470241
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
6,item2,2019-10-14 23:30:00,ST2,0.7651434643995749
7,item2,2019-10-14 23:30:00,ST3,0.7748600947051227
8,item3,2019-10-15 04:00:00,assembly,3.55576079427384
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
10,item3,2019-10-19 04:00:00,ST2,0.7195914577530177
11,item3,2019-10-19 04:00:00,ST3,0.492379835873388
12,item4,2019-10-19 10:30:00,assembly,4.02656704026567
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024177
14,item4,2019-10-19 10:30:00,ST2,0.690376569037657
15,item4,2019-10-19 10:30:00,ST3,0.838745695410320"""
data_reduced = pd.read_csv(StringIO(data), parse_dates=["start"], index_col=0)
I want to produce a graph with the item_id on the x-axis and with the start date on the x-axis. I want to track the rejects per station, and the combined of the assembly.
import holoviews as hv
import bokeh
from holoviews import opts
hv.extension('bokeh')
bokeh.plotting.output_notebook()
def plot(data_reduced, x_axis="item_id"):
x_label = x_axis if x_axis in {"start", "item_id"} else "item_id"
key_dimensions = [(x_label, x_label), ("station", "station")]
value_dimensions = [
("rejects", "rejects"),
("start", "start"),
("item_id", "item_id"),
("start", "start"),
]
datatable = hv.Table(
data_reduced, kdims=key_dimensions, vdims=value_dimensions
)
scatter_plot = datatable.to.scatter(x_label, ["rejects"])
overlay = scatter_plot.overlay("station")
tooltips = [
("item_id", "#item_id"),
("start", "#start{%Y-%m-%d %H:%M}"),
("station", "#station"),
("rejects", "#rejects"),
]
hover = bokeh.models.HoverTool(
tooltips=tooltips, formatters={"start": "datetime"}
)
return overlay.opts(
opts.Scatter(
color=hv.Cycle("Category10"),
show_grid=True,
padding=0.1,
height=400,
tools=[hover],
),
opts.NdOverlay(
legend_position="right", show_frame=False, xrotation=90
),
)
And then I make the graphs with plot(data_reduced, x_axis="start") or plot(data_reduced, x_axis="item_id")
plot(data_reduced, x_axis="start")
plot(data_reduced, x_axis="item_id")
How do I get the ??? filled in?
If I want to get the data from an individual line (list(p.items())[0][1].data), I get:
,item_id,start,station,rejects
1,item1,2019-10-14 19:00:00,ST1,0.2054653790836244
5,item2,2019-10-14 23:30:00,ST1,0.19071837253655435
9,item3,2019-10-15 04:00:00,ST1,0.37002775208140615
13,item4,2019-10-19 10:30:00,ST1,0.22926219258024175
So the data seems to be in the source
In cases like this I prefer to use hvplot which is a library built on top of holoviews, made by the same group of developers. This really makes life I think a lot easier and creates your plot all in one go.
1) With Hvplot you can specify extra hover columns easily with keyword hover_cols=['your_column']:
# with this import you can use .hvplot() on your df and create interactive holoviews plots
import hvplot.pandas
item_plot = data_reduced.hvplot(
kind='scatter',
x='item_id',
y='rejects',
by='station', # this creates the overlay
hover_cols=['start'],
padding=0.1,
)
start_plot = data_reduced.hvplot(
kind='scatter',
x='start',
y='rejects',
by='station',
hover_cols=['item_id'],
padding=0.1,
)
2) If you want a pure Holoviews solution, you can do:
import holoviews as hv
from holoviews import opts
hv_df = hv.Dataset(
data_reduced,
kdims=['item_id', 'station'],
vdims=['rejects', 'start'],
)
hv_df.to(hv.Scatter).overlay().opts(opts.Scatter(tools=['hover']))
Example plot with extra hover columns: