Create multivalue bar chart bokeh python - python

For the following sample dataframe df2, I want to create bar charts for every row only when there are positive values, using Bokeh.
import pandas as pd
import numpy as np
# create dataset
df = pd.DataFrame({'Temperature': ['Hot', 'Cold', 'Warm', 'Cold'],
})
# create dummy variables
df2=pd.get_dummies(df)
Please advise.
Edit.
I found the following example which works
from bokeh.plotting import figure, output_file, show
from bokeh.transform import dodge
labs = ['label_1', 'label_2', 'label_3']
vals = ['val_1','val_2','val_3']
my_data = {'labs':labs,
'val_1':[2,5,11],
'val_2':[34,23,1],
'val_3':[25, 34, 23]
}
fig = figure(x_range = labs, plot_width = 300, plot_height = 300)
fig.vbar(x = dodge('labs', -0.25, range = fig.x_range), top = 'val_1',
width = 0.2,source = my_data, color = "green")
fig.vbar(x = dodge('labs', 0.0, range = fig.x_range), top = 'val_2',
width = 0.2, source = my_data,color = "cyan")
fig.vbar(x = dodge('labs', 0.25, range = fig.x_range), top = 'val_3',
width = 0.2,source = my_data,color = "blue")
show(fig)
However my data source is pandas dataframe so I am confused how to achieve the outcome. Thanks in advance.

Related

plot data interactive as rgb

I have a bunch of pictures I'd like to plot with hvplot.rgb.
The user should be able to switch between the pictures with a widget like a slider.
My problem is, that i don't know how to use the hvplot.rgb()-function for interactive dataframes.
here's what i have so far:
import numpy as np
import pandas as pd
import hvplot
import hvplot.pandas
import hvplot.xarray
import panel as pn
######################## generating a Dataframe with random rgb-arrays
height = 2
length = 2
count = 3 # number of pictures in dataframe
def rgb(height,lenght):
image = np.random.randint(255, size=(height,length,3))
x = np.arange(length)
y = np.arange(height)
channel = np.arange(len(image[0][0]))
im_xr = xr.DataArray(image,coords={'y': y,'x': x,'channel': channel},dims=["y", "x", "channel"])
return im_xr # returning an xarray in order to use hv.plot.rgb()
list_rgb = []
for i in range(count):
list_rgb.append(rgb(height,length))
df = pd.DataFrame([list_rgb]).T
df.rename(columns = {0:'rgb'}, inplace = True)
dfi = df.interactive()
slider = pn.widgets.IntSlider(name='slider', start=0, end=count-1, step=1)
pipe = dfi[(dfi.index == slider.param.value)]
so at this point i can already plot the generated pictures in the DataFrame with
df.rgb[0].hvplot.rgb(x='x',
y='y',
bands='channel',
data_aspect=1,
flip_yaxis=True,
xaxis=False,
yaxis=None,
widget_type='scrubber',
widget_location='bottom'
)
How can I call the .hvplot.rgb() function to plot the "pipe"-interactive dataframe?
so here's how to do it:
import numpy as np
import pandas as pd
import hvplot
import hvplot.pandas
import hvplot.xarray
import panel as pn
######################## generating a Dataframe with random rgb-arrays
height = 2 # height of the generated picture
width = 2 # width of the generated picture
count = 3 # number of pictures in dataframe
def rgb(height,width):
image = np.random.randint(255, size=(height,width,3))
return image
list_rgb = []
for i in range(count):
list_rgb.append(rgb(height,width))
x = np.arange(width)
y = np.arange(height)
channel = np.arange(3)
time = np.arange(count)
xar = xr.DataArray(list_rgb,
dims=("time", "y", "x", "channel")
,
coords={"x": x,
"y": y,
"channel": channel,
"time" : time},
name="rgb")
xar.hvplot.rgb(x='x',
y='y',
bands='channel',
data_aspect=1,
flip_yaxis=True,
xaxis=False,
yaxis=None
)
additionally the scrubber tool is a pretty nice thing to create movies out of pictures with hvplot, like for example timelaps-shows...
xar.hvplot.rgb(x='x',
y='y',
bands='channel',
data_aspect=1,
flip_yaxis=True,
xaxis=False,
yaxis=None,
widget_type='scrubber',
widget_location='bottom'
)

Bokeh Map Not Showing in Jupyter Notebook

I'm struggling to get a Bokeh map. The cell runs but does not show anything. It takes about 50s. I can get a blank map to display, but nothing I have tried has worked.
Jupyter version 6.4.12 run through Anaconda 2.3.2
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
from bokeh.models import ColumnDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import PRGn, RdYlGn
from bokeh.transform import linear_cmap, factor_cmap
from bokeh.layouts import row, column
from bokeh.resources import INLINE
pd.set_option('display.max_columns', None)
output_notebook(INLINE)
I have Lat & Lon coordinates in my dataset, which I discovered I need to convert to mercator coordinates.
# Define function to switch from lat/long to mercator coordinates
def x_coord(x, y):
lat = x
lon = y
r_major = 6378137.000
x = r_major * np.radians(lon)
scale = x/lon
y = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + lat * (np.pi/180.0)/2.0)) * scale
return (x, y)
# Define coord as tuple (lat,long)
df['coordinates'] = list(zip(df['LATITUDE'], df['LONGITUDE']))
# Obtain list of mercator coordinates
mercators = [x_coord(x, y) for x, y in df['coordinates'] ]
# Create mercator column in our df
df['mercator'] = mercators
# Split that column out into two separate columns - mercator_x and mercator_y
df[['mercator_x', 'mercator_y']] = df['mercator'].apply(pd.Series)
From there, this is my code cell for the plot:
tile = get_provider('CARTODBPOSITRON')
source = ColumnDataSource(data = df)
palette = PRGn[11]
color_mapper = linear_cmap(field_name = 'FIRE_SIZE', palette = palette,
low=df['FIRE_SIZE'].min(), high = df['FIRE_SIZE'].max())
tooltips = [('Fire Year', '#FIRE_YEAR'),('State','#STATE')]
p = figure(title = 'Fire Locations',
x_axis_type = 'mercator',
y_axis_type = 'mercator',
x_axis_label = 'Longitude',
y_axis_label = 'Latitude',
tooltips = tooltips)
p.add_tile(tile)
p.circle(x = 'mercator_x',
y = 'mercator_y',
color = color_mapper,
size = 10,
fill_alpha = 0.7,
source = source)
color_bar = ColorBar(color_mapper = color_mapper['transform'],
formatter = NumeralTickFormatter(format='0.0[0000]'),
`your text` label_standoff = 13, width = 8, location = (0,0))
p.add_layout(color_bar, 'right')
show(p)
The cell runs, but nothing shows. There are no errors. I confirmed that I can get a plot to display using this code:
#Test
tile = get_provider('CARTODBPOSITRON')
p = figure(x_range = (-2000000, 2000000),
y_range = (1000000, 7000000),
x_axis_type = 'mercator',
y_axis_type = 'mercator')
p.add_tile(tile)
show(p)
This is a large dataset, with 2,303,566 entries. I have checked that I have no null values in any of the columns that I am using, as well as verifying the correct data types (lat/lon are float64).
Returning to answer my own question here. After doing some more testing based on helpful comments I received from #mosc9575 and #bigreddot, I determined that the size of my dataset is the reason for Bokeh failing to display the map. I used a single point first, and then a small slice of my dataframe - and the map displayed just fine.
I hope this is helpful to someone else at some point!
Thanks to everyone who assisted.

How to add labels to subplots in plotly?

I am trying to plot a candlestick with volume, using the plotly. However I can not get the proper x and yaxis label.please help.I need y labels for both plot but xlabel for just the bottom one, also one title for both. Bellow is the code.
** one more question, how can I change the line color in the volume plot.Thank you
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
fig = tools.make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
[None],
[{}]],
shared_xaxes=True,
vertical_spacing=0.1)
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
fig.update_layout(
yaxis_title = 'Apple Stock Price USD ($)'
)
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume']),
row = 3, col = 1)
fig.update_layout(
yaxis_title = 'Volume',
xaxis_title = 'Date'
)
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
chart_can_vol(df)
When you make your subplots, you can add the subplot_titles attribute. In the code below, I used the titles "test1" and "test2". When you change your axis labels, you can use update_xaxes and update_yaxes, just make sure that the row and column values are the same for the update_axes method and the subplot.
To change the color of the line, you can add the line attribute within the scatterplot method and set it equal to a dictionary with a hex value of the color you want.
P.S. You should update plotly, because the tools.make_subplots was deprecated. Once you update, you can simply use make_subplots. Also, you are using pandas, when you should use pandas-datareader. See import statements.
Code:
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
subplot_titles=["test1", "test2"]
rows = 2
cols = 2
height = 300 * rows
fig = make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
[None],
[{}]],
shared_xaxes=True,
subplot_titles=("test1", "test2"),
vertical_spacing=0.1)
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
fig.update_layout(
yaxis_title = 'Apple Stock Price USD ($)'
)
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume'],
line= dict(color="#ffe476")),
row = 3, col = 1)
fig.update_xaxes(title_text="Date", row = 3, col = 1)
fig.update_yaxes(title_text="Volume", row = 3, col = 1)
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
chart_can_vol(df).show()

how to plot a range with a line in the center with Plotly, in Python [duplicate]

How can I use Plotly to produce a line plot with a shaded standard deviation? I am trying to achieve something similar to seaborn.tsplot. Any help is appreciated.
The following approach is fully flexible with regards to the number of columns in a pandas dataframe and uses the default color cycle of plotly. If the number of lines exceed the number of colors, the colors will be re-used from the start. As of now px.colors.qualitative.Plotly can be replaced with any hex color sequence that you can find using px.colors.qualitative:
Alphabet = ['#AA0DFE', '#3283FE', '#85660D', '#782AB6', '#565656', '#1...
Alphabet_r = ['#FA0087', '#FBE426', '#B00068', '#FC1CBF', '#C075A6', '...
[...]
Complete code:
# imports
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
# sample data in a pandas dataframe
np.random.seed(1)
df=pd.DataFrame(dict(A=np.random.uniform(low=-1, high=2, size=25).tolist(),
B=np.random.uniform(low=-4, high=3, size=25).tolist(),
C=np.random.uniform(low=-1, high=3, size=25).tolist(),
))
df = df.cumsum()
# define colors as a list
colors = px.colors.qualitative.Plotly
# convert plotly hex colors to rgba to enable transparency adjustments
def hex_rgba(hex, transparency):
col_hex = hex.lstrip('#')
col_rgb = list(int(col_hex[i:i+2], 16) for i in (0, 2, 4))
col_rgb.extend([transparency])
areacol = tuple(col_rgb)
return areacol
rgba = [hex_rgba(c, transparency=0.2) for c in colors]
colCycle = ['rgba'+str(elem) for elem in rgba]
# Make sure the colors run in cycles if there are more lines than colors
def next_col(cols):
while True:
for col in cols:
yield col
line_color=next_col(cols=colCycle)
# plotly figure
fig = go.Figure()
# add line and shaded area for each series and standards deviation
for i, col in enumerate(df):
new_col = next(line_color)
x = list(df.index.values+1)
y1 = df[col]
y1_upper = [(y + np.std(df[col])) for y in df[col]]
y1_lower = [(y - np.std(df[col])) for y in df[col]]
y1_lower = y1_lower[::-1]
# standard deviation area
fig.add_traces(go.Scatter(x=x+x[::-1],
y=y1_upper+y1_lower,
fill='tozerox',
fillcolor=new_col,
line=dict(color='rgba(255,255,255,0)'),
showlegend=False,
name=col))
# line trace
fig.add_traces(go.Scatter(x=x,
y=y1,
line=dict(color=new_col, width=2.5),
mode='lines',
name=col)
)
# set x-axis
fig.update_layout(xaxis=dict(range=[1,len(df)]))
fig.show()
I was able to come up with something similar. I post the code here to be used by someone else or for any suggestions for improvements.
import matplotlib
import random
import plotly.graph_objects as go
import numpy as np
#random color generation in plotly
hex_colors_dic = {}
rgb_colors_dic = {}
hex_colors_only = []
for name, hex in matplotlib.colors.cnames.items():
hex_colors_only.append(hex)
hex_colors_dic[name] = hex
rgb_colors_dic[name] = matplotlib.colors.to_rgb(hex)
data = [[1, 3, 5, 4],
[2, 3, 5, 4],
[1, 1, 4, 5],
[2, 3, 5, 4]]
#calculating mean and standard deviation
mean=np.mean(data,axis=0)
std=np.std(data,axis=0)
#draw figure
fig = go.Figure()
c = random.choice(hex_colors_only)
fig.add_trace(go.Scatter(x=np.arange(4), y=mean+std,
mode='lines',
line=dict(color=c,width =0.1),
name='upper bound'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean,
mode='lines',
line=dict(color=c),
fill='tonexty',
name='mean'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean-std,
mode='lines',
line=dict(color=c, width =0.1),
fill='tonexty',
name='lower bound'))
fig.show()
Great custom responses posted by others. In case someone is interested in code from the official plotly website, see here: https://plotly.com/python/continuous-error-bars/
I wrote a function to extend plotly.express.line with the same high level interface of Plotly Express. The line function (source code below) is used in the same exact way as plotly.express.line but allows for continuous error bands with the flag argument error_y_mode which can be either 'band' or 'bar'. In the second case it produces the same result as the original plotly.express.line. Here is an usage example:
import plotly.express as px
df = px.data.gapminder().query('continent=="Americas"')
df = df[df['country'].isin({'Argentina','Brazil','Colombia'})]
df['lifeExp std'] = df['lifeExp']*.1 # Invent some error data...
for error_y_mode in {'band', 'bar'}:
fig = line(
data_frame = df,
x = 'year',
y = 'lifeExp',
error_y = 'lifeExp std',
error_y_mode = error_y_mode, # Here you say `band` or `bar`.
color = 'country',
title = f'Using error {error_y_mode}',
markers = '.',
)
fig.show()
which produces the following two plots:
The source code of the line function that extends plotly.express.line is this:
import plotly.express as px
import plotly.graph_objs as go
def line(error_y_mode=None, **kwargs):
"""Extension of `plotly.express.line` to use error bands."""
ERROR_MODES = {'bar','band','bars','bands',None}
if error_y_mode not in ERROR_MODES:
raise ValueError(f"'error_y_mode' must be one of {ERROR_MODES}, received {repr(error_y_mode)}.")
if error_y_mode in {'bar','bars',None}:
fig = px.line(**kwargs)
elif error_y_mode in {'band','bands'}:
if 'error_y' not in kwargs:
raise ValueError(f"If you provide argument 'error_y_mode' you must also provide 'error_y'.")
figure_with_error_bars = px.line(**kwargs)
fig = px.line(**{arg: val for arg,val in kwargs.items() if arg != 'error_y'})
for data in figure_with_error_bars.data:
x = list(data['x'])
y_upper = list(data['y'] + data['error_y']['array'])
y_lower = list(data['y'] - data['error_y']['array'] if data['error_y']['arrayminus'] is None else data['y'] - data['error_y']['arrayminus'])
color = f"rgba({tuple(int(data['line']['color'].lstrip('#')[i:i+2], 16) for i in (0, 2, 4))},.3)".replace('((','(').replace('),',',').replace(' ','')
fig.add_trace(
go.Scatter(
x = x+x[::-1],
y = y_upper+y_lower[::-1],
fill = 'toself',
fillcolor = color,
line = dict(
color = 'rgba(255,255,255,0)'
),
hoverinfo = "skip",
showlegend = False,
legendgroup = data['legendgroup'],
xaxis = data['xaxis'],
yaxis = data['yaxis'],
)
)
# Reorder data as said here: https://stackoverflow.com/a/66854398/8849755
reordered_data = []
for i in range(int(len(fig.data)/2)):
reordered_data.append(fig.data[i+int(len(fig.data)/2)])
reordered_data.append(fig.data[i])
fig.data = tuple(reordered_data)
return fig

Plotting a Heat Table Based on bokeh

I am trying to make a heat map like this one from bokeh:
Where all the code is here: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I got pretty close, but for some reason it is only printing the values in a diagonal order.
I tried to format my data the same way and just substitute it, but it got a little more complicated than that. Here is my data:
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import ColumnDataSource, figure, show, output_file
from bokeh.models import HoverTool
import pandas.util.testing as tm; tm.N = 3
df = pd.read_csv('MYDATA.csv', usecols=[1, 16])
df = df.set_index('recvd_dttm')
df.index = pd.to_datetime(df.index, format='%m/%d/%Y %H:%M')
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg(len).reset_index()
result.columns = ['Month', 'CompanyName', 'NumberCalls']
pivot_table = result.pivot(index='Month', columns='CompanyName', values='NumberCalls').fillna(0)
s = pivot_table.sum().sort(ascending=False,inplace=False)
pivot_table = pivot_table.ix[:,s.index[:46]]
pivot_table = pivot_table.transpose()
pivot_table.to_csv('pivot_table.csv')
pivot_table = pivot_table.reset_index()
pivot_table['CompanyName'] = [str(x) for x in pivot_table['CompanyName']]
Companies = list(pivot_table['CompanyName'])
months = ["1","2","3","4","5","6","7","8","9","10","11","12"]
pivot_table = pivot_table.set_index('CompanyName')
# this is the colormap from the original plot
colors = [
"#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
"#ddb7b1", "#cc7878", "#933b41", "#550b1d"
]
# Set up the data for plotting. We will need to have values for every
# pair of year/month names. Map the rate to a color.
month = []
company = []
color = []
rate = []
for y in pivot_table.index:
for m in pivot_table.columns:
month.append(m)
company.append(y)
num_calls = pivot_table.loc[y,m]
rate.append(num_calls)
color.append(colors[min(int(num_calls)-2, 8)])
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
output_file('heatmap.html')
TOOLS = "resize,hover,save,pan,box_zoom,wheel_zoom"
p = figure(title="Customer Calls This Year",
x_range=Companies, y_range=list(reversed(months)),
x_axis_location="above", plot_width=1400, plot_height=900,
toolbar_location="left", tools=TOOLS)
p.rect("Companies", "months", 1, 1, source=source,
color="color", line_color=None)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('Company Name', '#Companies'),
('Number of Calls', '#rate'),
])
show(p) # show the plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# just following your previous post to simulate your data
np.random.seed(0)
dates = np.random.choice(pd.date_range('2015-01-01 00:00:00', '2015-06-30 00:00:00', freq='1h'), 10000)
company = np.random.choice(['company' + x for x in '1 2 3 4 5'.split()], 10000)
df = pd.DataFrame(dict(recvd_dttm=dates, CompanyName=company)).set_index('recvd_dttm').sort_index()
df['C'] = 1
df.columns = ['CompanyName', '']
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg({df.columns[1]: sum}).reset_index()
result.columns = ['Month', 'CompanyName', 'counts']
pivot_table = result.pivot(index='CompanyName', columns='Month', values='counts')
x_labels = ['Month'+str(x) for x in pivot_table.columns.values]
y_labels = pivot_table.index.values
fig, ax = plt.subplots()
x = ax.imshow(pivot_table, cmap=plt.cm.winter)
plt.colorbar(mappable=x, ax=ax)
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
ax.set_xlabel('Month')
ax.set_ylabel('Company')
ax.set_title('Customer Calls This Year')
The answer was in this line:
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
It should have been:
source = ColumnDataSource(
data=dict(month=months, company=company, color=color, rate=rate)
)

Categories