For the following sample dataframe df2, I want to create bar charts for every row only when there are positive values, using Bokeh.
import pandas as pd
import numpy as np
# create dataset
df = pd.DataFrame({'Temperature': ['Hot', 'Cold', 'Warm', 'Cold'],
})
# create dummy variables
df2=pd.get_dummies(df)
Please advise.
Edit.
I found the following example which works
from bokeh.plotting import figure, output_file, show
from bokeh.transform import dodge
labs = ['label_1', 'label_2', 'label_3']
vals = ['val_1','val_2','val_3']
my_data = {'labs':labs,
'val_1':[2,5,11],
'val_2':[34,23,1],
'val_3':[25, 34, 23]
}
fig = figure(x_range = labs, plot_width = 300, plot_height = 300)
fig.vbar(x = dodge('labs', -0.25, range = fig.x_range), top = 'val_1',
width = 0.2,source = my_data, color = "green")
fig.vbar(x = dodge('labs', 0.0, range = fig.x_range), top = 'val_2',
width = 0.2, source = my_data,color = "cyan")
fig.vbar(x = dodge('labs', 0.25, range = fig.x_range), top = 'val_3',
width = 0.2,source = my_data,color = "blue")
show(fig)
However my data source is pandas dataframe so I am confused how to achieve the outcome. Thanks in advance.
Suppose I have the following pandas data frame:
import pandas as pd
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morining']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
How can I create a plot like this?
(Excuse the crude plotting)
I've tried tricking scatter, strip and box plots into this, but with no success.
Thank you!
generate a scatter trace per Person
a bit of logic on x so that each person is offset. Hence hovertext and xaxis ticks
import plotly.graph_objects as go
xbase = pd.Series(df["Time"].unique()).reset_index().rename(columns={"index":"x",0:"Time"})
dfp = df.merge(xbase, on="Time").set_index("Person")
go.Figure(
[
go.Scatter(
name=p,
x=dfp.loc[p, "x"] + i/10,
y=dfp.loc[p, "Energy"],
text=dfp.loc[p, "Time"],
mode="markers",
marker={"color": dfp.loc[p, "Color"], "symbol":i, "size":10},
hovertemplate="(%{text},%{y})"
)
for i, p in enumerate(dfp.index.get_level_values("Person").unique())
]
).update_layout(xaxis={"tickmode":"array", "tickvals":xbase["x"], "ticktext":xbase["Time"]})
You've already received some great suggestions, but since you're still wondering about:
What if I also want the colors to show in the legend?
I'd just like to chip in that px.scatter comes really close to being an optimal approach right out of the box. The only thing that's missing is jitter. Still, the plot below can be produced by these few lines of code:
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1], x = [1,2,3]))
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
Complete code:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
# data
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
# figure setup
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
# some customizations in order to get to the desired result:
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1],
x = [1,2,3]))
# jitter
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
# layout
fig.update_layout(xaxis={"tickmode":"array","tickvals":[1,2,3],"ticktext":df.Time.unique()})
fig.show()
Room for improvement:
Some elements of the snippet above could undoubtedly be made more dynamic, like x = [1,2,3] which should take into account a varying number of elements on the x-axis. The same goes for the number of people and the arguments used for jitter. But I can look into that too if this is something you can use.
You can go through each row the DataFrame using itertuples (better performance than iterrows), and map 'Morning', 'Noon', and 'Evening' values to 1,2,3, respectively, and then jitter the x-values by mapping 'Bob' to '-0.05' and 'Alice' to 0.05 and adding these values to each of the x-values. You can also pass the 'Color' information to the marker_color argument.
Then map the tickvalues of 1,2,3 back to 'Morning','Noon' and 'Evening' and also use a legendgroup to get only one Bob and one Alice legend marker to display (to stop the marker for each trace from displaying in the legend)
import pandas as pd
import plotly.graph_objects as go
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morning']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
shapes = {'Bob': 'circle', 'Alice': 'diamond'}
time = {'Morning':1, 'Noon':2, 'Evening':3}
jitter = {'Bob': -0.05, 'Alice': 0.05}
fig = go.Figure()
## position 1 of each row is Person... position 4 is the Energy value
s = df.Person.shift() != df.Person
name_changes = s[s].index.values
for row in df.itertuples():
if row[0] in name_changes:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=True
))
else:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=False
))
fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')))
fig.update_layout(
xaxis=dict(
tickmode='array',
tickvals=list(time.values()),
ticktext=list(time.keys())
)
)
fig.show()
In case you only want to go with matplotlib and don't want any extra dependencies, here is a sample code. (Pandas operations groupbys etc are left for you to optimize)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
from matplotlib.lines import Line2D
df = pd.DataFrame(
{
'Person': ['Bob'] * 9 + ['Alice'] * 9,
'Time': ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3
+ ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3,
'Color': ['Red', 'Blue', 'Green'] * 6,
'Energy': [1, 5, 4, 7, 3, 6, 8, 4, 2, 9, 8, 5, 2, 6, 7, 3, 8, 1],
}
)
plt.figure()
x = ['Morning', 'Noon', 'Evening']
# Transform function
offset = lambda p: transforms.ScaledTranslation(
p / 72.0, 0, plt.gcf().dpi_scale_trans
)
trans = plt.gca().transData
# Use this to center transformation
start_offset = -len(df['Person'].unique()) // 2
# Define as many markers as people you have
markers = ['o', '^']
# Use this for custom legend
custom_legend = []
# Do this if you need to aggregate
df = df.groupby(['Person', 'Time', 'Color'])['Energy'].sum().reset_index()
df = df.set_index('Time')
for i, [person, pgroup] in enumerate(df.groupby('Person')):
pts = (i + start_offset) * 10
marker = markers[i]
transform = trans + offset(pts)
# This is for legend, not plotted
custom_legend.append(
Line2D(
[0],
[0],
color='w',
markerfacecolor='black',
marker=marker,
markersize=10,
label=person,
)
)
for color, cgroup in pgroup.groupby('Color'):
mornings = cgroup.loc[cgroup.index == 'Morning', 'Energy'].values[0]
noons = cgroup.loc[cgroup.index == 'Noon', 'Energy'].values[0]
evenings = cgroup.loc[cgroup.index == 'Evening', 'Energy'].values[0]
# This stupid if is because you need to define at least one non
# transformation scatter be it first or whatever.
if pts == 0:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
)
else:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
transform=transform,
)
plt.ylabel('Energy')
plt.xlabel('Time')
plt.legend(handles=custom_legend)
plt.margins(x=0.5)
plt.show()
I want to add data labels to the tops of bar charts in plotly express. I'm using two different columns from the data frame so I can't use the "colors" method. I want to define "text" for each bar so it shows the data on top of the bar. Here is an MRE.
import pandas as pd
import plotly.express as px
x = ['Aaron', 'Bob', 'Chris']
y1 = [5, 10, 6]
y2 = [8, 16, 12]
fig = px.bar(x=x, y=[y1,y2],barmode='group')
fig.show()
I tried:
fig = px.bar(x=x, y=[y1,y2],text=[y1,y2], barmode='group')
But this doesn't work.
Using your setup, just add the following to the mix:
texts = [y1, y2]
for i, t in enumerate(texts):
fig.data[i].text = t
fig.data[i].textposition = 'outside'
Result:
Complete code:
import pandas as pd
import plotly.express as px
x = ['Aaron', 'Bob', 'Chris']
y1 = [5, 10, 6]
y2 = [8, 16, 12]
fig = px.bar(x=x, y=[y1,y2],barmode='group')
texts = [y1, y2]
for i, t in enumerate(texts):
fig.data[i].text = t
fig.data[i].textposition = 'outside'
fig.show()
i found a answer that's is better.
Let's take as example this dictionary:
data_dictionary = {
"data_frame":{
"x":["Aaron", "Bob", "Chris"],
"y1":[5, 10, 6],
"y2":[8, 16, 12]
},
"x":"x",
"y":["y1", "y2"],
"barmode":"group",
"text":None,
"text_auto":True
}
After that let's create a figure:
fig = px.bar(
**data_dictionary
)
If you tipe fig.show(), you'll se a graph simillary to the vestland's graph.
The only thing you need to do is to set text as None and text_auto as True.
I hope that helps you.
I'd like to be able to toggle the display of the points on and off in the below chart. The 2 lines are the means of the points in groups 1 and 2. I thought there would be a way to do this using interactive but cannot find any examples. Any help is much appreciated.
import math
import numpy as np
import pandas as pd
import altair as alt
x = np.arange(0,math.pi,0.1);
y = np.sin(x);
a, b = -0.2, 0.2
summary_df=[]
for i in range(0,2):
for j in range(0,5):
rand_y = (b - a)*np.random.rand(len(y)) + a
df = pd.DataFrame({
'group': i,
'batch': j,
'x': x,
'y': y+rand_y
})
summary_df.append(df)
summary_df = pd.concat(summary_df)
base = alt.Chart(
summary_df
).properties(
width=200,
height=400
)
mean_selection = alt.selection_multi(fields=['group'], bind='legend')
mean_line = base.mark_line(size=2).encode(
x=alt.X('x:Q'),
y=alt.Y('y:Q', aggregate='mean', axis=alt.Axis(title='y')),
color='group:N',
opacity=alt.condition(mean_selection, alt.value(1), alt.value(0.2))
).add_selection(
mean_selection
).interactive()
all_selection = alt.selection_multi(fields=['group'], bind='legend')
all_points = base.mark_square(size=10).encode(
y=alt.Y('y:Q', axis=alt.Axis(title='y')),
x=alt.X('x:Q',),
color='group:N',
tooltip='batch:N',
opacity=alt.condition(all_selection, alt.value(1), alt.value(0.2))
).add_selection(
all_selection
).interactive()
(mean_line+all_points)