I'm using plotnine recently and wanted to plot a horizontal histogram (i.e., a histogram with horizontal bars).
The following example illustrates the vertical histogram:
from plotnine import *
import numpy as np
df = pd.DataFrame({'values': np.random.normal(0,10,1000), 'group': ['a']*500 + ['b']*500})
#
(
ggplot(df, aes(x = 'values', y = after_stat('count'), fill = 'group'))
+ geom_histogram(binwidth = 5)
)
And the result:
Simply changing the axes in aes doesn't work:
(
ggplot(df, aes(y = 'values', x = after_stat('count'), fill = 'group'))
+ geom_histogram(binwidth = 5)
)
#PlotnineError: 'stat_bin() must not be used with a y aesthetic.'
How can I achieve the desired result?
Use coord_flip to achieve the desired result:
from plotnine import *
import numpy as np
df = pd.DataFrame({'values': np.random.normal(0,10,1000), 'group': ['a']*500 + ['b']*500})
#
(
ggplot(df, aes(x = 'values', y = after_stat('count'), fill = 'group'))
+ geom_histogram(binwidth = 5)
+ coord_flip()
)
For the following sample dataframe df2, I want to create bar charts for every row only when there are positive values, using Bokeh.
import pandas as pd
import numpy as np
# create dataset
df = pd.DataFrame({'Temperature': ['Hot', 'Cold', 'Warm', 'Cold'],
})
# create dummy variables
df2=pd.get_dummies(df)
Please advise.
Edit.
I found the following example which works
from bokeh.plotting import figure, output_file, show
from bokeh.transform import dodge
labs = ['label_1', 'label_2', 'label_3']
vals = ['val_1','val_2','val_3']
my_data = {'labs':labs,
'val_1':[2,5,11],
'val_2':[34,23,1],
'val_3':[25, 34, 23]
}
fig = figure(x_range = labs, plot_width = 300, plot_height = 300)
fig.vbar(x = dodge('labs', -0.25, range = fig.x_range), top = 'val_1',
width = 0.2,source = my_data, color = "green")
fig.vbar(x = dodge('labs', 0.0, range = fig.x_range), top = 'val_2',
width = 0.2, source = my_data,color = "cyan")
fig.vbar(x = dodge('labs', 0.25, range = fig.x_range), top = 'val_3',
width = 0.2,source = my_data,color = "blue")
show(fig)
However my data source is pandas dataframe so I am confused how to achieve the outcome. Thanks in advance.
I want to change the hover text and hover data for a python plotly boxplot. Instead of 5 separate hover boxes for max, q3, median, q1, and min, I want one condensed hover box for Median, Mean, IQR and date. I have played around with every "hover" variable with no luck. My sample code is found below.
import numpy as np
import plotly.express as px
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False, hover_data=['year','Mean','Median','IQR'])
fig.show()
In this case I tried to use "hover_data", which does not raise an error, but also does not change the plot, as shown above. I have tried both express and graph_objects with no luck. My plotly versions is 4.9.0. Thank you!
have used technique of overlaying a bar trace over boxplot trace
bar trace can be configured to show information you want
for sake of demonstration, I have set opacity to 0.05 it can be set to 0 to make it fully invisible
have built this against plotly 5.2.1, have not tested against 4.9.0
import numpy as np
import plotly.express as px
import pandas as pd
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False)
fig2 = px.bar(
tot_df.groupby("year", as_index=False)
.agg(base=("obs", "min"), bar=("obs", lambda s: s.max() - s.min()))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
fig.add_traces(fig2.data)
fig2 without named aggregations
fig2 = px.bar(
tot_df.groupby("year", as_index=False)["obs"]
.apply(lambda s: pd.Series({"base": s.min(), "bar": s.max() - s.min()}))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
Suppose I have the following pandas data frame:
import pandas as pd
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morining']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
How can I create a plot like this?
(Excuse the crude plotting)
I've tried tricking scatter, strip and box plots into this, but with no success.
Thank you!
generate a scatter trace per Person
a bit of logic on x so that each person is offset. Hence hovertext and xaxis ticks
import plotly.graph_objects as go
xbase = pd.Series(df["Time"].unique()).reset_index().rename(columns={"index":"x",0:"Time"})
dfp = df.merge(xbase, on="Time").set_index("Person")
go.Figure(
[
go.Scatter(
name=p,
x=dfp.loc[p, "x"] + i/10,
y=dfp.loc[p, "Energy"],
text=dfp.loc[p, "Time"],
mode="markers",
marker={"color": dfp.loc[p, "Color"], "symbol":i, "size":10},
hovertemplate="(%{text},%{y})"
)
for i, p in enumerate(dfp.index.get_level_values("Person").unique())
]
).update_layout(xaxis={"tickmode":"array", "tickvals":xbase["x"], "ticktext":xbase["Time"]})
You've already received some great suggestions, but since you're still wondering about:
What if I also want the colors to show in the legend?
I'd just like to chip in that px.scatter comes really close to being an optimal approach right out of the box. The only thing that's missing is jitter. Still, the plot below can be produced by these few lines of code:
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1], x = [1,2,3]))
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
Complete code:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
# data
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
# figure setup
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
# some customizations in order to get to the desired result:
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1],
x = [1,2,3]))
# jitter
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
# layout
fig.update_layout(xaxis={"tickmode":"array","tickvals":[1,2,3],"ticktext":df.Time.unique()})
fig.show()
Room for improvement:
Some elements of the snippet above could undoubtedly be made more dynamic, like x = [1,2,3] which should take into account a varying number of elements on the x-axis. The same goes for the number of people and the arguments used for jitter. But I can look into that too if this is something you can use.
You can go through each row the DataFrame using itertuples (better performance than iterrows), and map 'Morning', 'Noon', and 'Evening' values to 1,2,3, respectively, and then jitter the x-values by mapping 'Bob' to '-0.05' and 'Alice' to 0.05 and adding these values to each of the x-values. You can also pass the 'Color' information to the marker_color argument.
Then map the tickvalues of 1,2,3 back to 'Morning','Noon' and 'Evening' and also use a legendgroup to get only one Bob and one Alice legend marker to display (to stop the marker for each trace from displaying in the legend)
import pandas as pd
import plotly.graph_objects as go
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morning']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
shapes = {'Bob': 'circle', 'Alice': 'diamond'}
time = {'Morning':1, 'Noon':2, 'Evening':3}
jitter = {'Bob': -0.05, 'Alice': 0.05}
fig = go.Figure()
## position 1 of each row is Person... position 4 is the Energy value
s = df.Person.shift() != df.Person
name_changes = s[s].index.values
for row in df.itertuples():
if row[0] in name_changes:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=True
))
else:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=False
))
fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')))
fig.update_layout(
xaxis=dict(
tickmode='array',
tickvals=list(time.values()),
ticktext=list(time.keys())
)
)
fig.show()
In case you only want to go with matplotlib and don't want any extra dependencies, here is a sample code. (Pandas operations groupbys etc are left for you to optimize)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
from matplotlib.lines import Line2D
df = pd.DataFrame(
{
'Person': ['Bob'] * 9 + ['Alice'] * 9,
'Time': ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3
+ ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3,
'Color': ['Red', 'Blue', 'Green'] * 6,
'Energy': [1, 5, 4, 7, 3, 6, 8, 4, 2, 9, 8, 5, 2, 6, 7, 3, 8, 1],
}
)
plt.figure()
x = ['Morning', 'Noon', 'Evening']
# Transform function
offset = lambda p: transforms.ScaledTranslation(
p / 72.0, 0, plt.gcf().dpi_scale_trans
)
trans = plt.gca().transData
# Use this to center transformation
start_offset = -len(df['Person'].unique()) // 2
# Define as many markers as people you have
markers = ['o', '^']
# Use this for custom legend
custom_legend = []
# Do this if you need to aggregate
df = df.groupby(['Person', 'Time', 'Color'])['Energy'].sum().reset_index()
df = df.set_index('Time')
for i, [person, pgroup] in enumerate(df.groupby('Person')):
pts = (i + start_offset) * 10
marker = markers[i]
transform = trans + offset(pts)
# This is for legend, not plotted
custom_legend.append(
Line2D(
[0],
[0],
color='w',
markerfacecolor='black',
marker=marker,
markersize=10,
label=person,
)
)
for color, cgroup in pgroup.groupby('Color'):
mornings = cgroup.loc[cgroup.index == 'Morning', 'Energy'].values[0]
noons = cgroup.loc[cgroup.index == 'Noon', 'Energy'].values[0]
evenings = cgroup.loc[cgroup.index == 'Evening', 'Energy'].values[0]
# This stupid if is because you need to define at least one non
# transformation scatter be it first or whatever.
if pts == 0:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
)
else:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
transform=transform,
)
plt.ylabel('Energy')
plt.xlabel('Time')
plt.legend(handles=custom_legend)
plt.margins(x=0.5)
plt.show()
My goal is to produce this plot:
With this code:
import pandas as pd
import plotly.graph_objects as go
premium_growth_data = '{"Market":{"0":"Company"},"Class":{"0":"Total"},"total_2019_ytd":{"0":712725553},"total_2018_ytd":{"0":626756526},"total_2018":{"0":1211397008},"total_2017":{"0":1071172608},"total_2016":{"0":784732527},"yoy_2019":{"0":0.137164949},"yoy_2018":{"0":0.13090738},"yoy_2017":{"0":0.365016195},"rank_2018":{"0":7},"rank_2019":{"0":5},"share_2016":{"0":0.030046288},"share_2017":{"0":0.035641317},"share_2018":{"0":0.037034574},"share_2019":{"0":0.041049906},"gc_yoy_2019":{"0":0.050436263},"gc_yoy_2018":{"0":0.088362134},"gc_yoy_2017":{"0":0.150733851}}'
premium_growth_df = pd.read_json(premium_growth_data)
current_rank = premium_growth_df.rank_2018[0]
x2 = premium_and_rank_df["premium"]
y2 = premium_and_rank_df["rank"]
y2_text = ["You" if elm == current_rank else elm for elm in y2]
text = "$" + round(((x2.astype(float)) / 1000000)).astype("str")
colors = ['steelblue',] * len(y2)
colors[y2_text.index("You")] = 'crimson'
trace3 = go.Bar(x=x2, y=y2, marker_color=colors, text=text, textposition="outside", width=0.2, orientation="h", showlegend=False)
fig = go.Figure([trace3])
fig.show()
I achieved this:
I`ve been googling quite extensively but I did not find a solution for
"framing" the bar-plot with a white box
making the corners of the bars "round"