Sorting two plotly bar charts by column value

Sorting two plotly bar charts by column value - python

Let's suppose this is my dataset:
my_data = pd.DataFrame.from_dict({
'Countries':['Australia', 'Canada', 'China', 'Netherlands'],
'A':[1.89,1.45,1.22,0.94],
'B':[0.8,1,1.45,1.6]
})
I have this two bar plots:
I want to order the countries by the value of 'A' in both plots, but I tried a lot of things and just can't. I will be very grateful for any help.
This is my code:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
fig = make_subplots(rows=1, cols=2)
fig.add_trace(
go.Bar(
x=my_data['A'],
y=my_data['Countries'],
orientation='h',
text = my_data['A'],
name = 'A'),
row=1, col=1
)
fig.add_trace(
go.Bar(
x=my_data['B'],
y=my_data['Countries'],
orientation='h',
text = my_data['B'],
name = 'B'),
row=1, col=2
)
fig.update_layout(barmode='stack', yaxis={'categoryorder':'total ascending'})
fig.update_layout(height=600, width=1000, title_text="Side By Side Subplots")
fig.show()

There may be a more plotly-specific way to do this, but you can use fig.update_yaxes(categoryorder='array', categoryarray=desired_order) to update all the subplot y axes, where desired_order is a list to use for ordering. So if you want to order by column A, desired_order = my_data.sort_values('A')['Countries'].to_list(). All together:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
my_data = pd.DataFrame.from_dict({
'Countries':['Australia', 'Canada', 'China', 'Netherlands'],
'A':[1.89,1.45,1.22,0.94],
'B':[0.8,1,1.45,1.6]
})
desired_order = my_data.sort_values('A')['Countries'].to_list()
print(desired_order)
fig = make_subplots(rows=1, cols=2)
fig.add_trace(
go.Bar(
x=my_data['A'],
y=my_data['Countries'],
orientation='h',
text = my_data['A'],
name = 'A'),
row=1, col=1
)
fig.add_trace(
go.Bar(
x=my_data['B'],
y=my_data['Countries'],
orientation='h',
text = my_data['B'],
name = 'B'),
row=1, col=2
)
fig.update_yaxes(categoryorder='array', categoryarray=desired_order)
fig.update_layout(barmode='stack')
fig.update_layout(height=600, width=1000, title_text="Side By Side Subplots")
fig.show()
produces
I couldn't get the same behavior using just update_layout, not sure if that's a bug or a usage issue (I don't know plotly super well).
Edit: I figured out how to use update_layout. As noted in this thread: https://community.plotly.com/t/fig-update-layout-only-affecting-first-subplot/29648 , yaxis is only the first subplot, you can specify yaxis2 to reference the second subplot. So if you want to use update_layout (and not update_yaxes), you could do the following:
fig.update_layout(barmode='stack', yaxis={'categoryorder':'array', 'categoryarray':desired_order},
yaxis2={'categoryorder':'array', 'categoryarray':desired_order})

Related

Make subplots using plotly express with values coming from a dataframe

Assuming I have a toy model df which lists the model of the car and customer rating of one car showroom.
CustomerID Model Cust_rating
1 Corolla A
2 Corolla B
3 Forester A
4 GLC C
5 Xterra A
6 GLC A
Using plotly express, I created pie charts of percentage of cars by model and by Cust_rating, respectively as two separate graphs:
import plotly.express as px
px.pie(df,names='Model',title='Proportion Of each Model')
px.pie(df,names='Cust_rating',title='Proportion Of each Rating')
Now, I want to create subplots, and all the ways of doing it using the documentation are throwing up errors:
ValueError: Trace type 'pie' is not compatible with subplot type 'xy'
at grid position (1, 1)
This is what I tried:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Pie(values=df['Model']), row=1, col=1)
fig.add_trace(go.Pie(values=df['Cust_rating']), row=1, col=2)
fig.update_layout(height=700, showlegend=False)
fig.show()

A pie chart in a graph object requires a pair of labels and values. You must also specify the plot type in the subplot. See this for an example of a subplot type.
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=1, cols=2, subplot_titles=("Model", "Rating"), specs=[[{'type': 'domain'},{'type': 'domain'}]])
fig.add_trace(go.Pie(labels=df['Model'].value_counts().index,
values=df['Model'].value_counts(),
legendgroup='model',
legendgrouptitle=dict(text='Model'),
),
row=1, col=1)
fig.add_trace(go.Pie(labels=df['Cust_rating'].value_counts().index,
values=df['Cust_rating'].value_counts(),
legendgroup='rating',
legendgrouptitle=dict(text='Rating')),
row=1, col=2)
fig.update_layout(height=400, width=600, showlegend=True)
fig.show()

Use matplotlib and seaborn:
import matplotlib.pyplot as plt
import seaborn as sns
if you want two and more plot use
fig, ax = plt.subplots(2,2, figsize=(20, 15))
And use ax=ax[0,1], row and col,
sns.boxplot(x = 'bedrooms', y = 'price', data = dataset_df, ax=ax[0,1])
sns.boxplot(x = 'floor, y = 'price', data = dataset_df, ax=ax[0,2])

Setting boundaries for datetime x axis on Plotly (Python)

I have to plot some chronologically-ordered values (one value per month, in my case) on a Plotly (Python) graph. Also, I have to add a "end of period label" (i.e. a marker with text indicating the last value of the series) that has to be positioned at 'middle right'.
A working example would be something like this:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
date_range = pd.to_datetime(pd.date_range(start='1/1/2013', end='9/1/2022', freq='M').tolist()).date
values = np.random.randint(100, size=len(date_range)).tolist()
fig = go.Figure(
)
fig.add_trace(go.Scatter(
showlegend=False,
x=date_range,
y=values,
mode='lines',
line=dict(
width=2,
color="red",
)
)
)
fig.add_trace(go.Scatter(
showlegend=False,
x=[date_range[-1]],
y=[values[-1]],
text=[values[-1]],
textposition='middle right',
texttemplate="%{text:.3f}",
mode='markers+text',
line=dict(
width=2,
color="red",
)
)
)
fig.update_layout(
xaxis=dict(
tickformat="%m\n<b>%Y", dtick="M3",
)
)
which produces the following plot:
I am facing the following problem: the end of period label "extends" beyond the last value of the date range and makes the x axis go into the green area, which are all undesired months (for example, those that extend beyond the last value of the date range and into 2023).
I tried several things to "erase" or delete that undesired part of the x axis, but nothing worked properly: either the end of period label was cut in half or the whole x axis disappeared.
Thank you in advance for any help or suggestion.

as per #r0beginners comments
given text is outside graph area use an annotation for the text
make marker scatter just mode=markers
explicitly state xaxis range range=date_range[[0,-1]]
import pandas as pd
import numpy as np
import plotly.graph_objects as go
date_range = pd.to_datetime(
pd.date_range(start="1/1/2013", end="9/1/2022", freq="M").tolist()
).date
values = np.random.randint(100, size=len(date_range)).tolist()
fig = go.Figure()
fig.add_trace(
go.Scatter(
showlegend=False,
x=date_range,
y=values,
mode="lines",
line=dict(
width=2,
color="red",
),
)
)
fig.add_trace(go.Scatter(
showlegend=False,
x=[date_range[-1]],
y=[values[-1]],
mode='markers',
marker_size=15
)
)
fig.add_annotation(
x = date_range[-1],
y = values[-1],
text = values[-1],
xshift=10,
yshift=0,
showarrow=False
)
fig.update_layout(
xaxis=dict(
tickformat="%m\n<b>%Y",
dtick="M3",
range=date_range[[0,-1]]
)
)

Python Plotly display other information on Hover

Here is the code that I have tried:
# import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
df = pd.read_csv("resultant_data.txt", index_col = 0, sep = ",")
display=df[["Velocity", "WinLoss"]]
pos = lambda col : col[col > 0].sum()
neg = lambda col : col[col < 0].sum()
Related_Display_Info = df.groupby("RacerCount").agg(Counts=("Velocity","count"),
WinLoss=("WinLoss","sum"),
Positives=("WinLoss", pos),
Negatives=("WinLoss", neg),
)
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
go.Scatter(x=display.index, y=display["Velocity"], name="Velocity", mode="markers"),
secondary_y=False
)
fig.add_trace(
go.Scatter(x=Related_Display_Info.index,
y=Related_Display_Info["WinLoss"],
name="Win/Loss",
mode="markers",
marker=dict(
color=(
(Related_Display_Info["WinLoss"] < 0)
).astype('int'),
colorscale=[[0, 'green'], [1, 'red']]
)
),
secondary_y=True,
)
# Add figure title
fig.update_layout(
title_text="Race Analysis"
)
# Set x-axis title
fig.update_xaxes(title_text="<b>Racer Counts</b>")
# Set y-axes titles
fig.update_yaxes(title_text="<b>Velocity</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Win/Loss/b>", secondary_y=True)
fig.update_layout(hovermode="x unified")
fig.show()
The output is:
But I was willing to display the following information when I hover on the point:
RaceCount = From Display dataframe value Number of the race corresponding to the dot I hover on.
Velocity = From Display Dataframe value Velocity at that point
Counts = From Related_Display_Info Column
WinLoss = From Related_Display_Info Column
Positives = From Related_Display_Info Column
Negatives = From Related_Display_Info Column
Please can anyone tell me what to do to get this information on my chart?
I have checked this but was not helpful since I got many errors: Python/Plotly: How to customize hover-template on with what information to show?
Data:
RacerCount,Velocity,WinLoss
111,0.36,1
141,0.31,1
156,0.3,1
141,0.23,1
147,0.23,1
156,0.22,1
165,0.2,1
174,0.18,1
177,0.18,1
183,0.18,1
114,0.32,1
117,0.3,1
120,0.29,1
123,0.29,1
126,0.28,1
129,0.27,1
120,0.32,1
144,0.3,1
147,0.3,1
159,0.27,1
165,0.26,1
168,0.25,1
156,0.29,1
165,0.26,1
168,0.26,1
165,0.28,1
213,0.17,1
243,0.15,1
249,0.14,1
228,0.54,1
177,0.67,1
180,0.66,1
183,0.65,1
192,0.66,1
195,0.62,1
198,0.6,1
180,0.66,1
222,0.56,1
114,0.41,1
81,0.82,1
102,0.56,1
111,0.55,1
90,1.02,1
93,1.0,1
90,1.18,1
90,1.18,1
93,1.1,1
96,1.07,1
99,1.04,1
102,0.99,1
105,0.94,1
108,0.92,1
111,0.9,1
162,0.66,1
159,0.63,1
162,0.65,-1
162,0.66,-1
168,0.64,-1
159,0.68,-1
162,0.67,-1
174,0.62,-1
168,0.65,-1
171,0.64,-1
198,0.55,-1
300,0.47,-1
201,0.56,-1
174,0.63,-1
180,0.61,-1
171,0.64,-1
174,0.62,-1
303,0.47,-1
312,0.48,-1
258,0.51,-1
261,0.51,-1
264,0.5,-1
279,0.47,-1
288,0.48,-1
294,0.47,-1
258,0.52,-1
261,0.51,-1
267,0.5,-1
222,0.53,-1
171,0.64,-1
177,0.63,-1
177,0.63,-1

Essentially, this code ungroups the data frame before plotting to create the hovertemplate you're looking for.
As stated in the comments, the data has to have the same number of rows to be shown in the hovertemplate. At the end of my answer, I added the code all in one chunk.
Since you have hovermode as x unified, you probably only want one of these traces to have hover content.
I slightly modified the creation of Related_Display_Info. Instead of WinLoss, which is already in the parent data frame, I modified it to WinLoss_sum, so there wouldn't be a naming conflict when I ungrouped.
Related_Display_Info = df.groupby("RacerCount").agg(
Counts=("Velocity","count"), WinLoss_sum=("WinLoss","sum"),
Positives=("WinLoss", pos), Negatives=("WinLoss", neg))
Now it's time to ungroup the data you grouped. I created dui (stands for display info ungrouped).
dui = pd.merge(df, Related_Display_Info, how = "outer", on="RacerCount",
suffixes=(False, False))
I created the hovertemplate for both traces. I passed the entire ungrouped data frame to customdata. It looks like the only column that isn't in the template is the original WinLoss.
# create hover template for all traces
ht="<br>".join(["<br>RacerCount: %{customdata[0]}",
"Velocity: %{customdata[1]:.2f}",
"Counts: %{customdata[3]}",
"Winloss: %{customdata[4]}",
"Positives: %{customdata[5]}",
"Negatives: %{customdata[6]}<br>"])
The creation of fig is unchanged. However, the traces are both based on dui. Additionally, the index isn't RacerCount, so I used the literal field instead.
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(go.Scatter(x=dui["RacerCount"], y=dui["Velocity"],
name="Velocity", mode="markers",
customdata=dui, hovertemplate=ht),
secondary_y=False)
fig.add_trace(
go.Scatter(x = dui["RacerCount"], y=dui["WinLoss_sum"], customdata=dui,
name="Win/Loss", mode="markers",
marker=dict(color=((dui["WinLoss_sum"] < 0)).astype('int'),
colorscale=[[0, 'green'], [1, 'red']]),
hovertemplate=ht),
secondary_y=True)
All the code altogether (for easier copy + paste)
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
df = pd.read_clipboard(sep = ',')
display=df[["Velocity", "WinLoss"]]
pos = lambda col : col[col > 0].sum()
neg = lambda col : col[col < 0].sum()
Related_Display_Info = df.groupby("RacerCount").agg(
Counts=("Velocity","count"), WinLoss_sum=("WinLoss","sum"),
Positives=("WinLoss", pos), Negatives=("WinLoss", neg))
# ungroup the data for the hovertemplate
dui = pd.merge(df, Related_Display_Info, how = "outer", on="RacerCount",
suffixes=(False, False))
# create hover template for all traces
ht="<br>".join(["<br>RacerCount: %{customdata[0]}",
"Velocity: %{customdata[1]:.2f}",
"Counts: %{customdata[3]}",
"Winloss: %{customdata[4]}",
"Positives: %{customdata[5]}",
"Negatives: %{customdata[6]}<br>"])
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(go.Scatter(x=dui["RacerCount"], y=dui["Velocity"],
name="Velocity", mode="markers",
customdata=dui, hovertemplate=ht),
secondary_y=False)
fig.add_trace(
go.Scatter(x = dui["RacerCount"], y=dui["WinLoss_sum"], customdata=dui,
name="Win/Loss", mode="markers",
marker=dict(color=((dui["WinLoss_sum"] < 0)).astype('int'),
colorscale=[[0, 'green'], [1, 'red']]),
hovertemplate=ht),
secondary_y=True)
# Add figure title
fig.update_layout(
title_text="Race Analysis"
)
# Set x-axis title
fig.update_xaxes(title_text="<b>Racer Counts</b>")
# Set y-axes titles
fig.update_yaxes(title_text="<b>Velocity</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Win/Loss/b>", secondary_y=True)
fig.update_layout(hovermode="x unified")
fig.show()

How to set properties on a row/column in a grid of plotly plots?

Suppose I'm plotting 2 charts on each row, 10 rows, using plotly:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
N=10
fig = make_subplots(rows=N, cols=2)
fig.add_trace(
go.Scatter(x=x, y=y),
row=1, col=1
)
fig.add_trace(
go.Candlestick(
x=df_kline.index,
open=df_kline['O'],
high=df_kline['H'],
low=df_kline['L'],
close=df_kline['C']
),
row=1, col=2
)
:
fig.show()
How can I set a yaxis_title for each row?
How can I set the y-axis range to be [1,10] for the entire first column, and only show the ticklabels at the bottom of the plot?
I hope this qualifies as a single question rather than two, as it's dealing with group-by-row / group-by-col.
FOOTNOTE:
Following from the comments in the accepted answer, one can set settings on multiple subplots thus:
subplot_settings = {
'rangeslider_visible': True,
'rangeslider_thickness': 0.05
}
kwargs = {
f'xaxis{k}' : subplot_settings
for k in range(2, 2*N, 2)
}
fig.update_layout(**kwargs)
(Untested)

Since no data was presented, I responded to the challenge with four subplots using a certain stock price; the title and range of the y-axis for each row in the first one can be set in the y-axis settings. Also, in the settings section of the subplot, if you set the shared axis to x-axis, only the bottom x-axis will be available.
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
import pandas as pd
x = np.linspace(0,1, 100)
y = np.cumsum(x)
import yfinance as yf
df_kline = yf.download("AAPL", start="2021-01-01", end="2021-03-01")
df_kline.rename(columns={'Open':'O','High':'H','Low':'L','Close':'C'}, inplace=True)
N=2
fig = make_subplots(rows=N, cols=2,
shared_xaxes=True, )# vertical_spacing=0.1
fig.add_trace(
go.Scatter(x=x, y=y),
row=1, col=1
)
fig.add_trace(
go.Candlestick(
x=df_kline.index,
open=df_kline['O'],
high=df_kline['H'],
low=df_kline['L'],
close=df_kline['C'],
),
row=1, col=2,
)
fig.add_trace(
go.Scatter(x=x, y=y),
row=2, col=1
)
fig.add_trace(
go.Candlestick(
x=df_kline.index,
open=df_kline['O'],
high=df_kline['H'],
low=df_kline['L'],
close=df_kline['C'],
),
row=2, col=2
)
fig.update_layout(autosize=False, height=600, width=1000, showlegend=False)
# rangeslider visible false
fig.update_layout(title='Custome subplots',
xaxis2=dict(rangeslider=dict(visible=False)),
xaxis4=dict(rangeslider=dict(visible=False)))
# yxais customize
fig.update_layout(yaxis1=dict(range=[0,10], title='test'),
yaxis3=dict(range=[0,10], title='test2'))
fig.show()

How to get rid of scribbled lines in plotly line plot?

I am trying to plot a subplot using plotly where I have some line plots and all the plots in the subplot needs to share the same x-axis as shown.
fig = make_subplots(
rows=5,
cols=1,
vertical_spacing=0.05,
subplot_titles=['Count / Anzahl', 'min_nValue', 'max_nValue', 'avg_nValue', 'sum_nValue'],
shared_xaxes=True,
)
fig.append_trace(go.Scatter(
x=df_dict_nValueAgg['Erste_15_Minuten']['KurzName'],
y=df_dict_nValueAgg['Erste_15_Minuten']['min_nValueNorm'],
name = "min_nValue_" + "Erste_15_Minuten",
mode='lines+markers',
#legendgroup = 2
), row=2, col=1)
fig.append_trace(go.Scatter(
x=df_dict_nValueAgg['Erste_15_Minuten']['KurzName'],
y=df_dict_nValueAgg['Erste_15_Minuten']['max_nValueNorm'],
name = "max_nValue_" + "Erste_15_Minuten",
mode='lines+markers',
#legendgroup = 2
), row=2, col=1)
.
.
.
# couple of plots more
.
.
fig.update_layout(
legend_orientation="v",
height=1000,
width=2000,
title_text=currentEventTitle+pastEventTitle+nAttributes,
)
fig.update_xaxes(tickangle=45)
fig.write_image('fig1.png')
fig.show()
which gives me this figure
So I filter the data for each
The last three plots produces scribbled lines. Now I understand that since I am filtering the data based on four values of a column i.e. Erste_15_Minuten, Zweite_15_Minuten, Dritte_15_Minuten and Letzte_15_Minuten the number of xticks for the last three plots are unequal or maybe in different order. Is there a way where I can avoid this problem? Switching to Bar Plot would avoid this problem but I need to use only line plot. Thank you in advance.

from looking at your code. There are multiple data frames of same format in a dict
there is no guarantee that these dataframes are in same KurzName order
have simulated data to match above understanding
then have provided a way to re-order data frames to be consistent with third for generating line plots
merge to first dataframe on KurzName
use index of first dataframe to define sort order
looking at image
bar chart - ok, not order dependent
first line chart is scribbled, second is not
hence forcing order of data frames has resolved the issue
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# fmt: off
words = ['adipisci', 'aliquam', 'amet', 'consectetur', 'dolor', 'dolore', 'dolorem', 'eius', 'est', 'etincidunt', 'ipsum', 'labore', 'magnam', 'modi', 'neque', 'non', 'numquam', 'porro', 'quaerat', 'quiquia', 'quisquam', 'sed', 'sit', 'tempora', 'ut', 'velit', 'voluptatem']
# fmt: on
r = np.random.choice(words, [2, 30])
r = np.char.add(r[0], np.char.add("_", r[1]))
# Erste_15_Minuten, Zweite_15_Minuten, Dritte_15_Minuten and Letzte_15_Minuten the number
df_dict_nValueAgg = {}
for k in [
"Erste_15_Minuten",
"Zweite_15_Minuten",
"Dritte_15_Minuten",
"Letzte_15_Minuten",
]:
np.random.shuffle(r)
df_dict_nValueAgg[k] = pd.DataFrame(
{
"KurzName": r,
"Count": np.random.randint(100, 300, len(r)),
"min_nValueNorm": np.random.uniform(0, 0.5, len(r)),
"max_nValueNorm": np.random.uniform(0.5, 1, len(r)),
}
)
fig = make_subplots(
rows=5,
cols=1,
vertical_spacing=0.05,
subplot_titles=[
"Count / Anzahl",
"min_nValue",
"max_nValue",
"avg_nValue",
"sum_nValue",
],
shared_xaxes=True,
)
for k in df_dict_nValueAgg.keys():
fig.add_trace(
go.Bar(
x=df_dict_nValueAgg[k]["KurzName"], y=df_dict_nValueAgg[k]["Count"], name=k
),
row=1,
col=1,
)
# this will be scibbled as each dataframe is in a different order
for k in df_dict_nValueAgg.keys():
fig.add_trace(
go.Scatter(
x=df_dict_nValueAgg[k]["KurzName"],
y=df_dict_nValueAgg[k]["max_nValueNorm"],
name=k + " scribble max",
),
row=4,
col=1,
)
# force order of dataframes to be same as first
for i, k in enumerate(df_dict_nValueAgg.keys()):
df = df_dict_nValueAgg[k]
if i > 0:
df = df.merge(
df_dict_nValueAgg[list(df_dict_nValueAgg.keys())[0]]
.loc[:, "KurzName"]
.reset_index(),
on="KurzName",
).sort_values("index")
fig.add_trace(
go.Scatter(
x=df["KurzName"],
y=df["max_nValueNorm"],
name=k + " max",
),
row=5,
col=1,
)
fig

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Sorting two plotly bar charts by column value - python

Related

Make subplots using plotly express with values coming from a dataframe

Setting boundaries for datetime x axis on Plotly (Python)

Python Plotly display other information on Hover

How to set properties on a row/column in a grid of plotly plots?

How to get rid of scribbled lines in plotly line plot?

Categories

Resources