I want to change the hover text and hover data for a python plotly boxplot. Instead of 5 separate hover boxes for max, q3, median, q1, and min, I want one condensed hover box for Median, Mean, IQR and date. I have played around with every "hover" variable with no luck. My sample code is found below.
import numpy as np
import plotly.express as px
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False, hover_data=['year','Mean','Median','IQR'])
fig.show()
In this case I tried to use "hover_data", which does not raise an error, but also does not change the plot, as shown above. I have tried both express and graph_objects with no luck. My plotly versions is 4.9.0. Thank you!
have used technique of overlaying a bar trace over boxplot trace
bar trace can be configured to show information you want
for sake of demonstration, I have set opacity to 0.05 it can be set to 0 to make it fully invisible
have built this against plotly 5.2.1, have not tested against 4.9.0
import numpy as np
import plotly.express as px
import pandas as pd
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False)
fig2 = px.bar(
tot_df.groupby("year", as_index=False)
.agg(base=("obs", "min"), bar=("obs", lambda s: s.max() - s.min()))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
fig.add_traces(fig2.data)
fig2 without named aggregations
fig2 = px.bar(
tot_df.groupby("year", as_index=False)["obs"]
.apply(lambda s: pd.Series({"base": s.min(), "bar": s.max() - s.min()}))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
Related
Given a dataframe with 2 groups: (group1, group2), that have values > and < than 0:
plot:
Bar plot
x = x
y = values, divided by group1, group2
color = red if value<0, green if value>0
legend shows group1, grou2 with different colors.
My current code however is not coloring as i would expect, and the legend is shown with the same color:
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.DataFrame( {
"x" : [1,2,3],
"group1" : [np.nan, 1, -0.5],
"group2" : [np.nan, -0.2, 1],
}).set_index("x")
df_ = df.reset_index().melt(id_vars = 'x')
fig = px.bar(df_, x='x', y='value', color='variable', barmode='group')
fig.update_traces(marker_color=['red' if val < 0 else 'green' for val in df_['value']], marker_line_color='black', marker_line_width=1.5)
fig.show()
OUT with indications of what i want to achieve:
Since this cannot be achieved with express, we use a graph object to draw a bar chart for each group. The logic for color determination by numerical values is changed for Group 1 and Group 2, changing the conditions for Group 1 and Group 2. The reason is that the legend will not be color-coded unless this is done.
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
df = pd.DataFrame( {
"x" : [1,2,3],
"group1" : [np.nan, 1, -0.5],
"group2" : [np.nan, -0.2, 1],
}).set_index("x")
df_ = df.reset_index().melt(id_vars = 'x')
fig = go.Figure()
fig.add_trace(go.Bar(x=df_.query('variable =="group1"')['x'],
y=df_.query('variable =="group1"')['value'],
marker_color=['red' if val < 0 else 'green' for val in df_.query('variable =="group1"')['value']],
marker_line_color='black',
marker_line_width=1.5,
name='group1',
#legendgroup='group1'
)
)
fig.add_trace(go.Bar(x=df_.query('variable =="group2"')['x'],
y=df_.query('variable =="group2"')['value'],
marker_color=['green' if val > 0 else 'red' for val in df_.query('variable =="group2"')['value']],
marker_line_color='black',
marker_line_width=1.5,
name='group2',
#legendgroup='group2'
)
)
fig.update_layout(barmode='group', xaxis=dict(title_text='x'), yaxis=dict(title_text='value'))
fig.show()
If you want to color bar according their x value (and not variable group), you have to update traces individually (or you have to draw graph objects manually):
fig = px.bar(df_, x='x', y='value', color='variable', barmode='group')
fig.for_each_trace(
lambda trace: trace.update(marker_color=np.where(df_.loc[df_['variable'].eq(trace.name), 'value'] < 0, 'red', 'green'))
)
fig.update_layout(showlegend=False) # Hide legend because there is no distinct group
fig.show()
Output:
To stick with plotly.express, I would add a column to your dataframe, e.g. df_['positive'] with a boolean, and then color your plot by this variable.
It would look like this:
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.DataFrame( {
"x" : [1,2,3],
"group1" : [np.nan, 1, -0.5],
"group2" : [np.nan, -0.2, 1],
}).set_index("x")
df_ = df.reset_index().melt(id_vars = 'x')
df_['positive'] = (df_['value']>=0)
fig = px.bar(df_, x='x', y='value',barmode = 'group',
color='positive',
color_discrete_map={
True: 'green',
False: 'red'
}
)
fig.update_traces(marker_line_color='black', marker_line_width=1.5)
fig.show('browser')
which yields the following :
EDIT following comments
If you want to keep the colors AND the group distinction within plotly.express, one way could be to add patterns...
Solution 1 : Every combination has its legend entry
df = pd.DataFrame( {
"x" : [1,2,3],
"group1" : [np.nan, 1, -0.5],
"group2" : [np.nan, -0.2, 1],
}).set_index("x")
df_ = df.reset_index().melt(id_vars = 'x')
positive = (df_['value']>=0)
df_['positive'] = positive
df_['sign'] = ['positive' if x else 'negative' for x in df_['positive']]
# Each compbination of color and patterns
fig = px.bar(df_, x='x', y='value',barmode = 'group',
color='sign',
color_discrete_map={
'positive': 'green',
'negative': 'red'
},
pattern_shape="variable")
fig.update_layout(legend_title="Groups & Signs", bargap=0.5,bargroupgap=0.1)
fig.show('browser')
which outputs the following
Solution 2 : Legend only reflects patterns
# Only patterns in legend
fig = px.bar(df_, x='x', y='value', color='variable',
barmode='group',pattern_shape="variable")
fig.update_layout(legend_title="Groups", bargap=0.5,bargroupgap=0.1)
fig.for_each_trace(
lambda trace: trace.update(marker_color=np.where(df_.loc[df_['variable'].eq(trace.name), 'value'] < 0, 'red', 'green'))
)
fig.show('browser')
which outputs :
However I was not able to 'remove' the green color from the legend...
I am trying to make a categorical scatter plot like this:
This is supposed to be a categorical scatter plot with discrete x values represented by counts of elements in locations(Wells) vs Time Point.
I have written this code:
df = pd.read_excel (r'file.xlsx')
fig = go.Figure()
fig.add_trace(
go.Scatter(
x = [df['Well A'], df['Well B'],df['Well C'],df['Well D']],
y = df['Time Point'],
mode='markers'
)
)
And I get this as a result:
Which is crazy, I have no idea what is even happening there.
Could you please help?..
P.S. My df looks like this:
Please, help :(
If this is your desired result:
for c in df.columns[1:]:
fig.add_trace(go.Scatter(x = df['Time Point'], y = df[c],
mode = 'markers',
name = c
)
)
Complete code:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
obs = 8
df = pd.DataFrame({'Time Point': [1,2,3,4,5,6,7,8],
'Well A':np.random.randint(10, size=8),
'Well B':np.random.randint(10, size=8),
'Well C':np.random.randint(10, size=8),
'Well D':np.random.randint(10, size=8)})
fig = go.Figure()
for c in df.columns[1:]:
fig.add_trace(go.Scatter(x = df['Time Point'], y = df[c],
mode = 'markers',
name = c
)
)
fig.show()
I need help in calculating levels based on OHLC data in python, Please find the sample code below. The output expected is of all the price levels with datetime, which would help me plot ranges on the charts. More detailing are commented as part of the code. Any help here would be really helpfull.
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
df = pd.read_csv("https://raw.githubusercontent.com/IRPK16/SampleShare/5372e0c9fe07f6e31ac2729c86209684f6af69d1/CADCHF.csv")
df['Date'] = pd.to_datetime(df['Date'])
fig = go.Figure(data=go.Candlestick(x=df['Date'],
open=df['Open'],
high=df['High'],
low=df['Low'],
close=df['Close']))
fig.update(layout_xaxis_rangeslider_visible=False)
fig.update_layout(
title='CADCHF',
yaxis_title='Price',
xaxis_title='Date')
# Scenario 1: Identify normal range, currently hardcoded for last 20 bars. Need help in getting array[range1, range2,...] programatically
range_line = df.iloc[-22:].copy()
range_line['min_line'] = range_line['Close'].min()
range_line['mid_line'] = range_line['Close'].mean()
range_line['max_line'] = range_line['Close'].max()
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['min_line'], line={'color':'black', 'width':1}
))
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['max_line'], line={'color':'black', 'width':1}
))
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['mid_line'], line={'color':'black', 'width':1}
))
print(df)
# Scenario 2: Identify squeeze range, currently hardcoded for last 53 to 62 bars. Need help in getting such array[range1, range2,...] programatically
range_line2 = df.iloc[53:62].copy()
range_line2['min_line'] = range_line2['Close'].min()
range_line2['mid_line'] = range_line2['Close'].mean()
range_line2['max_line'] = range_line2['Close'].max()
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line2['Date'], y=range_line2['mid_line'], line={'color':'black', 'width':1}
))
fig.update_traces(showlegend=False)
fig.show()
expected output is as below:
def detect_level_method_2(df):
levels = []
max_list = []
min_list = []
for i in range(5, len(df)-5):
high_range = df['High'][i-5:i+4]
current_max = high_range.max()
if current_max not in max_list:
max_list = []
max_list.append(current_max)
if len(max_list) == 5 and isFarFromLevel(current_max, levels, df):
levels.append((high_range.idxmax(), current_max))
low_range = df['Low'][i-5:i+5]
current_min = low_range.min()
if current_min not in min_list:
min_list = []
min_list.append(current_min)
if len(min_list) == 5 and isFarFromLevel(current_min, levels, df):
levels.append((low_range.idxmin(), current_min))
return levels
Update:
I am able to identify based on previous value, but unable to group the values based on close nearest range. Please help me in solving this problem.
df = pd.read_csv("https://raw.githubusercontent.com/IRPK16/SampleShare/5372e0c9fe07f6e31ac2729c86209684f6af69d1/CADCHF.csv")
df['Date'] = pd.to_datetime(df['Date'])
dfOrderedByDateDesc = df.sort_values(['Date'], ascending=[True])
bar = dfOrderedByDateDesc.iloc[-1:].copy() # Returns last bar
from statistics import median
df.set_index('Date', inplace=True)
df['Prev_High'] = df['High'].shift()
df['Prev_Low'] = df['Low'].shift()
df['isInRange'] = ((df['Close'] < df['Prev_High']) & (df['Close'] > df['Prev_Low']))
print(df.tail(3))
I have a relatively simple issue, but cannot find any answer online that addresses it. Starting from a simple boxplot:
import plotly.express as px
df = px.data.iris()
fig = px.box(
df, x='species', y='sepal_length'
)
val_counts = df['species'].value_counts()
I would now like to add val_counts (in this dataset, 50 for each species) to the plots, preferably on either of the following places:
On top of the median line
On top of the max/min line
Inside the hoverbox
How can I achieve this?
The snippet below will set count = 50 for all unique values of df['species'] on top of the max line using fig.add_annotation like this:
for s in df.species.unique():
fig.add_annotation(x=s,
y = df[df['species']==s]['sepal_length'].max(),
text = str(len(df[df['species']==s]['species'])),
yshift = 10,
showarrow = False
)
Plot:
Complete code:
import plotly.express as px
df = px.data.iris()
fig = px.box(
df, x='species', y='sepal_length'
)
for s in df.species.unique():
fig.add_annotation(x=s,
y = df[df['species']==s]['sepal_length'].max(),
text = str(len(df[df['species']==s]['species'])),
yshift = 10,
showarrow = False
)
f = fig.full_figure_for_development(warn=False)
fig.show()
Using same approach that I presented in this answer: Change Plotly Boxplot Hover Data
calculate all the measures a box plot calculates plus the additional measure you want count
overlay bar traces over box plot traces so hover has all measures required
import plotly.express as px
df = px.data.iris()
# summarize data as per same dimensions as boxplot
df2 = df.groupby("species").agg(
**{
m
if isinstance(m, str)
else m[0]: ("sepal_length", m if isinstance(m, str) else m[1])
for m in [
"max",
("q75", lambda s: s.quantile(0.75)),
"median",
("q25", lambda s: s.quantile(0.25)),
"min",
"count",
]
}
).reset_index().assign(y=lambda d: d["max"] - d["min"])
# overlay bar over boxplot
px.bar(
df2,
x="species",
y="y",
base="min",
hover_data={c:not c in ["y","species"] for c in df2.columns},
hover_name="species",
).update_traces(opacity=0.1).add_traces(px.box(df, x="species", y="sepal_length").data)
This code produces the figure I've attached. Notice the sums are the totals over the df, but I need the columns to only show the totals for that particular month. What do you have to set in the
text = ...
assignment for this to occur?
df = data[['Month', 'A', 'B']]
for X in df['A'].unique():
trace = go.Bar(
x = df[df['A']==X]['Month'],
y = df[df['A']==X]['B'],
text = str(df[df['A']==X]['B'].sum())
)
traces.append(trace)
df = data.groupby(['Month','TA']).sum().reset_index()
for TA in df['TA'].unique():
trace = go.Bar(
x = df[df['TA']==TA]['Month'],
y = df[df['TA']==TA]['Studies'],
text = df[df['TA']==TA]['Studies'],
name = TA
)
traces.append(trace)
As long as all values are already showing in your figure, the following will work regardless of how you've built your figure or grouped your data:
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
Result:
Example of original figure:
Complete code:
# imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# data
df = px.data.stocks()
df = df[df.columns[:3]]
df = df.tail(25)
df['date'] = pd.to_datetime(df['date'])
# group py month
dfm = df.groupby(pd.Grouper(key = 'date', freq='M')).agg('sum').reset_index()
# figure setup
fig = go.Figure()
for col in dfm.columns[1:]:
fig.add_trace(go.Bar(x=dfm.date, y = dfm[col], text = [str(v)[:3] for v in dfm[col]], textposition = 'auto'))
fig.update_layout(barmode = 'stack')
# grap and sum data for all bars
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()