Connecting data points with lines in a Plotly boxplot in Python

Connecting data points with lines in a Plotly boxplot in Python - python

I am working on some boxplots. I found this code very helpful and I managed to replicate it for my needs:
import plotly.express as px
import numpy as np
import pandas as pd
np.random.seed(1)
y0 = np.random.randn(50) - 1
y1 = np.random.randn(50) + 1
df = pd.DataFrame({'graph_name':['trace 0']*len(y0)+['trace 1']*len(y1),
'value': np.concatenate([y0,y1],0),
'color':np.random.choice([0,1,2,3,4,5,6,7,8,9], size=100, replace=True)}
)
fig = px.strip(df,
x='graph_name',
y='value',
color='color',
stripmode='overlay')
fig.add_trace(go.Box(y=df.query('graph_name == "trace 0"')['value'], name='trace 0'))
fig.add_trace(go.Box(y=df.query('graph_name == "trace 1"')['value'], name='trace 1'))
fig.update_layout(autosize=False,
width=600,
height=600,
legend={'traceorder':'normal'})
fig.show()
I am now trying to put some lines connecting the datapoints with the same colors, but I am lost. Any idea?
Something similar to this:

My first idea was to add lines to your figure by using plotly shapes and specifying the start and end points in x- and y-axis coordinates. However, when you use px.strip, plotly implements jittering (adding randomly generated small values, say between -0.1 and 0.1, to the x-coordinates under the hood to avoid points overlapping), but as far as I know, there is no way to retrieve the exact x-coordinates of each point.
However we can get around this by using go.Scatter to plot all the paired points individually, adding jittering as needed to the x-values and connecting each pair of points with a line. We are basically implementing px.strip ourselves but with full control of the exact coordinates of each point.
In order to toggle colors the same way that px.strip allows you to, we need to assign all points of the same color to the same legendgroup, and also only show the legend entry the first time a color is plotted (as we don't want an legend entry for each point)
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
np.random.seed(1)
y0 = np.random.randn(50) - 1
y1 = np.random.randn(50) + 1
## sort both sets of data so we can easily connect them with line annotations
y0.sort()
y1.sort()
df = pd.DataFrame({'graph_name':['trace 0']*len(y0)+['trace 1']*len(y1),
'value': np.concatenate([y0,y1],0)}
# 'color':np.random.choice([0,1,2,3,4,5,6,7,8,9], size=100, replace=True)}
)
fig = go.Figure()
## i will set jittering to 0.1
x0 = np.array([0]*len(y0)) + np.random.uniform(-0.1,0.1,len(y0))
x1 = np.array([1]*len(y0)) + np.random.uniform(-0.1,0.1,len(y0))
## px.colors.sequential.Plasma contains 10 distinct colors
## colors_list = np.random.choice(px.colors.qualitative.D3, size=50)
## for simplicity, we repeat it 5 times instead of selecting randomly
## this guarantees the colors appear in order in the legend
colors_list = px.colors.qualitative.D3*5
color_number = {i:color for color,i in enumerate(px.colors.qualitative.D3)}
## keep track of whether the color is showing up for the first time as we build out the legend
colors_legend = {color:False for color in colors_list}
for x_start,x_end,y_start,y_end,color in zip(x0,x1,y0,y1,colors_list):
## if the color hasn't been added to the legend yet, add a legend entry
if colors_legend[color] == False:
fig.add_trace(
go.Scatter(
x=[x_start,x_end],
y=[y_start,y_end],
mode='lines+markers',
marker=dict(color=color),
line=dict(color="rgba(100,100,100,0.5)"),
legendgroup=color_number[color],
name=color_number[color],
showlegend=True,
hoverinfo='skip'
)
)
colors_legend[color] = True
## otherwise omit the legend entry, but add it to the same legend group
else:
fig.add_trace(
go.Scatter(
x=[x_start,x_end],
y=[y_start,y_end],
mode='lines+markers',
marker=dict(color=color),
line=dict(color="rgba(100,100,100,0.5)"),
legendgroup=color_number[color],
showlegend=False,
hoverinfo='skip'
)
)
fig.add_trace(go.Box(y=df.query('graph_name == "trace 0"')['value'], name='trace 0'))
fig.add_trace(go.Box(y=df.query('graph_name == "trace 1"')['value'], name='trace 1'))
fig.update_layout(autosize=False,
width=600,
height=600,
legend={'traceorder':'normal'})
fig.show()

Related

Plotly to show 2 decimal points when hovering over the chart, not nearest point

I am using Plotly to build a line chart, and when I hover over the line I would like it to display the x and y axis values up to 2 decimal points, instead of displaying the nearest data point on the line chart. To explain better, please see the example:
df = pd.DataFrame({'col1':[0.5,1.5,2.5], 'time':[2,3.5,4.5]})
def plot():
fig = go.Figure()
fig.add_trace(go.Scatter(x = df['time'],
y = df['col1'],
mode='lines', name = 'time plot',
hovertemplate='%{x:.2f}: %{y:.2f}'))
fig.update_layout(title='Plot', xaxis_tickformat = '.3f')
So, when I hover over the line, I can see x and y axis values to the nearest point from my dataset. I would like to be able to see 2 decimal points, for example, if I hover over the line, I want to see the points 2.11, 2.12 etc from the x-axis, even though they are not available on the data points.

I cannot think of a way to do this using plotly methods but I was able to think of a workaround by creating another line plot and setting the opacity to zero.
import plotly.graph_objects as go
import pandas as pd
import numpy as np
# your data
df = pd.DataFrame({'col1':[0.5,1.5,2.5], 'time':[2,3.5,4.5]})
# get the min and max X axis values
min_val, max_val = df['time'].agg([min, max])
# use np.arange to create the range with a step of .01
x = np.arange(min_val, max_val+.01, .01)
# create a zeros array of the same length
y = np.zeros(len(x))
# create your go.Figure object
fig = go.Figure()
# add your traces
fig.add_trace(go.Scatter(x=df['time'],
y=df['col1'],
name='time plot',
hovertemplate='%{x:.2f}: %{y:.2f}'))
fig.add_trace(go.Scatter(x=x,
y=y,
showlegend=False, # remove line from legend
hoverinfo='x',
opacity=0)) # set opacity to zero so it does not display on the graph
# your layout
fig.update_layout(hovermode='x unified', xaxis_tickformat = '.2f', title='Plot')
fig.show()

Rounding Numbers in a Quartile Figures of a Plotly Box Plot

I have been digging around a while trying to figure out how to round the numbers displayed in quartile figures displayed in the hover feature. There must be a straightforward to do this as it is with the x and y coordinates. In this case rounding to two decimals would be sufficient.
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/tips.csv")
fig = go.Figure(data=go.Box(y=df['total_bill'],
name='total_bill',
boxmean=True,
)
)
fig.update_layout(width=800, height=800,
hoverlabel=dict(bgcolor="white",
font_size=16,
font_family="Arial",
)
)
fig.show()

Unfortunately this is something that it looks like Plotly cannot easily do. If you modify the hovertemplate, it will only apply to markers that you hover over (the outliers), and the decimals after each of the boxplot statistics will remain unchanged upon hovering. Another issue with plotly-python is that you cannot extract the boxplot statistics because this would require you to interact with the javascript under the hood.
However, you can calculate the boxplot statistics on your own using the same method as plotly and round all of the statistics down to two decimal places. Then you can pass boxplot statistics: lowerfence, q1, median, mean, q3, upperfence to force plotly to construct the boxplot manually, and plot all the outliers as another trace of scatters.
This is a pretty ugly hack because you are essentially redoing all of calculations Plotly already does, and then constructing the boxplot manually, but it does force the boxplot statistics to display to two decimal places.
from math import floor, ceil
from numpy import mean
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/tips.csv")
## calculate quartiles as outlined in the plotly documentation
def get_percentile(data, p):
data.sort()
n = len(data)
x = n*p + 0.5
x1, x2 = floor(x), ceil(x)
y1, y2 = data[x1-1], data[x2-1] # account for zero-indexing
return round(y1 + ((x - x1) / (x2 - x1))*(y2 - y1), 2)
## calculate all boxplot statistics
y = df['total_bill'].values
lowerfence = min(y)
q1, median, q3 = get_percentile(y, 0.25), get_percentile(y, 0.50), get_percentile(y, 0.75)
upperfence = max([y0 for y0 in y if y0 < (q3 + 1.5*(q3-q1))])
## construct the boxplot
fig = go.Figure(data=go.Box(
x=["total_bill"]*len(y),
q1=[q1], median=[median], mean=[round(mean(y),2)],
q3=[q3], lowerfence=[lowerfence],
upperfence=[upperfence], orientation='v', showlegend=False,
)
)
outliers = y[y>upperfence]
fig.add_trace(go.Scatter(x=["total_bill"]*len(outliers), y=outliers, showlegend=False, mode='markers', marker={'color':'#1f77b4'}))
fig.update_layout(width=800, height=800,
hoverlabel=dict(bgcolor="white",
font_size=16,
font_family="Arial",
)
)
fig.show()

for me, setting yaxis_tickformat=",.2f" worked:
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/tips.csv")
fig = go.Figure(data=go.Box(y=df['total_bill'],
name='total_bill',
boxmean=True,
)
)
fig.update_layout(width=800, height=800,
# >>>>
yaxis_tickformat=",.2f",
# <<<<
hoverlabel=dict(bgcolor="white",
font_size=16,
font_family="Arial",
)
)
fig.show()
... you can also override yaxis back by setting text of y ticks:
fig.update_layout(
yaxis = dict(
tickformat=",.2f",
tickmode = 'array',
tickvals = [10, 20, 30, 40, 50],
ticktext =["10", "20", "30", "40", "50"],
))
if you want the y axis ticks unchanged
(tested on plotly 5.8.2)

Plotly: How to set a fill color between two vertical lines?

Using matplotlib, we can "trivially" fill the area between two vertical lines using fill_between() as in the example:
https://matplotlib.org/3.2.1/gallery/lines_bars_and_markers/fill_between_demo.html#selectively-marking-horizontal-regions-across-the-whole-axes
Using matplotlib, I can make what I need:
We have two signals, and I''m computing the rolling/moving Pearson's and Spearman's correlation. When the correlations go either below -0.5 or above 0.5, I want to shade the period (blue for Pearson's and orange for Spearman's). I also darken the weekends in gray in all plots.
However, I'm finding a hard time to accomplish the same using Plotly. And it will also be helpful to know how to do it between two horizontal lines.
Note that I'm using Plotly and Dash to speed up the visualization of several plots. Users asked for a more "dynamic type of thing." However, I'm not a GUI guy and cannot spend time on this, although I need to feed them with initial results.
BTW, I tried Bokeh in the past, and I gave up for some reason I cannot remember. Plotly looks good since I can use either from Python or R, which are my main development tools.
Thanks,
Carlos

I don't think there is any built-in Plotly method that that is equivalent to matplotlib's fill_between() method. However you can draw shapes so a possible workaround is to draw a grey rectangle and set the the parameter layer="below" so that the signal is still visible. You can also set the coordinates of the rectangle outside of your axis range to ensure the rectangle extends to the edges of the plot.
You can fill the area in between horizontal lines by drawing a rectangle and setting the axes ranges in a similar manner.
import numpy as np
import plotly.graph_objects as go
x = np.arange(0, 4 * np.pi, 0.01)
y = np.sin(x)
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x,
y=y
))
# hard-code the axes
fig.update_xaxes(range=[0, 4 * np.pi])
fig.update_yaxes(range=[-1.2, 1.2])
# specify the corners of the rectangles
fig.update_layout(
shapes=[
dict(
type="rect",
xref="x",
yref="y",
x0="4",
y0="-1.3",
x1="5",
y1="1.3",
fillcolor="lightgray",
opacity=0.4,
line_width=0,
layer="below"
),
dict(
type="rect",
xref="x",
yref="y",
x0="9",
y0="-1.3",
x1="10",
y1="1.3",
fillcolor="lightgray",
opacity=0.4,
line_width=0,
layer="below"
),
]
)
fig.show()

You haven't provided a data sample so I'm going to use a synthetical time-series to show you how you can add a number of shapes with defined start and stop dates for several different categories using a custom function bgLevel
Two vertical lines with a fill between them very quickly turns into a rectangle. And rectangles can easily be added as shapes using fig.add_shape. The example below will show you how to find start and stop dates for periods given by a certain critera. In your case these criteria are whether or not the value of a variable is higher or lower than a certain level.
Using shapes instead of traces with fig.add_trace() will let you define the position with regards to plot layers using layer='below'. And the shapes outlines can easily be hidden using line=dict(color="rgba(0,0,0,0)).
Plot 1: Time series figure with random data:
Plot 2: Background is set to an opaque grey when A > 100 :
Plot 2: Background is also set to an opaque red when D < 60
Complete code:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import datetime
pd.set_option('display.max_rows', None)
# data sample
nperiods = 200
np.random.seed(123)
df = pd.DataFrame(np.random.randint(-10, 12, size=(nperiods, 4)),
columns=list('ABCD'))
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),periods=nperiods).tolist()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum().reset_index()
# function to set background color for a
# specified variable and a specified level
# plotly setup
fig = px.line(df, x='dates', y=df.columns[1:])
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
def bgLevels(fig, variable, level, mode, fillcolor, layer):
"""
Set a specified color as background for given
levels of a specified variable using a shape.
Keyword arguments:
==================
fig -- plotly figure
variable -- column name in a pandas dataframe
level -- int or float
mode -- set threshold above or below
fillcolor -- any color type that plotly can handle
layer -- position of shape in plotly fiugre, like "below"
"""
if mode == 'above':
m = df[variable].gt(level)
if mode == 'below':
m = df[variable].lt(level)
df1 = df[m].groupby((~m).cumsum())['dates'].agg(['first','last'])
for index, row in df1.iterrows():
#print(row['first'], row['last'])
fig.add_shape(type="rect",
xref="x",
yref="paper",
x0=row['first'],
y0=0,
x1=row['last'],
y1=1,
line=dict(color="rgba(0,0,0,0)",width=3,),
fillcolor=fillcolor,
layer=layer)
return(fig)
fig = bgLevels(fig = fig, variable = 'A', level = 100, mode = 'above',
fillcolor = 'rgba(100,100,100,0.2)', layer = 'below')
fig = bgLevels(fig = fig, variable = 'D', level = -60, mode = 'below',
fillcolor = 'rgba(255,0,0,0.2)', layer = 'below')
fig.show()

I think that fig.add_hrect() and fig.add_vrect() are the simplest approaches to reproducing the MatPlotLib fill_between functionality in this case:
https://plotly.com/python/horizontal-vertical-shapes/
For your example, add_vrect() should do the trick.

Hoverinformation for shapes in plotly

I know there is the hovertemplate/hover_text/ option for traces (marker/line) but I cannot find such a thing for shapes.
Is there a way to have a hover text pop up when moving over a shape? Maybe a workaround?
Example:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(
x=[1.5, 3],
y=[2.5, 2.5],
text=["Rectangle reference to the plot",
"Rectangle reference to the axes"],
mode="markers",
))
fig.add_shape(
# Rectangle reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0.25,
y0=0,
x1=0.5,
y1=0.5,
line=dict(
color="LightSeaGreen",
width=3,
),
fillcolor="PaleTurquoise",
)
When I hover over the two points, I get a hover-template with information. How can I get something similar for the shape?

It seems that it's not possible to add hoverinfo to shapes directly. But you can obtain something very close to what seems to be the desired effect through the right combination of shapes and traces. The following plot is made from specifying two rectangles in a list like:
shapes = [[2,6,2,6],
[4,7,4,7]]
The rest of the code snippet is set up to be flexible with regards to the number of shapes, and the colors assigned to them and the corresponding traces to make that little dot in the lower right corners of the shapes.
Plot:
If this is something you can use, we can discuss ways to edit what is being displayed in the hoverinfo.
Complete code:
# Imports
import pandas as pd
#import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
# shape definitions
shapes = [[2,6,2,6],
[4,7,4,7]]
# color management
# define colors as a list
colors = px.colors.qualitative.Plotly
# convert plotly hex colors to rgba to enable transparency adjustments
def hex_rgba(hex, transparency):
col_hex = hex.lstrip('#')
col_rgb = list(int(col_hex[i:i+2], 16) for i in (0, 2, 4))
col_rgb.extend([transparency])
areacol = tuple(col_rgb)
return areacol
rgba = [hex_rgba(c, transparency=0.4) for c in colors]
colCycle = ['rgba'+str(elem) for elem in rgba]
# plotly setup
fig = go.Figure()
# shapes
for i, s in enumerate(shapes):
fig.add_shape(dict(type="rect",
x0=s[0],
y0=s[2],
x1=s[1],
y1=s[3],
layer='above',
fillcolor=colCycle[i],
line=dict(
color=colors[i],
width=3)))
# traces as dots in the lower right corner for each shape
for i, s in enumerate(shapes):
fig.add_trace(go.Scatter(x=[s[1]], y=[s[2]], name = "Hoverinfo " +str(i + 1),
showlegend=False,
mode='markers', marker=dict(color = colors[i], size=12)))
# edit layout
fig.update_layout(yaxis=dict(range=[0,8], showgrid=True),
xaxis=dict(range=[0,8], showgrid=True))
fig.show()

I thought of a solution I am happy with.
Simply draw a shape. You won't be able to see a hover text. However, if you add a trace with a fill on top of the shape, then set the trace to opacity=0 you will see the hover text from the trace pop up when moving over the shape.
Again, thanks for your responses!
import plotly.graph_objects as go
# Draw shape (you won't be able to add a hover text for it)
fig = go.Figure()
fig.add_shape(
type="rect",
x0=0, y0=0,
x1=4, y1=3,
fillcolor='LightSkyBlue',
line_color='Blue',
name='Shape 1'
)
# Adding a trace with a fill, setting opacity to 0
fig.add_trace(
go.Scatter(
x=[0,0,4,4,0],
y=[0,3,3,0,0],
fill="toself",
mode='lines',
name='',
text='Custom text on top of shape',
opacity=0
)
)
fig.show()

Plotly: Is there a way to only change one of the add_trace elements rather than all?

In python3, I am trying to edit a specific add_trace() plot. For context, I create a Plotly graph with dropdown menus to change/update the plot itself.
I am plotting a 3D scatter which takes x, y, z. But then I added a planar graph to the same figure, go.Surface, which is the planar estimation for the data.
The problem with this is that when I change an attribute, say 'x' for the X-axis, it also changes the 'x' attribute on the go.Surface, which should only be the estimation of the data rather than the data itself.
Is there a way to separate specific add_trace() attributes, so that updating a data parameter won't effect the go.Surface parameter?

Here is an example is used for editing the left and right y axis
in the arg of the menu you need to add a list with a index. [0] is the first trace added [1] the second etc.
ps I'm not a 'code' expert just 'learned' python last week for ChemE thesis so my code works but it probably isn't the best/most efficient
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
# Data
x = np.linspace(-np.pi, np.pi, 200)
y1 = np.sin(x) # f(x) = sin(x*pi)
y2 = np.cos(x) #f(x) = cos(x*pi)
y3 = np.tan(x) #f(x) = tan(x*pi)
d = {'x': x, 'y1': y1,'y2': y2,'y3': y3}
df = pd.DataFrame(data=d)
name = 'Test'
def Graph(df,name):
#Make fig
fig = make_subplots(specs=[[{"secondary_y": True}]])
#Add first trace
fig.add_trace(
go.Scatter(x=df['x'],
y=df['y1'],
),
secondary_y=False,)
#Add second trace
fig.add_trace(
go.Scatter(x=df['x'],
y=df['y2'],
),
secondary_y=True,)
# buttons for menu 1, contolling first trace
buttons1=[]
#Dynamic list based on df keys
for dfk in df.keys():
buttons1.append(dict(method='restyle',
label=dfk,
visible=True,
args=[{'y':[df[dfk].values]}, [0]], #The [0] 'locks' it to the first trace
)
)
# buttons for menu 2, contolling seccond trace
buttons2=[]
#Dynamic list based on df keys
for dfk in df.keys():
buttons2.append(dict(method='restyle',
label=dfk,
visible=True,
args=[{'y':[df[dfk].values]}, [1]], #The [1] 'locks' it to the second trace
)
)
#Delete x-axis from dropdown
del buttons1[0]
del buttons2[0]
#List for menus
updatemenu=[]
#add dict for buttons
your_menu1=dict()
updatemenu.append(your_menu1)
your_menu2=dict()
updatemenu.append(your_menu2)
#Fill dict
updatemenu[0]['buttons']=buttons1
updatemenu[0]['x']=0.15 #Some styling
updatemenu[0]['y']=1.12 #Some styling
updatemenu[1]['buttons']=buttons2
updatemenu[1]['x']=0.33 #Some styling
updatemenu[1]['y']=1.12 #Some styling
# add dropdown menus to the figure
fig.update_layout(showlegend=True, updatemenus=updatemenu)
# add notations to the dropdown menus
fig.update_layout(
annotations=[
dict(text="Left y-axis:", x=0, xref="paper", y=1.10, yref="paper",
align="left", showarrow=False),
dict(text="Right y-axis::", x=0.17, xref="paper", y=1.10,
yref="paper", showarrow=False)])
name = str(name)+'.html'
fig.write_html(name, auto_open=True)
Graph(df,name)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Connecting data points with lines in a Plotly boxplot in Python - python

Related

Plotly to show 2 decimal points when hovering over the chart, not nearest point

Rounding Numbers in a Quartile Figures of a Plotly Box Plot

Plotly: How to set a fill color between two vertical lines?

Hoverinformation for shapes in plotly

Plotly: Is there a way to only change one of the add_trace elements rather than all?

Categories

Resources