Plotly horizontal bar comparizon - python

The chart attached is from R plotly package. Does this exist or can be done in python using the plotly package?

You can create diverging stacked bars in plotly-python by plotting the bars for male and female populations as separate traces, making the population values negative for the men, and then using the original values in the customdata so the populations for men display positive values.
I followed the method outlined by #empet in his answer here, and modified the categories and hovertemplate to fit your example.
import numpy as np
import pandas as pd
import plotly.graph_objects as go
d = {'Age': ['0-19','20-29','30-39','40-49','50-59','60-Inf'],
'Male': [1000,2000,4200,5000,3500,1000],
'Female': [1000,2500,4000,4800,2000,1000],
}
df = pd.DataFrame(d)
fig = go.Figure()
fig.add_trace(go.Bar(x=-df['Male'].values,
y=df['Age'],
orientation='h',
name='Male',
customdata=df['Male'],
hovertemplate = "Age: %{y}<br>Pop:%{customdata}<br>Gender:Male<extra></extra>"))
fig.add_trace(go.Bar(x= df['Female'],
y =df['Age'],
orientation='h',
name='Female',
hovertemplate="Age: %{y}<br>Pop:%{x}<br>Gender:Female<extra></extra>"))
fig.update_layout(barmode='relative',
height=400,
width=700,
yaxis_autorange='reversed',
bargap=0.01,
legend_orientation ='h',
legend_x=-0.05, legend_y=1.1
)
fig.show()

Related

How to de-dupe legend in faceted choropleth chart?

I'm trying to create faceted maps by the column rank in my df. Each map will display the product for each state. I want the color of the product to be consistent across maps.
With the solution below I can achieve that, but the legend will show multiple entries for the same product, one for each state. How can I have the legend show only one entry per distinct product?
import pandas as pd
import plotly.express as px
from random import randint
df = pd.DataFrame({'rank': [1,1,1,1,2,2,2,2],'product':['A','B','C','D','C','D','Z','X'],'state':['WA','OR','CA','ID','WA','OR','CA','ID']})
unique_hi = df['product'].unique()
color_discrete_map = {unique_hi[k]: '#%06X' % randint(0, 0xFFFFFF) for k in range(len(unique_hi))}
fig = px.choropleth(df, color='product', facet_col="rank",facet_col_wrap=2,
locations="state", #featureidkey="properties.district",
locationmode="USA-states",
projection="mercator",height=600,
color_discrete_map=color_discrete_map,
title='Regional products'
)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
If you check the contents of the created map in fig.data, you will find the original name of the legend, which is collected and only the names of the non-duplicated.
import pandas as pd
import plotly.express as px
from random import randint
df = pd.DataFrame({'rank': [1,1,1,1,2,2,2,2],'product':['A','B','C','D','C','D','Z','X'],'state':['WA','OR','CA','ID','WA','OR','CA','ID']})
unique_hi = df['product'].unique()
color_discrete_map = {unique_hi[k]: '#%06X' % randint(0, 0xFFFFFF) for k in range(len(unique_hi))}
fig = px.choropleth(df, color='product', facet_col="rank",facet_col_wrap=2,
locations="state", #featureidkey="properties.district",
locationmode="USA-states",
projection="mercator",height=600,
color_discrete_map=color_discrete_map,
title='Regional products'
)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
# update
names = set()
fig.for_each_trace(
lambda trace:
trace.update(showlegend=False)
if (trace.name in names) else names.add(trace.name))
fig.show()
The way to add a product name as an annotation is not possible to specify it using map coordinates (I referred to this for the rationale), so adding the following code will make the annotation, but all products will need to be manually addressed. Upon further investigation, it seems that a combination of go.choroplethmapbox() and go.scattergeo() would do it. In this case, you will need to rewrite the code from scratch.
fig.add_annotation(
x=0.2,
xref='paper',
y=0.85,
yref='paper',
text='A',
showarrow=False,
font=dict(
color='yellow',
size=14
)
)

How plot points based on categorical variable in plotly

I am using Plotly for visualization. I want to make plot, and give the points colors based on categorical variable.
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.Predicted, y=df.Predicted,colors='Category',mode='markers',
))
fig.add_trace(go.Scatter(x=df.Predicted, y=df.real , colors='Category'
))
fig.show()
where Category is column in my dataframe. How can I do this kind of graph
you have implied a data frame structure which I have simulated
it's simpler to use Plotly Express higher level API that graph objects
have used to calls to px.scatter() to generate traces defined in your question. Plus have renamed traces in second call to ensure legend is clear and made them lines
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.DataFrame(
{
"Predicted": np.sort(np.random.uniform(3, 15, 100)),
"real": np.sort(np.random.uniform(3, 15, 100)),
"Category": np.random.choice(list("ABCD"), 100),
}
)
px.scatter(df, x="Predicted", y="Predicted", color="Category").add_traces(
px.line(df, x="Predicted", y="real", color="Category")
.for_each_trace(
lambda t: t.update(name="real " + t.name)
) # make it clear in legend this is second set of traces
.data
)

How to format plotly legend when using marker color?

I want to follow up on this post: Plotly: How to colorcode plotly graph objects bar chart using Python?.
When using plotly express, and specifying 'color', the legend is correctly produced as seen in the post by vestland.
This is my plotly express code:
data = {'x_data': np.random.random_sample((5,)),
'y_data': ['A', 'B', 'C', 'D', 'E'],
'c_data': np.random.randint(1, 100, size=5)
}
df = pd.DataFrame(data=data)
fig = px.bar(df,
x='x_data',
y='y_data',
orientation='h',
color='c_data',
color_continuous_scale='YlOrRd'
)
fig.show()
But when using go.Bar, the legend is incorrectly displayed as illustrated here:
This is my code using graph objects:
bar_trace = go.Bar(name='bar_trace',
x=df['x_data'],
y=df['y_data'],
marker={'color': df['c_data'], 'colorscale': 'YlOrRd'},
orientation='h'
)
layout = go.Layout(showlegend=True)
fig = go.FigureWidget(data=[bar_trace], layout=layout)
fig.show()
I'm learning how to use FigureWidget and it seems it can't use plotly express so I have to learn how to use graph objects to plot. How do I link the legend to the data such that it works like the plotly express example in vestland's post.
This really comes down to understanding what a high level API (plotly express) does. When you specify color in px if it is categorical it creates a trace per value of categorical. Hence the below two ways of creating a figure are mostly equivalent. The legend shows an item for each trace, not for each color.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
df = pd.DataFrame({"x":np.linspace(0,10,10), "y":np.linspace(5,15,10), "color":np.random.choice(list("ABCD"),10)})
px.bar(df, x="x", y="y", color="color", orientation="h").show()
fig = go.Figure()
for g in df.groupby("color"):
fig.add_trace(go.Bar(x=g[1]["x"], y=g[1]["y"], name=g[0], orientation="h"))
fig
supplementary based on comments
you do not have to use graph objects if you are using FigureWidget() as demonstrated by second figure, create with plotly express and then generate FigureWidget()
for continuous data normal pattern is to use a single trace and a colorbar (also demonstrated in second figure). However if you want a discrete legend, create a trace per value in c_data and use https://plotly.com/python-api-reference/generated/plotly.colors.html sample_colorscale()
import plotly.express as px
import plotly.colors
import plotly.graph_objects as go
import numpy as np
import pandas as pd
# simulate data frame...
df = pd.DataFrame(
{
"x_data": np.linspace(0, 10, 10),
"y_data": np.linspace(5, 15, 10),
"c_data": np.random.randint(0, 4, 10),
}
)
# build a trace per value in c_data using graph objects ... correct legend !!??
bar_traces = [
go.Bar(
name="bar_trace",
x=d["x_data"],
y=d["y_data"],
marker={
"color": plotly.colors.sample_colorscale(
"YlOrRd",
d["c_data"] / df["c_data"].max(),
)
},
orientation="h",
)
for c, d in df.groupby("c_data")
]
layout = go.Layout(showlegend=True)
fig = go.FigureWidget(data=bar_traces, layout=layout)
fig.show()
fig = px.bar(
df,
x="x_data",
y="y_data",
color="c_data",
orientation="h",
color_continuous_scale="YlOrRd",
)
fig = go.FigureWidget(data=fig.data, layout=fig.layout)
fig.show()

Plotly Express: Plotting individual columns of a dataframe as multiple plots (scrollable) using plotly express

I posed a question at Plotly: How to add a horizontal scrollbar to a plotly express figure? asking how to add a horizontal scrollbar to a plotly express figure for purposes of visualizing a long multivariate time series. A solution for a simple example consisting of three series having 100K points each was given as follows:
import plotly.express as px
import numpy as np
import pandas as pd
np.random.seed(123)
e = np.random.randn(100000,3)
df=pd.DataFrame(e, columns=['a','b','c'])
df['x'] = df.index
df_melt = pd.melt(df, id_vars="x", value_vars=df.columns[:-1])
fig=px.line(df_melt, x="x", y="value",color="variable")
# Add range slider
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),
type="linear"))
fig.show()
This code is nice, but I'd like to have the plots not superimposed on a single set of axes--instead one above the other as would be done with subplot. For example, signal 'a' would appear above signal 'b', which would appear above signal 'c'.
Because my actual time series have at least 50 channels, a vertical scrollbar will likely be necessary.
As far as I know, it may be possible in dash, but it does not exist in plotly. The question you quoted also suggests a range slider as a substitute for the scroll function. At the same time, the range slider is integrated with the graph, so if you don't make the slider function independent, it will disappear on scrolling, which is not a good idea. I think the solution at the moment is to have 50 channels side by side and add a slider.
import plotly.graph_objects as go
import numpy as np
import pandas as pd
np.random.seed(123)
e = np.random.randn(100000,3)
df=pd.DataFrame(e, columns=['a','b','c'])
df['x'] = df.index
df_melt = pd.melt(df, id_vars="x", value_vars=df.columns[:-1])
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_melt.query('variable == "a"')['x'],
y=df_melt.query('variable == "a"')['value'], yaxis='y'))
fig.add_trace(go.Scatter(x=df_melt.query('variable == "b"')['x'],
y=df_melt.query('variable == "b"')['value'], yaxis='y2'))
fig.add_trace(go.Scatter(x=df_melt.query('variable == "c"')['x'],
y=df_melt.query('variable == "c"')['value'], yaxis='y3'))
# Add range slider
fig.update_layout(
xaxis=dict(
rangeslider=dict(visible=True),
type="linear"),
yaxis=dict(
anchor='x',
domain=[0, 0.33],
linecolor='blue',
type='linear',
zeroline=False
),
yaxis2=dict(
anchor='x',
domain=[0.33, 0.66],
linecolor='red',
type='linear',
zeroline=False
),
yaxis3=dict(
anchor='x',
domain=[0.66, 1.0],
linecolor='green',
type='linear',
zeroline=False
),
)
fig.show()

Plotly subplot represent same y-axis name with same color and single legend

I am trying to create a plot for two categories in a subplot. 1st column represent category FF and 2nd column represent category RF in the subplot.
The x-axis is always time and y-axis is remaining columns. In other words, it is a plot with one column vs rest.
1st category and 2nd category always have same column names just only the values differs.
I tried to generate the plot in a for loop but the problem is plotly treats each column name as distinct and thereby it represents the lines in different color for y-axis with same name. As a consequence, in legend also an entry is created.
For example, in first row Time vs price2010 I want both subplot FF and RF to be represented in same color (say blue) and a single entry in legend.
I tried adding legendgroup in go.Scatter but it doesn't help.
import pandas as pd
from pandas import DataFrame
from plotly import tools
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from plotly.subplots import make_subplots
CarA = {'Time': [10,20,30,40 ],
'Price2010': [22000,26000,27000,35000],
'Price2011': [23000,27000,28000,36000],
'Price2012': [24000,28000,29000,37000],
'Price2013': [25000,29000,30000,38000],
'Price2014': [26000,30000,31000,39000],
'Price2015': [27000,31000,32000,40000],
'Price2016': [28000,32000,33000,41000]
}
ff = DataFrame(CarA)
CarB = {'Time': [8,18,28,38 ],
'Price2010': [19000,20000,21000,22000],
'Price2011': [20000,21000,22000,23000],
'Price2012': [21000,22000,23000,24000],
'Price2013': [22000,23000,24000,25000],
'Price2014': [23000,24000,25000,26000],
'Price2015': [24000,25000,26000,27000],
'Price2016': [25000,26000,27000,28000]
}
rf = DataFrame(CarB)
Type = {
'FF' : ff,
'RF' : rf
}
fig = make_subplots(rows=len(ff.columns), cols=len(Type), subplot_titles=('FF','RF'),vertical_spacing=0.3/len(ff.columns))
labels = ff.columns[1:]
for indexC, (cat, values) in enumerate(Type.items()):
for indexP, params in enumerate(values.columns[1:]):
trace = go.Scatter(x=values.iloc[:,0], y=values[params], mode='lines', name=params,legendgroup=params)
fig.append_trace(trace,indexP+1, indexC+1)
fig.update_xaxes(title_text=values.columns[0],row=indexP+1, col=indexC+1)
fig.update_yaxes(title_text=params,row=indexP+1, col=indexC+1)
fig.update_layout(height=2024, width=1024,title_text="Car Analysis")
iplot(fig)
It might not be a good solution, but so far I can able to come up only with this hack.
fig = make_subplots(rows=len(ff.columns), cols=len(Type), subplot_titles=('FF','RF'),vertical_spacing=0.2/len(ff.columns))
labels = ff.columns[1:]
colors = [ '#a60000', '#f29979', '#d98d36', '#735c00', '#778c23', '#185900', '#00a66f']
legend = True
for indexC, (cat, values) in enumerate(Type.items()):
for indexP, params in enumerate(values.columns[1:]):
trace = go.Scatter(x=values.iloc[:,0], y=values[params], mode='lines', name=params,legendgroup=params, showlegend=legend, marker=dict(
color=colors[indexP]))
fig.append_trace(trace,indexP+1, indexC+1)
fig.update_xaxes(title_text=values.columns[0],row=indexP+1, col=indexC+1)
fig.update_yaxes(title_text=params,row=indexP+1, col=indexC+1)
fig.update_layout(height=1068, width=1024,title_text="Car Analysis")
legend = False
If you combine your data into a single tidy data frame, you can use a simple Plotly Express call to make the chart: px.line() with color, facet_row and facet_col

Categories