I want to add data labels to the tops of bar charts in plotly express. I'm using two different columns from the data frame so I can't use the "colors" method. I want to define "text" for each bar so it shows the data on top of the bar. Here is an MRE.
import pandas as pd
import plotly.express as px
x = ['Aaron', 'Bob', 'Chris']
y1 = [5, 10, 6]
y2 = [8, 16, 12]
fig = px.bar(x=x, y=[y1,y2],barmode='group')
fig.show()
I tried:
fig = px.bar(x=x, y=[y1,y2],text=[y1,y2], barmode='group')
But this doesn't work.
Using your setup, just add the following to the mix:
texts = [y1, y2]
for i, t in enumerate(texts):
fig.data[i].text = t
fig.data[i].textposition = 'outside'
Result:
Complete code:
import pandas as pd
import plotly.express as px
x = ['Aaron', 'Bob', 'Chris']
y1 = [5, 10, 6]
y2 = [8, 16, 12]
fig = px.bar(x=x, y=[y1,y2],barmode='group')
texts = [y1, y2]
for i, t in enumerate(texts):
fig.data[i].text = t
fig.data[i].textposition = 'outside'
fig.show()
i found a answer that's is better.
Let's take as example this dictionary:
data_dictionary = {
"data_frame":{
"x":["Aaron", "Bob", "Chris"],
"y1":[5, 10, 6],
"y2":[8, 16, 12]
},
"x":"x",
"y":["y1", "y2"],
"barmode":"group",
"text":None,
"text_auto":True
}
After that let's create a figure:
fig = px.bar(
**data_dictionary
)
If you tipe fig.show(), you'll se a graph simillary to the vestland's graph.
The only thing you need to do is to set text as None and text_auto as True.
I hope that helps you.
Related
I have a straightforward for loop that loops through datasets in a set and plots the resultant scatterplot for each dataset using the code below;
for i in dataframes:
x = i['cycleNumber']
y = i['QCharge_mA_h']
plt.figure()
sns.scatterplot(x=x, y=y).set(title=i.name)
This plots the graphs out as expected, one on top of the other. Is there a simple way to get them all to plot onto a grid for better readability?
As an example lets say we have the following datasets and code:
data1 = {'X':[12, 10, 20, 17], 'Y':[9, 8, 5, 3]}
data2 = {'X':[2, 13, 7, 21], 'Y':[17, 18, 4, 6]}
data3 = {'X':[9, 19, 20, 3], 'Y':[6, 12, 4, 1]}
data4 = {'X':[10, 13, 15, 1], 'Y':[6, 12, 5,16]}
data5 = {'X':[12, 10, 5, 3], 'Y':[18, 7, 21, 7]}
data6 = {'X':[5, 10, 8, 17], 'Y':[9, 12, 5, 18]}
df1=pd.DataFrame(data1)
df2=pd.DataFrame(data2)
df3=pd.DataFrame(data3)
df4=pd.DataFrame(data4)
df5=pd.DataFrame(data5)
df6=pd.DataFrame(data6)
lst = [df1, df2, df3, df4, df5, df6]
for i in lst:
plt.figure()
sns.scatterplot(x=i['X'], y=i['Y'])
This returns an output of each scatterplot called printing on top of another i.e. stacked. I cant upload a shot of what that output looks like as it runs across multiple pages (this tidy output that I can capture and display is exactly what it is I'm trying to achieve).
I want it to be in a grid, lets say a 2x3 grid given it has 6 plots. How do I achieve this?
Few ways you could do this.
The Original
import matplotlib # 3.6.0
from matplotlib import pyplot as plt
import numpy as np # 1.23.3
import pandas as pd # 1.5.1
import seaborn as sns # 0.12.1
# make fake data
df = pd.DataFrame({
"cycleNumber": np.random.random(size=(100,)),
"QCharge_mA_h": np.random.random(size=(100,)),
})
# single plot
fig, ax = plt.subplots()
sns.scatterplot(df, x="cycleNumber", y="QCharge_mA_h", ax=ax)
plt.show()
With matplotlib
# make 5 random data frames
dataframes = []
for i in range(5):
np.random.seed(i)
random_df = pd.DataFrame({
"cycleNumber": np.random.random(size=(100,)),
"QCharge_mA_h": np.random.random(size=(100,)),
})
dataframes.append(random_df)
# make len(dataframes) rows using matplotlib
fig, axs = plt.subplots(nrows=len(dataframes))
for df, ax in zip(dataframes, axs):
sns.scatterplot(df, x="cycleNumber", y="QCharge_mA_h", ax=ax)
plt.show()
With seaborn
# make 5 random data frames
dataframes = []
for i in range(5):
np.random.seed(i)
random_df = pd.DataFrame({
"cycleNumber": np.random.random(size=(100,)),
"QCharge_mA_h": np.random.random(size=(100,)),
})
dataframes.append(random_df)
# make len(dataframes) rows using matplotlib
# concat dataframes
dfs = pd.concat(dataframes, keys=range(len(dataframes)), names=["keys"])
# move keys to columns
dfs = dfs.reset_index(level="keys")
# make grid and map scatterplot to each row
grid = sns.FacetGrid(data=dfs, row="keys")
grid.map(sns.scatterplot, "cycleNumber", "QCharge_mA_h")
plt.show()
With col_wrap=3
# make 5 random data frames
dataframes = []
for i in range(5):
np.random.seed(i)
random_df = pd.DataFrame({
"cycleNumber": np.random.random(size=(100,)),
"QCharge_mA_h": np.random.random(size=(100,)),
})
dataframes.append(random_df)
# make len(dataframes) rows using matplotlib
# concat dataframes
dfs = pd.concat(dataframes, keys=range(len(dataframes)), names=["keys"])
# move keys to columns
dfs = dfs.reset_index(level="keys")
# make grid and map scatterplot to each column, wrapping after 3
grid = sns.FacetGrid(data=dfs, col="keys", col_wrap=3)
grid.map(sns.scatterplot, "cycleNumber", "QCharge_mA_h")
plt.show()
As per the Plotly website, in a simple line chart one can change the legend entry from the column name to a manually specified string of text. For example, this code results in the following chart:
import pandas as pd
import plotly.express as px
df = pd.DataFrame(dict(
x = [1, 2, 3, 4],
y = [2, 3, 4, 3]
))
fig = px.line(
df,
x="x",
y="y",
width=800, height=600,
labels={
"y": "Series"
},
)
fig.show()
label changed:
However, when one plots multiple columns to the line chart, this label specification no longer works. There is no error message, but the legend entries are simply not changed. See this example and output:
import pandas as pd
import plotly.express as px
df = pd.DataFrame(dict(
x = [1, 2, 3, 4],
y1 = [2, 3, 4, 3],
y2 = [2, 4, 6, 8]
))
fig = px.line(
df,
x="x",
y=["y1", "y2"],
width=800, height=600,
labels={
"y1": "Series 1",
"y2": "Series 2"
},
)
fig.show()
legend entries not changed:
Is this a bug, or am I missing something? Any idea how this can be fixed?
In case anybody read my previous post, I did some more digging and found the solution to this issue. At the heart, the labels one sees over on the right in the legend are attributes known as "names" and not "labels". Searching for how to revise those names, I came across another post about this issue with a solution Legend Label Update. Using that information, here is a revised version of your program.
import pandas as pd
import plotly.express as px
df = pd.DataFrame(dict(
x = [1, 2, 3, 4],
y1 = [2, 3, 4, 3],
y2 = [2, 4, 6, 8]
))
fig = px.line(df, x="x", y=["y1", "y2"], width=800, height=600)
fig.update_layout(legend_title_text='Variable', xaxis_title="X", yaxis_title="Series")
newnames = {'y1':'Series 1', 'y2': 'Series 2'} # From the other post
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig.show()
Following is a sample graph.
Try that out to see if that addresses your situation.
Regards.
I have two dictionaries:
days = {'a':[1,2,3], 'b':[3,4,5]}
vals = {'a':[10,20,30], 'b':[9,16,25]}
Using plotly (ideally plotly express) I would like one line plot with two lines: the first line being days['a'] vs vals['a'] and the second line being days['b'] vs vals['b']. Of course in practice I may have many more potential lines. I am not sure how to pull this off. I'm happy to make a dataframe out of this data but not sure what the best structure is.
Thanks! Apologies for a noob question.
You can try the following:
import plotly.graph_objects as go
# your data
days = {'a':[1,2,3], 'b':[3,4,5]}
vals = {'a':[10,20,30], 'b':[9,16,25]}
# generate a plot for each dictionary key
data = []
for k in days.keys():
plot = go.Scatter(x=days[k],
y=vals[k],
mode="lines",
name=k # label for the plot legend
)
data.append(plot)
# create a figure with all plots and display it
fig = go.Figure(data=data)
fig.show()
This gives:
With Plotly Express:
import plotly.express as px
import pandas as pd
days = {'a': [1, 2, 3], 'b': [3, 4, 5]}
vals = {'a': [10, 20, 30], 'b': [9, 16, 25]}
# build DataFrame
df = pd.DataFrame(columns=["days", "vals", "label"])
for k in days.keys():
df = df.append(pd.DataFrame({
"days": days[k],
"vals": vals[k],
"label": k
}))
fig = px.line(df, x="days", y="vals", color="label")
fig.show()
The result is the same as above.
How can I use Plotly to produce a line plot with a shaded standard deviation? I am trying to achieve something similar to seaborn.tsplot. Any help is appreciated.
The following approach is fully flexible with regards to the number of columns in a pandas dataframe and uses the default color cycle of plotly. If the number of lines exceed the number of colors, the colors will be re-used from the start. As of now px.colors.qualitative.Plotly can be replaced with any hex color sequence that you can find using px.colors.qualitative:
Alphabet = ['#AA0DFE', '#3283FE', '#85660D', '#782AB6', '#565656', '#1...
Alphabet_r = ['#FA0087', '#FBE426', '#B00068', '#FC1CBF', '#C075A6', '...
[...]
Complete code:
# imports
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
# sample data in a pandas dataframe
np.random.seed(1)
df=pd.DataFrame(dict(A=np.random.uniform(low=-1, high=2, size=25).tolist(),
B=np.random.uniform(low=-4, high=3, size=25).tolist(),
C=np.random.uniform(low=-1, high=3, size=25).tolist(),
))
df = df.cumsum()
# define colors as a list
colors = px.colors.qualitative.Plotly
# convert plotly hex colors to rgba to enable transparency adjustments
def hex_rgba(hex, transparency):
col_hex = hex.lstrip('#')
col_rgb = list(int(col_hex[i:i+2], 16) for i in (0, 2, 4))
col_rgb.extend([transparency])
areacol = tuple(col_rgb)
return areacol
rgba = [hex_rgba(c, transparency=0.2) for c in colors]
colCycle = ['rgba'+str(elem) for elem in rgba]
# Make sure the colors run in cycles if there are more lines than colors
def next_col(cols):
while True:
for col in cols:
yield col
line_color=next_col(cols=colCycle)
# plotly figure
fig = go.Figure()
# add line and shaded area for each series and standards deviation
for i, col in enumerate(df):
new_col = next(line_color)
x = list(df.index.values+1)
y1 = df[col]
y1_upper = [(y + np.std(df[col])) for y in df[col]]
y1_lower = [(y - np.std(df[col])) for y in df[col]]
y1_lower = y1_lower[::-1]
# standard deviation area
fig.add_traces(go.Scatter(x=x+x[::-1],
y=y1_upper+y1_lower,
fill='tozerox',
fillcolor=new_col,
line=dict(color='rgba(255,255,255,0)'),
showlegend=False,
name=col))
# line trace
fig.add_traces(go.Scatter(x=x,
y=y1,
line=dict(color=new_col, width=2.5),
mode='lines',
name=col)
)
# set x-axis
fig.update_layout(xaxis=dict(range=[1,len(df)]))
fig.show()
I was able to come up with something similar. I post the code here to be used by someone else or for any suggestions for improvements.
import matplotlib
import random
import plotly.graph_objects as go
import numpy as np
#random color generation in plotly
hex_colors_dic = {}
rgb_colors_dic = {}
hex_colors_only = []
for name, hex in matplotlib.colors.cnames.items():
hex_colors_only.append(hex)
hex_colors_dic[name] = hex
rgb_colors_dic[name] = matplotlib.colors.to_rgb(hex)
data = [[1, 3, 5, 4],
[2, 3, 5, 4],
[1, 1, 4, 5],
[2, 3, 5, 4]]
#calculating mean and standard deviation
mean=np.mean(data,axis=0)
std=np.std(data,axis=0)
#draw figure
fig = go.Figure()
c = random.choice(hex_colors_only)
fig.add_trace(go.Scatter(x=np.arange(4), y=mean+std,
mode='lines',
line=dict(color=c,width =0.1),
name='upper bound'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean,
mode='lines',
line=dict(color=c),
fill='tonexty',
name='mean'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean-std,
mode='lines',
line=dict(color=c, width =0.1),
fill='tonexty',
name='lower bound'))
fig.show()
Great custom responses posted by others. In case someone is interested in code from the official plotly website, see here: https://plotly.com/python/continuous-error-bars/
I wrote a function to extend plotly.express.line with the same high level interface of Plotly Express. The line function (source code below) is used in the same exact way as plotly.express.line but allows for continuous error bands with the flag argument error_y_mode which can be either 'band' or 'bar'. In the second case it produces the same result as the original plotly.express.line. Here is an usage example:
import plotly.express as px
df = px.data.gapminder().query('continent=="Americas"')
df = df[df['country'].isin({'Argentina','Brazil','Colombia'})]
df['lifeExp std'] = df['lifeExp']*.1 # Invent some error data...
for error_y_mode in {'band', 'bar'}:
fig = line(
data_frame = df,
x = 'year',
y = 'lifeExp',
error_y = 'lifeExp std',
error_y_mode = error_y_mode, # Here you say `band` or `bar`.
color = 'country',
title = f'Using error {error_y_mode}',
markers = '.',
)
fig.show()
which produces the following two plots:
The source code of the line function that extends plotly.express.line is this:
import plotly.express as px
import plotly.graph_objs as go
def line(error_y_mode=None, **kwargs):
"""Extension of `plotly.express.line` to use error bands."""
ERROR_MODES = {'bar','band','bars','bands',None}
if error_y_mode not in ERROR_MODES:
raise ValueError(f"'error_y_mode' must be one of {ERROR_MODES}, received {repr(error_y_mode)}.")
if error_y_mode in {'bar','bars',None}:
fig = px.line(**kwargs)
elif error_y_mode in {'band','bands'}:
if 'error_y' not in kwargs:
raise ValueError(f"If you provide argument 'error_y_mode' you must also provide 'error_y'.")
figure_with_error_bars = px.line(**kwargs)
fig = px.line(**{arg: val for arg,val in kwargs.items() if arg != 'error_y'})
for data in figure_with_error_bars.data:
x = list(data['x'])
y_upper = list(data['y'] + data['error_y']['array'])
y_lower = list(data['y'] - data['error_y']['array'] if data['error_y']['arrayminus'] is None else data['y'] - data['error_y']['arrayminus'])
color = f"rgba({tuple(int(data['line']['color'].lstrip('#')[i:i+2], 16) for i in (0, 2, 4))},.3)".replace('((','(').replace('),',',').replace(' ','')
fig.add_trace(
go.Scatter(
x = x+x[::-1],
y = y_upper+y_lower[::-1],
fill = 'toself',
fillcolor = color,
line = dict(
color = 'rgba(255,255,255,0)'
),
hoverinfo = "skip",
showlegend = False,
legendgroup = data['legendgroup'],
xaxis = data['xaxis'],
yaxis = data['yaxis'],
)
)
# Reorder data as said here: https://stackoverflow.com/a/66854398/8849755
reordered_data = []
for i in range(int(len(fig.data)/2)):
reordered_data.append(fig.data[i+int(len(fig.data)/2)])
reordered_data.append(fig.data[i])
fig.data = tuple(reordered_data)
return fig
Here you are part of my data.
I count my data
count_interests = interests.count()
then made a graph
count_interests.iplot(kind = 'bar', xTitle='Interests', yTitle='Number of Person', colors='Red')
I tried many times to find a function change columns color with values so bigger and smaller columns looks different colors.
I know there is colorscale and color functions and I tried many times I couldn't find. Does anyone know any function?
You could define a function which returns a color for each value and then pass the colors for each bar in a list.
import pandas as pd
import plotly
def color(val, median, std):
if val > median + std:
return 'darkgreen'
if val < median - std:
return 'darkred'
return 'orange'
df = pd.DataFrame({'cinema': [1, 2, 5, 3, 3, None],
'theatre': [3, 0, 8, 4, 0, 4],
'wine': [3, 2, 5, None, 1, None],
'beer': [4, 8, 2, None, None, None]})
med = df.count().median()
std = df.count().std()
colors = [color(i, med, std) for i in df.count()]
fig = plotly.graph_objs.Bar(x=df.columns,
y=df.count(),
marker=dict(color=colors))
plotly.offline.plot([fig])
The bars could be also colored either by pd.pivot_table() the rows to columns or by creating a separate list of traces for bars. Here, each column was aggregated by taking a sum() as an example. Code below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
import plotly.graph_objs as go
import plotly.offline as pyo
# Create dataframe
INT_M_PUB = [0,0,0,0,0,1,0,0,0,0]
INT_M_CINEMA = [1,1,1,0,0,0,0,0,0,1]
INT_M_THEATRE = [1,0,1,0,0,1,0,1,0,1]
INT_M_GYM = [0,0,0,0,0,1,0,0,0,1]
INT_M_ENTERTAIN = [0,0,1,1,0,1,0,1,0,1]
INT_M_EATOUT = [0,1,1,0,0,1,0,0,1,1]
INT_M_WINE = [0,0,0,0,0,1,0,0,0,1]
interests = pd.DataFrame({'INT_M_PUB':INT_M_PUB, 'INT_M_CINEMA':INT_M_CINEMA, 'INT_M_THEATRE':INT_M_THEATRE,
'INT_M_GYM':INT_M_GYM, 'INT_M_ENTERTAIN':INT_M_ENTERTAIN, 'INT_M_EATOUT':INT_M_EATOUT,
'INT_M_WINE':INT_M_WINE
})
interests.head(2)
dfm = interests.sum().reset_index().rename(columns={'index':'interests', 0:'value'})
dfm
# Re-creating the plot similar to that in question (note: y-axis scales are different)
df = dfm.copy()
col_list = df.columns
df.iplot(kind = 'bar', x='interests', y='value', xTitle='Interests', yTitle='Number of Person', title='These bars need to be colored', color='red')
# Color plots by creating traces
# Initialize empty list named data to collect traces for each bar
data = []
for col_name in col_list:
trace = go.Bar(
x=[col_name],
y=df[col_name],
name=col_name
)
data.append(trace)
data = data
layout = go.Layout(
barmode='group',
title='Interests',
xaxis=dict(title='Interests'),
yaxis=dict(title='Number of Person')
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='grouped-bar')
# Creating plot by pivoting the table
df = pd.pivot_table(dfm, values='value', columns='interests')
df.iplot(kind = 'bar',xTitle='Interests', yTitle='Number of Person')