I'm trying to add custom text inside of a plotly pie chart.
I want the largest share to be labeled Rank 1, second largest Rank 2, and so on...
Is there any way to do this using the texttemplate or some other method?
import plotly.graph_objects as go
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=("Rank %i" % 1))])
fig.show()
Passing an array to texttemplate helps
import plotly.graph_objects as go
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=([4,2,3,'Rank 1']))])
fig.show()
You need this function:
def get_ranks(lst, begin_with_one=False):
srtd = sorted(lst, reverse=True)
res = [srtd.index(x) for x in lst]
return [x + 1 for x in res] if begin_with_one else res
import plotly.graph_objects as go
import numpy as np
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=[f"Rank {x}" for x in get_ranks(values, begin_with_one=True)])])
fig.show()
Use as often as possible the f-strings in Python3 - they are super convenient!
Since this works only with unique list elements, I created a better index assessor:
def map_to_index(lst1, lst2):
"""Return lst1 as indexes of lst2"""
dct = {}
for i, x in enumerate(lst2):
dct[x] = dct.get(x, []) + [i]
indexes = []
for x in lst1:
indexes.append(dct[x][0])
if len(dct[x]) > 0:
dct[x] = dct[x][1:]
return indexes
And an improved get_ranks():
def get_ranks(lst, begin_with_one=False):
srtd = sorted(lst, reverse=True)
res = map_to_index(lst, srtd)
return [x + 1 for x in res] if begin_with_one else res
Then it works also with:
import plotly.graph_objects as go
import numpy as np
labels = list('ABCDEF')
values = [25,45,13,78,45,78] # with elements of same value
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=[f"Rank {x}" for x in get_ranks(values, begin_with_one=True)])])
fig.show()
Related
I have a CSV File with elliptic curve related data which looks as follows (excerpt):
3,19,[(1,4,1),(4,7,44)]
3,13,[(4,1,10)]
5,11,[(4,39,14)]
3,7,[(1,2,1),(1,3622,111),(4,17,10)]
3,5,[(4,1,2),(4,959,124)]
3,23,[(3,5,2)]
5,13,[(2,7,2),(2,8,1),(2,47,70),...]
7,11,[(1,2,1),(1,13,2),(1,53,4),...]
3,29,[(4,13,4)]
5,17,[(2,2,3),(4,6881,498)]
5,19,[(4,71,18)]
3,37,[(4,25,14)]
7,17,[(4,19,30)]
7,19,[(3,66,5)]
The first and second number in each line forms a pair of odd prime numbers and the list after these two primes is used to specify the color of the corresponding matrix cell. I want to generate a matrix plot where the x-axis and y-axis are labeled with the first 200 (odd) primes: 3, 5, 7, 11, 13, 17, 19, 23, ..., 1193, 1201, 1213, 1217, 1223, 1229. The following code works fine to generate the colorized matrix plot.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sympy import sieve, prime
import itertools
import pandas as pd
matrix_size = 200
matrix = np.zeros((matrix_size, matrix_size))
with open('C:/Users/esultano/git/elliptic_curves/data/elliptic_curves.csv') as f:
for line in f.readlines():
line = line.strip()
p, q, cases = eval(line)
idx_p = sieve.search(p)[0]-2
idx_q = sieve.search(q)[0]-2
if idx_p < matrix_size and idx_q < matrix_size:
cases_set = set()
for case in cases:
cases_set.add(case[0])
cases_list = list(cases_set)
val = sum(i*i for i in cases_list)
matrix[idx_p,idx_q] = val
matrix[idx_q,idx_p] = val
max_prime = prime(matrix_size+2)
axis_labels = list(sieve.primerange(3, max_prime))
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
matrix_plot = ax.matshow(matrix, interpolation='nearest')
ax.set_xticklabels(axis_labels)
ax.set_yticklabels(axis_labels)
plt.show()
The plot looks as follows:
Unfortunatelly the axis labeling is not correct since each axis should span the primes from 3 to 1229. How can I fix the axis labeling? I am not requiring to place each of these 200 primes as axis label (displaying a few labels would be fine).
As documentation of Axes.set_xticklabels() says, obligatory you have to call Axes.set_xticks() before.
I did necessary modifications to your code, so that it works now. Screenshot of output goes after code.
In my code you can see // 9, because of this 10 primes (ticks) on axis is shown. If you want e.g. 20 numbers then do // 19, etc.
Try it online! and See diffs
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sympy import sieve, prime
import itertools
import pandas as pd
matrix_size = 200
matrix = np.zeros((matrix_size, matrix_size))
with open('elliptic_curves.csv') as f:
for line in f.readlines():
line = line.strip()
p, q, cases = eval(line)
idx_p = sieve.search(p)[0]-2
idx_q = sieve.search(q)[0]-2
if idx_p < matrix_size and idx_q < matrix_size:
cases_set = set()
for case in cases:
cases_set.add(case[0])
cases_list = list(cases_set)
val = sum(i*i for i in cases_list)
matrix[idx_p,idx_q] = val
matrix[idx_q,idx_p] = val
max_prime = prime(matrix_size+2)
axis_labels = list(enumerate(sieve.primerange(3, max_prime)))
axis_labels = axis_labels[::len(axis_labels) // 9][:-1] + [axis_labels[-1]]
ticks = [e[0] for e in axis_labels]
ticklabels = [e[1] for e in axis_labels]
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
matrix_plot = ax.matshow(matrix, interpolation='nearest')
ax.set_xticks(ticks); ax.set_xticklabels(ticklabels)
ax.set_yticks(ticks); ax.set_yticklabels(ticklabels)
plt.show()
Output:
I am new to Python and have mainly used MatLab in the past. I am re-writing one of my MatLab scripts and am wondering how to add plots to figures. It seems in python I can only have one figure open at a time and have to manually close the window before a second figure will open. My original script is a couple hundred lines long, but here is a MWE of what I want to do.
import matplotlib.pyplot as plt
import numpy as np
#from mpl_toolkits import mplot3d
lst = [ 1, 1.5, 2, 4.5]
alpha= np.array(lst)
#initialize tables for plots
xtable = []
ytable = []
y2table = []
#determine whether lst is a vector or an array for number of iterations of inner and outer loops
def size(arr):
if len(arr.shape) == 1:
return arr.shape[0], 1
return arr.shape
[nn,mm] = size(alpha)
#create and plot data
for kk in range(nn):#= 1:nn
x = [i for i in range(0, 10)]
y = [alpha[kk]*i for i in range(0, 10)]
y2 = [alpha[kk]*i**2 for i in range(0, 10)]
#data for plot(s)
xtable += [x]
ytable += [y]
y2table += [y2]
#plot1
plt.plot(xtable,ytable)
plt.hold on
#plot2
plt.plot(xtable,y2table)
plt.hold on
In my script these will actually be 3D plots, but I don't think that's necessary here. I just want the for-loop to run for each value in lst and end up with two figures, each with 4 plots. The size of lst is not fixed or I'd generate the data in the loop and plot later.
Thank you in advance for your help
follow up on tdy's comment:
#create plots:
fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
#plot data
for kk in range(nn):#= 1:nn
x = [i for i in range(0, 10)]
y = [alpha[kk]*i for i in range(0, 10)]
y2 = [alpha[kk]*i**2 for i in range(0, 10)]
#data for plot(s)
xtable += [x]
ytable += [y]
y2table += [y2]
#plot1
ax1.plot(xtable,ytable)
#plot2
ax2.plot(xtable,y2table)
This code produces the figure I've attached. Notice the sums are the totals over the df, but I need the columns to only show the totals for that particular month. What do you have to set in the
text = ...
assignment for this to occur?
df = data[['Month', 'A', 'B']]
for X in df['A'].unique():
trace = go.Bar(
x = df[df['A']==X]['Month'],
y = df[df['A']==X]['B'],
text = str(df[df['A']==X]['B'].sum())
)
traces.append(trace)
df = data.groupby(['Month','TA']).sum().reset_index()
for TA in df['TA'].unique():
trace = go.Bar(
x = df[df['TA']==TA]['Month'],
y = df[df['TA']==TA]['Studies'],
text = df[df['TA']==TA]['Studies'],
name = TA
)
traces.append(trace)
As long as all values are already showing in your figure, the following will work regardless of how you've built your figure or grouped your data:
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
Result:
Example of original figure:
Complete code:
# imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# data
df = px.data.stocks()
df = df[df.columns[:3]]
df = df.tail(25)
df['date'] = pd.to_datetime(df['date'])
# group py month
dfm = df.groupby(pd.Grouper(key = 'date', freq='M')).agg('sum').reset_index()
# figure setup
fig = go.Figure()
for col in dfm.columns[1:]:
fig.add_trace(go.Bar(x=dfm.date, y = dfm[col], text = [str(v)[:3] for v in dfm[col]], textposition = 'auto'))
fig.update_layout(barmode = 'stack')
# grap and sum data for all bars
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
How can I use Plotly to produce a line plot with a shaded standard deviation? I am trying to achieve something similar to seaborn.tsplot. Any help is appreciated.
The following approach is fully flexible with regards to the number of columns in a pandas dataframe and uses the default color cycle of plotly. If the number of lines exceed the number of colors, the colors will be re-used from the start. As of now px.colors.qualitative.Plotly can be replaced with any hex color sequence that you can find using px.colors.qualitative:
Alphabet = ['#AA0DFE', '#3283FE', '#85660D', '#782AB6', '#565656', '#1...
Alphabet_r = ['#FA0087', '#FBE426', '#B00068', '#FC1CBF', '#C075A6', '...
[...]
Complete code:
# imports
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
# sample data in a pandas dataframe
np.random.seed(1)
df=pd.DataFrame(dict(A=np.random.uniform(low=-1, high=2, size=25).tolist(),
B=np.random.uniform(low=-4, high=3, size=25).tolist(),
C=np.random.uniform(low=-1, high=3, size=25).tolist(),
))
df = df.cumsum()
# define colors as a list
colors = px.colors.qualitative.Plotly
# convert plotly hex colors to rgba to enable transparency adjustments
def hex_rgba(hex, transparency):
col_hex = hex.lstrip('#')
col_rgb = list(int(col_hex[i:i+2], 16) for i in (0, 2, 4))
col_rgb.extend([transparency])
areacol = tuple(col_rgb)
return areacol
rgba = [hex_rgba(c, transparency=0.2) for c in colors]
colCycle = ['rgba'+str(elem) for elem in rgba]
# Make sure the colors run in cycles if there are more lines than colors
def next_col(cols):
while True:
for col in cols:
yield col
line_color=next_col(cols=colCycle)
# plotly figure
fig = go.Figure()
# add line and shaded area for each series and standards deviation
for i, col in enumerate(df):
new_col = next(line_color)
x = list(df.index.values+1)
y1 = df[col]
y1_upper = [(y + np.std(df[col])) for y in df[col]]
y1_lower = [(y - np.std(df[col])) for y in df[col]]
y1_lower = y1_lower[::-1]
# standard deviation area
fig.add_traces(go.Scatter(x=x+x[::-1],
y=y1_upper+y1_lower,
fill='tozerox',
fillcolor=new_col,
line=dict(color='rgba(255,255,255,0)'),
showlegend=False,
name=col))
# line trace
fig.add_traces(go.Scatter(x=x,
y=y1,
line=dict(color=new_col, width=2.5),
mode='lines',
name=col)
)
# set x-axis
fig.update_layout(xaxis=dict(range=[1,len(df)]))
fig.show()
I was able to come up with something similar. I post the code here to be used by someone else or for any suggestions for improvements.
import matplotlib
import random
import plotly.graph_objects as go
import numpy as np
#random color generation in plotly
hex_colors_dic = {}
rgb_colors_dic = {}
hex_colors_only = []
for name, hex in matplotlib.colors.cnames.items():
hex_colors_only.append(hex)
hex_colors_dic[name] = hex
rgb_colors_dic[name] = matplotlib.colors.to_rgb(hex)
data = [[1, 3, 5, 4],
[2, 3, 5, 4],
[1, 1, 4, 5],
[2, 3, 5, 4]]
#calculating mean and standard deviation
mean=np.mean(data,axis=0)
std=np.std(data,axis=0)
#draw figure
fig = go.Figure()
c = random.choice(hex_colors_only)
fig.add_trace(go.Scatter(x=np.arange(4), y=mean+std,
mode='lines',
line=dict(color=c,width =0.1),
name='upper bound'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean,
mode='lines',
line=dict(color=c),
fill='tonexty',
name='mean'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean-std,
mode='lines',
line=dict(color=c, width =0.1),
fill='tonexty',
name='lower bound'))
fig.show()
Great custom responses posted by others. In case someone is interested in code from the official plotly website, see here: https://plotly.com/python/continuous-error-bars/
I wrote a function to extend plotly.express.line with the same high level interface of Plotly Express. The line function (source code below) is used in the same exact way as plotly.express.line but allows for continuous error bands with the flag argument error_y_mode which can be either 'band' or 'bar'. In the second case it produces the same result as the original plotly.express.line. Here is an usage example:
import plotly.express as px
df = px.data.gapminder().query('continent=="Americas"')
df = df[df['country'].isin({'Argentina','Brazil','Colombia'})]
df['lifeExp std'] = df['lifeExp']*.1 # Invent some error data...
for error_y_mode in {'band', 'bar'}:
fig = line(
data_frame = df,
x = 'year',
y = 'lifeExp',
error_y = 'lifeExp std',
error_y_mode = error_y_mode, # Here you say `band` or `bar`.
color = 'country',
title = f'Using error {error_y_mode}',
markers = '.',
)
fig.show()
which produces the following two plots:
The source code of the line function that extends plotly.express.line is this:
import plotly.express as px
import plotly.graph_objs as go
def line(error_y_mode=None, **kwargs):
"""Extension of `plotly.express.line` to use error bands."""
ERROR_MODES = {'bar','band','bars','bands',None}
if error_y_mode not in ERROR_MODES:
raise ValueError(f"'error_y_mode' must be one of {ERROR_MODES}, received {repr(error_y_mode)}.")
if error_y_mode in {'bar','bars',None}:
fig = px.line(**kwargs)
elif error_y_mode in {'band','bands'}:
if 'error_y' not in kwargs:
raise ValueError(f"If you provide argument 'error_y_mode' you must also provide 'error_y'.")
figure_with_error_bars = px.line(**kwargs)
fig = px.line(**{arg: val for arg,val in kwargs.items() if arg != 'error_y'})
for data in figure_with_error_bars.data:
x = list(data['x'])
y_upper = list(data['y'] + data['error_y']['array'])
y_lower = list(data['y'] - data['error_y']['array'] if data['error_y']['arrayminus'] is None else data['y'] - data['error_y']['arrayminus'])
color = f"rgba({tuple(int(data['line']['color'].lstrip('#')[i:i+2], 16) for i in (0, 2, 4))},.3)".replace('((','(').replace('),',',').replace(' ','')
fig.add_trace(
go.Scatter(
x = x+x[::-1],
y = y_upper+y_lower[::-1],
fill = 'toself',
fillcolor = color,
line = dict(
color = 'rgba(255,255,255,0)'
),
hoverinfo = "skip",
showlegend = False,
legendgroup = data['legendgroup'],
xaxis = data['xaxis'],
yaxis = data['yaxis'],
)
)
# Reorder data as said here: https://stackoverflow.com/a/66854398/8849755
reordered_data = []
for i in range(int(len(fig.data)/2)):
reordered_data.append(fig.data[i+int(len(fig.data)/2)])
reordered_data.append(fig.data[i])
fig.data = tuple(reordered_data)
return fig
I know pandas supports a secondary Y axis, but I'm curious if anyone knows a way to put a tertiary Y axis on plots. Currently I am achieving this with numpy+pyplot, but it is slow with large data sets.
This is to plot different measurements with distinct units on the same graph for easy comparison (eg: Relative Humidity/Temperature/ and Electrical Conductivity).
So really just curious if anyone knows if this is possible in pandas without too much work.
[Edit] I doubt that there is a way to do this(without too much overhead) however I hope to be proven wrong, as this may be a limitation of matplotlib.
I think this might work:
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame
df = DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'])
fig, ax = plt.subplots()
ax3 = ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.7)
df.A.plot(ax=ax, style='b-')
# same ax as above since it's automatically added on the right
df.B.plot(ax=ax, style='r-', secondary_y=True)
df.C.plot(ax=ax3, style='g-')
# add legend --> take advantage of pandas providing us access
# to the line associated with the right part of the axis
ax3.legend([ax.get_lines()[0], ax.right_ax.get_lines()[0], ax3.get_lines()[0]],\
['A','B','C'], bbox_to_anchor=(1.5, 0.5))
Output:
A simpler solution without plt:
ax1 = df1.plot()
ax2 = ax1.twinx()
ax2.spines['right'].set_position(('axes', 1.0))
df2.plot(ax=ax2)
ax3 = ax1.twinx()
ax3.spines['right'].set_position(('axes', 1.1))
df3.plot(ax=ax3)
....
Using function to achieve this:
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = get_standard_colors(num_colors=len(cols))
# First axis
ax = data.loc[:, cols[0]].plot(label=cols[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=cols[0])
lines, labels = ax.get_legend_handles_labels()
for n in range(1, len(cols)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines['right'].set_position(('axes', 1 + spacing * (n - 1)))
data.loc[:, cols[n]].plot(ax=ax_new, label=cols[n], color=colors[n % len(colors)], **kwargs)
ax_new.set_ylabel(ylabel=cols[n])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Example:
from random import randrange
data = pd.DataFrame(dict(
s1=[randrange(-1000, 1000) for _ in range(100)],
s2=[randrange(-100, 100) for _ in range(100)],
s3=[randrange(-10, 10) for _ in range(100)],
))
plot_multi(data.cumsum(), figsize=(10, 5))
Output:
I modified the above answer a bit to make it accept custom x column, well-documented, and more flexible.
You can copy this snippet and use it as a function:
from typing import List, Union
import matplotlib.axes
import pandas as pd
def plot_multi(
data: pd.DataFrame,
x: Union[str, None] = None,
y: Union[List[str], None] = None,
spacing: float = 0.1,
**kwargs
) -> matplotlib.axes.Axes:
"""Plot multiple Y axes on the same chart with same x axis.
Args:
data: dataframe which contains x and y columns
x: column to use as x axis. If None, use index.
y: list of columns to use as Y axes. If None, all columns are used
except x column.
spacing: spacing between the plots
**kwargs: keyword arguments to pass to data.plot()
Returns:
a matplotlib.axes.Axes object returned from data.plot()
Example:
>>> plot_multi(df, figsize=(22, 10))
>>> plot_multi(df, x='time', figsize=(22, 10))
>>> plot_multi(df, y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df[['time price qty'.split()]], x='time', figsize=(22, 10))
See Also:
This code is mentioned in https://stackoverflow.com/q/11640243/2593810
"""
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if y is None:
y = data.columns
# remove x_col from y_cols
if x:
y = [col for col in y if col != x]
if len(y) == 0:
return
colors = get_standard_colors(num_colors=len(y))
if "legend" not in kwargs:
kwargs["legend"] = False # prevent multiple legends
# First axis
ax = data.plot(x=x, y=y[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=y[0])
lines, labels = ax.get_legend_handles_labels()
for i in range(1, len(y)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines["right"].set_position(("axes", 1 + spacing * (i - 1)))
data.plot(
ax=ax_new, x=x, y=y[i], color=colors[i % len(colors)], **kwargs
)
ax_new.set_ylabel(ylabel=y[i])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Here's one way to use it:
plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))