How do put a white frame arround a plotly figure - python

My goal is to produce this plot:
With this code:
import pandas as pd
import plotly.graph_objects as go
premium_growth_data = '{"Market":{"0":"Company"},"Class":{"0":"Total"},"total_2019_ytd":{"0":712725553},"total_2018_ytd":{"0":626756526},"total_2018":{"0":1211397008},"total_2017":{"0":1071172608},"total_2016":{"0":784732527},"yoy_2019":{"0":0.137164949},"yoy_2018":{"0":0.13090738},"yoy_2017":{"0":0.365016195},"rank_2018":{"0":7},"rank_2019":{"0":5},"share_2016":{"0":0.030046288},"share_2017":{"0":0.035641317},"share_2018":{"0":0.037034574},"share_2019":{"0":0.041049906},"gc_yoy_2019":{"0":0.050436263},"gc_yoy_2018":{"0":0.088362134},"gc_yoy_2017":{"0":0.150733851}}'
premium_growth_df = pd.read_json(premium_growth_data)
current_rank = premium_growth_df.rank_2018[0]
x2 = premium_and_rank_df["premium"]
y2 = premium_and_rank_df["rank"]
y2_text = ["You" if elm == current_rank else elm for elm in y2]
text = "$" + round(((x2.astype(float)) / 1000000)).astype("str")
colors = ['steelblue',] * len(y2)
colors[y2_text.index("You")] = 'crimson'
trace3 = go.Bar(x=x2, y=y2, marker_color=colors, text=text, textposition="outside", width=0.2, orientation="h", showlegend=False)
fig = go.Figure([trace3])
fig.show()
I achieved this:
I`ve been googling quite extensively but I did not find a solution for
"framing" the bar-plot with a white box
making the corners of the bars "round"

Related

Bar Chart Race with matplotlib: Bars changing colors

I’m trying to write a Bar Chart Race with matplot. I don’t use the "bar_chart_race" library because I need more options for customization later. But I use the basic explanations of the same autor: https://www.dunderdata.com/blog/create-a-bar-chart-race-animation-in-python-with-matplotlib
It works fine, but the bars of the countries are changing their colors. But each country must have it's own color. It should not change when changing it's position.
I think I know where the problem is: My dataset is much bigger than the dataset of the example (230 columns instead of 6) and I only want to show the highest ten values. For this I use “.nlargest(10)” and I think, this is the problem. I also tried to use ".sort_values(ascending=False).head(10)" but it didn't work either. If i don't use "nlargest(10)" i get the Bar Chart Race for all 230 Columns.
Furthermore I can't (and don't want to) manually define a color for each of the 230 columns in this dataset and over 400 columns in my next dataset. So this is not an option.
What can I do to keep the country colors the same?
After an advice of an user, here is a minimalistic code that shows the problem:
import pandas as pd
from matplotlib.animation import FuncAnimation
import numpy as np
data = {"year": [1950,1960,1970,1980,1990,2000,2010,2020,2030],
"USA" : [10,20,30,40,50,50,50,50,55],
"GB" : [5,10,15,45,60,70,80,90,95],
"FR" : [5,15,16,17,18,25,50,60,65],
"BEL" : [3,34,11,23,34,23,12,22,27],
"GER" : [5,15,16,23,34,40,23,50,55],
"POL" : [5,14,19,20,23,45,50,70,75],
"KAN" : [1,5,18,22,34,45,46,60,65],
"ISR" : [2,15,25,32,43,57,66,67,70],
"IND" : [3,12,16,17,23,25,45,50,55],
"CH" : [2,19,21,22,22,22,25,26,30],
"AUS" : [4,4,14,17,22,25,30,34,37],
}
df = pd.DataFrame(data).set_index("year")
def nice_axes(ax):
ax.set_facecolor('.8')
ax.tick_params(labelsize=8, length=0)
ax.grid(True, axis='x', color='white')
ax.set_axisbelow(True)
[spine.set_visible(False) for spine in ax.spines.values()]
def prepare_data(df, steps=5):
df = df.reset_index()
df.index = df.index * steps
last_idx = df.index[-1] + 1
df_expanded = df.reindex(range(last_idx))
df_expanded['year'] = df_expanded['year'].fillna(method='ffill')
df_expanded = df_expanded.set_index('year')
df_rank_expanded = df_expanded.rank(axis=1, method='first')
df_expanded = df_expanded.interpolate()
df_rank_expanded = df_rank_expanded.interpolate()
return df_expanded, df_rank_expanded
df_expanded, df_rank_expanded = prepare_data(df)
colors = plt.cm.viridis(np.linspace(0, 1, 10))
def init():
ax.clear()
nice_axes(ax)
def update(i):
for bar in ax.containers:
bar.remove()
y = df_rank_expanded.iloc[i].nlargest(10)
width = df_expanded.iloc[i].nlargest(10)
ax.barh(y=y, width=width, color = colors, tick_label=y.index)
fig = plt.Figure(figsize=(8, 3), dpi=144)
ax = fig.add_subplot()
anim = FuncAnimation(fig=fig, func=update, init_func=init, frames=len(df_expanded),
interval=100, repeat=False)
from IPython.display import HTML
html = anim.to_html5_video()
HTML(html)
I found a possible solution. A random generator assigns a fixed color to all countries. Since the choice of colors isn't the best, I'll have to create a color palette by hand later. But for now it works. The solution:
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.animation import FuncAnimation
import numpy as np
import random
data = {"year": [1950,1960,1970,1980,1990,2000,2010,2020,2030],
"USA" : [10,20,30,40,50,50,50,50,55],
"GB" : [5,10,15,45,60,70,80,90,95],
"FR" : [5,15,16,17,18,25,50,60,65],
"BEL" : [3,34,11,23,34,23,12,22,27],
"GER" : [5,15,16,23,34,40,23,50,55],
"POL" : [5,14,19,20,23,45,50,70,75],
"KAN" : [1,5,18,22,34,45,46,60,65],
"ISR" : [2,15,25,32,43,57,66,67,70],
"IND" : [3,12,16,17,23,25,45,50,55],
"CH" : [2,19,21,22,22,22,25,26,30],
"AUS" : [4,4,14,17,22,25,30,34,37],
}
df = pd.DataFrame(data).set_index("year")
def nice_axes(ax):
ax.set_facecolor('.8')
ax.tick_params(labelsize=8, length=0)
ax.grid(True, axis='x', color='white')
ax.set_axisbelow(True)
[spine.set_visible(False) for spine in ax.spines.values()]
#Prepare Data (expand the dataframe for better animation)
def prepare_data(df, steps=10):
df = df.reset_index()
df.index = df.index * steps
last_idx = df.index[-1] + 1
df_expanded = df.reindex(range(last_idx))
df_expanded['year'] = df_expanded['year'].fillna(method='ffill')
df_expanded = df_expanded.set_index('year')
df_rank_expanded = df_expanded.rank(axis=1, method='first')
df_expanded = df_expanded.interpolate()
df_rank_expanded = df_rank_expanded.interpolate()
return df_expanded, df_rank_expanded
df_expanded, df_rank_expanded = prepare_data(df)
# RGB to RGBA
def get_color(r, g, b):
return (r, g, b, 1.0)
#Randomized colors
color_dict = {}
for idx in range(df_expanded.shape[1]):
r = random.random()
b = random.random()
g = random.random()
color = get_color(r, g, b)
color_dict[df_expanded.columns[idx]] = color
def init():
ax.clear()
nice_axes(ax)
def update(i):
for bar in ax.containers:
bar.remove()
ax.clear()
nice_axes(ax)
y = df_rank_expanded.iloc[i].nlargest(10)
width = df_expanded.iloc[i].nlargest(10)
# Color for each country
bar_colors = [color_dict.get(country) for country in y.index]
#Plot
ax.barh(y=y, width=width, color = bar_colors, tick_label=y.index, alpha=1.0, align='center')
fig = plt.Figure(figsize=(8, 3), dpi=144)
ax = fig.add_subplot()
anim = FuncAnimation(fig=fig, func=update, init_func=init, frames=len(df_expanded),
interval=100, repeat=False)
from IPython.display import HTML
html = anim.to_html5_video()
HTML(html)

Using a loop to plot labels on a scatter plot based on their value

Hi I'm new to python and would like to plot the names of the footballers on my scatterplot as labels if their Goals or npxG are greater than the average values i have calculated.
I wondered whether I could use a for/while loop to go through the data and plot the relevant players names?
I've struggled to figure out the most efficient way for this to be done.
Please see the scatter plot and code below for additional context. Any help would be greatly appreciated, Thanks.
df = pd.read_csv('C:/Users/alexo/Documents/Data/football data/shooting_top5_leagues_21_22.csv',encoding = 'ISO-8859-1')
striker_df = df.loc[(df['Pos']=='FW') & (df['90s']>= 15)]
sns.set_style('darkgrid')
sns.set(rc = {'figure.figsize':(15,8)})
graph = sns.scatterplot(striker_df.Gls,striker_df.npxG_p90,hue=striker_df.League,size=striker_df.npxG_pSh,edgecolor = 'black')
# averageline x axis
graph.axvline(9.751677852348994,c='grey',ls='--')
# average line yaxis
graph.axhline(0.34438111920973147,c='grey',ls='--')
#adding label names for specific players
#title
plt.title('Best Strikers across Europes Top 5 leagues 21/22',size=17,c='black')
# add credits
Notes = 'By Alex Orlandini'
CREDIT_1 = "data: statsbomb via fbref"
graph.text(
36, 0.1, f"{Notes}\n{CREDIT_1}", size=10,
color="#000000",
ha="right");
enter image description here
Yes, you can loop through specific players and add the arrow and text.
Just a matter of getting the x, y coordinate of the data point, then deciding where to place the label. I had to pull my own data since you didn't share yours.
I would also avoid hard coding that average. I'd have that as a calculated variable.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#df = pd.read_csv('C:/Users/alexo/Documents/Data/football data/shooting_top5_leagues_21_22.csv',encoding = 'ISO-8859-1')
df = pd.read_html('https://fbref.com/en/comps/Big5/shooting/players/Big-5-European-Leagues-Stats', header=1)[0]
df = df[df['Rk'].ne('Rk')]
df['npxG'] = df['npxG'].astype(float)
df['90s'] = df['90s'].astype(float)
df['npxG/Sh'] = df['npxG/Sh'].astype(float)
df['Gls'] = df['Gls'].astype(int)
df['npxG_p90'] = df['npxG'] / df['90s']
df['League'] = df['Comp'].str.split(' ',1, expand=True)[1]
df = df.rename(columns={'npxG/Sh':'npxG_pSh'})
striker_df = df.loc[(df['Pos']=='FW') & (df['90s']>= 15)]
sns.set_style('darkgrid')
sns.set(rc = {'figure.figsize':(15,8)})
x_axis_column = 'Gls'
y_axis_column = 'npxG_p90'
graph = sns.scatterplot(x = striker_df[x_axis_column],
y = striker_df[y_axis_column],
hue = striker_df.League,
size = striker_df.npxG_pSh,
edgecolor = 'black')
# averageline x axis
avgX = striker_df[x_axis_column].mean()
graph.axvline(avgX, c='grey', ls='--')
# average line yaxis
avgY = striker_df[y_axis_column].mean()
graph.axhline(avgY, c='grey', ls='--')
xOffset = (striker_df[x_axis_column].max() - striker_df[x_axis_column].min()) *.10
yOffset = (striker_df[y_axis_column].max() - striker_df[y_axis_column].min()) *.10
#adding label names for specific players
for player in ['Robert Lewandowski', 'Kylian Mbappé', 'Patrik Schick', 'Arnaut Groeneveld']:
# Label coordinate, Custom arrow
x = striker_df[striker_df['Player'] == player].iloc[0][x_axis_column]
y = striker_df[striker_df['Player'] == player].iloc[0][y_axis_column]
plt.annotate(player, xy=(x, y),xytext=(x + xOffset, y + yOffset) ,
horizontalalignment="center",
arrowprops=dict(arrowstyle='->', lw=2, color='black')
)
#title
plt.title('Best Strikers across Europes Top 5 leagues 21/22',size=17,c='black')
# add credits
Notes = 'By Alex Orlandini'
CREDIT_1 = "data: statsbomb via fbref"
graph.text(
36, 0.1, f"{Notes}\n{CREDIT_1}", size=10,
color="#000000",
ha="right");
Output:
Or you can iterate through a dataframe:
#adding label names for specific players
striker_df['calc'] = striker_df[x_axis_column] + striker_df[y_axis_column]
striker_df = striker_df.sort_values('calc', ascending = False)
top_players = striker_df.head(8)
for idx, row in top_players.iterrows():
# Label coordinate, Custom arrow
player = row['Player']
x = row[x_axis_column]
y = row[y_axis_column]
plt.annotate(player, xy=(x, y),xytext=(x + xOffset, y) ,
horizontalalignment="center",
arrowprops=dict(arrowstyle='->', lw=2, color='black')
)
To get something like this:

Plotly: How to annotate end of multiple lines with text and marker colors that match the lines?

The post Plotly: Annotate marker at the last value in line chart
shows how to annotate end of lines with text and an individual marker. But how can you do the same thing for multiple lines and at the same time set the associated text and markers to match the color of all lines?
Example plot:
Code with sample dataset:
# imports
import pandas as pd
import plotly.express as px
# data
df = px.data.stocks()
colors = px.colors.qualitative.T10
# plotly
fig = px.line(df,
x = 'date',
y = [c for c in df.columns if c != 'date'],
template = 'plotly_dark',
color_discrete_sequence = colors,
title = 'Stocks',
)
fig.show()
You can address the features of each trace and build new traces for your end markers and text through:
for i, d in enumerate(fig.data):
fig.add_scatter(x=[d.x[-1]], y = [d.y[-1]], [...])
If you've specified colors when building your figure you can also retrieve trace colors and set colors for markers and fonts like this:
textfont = dict(color=d.line.color),
marker = dict(color = d.line.color, size = 12)
Plot:
The figure was being a bit crowded so I dropped one of the stocks. I also made room for the annotations by changing the position of the legend through fig.layout.legend.x = -0.3
Complete code:
# imports
import pandas as pd
import plotly.express as px
# data
df = px.data.stocks()
df = df.drop('AMZN', axis = 1)
colors = px.colors.qualitative.T10
# plotly
fig = px.line(df,
x = 'date',
y = [c for c in df.columns if c != 'date'],
template = 'plotly_dark',
color_discrete_sequence = colors,
title = 'Stocks',
)
# move legend
fig.layout.legend.x = -0.3
# add traces for annotations and text for end of lines
for i, d in enumerate(fig.data):
fig.add_scatter(x=[d.x[-1]], y = [d.y[-1]],
mode = 'markers+text',
text = d.y[-1],
textfont = dict(color=d.line.color),
textposition='middle right',
marker = dict(color = d.line.color, size = 12),
legendgroup = d.name,
showlegend=False)
fig.show()

how to plot a range with a line in the center with Plotly, in Python [duplicate]

How can I use Plotly to produce a line plot with a shaded standard deviation? I am trying to achieve something similar to seaborn.tsplot. Any help is appreciated.
The following approach is fully flexible with regards to the number of columns in a pandas dataframe and uses the default color cycle of plotly. If the number of lines exceed the number of colors, the colors will be re-used from the start. As of now px.colors.qualitative.Plotly can be replaced with any hex color sequence that you can find using px.colors.qualitative:
Alphabet = ['#AA0DFE', '#3283FE', '#85660D', '#782AB6', '#565656', '#1...
Alphabet_r = ['#FA0087', '#FBE426', '#B00068', '#FC1CBF', '#C075A6', '...
[...]
Complete code:
# imports
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
# sample data in a pandas dataframe
np.random.seed(1)
df=pd.DataFrame(dict(A=np.random.uniform(low=-1, high=2, size=25).tolist(),
B=np.random.uniform(low=-4, high=3, size=25).tolist(),
C=np.random.uniform(low=-1, high=3, size=25).tolist(),
))
df = df.cumsum()
# define colors as a list
colors = px.colors.qualitative.Plotly
# convert plotly hex colors to rgba to enable transparency adjustments
def hex_rgba(hex, transparency):
col_hex = hex.lstrip('#')
col_rgb = list(int(col_hex[i:i+2], 16) for i in (0, 2, 4))
col_rgb.extend([transparency])
areacol = tuple(col_rgb)
return areacol
rgba = [hex_rgba(c, transparency=0.2) for c in colors]
colCycle = ['rgba'+str(elem) for elem in rgba]
# Make sure the colors run in cycles if there are more lines than colors
def next_col(cols):
while True:
for col in cols:
yield col
line_color=next_col(cols=colCycle)
# plotly figure
fig = go.Figure()
# add line and shaded area for each series and standards deviation
for i, col in enumerate(df):
new_col = next(line_color)
x = list(df.index.values+1)
y1 = df[col]
y1_upper = [(y + np.std(df[col])) for y in df[col]]
y1_lower = [(y - np.std(df[col])) for y in df[col]]
y1_lower = y1_lower[::-1]
# standard deviation area
fig.add_traces(go.Scatter(x=x+x[::-1],
y=y1_upper+y1_lower,
fill='tozerox',
fillcolor=new_col,
line=dict(color='rgba(255,255,255,0)'),
showlegend=False,
name=col))
# line trace
fig.add_traces(go.Scatter(x=x,
y=y1,
line=dict(color=new_col, width=2.5),
mode='lines',
name=col)
)
# set x-axis
fig.update_layout(xaxis=dict(range=[1,len(df)]))
fig.show()
I was able to come up with something similar. I post the code here to be used by someone else or for any suggestions for improvements.
import matplotlib
import random
import plotly.graph_objects as go
import numpy as np
#random color generation in plotly
hex_colors_dic = {}
rgb_colors_dic = {}
hex_colors_only = []
for name, hex in matplotlib.colors.cnames.items():
hex_colors_only.append(hex)
hex_colors_dic[name] = hex
rgb_colors_dic[name] = matplotlib.colors.to_rgb(hex)
data = [[1, 3, 5, 4],
[2, 3, 5, 4],
[1, 1, 4, 5],
[2, 3, 5, 4]]
#calculating mean and standard deviation
mean=np.mean(data,axis=0)
std=np.std(data,axis=0)
#draw figure
fig = go.Figure()
c = random.choice(hex_colors_only)
fig.add_trace(go.Scatter(x=np.arange(4), y=mean+std,
mode='lines',
line=dict(color=c,width =0.1),
name='upper bound'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean,
mode='lines',
line=dict(color=c),
fill='tonexty',
name='mean'))
fig.add_trace(go.Scatter(x=np.arange(4), y=mean-std,
mode='lines',
line=dict(color=c, width =0.1),
fill='tonexty',
name='lower bound'))
fig.show()
Great custom responses posted by others. In case someone is interested in code from the official plotly website, see here: https://plotly.com/python/continuous-error-bars/
I wrote a function to extend plotly.express.line with the same high level interface of Plotly Express. The line function (source code below) is used in the same exact way as plotly.express.line but allows for continuous error bands with the flag argument error_y_mode which can be either 'band' or 'bar'. In the second case it produces the same result as the original plotly.express.line. Here is an usage example:
import plotly.express as px
df = px.data.gapminder().query('continent=="Americas"')
df = df[df['country'].isin({'Argentina','Brazil','Colombia'})]
df['lifeExp std'] = df['lifeExp']*.1 # Invent some error data...
for error_y_mode in {'band', 'bar'}:
fig = line(
data_frame = df,
x = 'year',
y = 'lifeExp',
error_y = 'lifeExp std',
error_y_mode = error_y_mode, # Here you say `band` or `bar`.
color = 'country',
title = f'Using error {error_y_mode}',
markers = '.',
)
fig.show()
which produces the following two plots:
The source code of the line function that extends plotly.express.line is this:
import plotly.express as px
import plotly.graph_objs as go
def line(error_y_mode=None, **kwargs):
"""Extension of `plotly.express.line` to use error bands."""
ERROR_MODES = {'bar','band','bars','bands',None}
if error_y_mode not in ERROR_MODES:
raise ValueError(f"'error_y_mode' must be one of {ERROR_MODES}, received {repr(error_y_mode)}.")
if error_y_mode in {'bar','bars',None}:
fig = px.line(**kwargs)
elif error_y_mode in {'band','bands'}:
if 'error_y' not in kwargs:
raise ValueError(f"If you provide argument 'error_y_mode' you must also provide 'error_y'.")
figure_with_error_bars = px.line(**kwargs)
fig = px.line(**{arg: val for arg,val in kwargs.items() if arg != 'error_y'})
for data in figure_with_error_bars.data:
x = list(data['x'])
y_upper = list(data['y'] + data['error_y']['array'])
y_lower = list(data['y'] - data['error_y']['array'] if data['error_y']['arrayminus'] is None else data['y'] - data['error_y']['arrayminus'])
color = f"rgba({tuple(int(data['line']['color'].lstrip('#')[i:i+2], 16) for i in (0, 2, 4))},.3)".replace('((','(').replace('),',',').replace(' ','')
fig.add_trace(
go.Scatter(
x = x+x[::-1],
y = y_upper+y_lower[::-1],
fill = 'toself',
fillcolor = color,
line = dict(
color = 'rgba(255,255,255,0)'
),
hoverinfo = "skip",
showlegend = False,
legendgroup = data['legendgroup'],
xaxis = data['xaxis'],
yaxis = data['yaxis'],
)
)
# Reorder data as said here: https://stackoverflow.com/a/66854398/8849755
reordered_data = []
for i in range(int(len(fig.data)/2)):
reordered_data.append(fig.data[i+int(len(fig.data)/2)])
reordered_data.append(fig.data[i])
fig.data = tuple(reordered_data)
return fig

plotly graph issue with legends

I am having an issue with graph resizing due to legends when using pyplot. This is the code:
random_x = df['column1']
random_y = df['column2']
random_x1 = df['column1']
random_y1 = df['column3']
trace = go.Scatter(
x = random_x,
y = random_y,
name='abcdefghijklmnop......'
)
trace1 = go.Scatter(
x = random_x1,
y = random_y1,
name='abcdefghijklmnopadfdsfsdff......'
)
data = [trace,trace1]
iplot(data, filename='basic-line')
It gives me the graph but since my legend characters are long, it reduces the size of my actual graph. I want the legends to either come at the bottom or go further to the top
layout = go.Layout(
legend=dict(
orientation="h")
)
figure=go.Figure(data=data, layout=layout)
iplot(figure,filename='basic-line')

Categories