ax.annotate not annotating in correct order? - python

I have the following code to create annotations for a plot. I basically want to plot the win ratio and the win percentage for each page type. But the annotations are labeling in an incorrect order.
Heres some reproducible code. Does anyone know why it's not in the correct order?
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
data = [['Cat page', 'No Fields', 0, 2, 0.000000],
['Cat page', 'Fields Included', 2, 2, 1.000000],
['Web page', 'No Fields', 3, 6, 0.500000],
['Web page', 'Fields Included', 3, 6, 0.500000],
['Home page', 'No Fields', 5, 13, 0.384615],
['Home page', 'Fields Included', 8, 13, 0.615385]
]
data = pd.DataFrame(data, columns = ['page_type',
'signup_field',
'win_count',
'total_wins',
'win_percent']
)
plt.figure(figsize=(8,7))
ax = sns.barplot(
x = "win_count",
y = "page_type",
hue = "signup_field",
data = data,
)
n=0
for patch in ax.patches:
w, h = patch.get_width(), patch.get_height()
w = 0 if np.isnan(w) else 1 if w == 1 else np.round(np.float(w) ,3)
y = patch.get_y()
ax.text(w , h/2+y, '({},{}/{})'.format(data.win_percent[n], data.win_count[n], data.total_wins[n]), va='center')
n+=1
Here's an image of the incorrect labeling. For example for Home page (no fields) the label should be (0.384615) 5/13 but it's giving the label for web page for some reason.

Related

Hiding Duplicate Legend in Plotly

I'm new in using plotly and I'm trying to make a 2 different graph and show them individually through button; however, when I make it, the legends duplicated, resulting to a bad visualization of the data. Here's the code that I'm running right now:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly as ply
import plotly.express as px
import plotly.graph_objects as go
url = "https://raw.githubusercontent.com/m23chaffee/DS100-Repository/main/Aluminum%20Alloy%20Data%20Set.csv"
alloy = pd.read_csv('https://raw.githubusercontent.com/m23chaffee/DS100-Repository/main/Aluminum%20Alloy%20Data%20Set.csv')
del alloy['temper']
alloy = alloy.rename(columns={'aluminum_alloy':'Alloy Number',
'modulus_elastic': 'Elastic Modulus',
'modulus_shear': 'Shear Modulus',
'strength_yield': 'Yield Strength',
'strength_tensile': 'Tensile Strength'
})
bar1 = px.bar(alloy,
x = "Alloy Number",
y = ["Elastic Modulus", "Shear Modulus","Yield Strength","Tensile Strength"],
barmode = 'group',
width = 1100,
height =500,
orientation = 'v',
color_discrete_sequence = px.colors.qualitative.Pastel,
labels={"value": "Data Values"},
template = 'seaborn').update_traces(legendgroup="group").update_layout(showlegend=False)
line1 = px.line(alloy,
x = "Alloy Number",
y = ["Elastic Modulus", "Shear Modulus","Yield Strength","Tensile Strength"],
width = 1100,
height =500,
orientation = 'v',
color_discrete_sequence = px.colors.qualitative.Pastel,
labels={"value": "Data Values"},
template = 'seaborn').update_traces(legendgroup="group", visible = 'legendonly').update_layout(showlegend=False)
# Add buttom
fig.update_layout(
updatemenus=[
dict(
type = "buttons",
direction = "left",
buttons=list([
dict(
args=['type', 'bar'],
label="Bar Graph",
method="restyle",
),
dict(
args=["type", "line"],
label="Line Graph",
method="restyle"
)
]),
pad={"r": 10, "t": 10},
showactive=True,
x=0.11,
xanchor="left",
y=1.1,
yanchor="middle"
),
]
)
fig.show()
and the result of the image would look like this:
Result of the code above
Attempted Solution
I tried to hide it using traces and in the documentation but it seems it didn't work out for me. I also found a similar stackoverflow post 8 years ago, tried it, and it didn't make any changes in my graph.

How to set order of the nodes in Sankey Diagram Plotly

So i am traying to make a cycle that gives different sankey diagram the thing is due to the plotly optimization the node are in different positions. I will like to set the standard order to be [Formal, Informal, Unemployed, Inactive]
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv(path, delimiter=",")
Lista_Paises = df["code"].unique().tolist()
Lista_DF = []
for x in Lista_Paises:
DF_x = df[df["code"] == x]
Lista_DF.append(DF_x)
def grafico(df):
df = df.astype({"Source": "category", "Value": "float", "Target": "category"})
def category(i):
if i == "Formal":
return 0
if i == "Informal":
return 1
if i == "Unemployed":
return 2
if i == "Inactive":
return 3
def color(i):
if i == "Formal":
return "#9FB5D5"
if i == "Informal":
return "#E3EEF9"
if i == "Unemployed":
return "#E298AE"
if i == "Inactive":
return "#FCEFBC"
df['Source_cat'] = df["Source"].apply(category).astype("int")
df['Target_cat'] = df["Target"].apply(category).astype("int")
# df['Source_cat'] = LabelEncoder().fit_transform(df.Source)
# df['Target_cat'] = LabelEncoder().fit_transform(df.Target)
df["Color"] = df["Source"].apply(color).astype("str")
df = df.sort_values(by=["Source_cat", "Target_cat"])
Lista_Para_Sumar = df["Source_cat"].nunique()
Lista_Para_Tags = df["Source"].unique().tolist()
Suma = Lista_Para_Sumar
df["out"] = df["Target_cat"] + Suma
TAGS = Lista_Para_Tags + Lista_Para_Tags
Origen = df['Source_cat'].tolist()
Destino = df["out"].tolist()
Valor = df["Value"].tolist()
Color = df["Color"].tolist()
return (TAGS, Origen, Destino, Valor, Color)
def Sankey(TAGS: object, Origen: object, Destino: object, Valor: object, Color: object, titulo: str) -> object:
label = TAGS
source = Origen
target = Destino
value = Valor
link = dict(source=source, target=target, value=value,
color=Color)
node = dict(x=[0, 0, 0, 0, 1, 1, 1, 1], y=[1, 0.75, 0.5, 0.25, 0, 1, 0.75, 0.5, 0.25, 0], label=label, pad=35,
thickness=10,
color=["#305CA3", "#C1DAF1", "#C9304E", "#F7DC70", "#305CA3", "#C1DAF1", "#C9304E", "#F7DC70"])
data = go.Sankey(link=link, node=node, arrangement='snap')
fig = go.Figure(data)
fig.update_layout(title_text=titulo + "-" + "Mujeres", font_size=10, )
plt.plot(alpha=0.01)
titulo_guardar = (str(titulo) + ".png")
fig.write_image("/Users/agudelo/Desktop/GRAFICOS PNUD/Graficas/MUJERES/" + titulo_guardar, engine="kaleido")
for y in Lista_DF:
TAGS, Origen, Destino, Valor, Color = grafico(y)
titulo = str(y["code"].unique())
titulo = titulo.replace("[", "")
titulo = titulo.replace("]", "")
titulo = titulo.replace("'", "")
Sankey(TAGS, Origen, Destino, Valor, Color, titulo)
The expected result should be.
The expected result due to the correct order:
The real result i am getting is:
I had a similar problem earlier. I hope this will work for you. As I did not have your data, I created some dummy data. Sorry about the looooong explanation. Here are the steps that should help you reach your goal...
This is what I did:
Order the data and sort it - used pd.Categorical to set the order and then df.sort to sort the data so that the input is sorted by source and then destination.
For the sankey node, you need to set the x and y positions. x=0, y=0 starts at top left. This is important as you are telling plotly the order you want the nodes. One weird thing is that it sometimes errors if x or y is at 0 or 1. Keep it very close, but not the same number... wish I knew why
For the other x and y entries, I used ratios as my total adds up to 285. For eg. Source-Informal starts at x = 0.001 and y = 75/285 as Source-Formal = 75 and this will start right after that
Based on step 1, the link -> source and destination should also be sorted. But, pls do check.
Note: I didn't color the links, but think you already have achieved that...
Hope this helps resolve your issue...
My data - sankey.csv
source,destination,value
Formal,Formal,20
Formal,Informal, 10
Formal,Unemployed,30
Formal,Inactive,15
Informal,Formal,20
Informal,Informal,15
Informal,Unemployed,25
Informal,Inactive,25
Unemployed,Formal,5
Unemployed,Informal,10
Unemployed,Unemployed,10
Unemployed,Inactive,5
Inactive,Formal,30
Inactive,Informal,20
Inactive,Unemployed,20
Inactive,Inactive,25
The code
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv('sankey.csv') #Read above CSV
#Sort by Source and then Destination
df['source'] = pd.Categorical(df['source'], ['Formal','Informal', 'Unemployed', 'Inactive'])
df['destination'] = pd.Categorical(df['destination'], ['Formal','Informal', 'Unemployed', 'Inactive'])
df.sort_values(['source', 'destination'], inplace = True)
df.reset_index(drop=True)
mynode = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = ['Formal', 'Informal', 'Unemployed', 'Inactive', 'Formal', 'Informal', 'Unemployed', 'Inactive'],
x = [0.001, 0.001, 0.001, 0.001, 0.999, 0.999, 0.999, 0.999],
y = [0.001, 75/285, 160/285, 190/285, 0.001, 75/285, 130/285, 215/285],
color = ["#305CA3", "#C1DAF1", "#C9304E", "#F7DC70", "#305CA3", "#C1DAF1", "#C9304E", "#F7DC70"])
mylink = dict(
source = [ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 ],
target = [ 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7 ],
value = df.value.to_list())
fig = go.Figure(data=[go.Sankey(
arrangement='snap',
node = mynode,
link = mylink
)])
fig.update_layout(title_text="Basic Sankey Diagram", font_size=20)
fig.show()
The output

How to update figure in same window dynamically without opening and redrawing in new tab?

I am creating a 3D scatter plot based off a pandas dataframe, and then I want to re-draw it with slightly updated data whenever the user presses a button in my program. I almost have this functionality working, except the updated figure is drawn via a new opened tab, when really I just want my origin existing figure to be updated.
Here is my code. First I initialize the plot with 'version 1' of the data, then I set up a simple while loop to wait for the user to request an update. Then ideally once they enter input to ask for the update, I just re-draw everything in the same tab that is open. But instead a new tab is opened (which redraws the data correctly at least).
fig = go.Figure(data=[go.Scatter3d(x=df['x'],y=df['y'],z=df['z'],mode='markers', marker=dict(
size=4,
color=df['y'], # set color to an array/list of desired values
colorscale='Viridis', # choose a colorscale
opacity=0.3
))])
# Column max and mins for plotting:
xmax = df_1.max(axis=0)['x']; xmin = df_1.min(axis=0)['x']
ymax = df_1.max(axis=0)['y']; ymin = df_1.min(axis=0)['y']
zmax = df_1.max(axis=0)['z']; zmin = df_1.min(axis=0)['z']
fig.update_layout(
scene = dict(xaxis = dict(nticks=4, range=[xmin,xmax],),
yaxis = dict(nticks=4, range=[ymin,ymax],),
zaxis = dict(nticks=4, range=[zmin,zmax],),))
f2 = go.FigureWidget(fig)
f2.show()
#fig.show()
while True:
choice = input("> ")
choice = choice.lower() #Convert input to "lowercase"
if choice == 'exit':
print("Good bye.")
break
if choice == 'w':
print("W, moving forward")
cube_origin = cube_origin + np.array([0.1,0,0])
df_cube = createCubeMesh(cube_size, cube_density, cube_origin)
new_df = df_scene_orig.copy()
new_df = new_df.append(df_cube)
fig = go.Figure(data=[go.Scatter3d(x=new_df['x'],y=new_df['y'],z=new_df['z'],mode='markers', marker=dict(
size=4,
color=new_df['y'], # set color to an array/list of desired values
colorscale='Viridis', # choose a colorscale
opacity=0.3
))])
f2 = go.FigureWidget(fig)
f2.show()
I based my code on another answer that said to use go.FigureWidget(fig), but it doesn't seem to work as intended.
Edit
Instead of me using f2.show() at the end, I just want a simple thing analogous to f2.update() that redraws.
This is the case you want.
Everywhere in this page that you see fig.show(), you can display the same figure in a Dash application by passing it to the figure argument of the Graph component from the built-in dash_core_components package like this:
import plotly.graph_objects as go
fig = go.Figure(
data=[go.Scatter(
mode="markers+text",
x=[10, 20],
y=[20, 25],
text=["Point A", "Point B"]
)],
layout=dict(height=400, width=400, template="none")
)
import dash
import dash_core_components as dcc
import dash_html_components as html
app = dash.Dash()
app.layout = html.Div([
dcc.Graph(figure=fig)
])
app.run_server(debug=True, use_reloader=False)
reference: https://plotly.com/python/figure-introspection/
Help you write a code that is closest to your needs:
import plotly as py
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import pandas as pd
import numpy as np
py.offline.init_notebook_mode(connected=True)
app = JupyterDash('SimpleExample')
app.layout = html.Div([
dcc.Dropdown(id='dropdown', options=[
{'label': 'W', 'value': 'W'},
{'label': 'exit', 'value': 'exit'}],
value='exit'),
dcc.Graph(id='graph-court')
])
def random_data():
# sample dataframe of a wide format
np.random.seed(4)
cols = list('xyz')
X = np.random.randint(50, size=(3, len(cols)))
df = pd.DataFrame(X, columns=cols)
df.iloc[0] = 0
return df
df = random_data()
def create_figure(df):
fig = go.Figure(data=[go.Scatter3d(x=df['x'], y=df['y'], z=df['z'], mode='markers', marker=dict(
size=10,
color=df['y'],
colorscale='Viridis',
opacity=0.3
))])
# Column max and mins for plotting:
xmax = df.max(axis=0)['x']
xmin = df.min(axis=0)['x']
ymax = df.max(axis=0)['y']
ymin = df.min(axis=0)['y']
zmax = df.max(axis=0)['z']
zmin = df.min(axis=0)['z']
fig.update_layout(
scene=dict(xaxis=dict(nticks=4, range=[xmin, xmax], ),
yaxis=dict(nticks=4, range=[ymin, ymax], ),
zaxis=dict(nticks=4, range=[zmin, zmax], ), ))
fig = go.FigureWidget(fig)
return fig
#app.callback(Output('graph-court', 'figure'),
[Input('dropdown', 'value')])
def update_figure(selected_value):
selected_value = selected_value.lower() # Convert input to "lowercase"
if selected_value == 'exit':
print("Good bye.")
new_x, new_y, new_z = [], [], []
else:
print("W, moving forward")
# new data
new_x, new_y, new_z = np.random.randint(10, size=(3, 1))
# ploy
fig = create_figure(df) # Set as global variable or local variable as required
fig.add_trace(go.Scatter3d(x=new_x, y=new_y, z=new_z, marker=dict(size=10, color='green'), mode='markers'))
return fig
app.run_server(debug=False, use_reloader=False)
Estimated that your "tab" is referring to "browser tab" it is basically not possible with the standard renderer.
With the renderer browser it serves a one-shot server on a random port, which is shutting down immediately after the rendering is done. You can check that by reloading the graph in browser.
You can:
generate a static image and serve that yourself in a webapp (e.g. with flask) with f2.write_image("test.svg")
generate a dynamic html content by f2.show(renderer = "iframe") and serve that with e.g. flask
simply use plotly dash, look here for impressions
Try using Plotly for plotting, it has a functionality (Visibility), using that you can update your plot on button click or drop down.
The below example is for dropdown.
import pandas as pd
import numpy as np
import plotly.offline as py_offline
import plotly.graph_objs as go
from plotly import tools
py_offline.init_notebook_mode()
trace = go.Scatter(
x=[1, 2, 3],
y=[4, 5, 6]
)
fig = tools.make_subplots(rows=10, cols=1)
for k in range(10):
fig.append_trace(trace, k+1, 1)
updatemenus=list([
dict(
buttons=[],
direction = 'down',
pad = {'r': 10, 't': 10},
showactive = True,
x = 0,
xanchor = 'left',
y = 1.2,
yanchor = 'top'
),
])
lister = []
for k in range(11):
lister.append(dict(
args=['visible', [True for k in range(10)] if k == 0 else [True if (i+1) == k else False for i in range(10)]],
label='Show Trace ' + str( 'All' if k == 0 else k),
method='restyle'
))
updatemenus[0]['buttons'] = lister
fig['layout']['updatemenus'] = updatemenus
fig['layout'].update(title='subplots')
py_offline.iplot(fig, filename='simple-subplot')

Problem with visualization with Python Plotly Pandas?

I have Pandas data frame like this: data = pd.DataFrame({"Risk":["good", "bad", "good", "good", "bad"], "Age":[22, 50, 43, 27, 19]})
and I wan to achieve something like this:
Why my code does not work ? Could you repare my code? I have error that: AttributeError: 'list' object has no attribute 'loc'
import plotly.graph_objects as go
import plotly.tools as tls
import chart_studio.plotly as py
df_good = data.loc[data["Risk"] == 'good']['Age'].values.tolist()
df_bad = data.loc[data["Risk"] == 'bad']['Age'].values.tolist()
df_age = data['Age'].values.tolist()
#First plot
trace0 = go.Histogram(
x=df_good,
histnorm='probability',
name="Good Credit"
)
#Second plot
trace1 = go.Histogram(
x=df_bad,
histnorm='probability',
name="Bad Credit"
)
#Third plot
trace2 = go.Histogram(
x=df_age,
histnorm='probability',
name="Overall Age"
)
#Creating the grid
fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
subplot_titles=('Good','Bad', 'General Distribuition'))
#setting the figs
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
fig.append_trace(trace2, 2, 1)
fig['layout'].update(showlegend=True, title='Age Distribuition', bargap=0.05)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')

Plotly Python Dashboard error py.dashboard_ops.upload

I’m trying to replicate the dashboard example here
I got 2 errors:
1) Using the “fileId_from_url” function I get the raw_fileId as empty so that when I try to access it
raw_fileId = re.findall("~[A-z]+/[0-9]+", url)[0][1: ]
I get the error
IndexError: list index out of range
2) If I manually change “/” to “:” in the urls, I can go on. When I try to create the dashboard with the line:
py.dashboard_ops.upload(my_dboard, ‘My First Dashboard with Pythonvv’)
I get the error:
raise exceptions.PlotlyRequestError(message, status_code, content)
3) What if I want to try to run it offline? If I import “import plotly.offline as pyo” I can’t use “pyo.dashboard_ops.upload” anymore.
Thanks in advance.
I add the code below
import plotly.plotly as py
py.sign_in(username='myname', api_key='mypass')
#%%
import plotly.dashboard_objs as dashboard
import IPython.display
from IPython.display import Image
my_dboard = dashboard.Dashboard()
my_dboard.get_preview()
#%%
import plotly.graph_objs as go
import numpy as np
colorscale = [[0, '#FAEE1C'], [0.33, '#F3558E'], [0.66, '#9C1DE7'], [1, '#581B98']]
trace1 = go.Scatter(
y = np.random.randn(500),
mode='markers',
marker=dict(
size='16',
color = np.random.randn(500),
colorscale=colorscale,
showscale=True
)
)
data = [trace1]
url_1 = py.plot(data, filename='scatter-for-dashboard', auto_open=False)
py.iplot(data, filename='scatter-for-dashboard')
#%%
x0 = np.random.randn(50)
x1 = np.random.randn(50) + 2
x2 = np.random.randn(50) + 4
x3 = np.random.randn(50) + 6
colors = ['#FAEE1C', '#F3558E', '#9C1DE7', '#581B98']
trace0 = go.Box(x=x0, marker={'color': colors[0]})
trace1 = go.Box(x=x1, marker={'color': colors[1]})
trace2 = go.Box(x=x2, marker={'color': colors[2]})
trace3 = go.Box(x=x3, marker={'color': colors[3]})
data = [trace0, trace1, trace2, trace3]
url_2 = py.plot(data, filename='box-plots-for-dashboard', auto_open=False)
py.iplot(data, filename='box-plots-for-dashboard')
#%%
import re
def fileId_from_url(url):
"""Return fileId from a url."""
raw_fileId = re.findall("~[A-z]+/[0-9]+", url)[0][1: ]
return raw_fileId.replace('/', ':')
def sharekey_from_url(url):
"""Return the sharekey from a url."""
if 'share_key=' not in url:
return "This url is not 'sercret'. It does not have a secret key."
return url[url.find('share_key=') + len('share_key='):]
fileId_1 = fileId_from_url(url_1)
fileId_2 = fileId_from_url(url_2)
box_a = {
'type': 'box',
'boxType': 'plot',
'fileId': fileId_1,
'title': 'scatter-for-dashboard'
}
text_for_box = ""
box_b = {
'type': 'box',
'boxType': 'text',
'text': text_for_box,
'title': 'Markdown Options for Text Box'
}
box_c = {
'type': 'box',
'boxType': 'plot',
'fileId': fileId_2,
'title': 'box-for-dashboard',
'shareKey': sharekey_from_url(url_2)
}
my_dboard.insert(box_a)
my_dboard.insert(box_b, 'above', 1)
#%%
my_dboard.get_box(1)
my_dboard.get_box(1)['title'] = 'a new title'
my_dboard.get_box(1)
my_dboard.insert(box_a, 'below', 2)
my_dboard['settings']['logoUrl'] = 'https://images.plot.ly/language-icons/api-home/python-logo.png'
my_dboard['settings']['links'] = []
my_dboard['settings']['links'].append({'title': 'Link to Plotly', 'url': 'https://plot.ly/'})
my_dboard['settings']['links'].append({'title': 'Link to Python Website', 'url': 'https://www.python.org/'})
my_dboard['settings']['foregroundColor'] = '#000000'
my_dboard['settings']['backgroundColor'] = '#adcaea'
my_dboard['settings']['headerForegroundColor'] = '#ffffff'
my_dboard['settings']['headerBackgroundColor'] = '#D232C8'
my_dboard['settings']['boxBackgroundColor'] = '#ffffff'
my_dboard['settings']['boxBorderColor'] = '#000000'
my_dboard['settings']['boxHeaderBackgroundColor'] = '#ffffff'
stacked_dboard = dashboard.Dashboard()
text_box = {
'type': 'box',
'boxType': 'text',
'text': 'empty space'
}
for _ in range(5):
stacked_dboard.insert(text_box, 'below', 1)
#%%
stacked_dboard['layout']['size'] = 3000
my_dboard['layout']['first']['first'] = text_for_box
py.dashboard_ops.upload(my_dboard, 'My First Dashboard with Pythonvv')

Categories