I want to plot something like a biplot in python Plotly ,but using 3 principal components so as to make a 3d plot.
How do I go about plotting the direction vectors(the red lines) of principal components in plotly python?
There is exactly the same question , but for R, here. I am not able to translate the code perfectly.
With much help from here
from plotly.offline import plot
import plotly.graph_objs as go
pca = PCA(n_components=3).fit(iris.data)
X_reduced = pca.transform(iris.data)
trace1 = go.Scatter3d(
x=X_reduced[:,0],
y = X_reduced[:,1],
z = X_reduced[:,2],
mode='markers',
marker=dict(
size=12,
color= target,
opacity=1
)
)
dc_1 = go.Scatter3d( x = [0,pca.components_.T[0][0]],
y = [0,pca.components_.T[0][1]],
z = [0,pca.components_.T[0][2]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "red",
width = 6),
name = "Var1"
)
dc_2 = go.Scatter3d( x = [0,pca.components_.T[1][0]],
y = [0,pca.components_.T[1][1]],
z = [0,pca.components_.T[1][2]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "green",
width = 6),
name = "Var2"
)
dc_3 = go.Scatter3d( x = [0,pca.components_.T[2][0]],
y = [0,pca.components_.T[2][1]],
z = [0,pca.components_.T[2][2]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "blue",
width = 6),
name = "Var3"
)
dc_4 = go.Scatter3d( x = [0,pca.components_.T[3][0]],
y = [0,pca.components_.T[3][1]],
z = [0,pca.components_.T[3][2]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "yellow",
width = 6),
name = "Var4"
)
data = [trace1,dc_1,dc_2,dc_3,dc_4]
layout = go.Layout(
xaxis=dict(
title='PC1',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
)
fig = go.Figure(data=data, layout=layout)
plot(fig, filename='3d-scatter-tupac-with-mac')
Related
I am going through multiple columns and creating plotly pie charts, to check the distribution of these variables between two types of customer: power user and not power user. However the color order changes every time.
In one plot, Yes will be orange and No will , in the next plot, Yes will be blue and No will be orange.
You can see an example here, if you check the legend between the two charts:
Here is the code I am using. After some googling I tried adding sort=False to go.pie but it didn't change anything:
def plot_pie(column) :
trace1 = go.Pie(values = churn[column].value_counts().values.tolist(),
labels = churn[column].value_counts().keys().tolist(),
hoverinfo = "label+percent+name",
domain = dict(x = [0,.48]),
name = f"{target}",
marker = dict(line = dict(width = 2,
color = "rgb(243,243,243)")
),
hole = .6,
sort=False
)
trace2 = go.Pie(values = not_churn[column].value_counts().values.tolist(),
labels = not_churn[column].value_counts().keys().tolist(),
hoverinfo = "label+percent+name",
marker = dict(line = dict(width = 2,
color = "rgb(243,243,243)")
),
domain = dict(x = [.52,1]),
hole = .6,
name = f"Not {target}" ,
sort=False
)
layout = go.Layout(dict(title = column + f" distribution in {target} ",
plot_bgcolor = "rgb(243,243,243)",
paper_bgcolor = "rgb(243,243,243)",
annotations = [dict(text = f"{target}",
font = dict(size = 13),
showarrow = False,
x = .15, y = .5),
dict(text = f"not {target}",
font = dict(size = 13),
showarrow = False,
x = .88,y = .5
)
]
)
)
data = [trace1,trace2]
fig = go.Figure(data = data,layout = layout)
py.iplot(fig)
Any advice?
This is my code:
trace1 = go.Scatterpolar(
r = r,
theta = theta,
mode='markers',
marker=dict(
size=12,
color= iris.target,
opacity=1
)
)
dc_1 = go.Scatterpolar( r = [0,V_r[0][0]],
theta = [0,V_r[0][1]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "red",
width = 6),
name = "Var1"
)
dc_2 = go.Scatterpolar( r = [0,V_r[1][0]],
theta = [0,V_r[1][1]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "green",
width = 6),
name = "Var2"
)
dc_3 = go.Scatterpolar( r = [0,V_r[2][0]],
theta = [0,V_r[2][1]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "blue",
width = 6),
name = "Var3"
)
dc_4 = go.Scatterpolar( r = [0,V_r[3][0]],
theta = [0,V_r[3][1]],
marker = dict( size = 1,
color = "rgb(84,48,5)"),
line = dict( color = "yellow",
width = 6),
name = "Var4"
)
data = [dc_1,dc_2,dc_3,dc_4, trace1]
layout = go.Layout(
xaxis=dict(
title='PC1',
rangemode='tozero',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#080707'
)
)
)
fig = go.Figure(data=data, layout=layout)
plot(fig, filename='Scatter polar, PCA.')
And this is the graph resultant:
enter image description here
As you can see, currently the lines of the variables start from "0", but the origin of coordinates is in r = - 5 (since that is the first value it receives) how can I set it to r = 0?
r has to be represented in absolute value, since the address gives you the value of theta. r cannot be < 0:
r = abs(np.squeeze(np.asarray(P[:,0])))
theta = (np.squeeze(np.asarray(P[:,1])))*(180/math.pi)
I'm create a choropleth map using plotly, geojso and matplotlib. Now i want to add a slider.
i'm using python3 and i can't find a answer for this in the internet.all of them used plotly's default choropleth map.
def get_scatter_colors(sm, df):
grey = 'rgba(128,128,128,1)'
return ['rgba' + str(sm.to_rgba(m, bytes = True, alpha = 1)) if not np.isnan(m) else grey for m in df]
for age in columns[3:24]:
age_data = df[age]
age_data.name = 'province'
# print(age_data.head())
df_tmp = age_data.copy()
df_tmp.index = df_tmp.index.map(match_dict)
df_tmp = df_tmp[~df_tmp.index.duplicated(keep=False)]
df_reindexed = df_tmp.reindex(index = provinces_names)
colormap = 'Blues'
cmin = df_reindexed.min()
cmax = df_reindexed.max()
sm = scalarmappable(colormap, cmin, cmax)
scatter_colors = get_scatter_colors(sm, df_reindexed)
colorscale = get_colorscale(sm, df_reindexed, cmin, cmax)
hover_text = get_hover_text(df_reindexed)
scatter_color_list.append(scatter_colors)
tickformat = ""
data = dict(type='scattermapbox',
lat=lats,
lon=lons,
mode='markers',
text=hover_text,
marker=dict(size=10,
color=scatter_colors,
showscale = True,
cmin = df_reindexed.min(),
cmax = df_reindexed.max(),
colorscale = colorscale,
colorbar = dict(tickformat = tickformat)
),
showlegend=False,
hoverinfo='text'
)
data_slider.append(data)
layers=([dict(sourcetype = 'geojson',
source =sources[k],
below="",
type = 'line', # the borders
line = dict(width = 1),
color = 'black',
) for k in range(n_provinces)
] +
# fill_list
[dict(sourcetype = 'geojson',
source =sources[k],
below="water",
type = 'fill',
color = scatter_colors[k],
opacity=0.8,
) for k in range(n_provinces)
]
)
steps = []
for i in range(len(data_slider)):
step = dict(method='restyle',
args=['visible', [False] * len(data_slider)],
label='Age {}' .format(i))
step['args'][1][i] = True
steps.append(step)
sliders = [dict(active=0, steps=steps)]
layout = dict(title="2016 POPULATION",
autosize=False,
width=700,
height=800,
hovermode='closest',
# hoverdistance = 30,
mapbox=dict(accesstoken=MAPBOX_APIKEY,
layers=layers,
bearing=0,
center=dict(
lat=35.715298,
lon=51.404343),
pitch=0,
zoom=4.9,
style = 'light'),
sliders=sliders,
)
when i test this code with data, i want changing color of the map with slides , but the color of map is fixed by the lastest slide color.
Edit step part like this:
visibility = []
for i in range(len(data_slider)):
list = [False] * len(data_slider)
list[i] = True
visibility.append(list)
steps = []
for i in range(len(data_slider)):
step = dict(method='update',
args=[{'visible': visibility[i]},
{'mapbox.layers': layers[i]}],
label='Age {}' .format(i),)
steps.append(step)
And add function for layers:
def get_data_layout(df):
layers = []
for i in range(len(data_slider)):
scatter_colors = df[i]['marker']['color']
layer=([dict(sourcetype = 'geojson',
source =sources[k],
below="",
type = 'line',
line = dict(width = 1),
color = 'black',
) for k in range(n_provinces)
] +
[dict(sourcetype = 'geojson',
source =sources[k],
below="water",
type = 'fill',
color = scatter_colors[k],
opacity=0.8,
) for k in range(n_provinces)]
)
layers.append(layer)
return layers
and change layout to this:
layout = dict(title="IRAN 2016 POPULATION",
autosize=False,
width=700,
height=800,
hovermode='closest',
mapbox=dict(accesstoken=MAPBOX_APIKEY,
bearing=0,
center=dict(
lat=35.715298,
lon=51.404343),
pitch=0,
zoom=4.9,
style = 'dark'),
sliders=sliders,
)
other parts are same:))
please notice barplot('Age',False) runs without any error but fails to plot graph in chromeI'm using spyder to analyse employee attrition of ibm hr employee dataset from kaggle. For plotting graphs I'm using plotly library but I can't plot barplots using this library whereas I can successfully plot scatter plots and pie charts.
Attaching sample code of my work
spyder was successful to print plot_distribution('Age', False) this graph in chrome but can't plot barplot('Age', False)
def barplot(var_select, x_no_numeric) :
tmp1 = data[(data['Attrition'] != 0)]
tmp2 = data[(data['Attrition'] == 0)]
tmp3 = pd.DataFrame(pd.crosstab(data[var_select], data['Attrition']), )
tmp3['Attr%'] = tmp3[1] / (tmp3[1] + tmp3[0]) * 100
if x_no_numeric == True:
tmp3 = tmp3.sort_values(1, ascending=False)
color = ['skyblue', 'gold']
trace1 = go.Bar(
x = tmp1[var_select].value_counts().keys().tolist(),
y = tmp2[var_select].value_counts().values.tolist(),
name = 'yes_attrition', opacity = 0.8,
marker = dict(color = 'gold', line=dict(color='#000000',width=1)))
trace2 = go.Bar(
x = tmp1[var_select].value_counts().keys().tolist(),
y = tmp2[var_select].value_counts().values.tolist(),
name = 'no_attrition', opacity = 0.8,
marker = dict(color='skyblue', line=dict(color='#000000',width=1)))
trace3 = go.Scatter(
x = tmp3.index,
y = tmp3['Attr%'],
yaxis = 'y2',
name = '%Attrition', opacity= 0.5,
marker= dict(color='black',line=dict(color='#000000',width=0.5)))
layout = dict(title= str(var_select),
xaxis = dict(),
yaxis = dict(title='count'),
yaxis2 = dict(range= [-0,75],
overlaying = 'y',
anchor = 'x',
side = 'right',
Zeroline = False,
showgrid = False,
title = '%Attrition'))
fig = go.Figure(data=[trace1, trace2, trace3], layout= layout)
py.plot(fig)
plot_distribution('Age', False)
barplot('Age', False)
I'm trying to create a specific plot with Python and Plotly. I was wondering if it's possible to create a plot with 3 subplots arranged vertically (https://plot.ly/python/subplots/) that have a shared x-axis, along with a range slider that controls the x-axis (https://plot.ly/python/range-slider/)?
As of Jan 2017 you cannot do this, see here: https://github.com/plotly/plotly.js/issues/1250
Today, Jan 24th 2017, I have managed to create stacked plots that share one x-axis, in combination with a range slider. However, the problem is that the range of the y-axis is automatically set. I cannot control it. This is a plroblem for me. My code is:
trace_1 = go.Scatter(
x=time_station1,
y=turb_station1,
mode = 'lines+markers',
name = 'Turbidity',
connectgaps = False,
marker = dict(
size = 5,
color = 'rgb(64, 97, 139)',
line = dict(
width = 1,
color = 'rgb(64, 97, 139)'
)
)
)
trace_2 = go.Scatter(
x=time_station1,
y=battery_station1,
yaxis='y2',
mode = 'lines+markers',
name = 'Battery',
connectgaps = False,
marker = dict(
size = 5,
color = 'rgb(117, 15, 7)',
line = dict(
width = 1,
color = 'rgb(117, 15, 7)'
)
)
)
trace_3 = go.Scatter(
x=time_station1,
y=cond_station1,
yaxis='y3',
mode = 'lines+markers',
name = 'Conductivity',
connectgaps = False,
marker = dict(
size = 5,
color = 'rgb(130, 0, 132)',
line = dict(
width = 1,
color = 'rgb(130, 0, 132)'
)
)
)
trace_4 = go.Scatter(
x=time_station1,
y=depth_station1,
yaxis='y4',
mode = 'lines+markers',
name = 'Depth',
connectgaps = False,
marker = dict(
size = 5,
color = 'rgb(204, 100, 0)',
line = dict(
width = 1,
color = 'rgb(204, 100, 0)'
)
)
)
trace_5 = go.Scatter(
x=time_station1,
y=temp_station1,
yaxis='y5',
mode = 'lines+markers',
name = 'Temperature',
connectgaps = False,
marker = dict(
size = 5,
color = 'rgb(255, 255, 0)',
line = dict(
width = 1,
color = 'rgb(255, 255, 0)'
)
)
)
layout = go.Layout(
title='Station ABC',
xaxis = dict(
rangeselector=dict(
buttons = list([
dict(count=1,
label='1min',
step='minute',
stepmode='backward'),
dict(count=24,
label='24h',
step='hour',
stepmode='backward'),
])
),
rangeslider=dict(),
type='date',
title='Date and Time'
),
yaxis=dict(
domain=[0,0.15]),
yaxis2=dict(
domain=[0.2,0.35]),
yaxis3=dict(
domain=[0.4,0.55]),
yaxis4=dict(
domain=[0.4,0.75]),
yaxis5=dict(
domain=[0.8,1]),
)
data = [trace_1, trace_2, trace_3, trace_4, trace_5]
plot_url = py.plot(fig, filename='offline plot.html')