heatmap and dendrogram (clustermap) error using Plotly - python

The last example in Plotly's documentation for Dendrograms has an error. When executing this code, I get this error in two locations due to 'extend':
AttributeError: ‘tuple’ object has no attribute ‘extend’
They are produced by these lines: figure.add_traces(heatmap) and figure['data'].extend(dendro_side['data'])
If anyone has run into this problem, please see my solution below! Happy coding!

I have a quick and accurate solution to run the last example code in Plotly's documentation for Dendrograms. Note that I am using Plotly offline in a Jupyter Notebook.
Figure has methods to add_traces, and these should replace extend.
The three key lines are :
figure.add_traces(dendro_side[‘data’])
figure.add_traces(heatmap)
plotly.offline.iplot(figure, filename=‘dendrogram_with_heatmap’)
Here is the full example code with my corrections and necessary imports, below:
# Import Useful Things
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
plotly.offline.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
import numpy as np
from scipy.spatial.distance import pdist, squareform
# Get Data
data = np.genfromtxt("http://files.figshare.com/2133304/ExpRawData_E_TABM_84_A_AFFY_44.tab",names=True,usecols=tuple(range(1,30)),dtype=float, delimiter="\t")
data_array = data.view((np.float, len(data.dtype.names)))
data_array = data_array.transpose()
labels = data.dtype.names
# Initialize figure by creating upper dendrogram
figure = ff.create_dendrogram(data_array, orientation='bottom', labels=labels)
for i in range(len(figure['data'])):
figure['data'][i]['yaxis'] = 'y2'
# Create Side Dendrogram
dendro_side = ff.create_dendrogram(data_array, orientation='right')
for i in range(len(dendro_side['data'])):
dendro_side['data'][i]['xaxis'] = 'x2'
# Add Side Dendrogram Data to Figure
figure.add_traces(dendro_side['data'])
# Create Heatmap
dendro_leaves = dendro_side['layout']['yaxis']['ticktext']
dendro_leaves = list(map(int, dendro_leaves))
data_dist = pdist(data_array)
heat_data = squareform(data_dist)
heat_data = heat_data[dendro_leaves,:]
heat_data = heat_data[:,dendro_leaves]
heatmap = [
go.Heatmap(
x = dendro_leaves,
y = dendro_leaves,
z = heat_data,
colorscale = 'Blues'
)
]
heatmap[0]['x'] = figure['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']
figure.add_traces(heatmap)
# Edit Layout
figure['layout'].update({'width':800, 'height':800,
'showlegend':False, 'hovermode': 'closest',
})
# Edit xaxis
figure['layout']['xaxis'].update({'domain': [.15, 1],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'ticks':""})
# Edit xaxis2
figure['layout'].update({'xaxis2': {'domain': [0, .15],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""}})
# Edit yaxis
figure['layout']['yaxis'].update({'domain': [0, .85],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks': ""})
# Edit yaxis2
figure['layout'].update({'yaxis2':{'domain':[.825, .975],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""}})
# Plot using Plotly Offline
plotly.offline.iplot(figure, filename='dendrogram_with_heatmap')
This outputs:

Related

y2 axis selectable/dropdown range

new to python and programing.
I am trying to program some data visualization to improve efficiency.
I want to generate a scatter plot with plotly with y1 and y2. On y1 I want to have 2 permanent ranges of data. On y2 I want to have multiple data sets/ranges that can be selected to be shown or not.
Data is imported from an excel file.
I found a way, the other way around, to plot the permanent on y2 and on/off data on y1, but is not working great, legend is going crazy, format is changed to line plot and in the end this is not what I want, I want to have permanent data on y1 and selectable data on y2
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
for column in df.columns.to_list():
fig.add_trace(
go.Scatter(x=df["Lap"],
y=df["P_TYR_FA (bar) mean"],name="TYR_FA"),secondary_y=False)
fig.add_trace(
go.Scatter(x=df["Lap"],
y=df["P_TYR_RA (bar) mean"],name="TYR_RA"),secondary_y=False)
fig.add_trace(
go.Scatter(x=df["Lap"],
y=df['P_INT (mbar) mean'],name="TYR_FR"),secondary_y=False)
fig.add_trace(
go.Scatter(x=df["Lap"],
y=df['P_FUE (bar) mean'],name="TYR_FF"),secondary_y=True)
fig.update_layout(
updatemenus=[go.layout.Updatemenu(
active=0,
buttons=list(
[dict(label = 'All',
method = 'update',
args = [{'visible': [True, True, True, True]},
{'title': 'All',
}]),
dict(label = 'P_TYR_FA',
method = 'update',
args = [{'visible': [True, False, False, True]}, # the index of True aligns with the indices of plot traces
{'title': 'MSFT',
}]),
dict(label = 'P_TYR_RA',
method = 'update',
args = [{'visible': [False, True, False, True]},
{'title': 'AAPL',
}]),
dict(label = 'P_INT',
method = 'update',
args = [{'visible': [False, False, True, True]},
{'title': 'AMZN',
}]),
dict(label = 'P_FUE',
method = 'update',
args = [{'visible': [False, False, False, True]},
{'title': 'GOOGL',
}]),
])
)
])
fig.update_layout(template="plotly_dark")
fig.show()
enter image description here

Multiple Bar Plot using Seaborn

I'm making a barplot using 3 datasets in seaborn, however each datapoint overlays the previous, regardless of if it is now hiding the previous plot. eg:
sns.barplot(x="Portfolio", y="Factor", data=d2,
label="Portfolio", color="g")
sns.barplot(x="Benchmark", y="Factor", data=d2,
label="Benchmark", color="b")
sns.barplot(x="Active Exposure", y="Factor", data=d2,
label="Active", color="r")
ax.legend(frameon=True)
ax.set(xlim=(-.1, .5), ylabel="", xlabel="Sector Decomposition")
sns.despine(left=True, bottom=True)
However, I want it to show green, even if the blue being overlayed is greater. Any ideas?
Without being able to see your data I can only guess that your dataframe is not in long-form. There's a section on the seaborn tutorial on the expected shape of DataFrames that seaborn is expecting, I'd take a look there for more info, specifically the section on messy data.
Because I can't see your DataFrame I have made some assumptions about it's shape:
import numpy as np
import pandas as pd
import seaborn as sns
df = pd.DataFrame({
"Factor": list("ABC"),
"Portfolio": np.random.random(3),
"Benchmark": np.random.random(3),
"Active Exposure": np.random.random(3),
})
# Active Exposure Benchmark Factor Portfolio
# 0 0.140177 0.112653 A 0.669687
# 1 0.823740 0.078819 B 0.072474
# 2 0.450814 0.702114 C 0.039068
We can melt this DataFrame to get the long-form data seaborn wants:
d2 = df.melt(id_vars="Factor", var_name="exposure")
# Factor exposure value
# 0 A Active Exposure 0.140177
# 1 B Active Exposure 0.823740
# 2 C Active Exposure 0.450814
# 3 A Benchmark 0.112653
# 4 B Benchmark 0.078819
# 5 C Benchmark 0.702114
# 6 A Portfolio 0.669687
# 7 B Portfolio 0.072474
# 8 C Portfolio 0.039068
Then, finally we can plot out box plot using the seaborn's builtin aggregations:
ax = sns.barplot(x="value", y="Factor", hue="exposure", data=d2)
ax.set(ylabel="", xlabel="Sector Decomposition")
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
Which produces:
Here's the plot params I used to make this chart:
import matplotlib as mpl
# Plot configuration
mpl.style.use("seaborn-pastel")
mpl.rcParams.update(
{
"font.size": 14,
"figure.facecolor": "w",
"axes.facecolor": "w",
"axes.spines.right": False,
"axes.spines.top": False,
"axes.spines.bottom": False,
"xtick.top": False,
"xtick.bottom": False,
"ytick.right": False,
"ytick.left": False,
}
)
If you are fine without using seaborn you can use pandas plotting to create a stacked horizontal bar chart (barh):
import pandas as pd
import matplotlib as mpl
# Plot configuration
mpl.style.use("seaborn-pastel")
mpl.rcParams.update(
{
"font.size": 14,
"figure.facecolor": "w",
"axes.facecolor": "w",
"axes.spines.right": False,
"axes.spines.top": False,
"axes.spines.bottom": False,
"xtick.top": False,
"xtick.bottom": False,
"ytick.right": False,
"ytick.left": False,
}
)
df = pd.DataFrame({
"Factor": list("ABC"),
"Portfolio": [0.669687, 0.072474, 0.039068],
"Benchmark": [0.112653, 0.078819, 0.702114],
"Active Exposure": [0.140177, 0.823740, 0.450814],
}).set_index("Factor")
ax = df.plot.barh(stacked=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel("")
ax.set_xlabel("Sector Decomposition")
Notice in the code above the index is set to Factor which then becomes the y axis.
If you don't set stacked=True you get almost the same chart as seaborn produced:
ax = df.plot.barh(stacked=False)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel("")
ax.set_xlabel("Sector Decomposition")

Boxplot with a bolean column and a Int value

I would like to create a boxplot of the distribution of the variable duration according to whether the film belongs to the category Dramas or (true or false)
Unfortunately these two options do not take into account whether the in_Dramas column is true or false...
Notice that the two columns are in the same DataFrame
movies.boxplot(column= 'in_drama', by='duree', figsize= (7,7));
# sns.catplot(x="in_drama", y="duree" , kind="box", data=movies);
For the pandas boxplot, you can set by='in_drama' and column='duree' to get x-values of in_drama == False and in_drama == True, and boxplots taking into account the duree column:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
movies = pd.DataFrame({'in_drama': [False, False, False, False, False, True, True, True, True, True],
'durée': [95, 118, 143, 89, 91, 145, 168, 193, 139, 141]})
movies.boxplot(by='in_drama', column='durée', figsize=(7, 7))
plt.show()
The seaborn plot should also work. As only one subplot is needed, sns.boxplot can be used directly.
sns.set()
sns.boxplot(x="in_drama", y="durée", data=movies)
At the left the pandas boxplot, at the right seaborn:

heatmap simple customized binary legend

I have this dataframe with True and False values with a heatmap plot:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
df = pd.DataFrame({'A': {1: False, 2: False, 3: False, 4: True, 5: True, 6: True, 7: False, 8: False},
'B': {1: False, 2: False, 3: True, 4: True, 5: False, 6: True, 7: True, 8: False},
'C': {1: False, 2: True, 3: False, 4: False, 5: False, 6: False, 7: True, 8: True}})
fig, ax = plt.subplots(figsize=(3,3))
cmap = sns.mpl_palette("Set2", 2)
sns.heatmap(data=df, cmap=cmap, cbar=False)
plt.xticks(rotation=90, fontsize=10)
plt.yticks(rotation=0, fontsize=10)
plt.show()
I'm trying to add outside the plot a simple legend where the red color = True and the green color = False, with labels "missing value" when is red, and "non missing value" when is green. I'm not looking for a continuous legend as its common on heatmaps (that's why cbar=False).
I have tried multiple solutions (from other plots also) without success: how to add a legend, legend guide, matplotlib legends not working, customizing plot legends, among others, but all of them are too far complicated to adapt them to heatmap. I'm looking for something like:
plt.legend(values=[1,0], colors=["red", "green"], label_legend=["missing value", "non missing value"])
Any suggestions?
You can create a custom legend as follows:
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import seaborn as sns
import pandas as pd
df = pd.DataFrame({'A': {1: False, 2: False, 3: False, 4: True, 5: True, 6: True, 7: False, 8: False},
'B': {1: False, 2: False, 3: True, 4: True, 5: False, 6: True, 7: True, 8: False},
'C': {1: False, 2: True, 3: False, 4: False, 5: False, 6: False, 7: True, 8: True}})
fig, ax = plt.subplots(figsize=(3, 3))
cmap = sns.mpl_palette("Set2", 2)
sns.heatmap(data=df, cmap=cmap, cbar=False)
plt.xticks(rotation=90, fontsize=10)
plt.yticks(rotation=0, fontsize=10)
legend_handles = [Patch(color=cmap[True], label='Missing Value'), # red
Patch(color=cmap[False], label='Non Missing Value')] # green
plt.legend(handles=legend_handles, ncol=2, bbox_to_anchor=[0.5, 1.02], loc='lower center', fontsize=8, handlelength=.8)
plt.tight_layout()
plt.show()
Patch creates a "patch" (2D filled shape) which by default is rectangular and which can be given a color.
cmap[value] where value is preferably a number between 0 and 1, gives the corresponding color. Note that 'True' and 'False' get converted to 1 and to 0 when used as numeric value. The code above uses 'True' and 'False' for a better readability when comparing with the dataframe. Also note that the syntax is cmap(value) with round brackets for a usual matplotlib colormap (seaborn uses a little different syntax, as it extends on matplotlib functionality).
The handles= for a legend is a list of graphical elements to be put in the legend. Often they are created automatically by many functions, but you can use your own when something non-standard is needed. If the "handles" already have their own labels, they get used in the legend. With labels=, they can be changed.
More information can be found in the linked documentation and for example this tutorial.

Transition not showing for animated polarbar plot with plotly (offline)

I am trying to make a polar barplot rotate smoothly using plotly in offline mode. Following the examples available in the docs, I do this by creating a button with as method "animate" and setting the transition time to a value >0 ms.
The same problem occurred when using a scatterpolar type plot instead of a barplot, however the animation did work for a non-polar type scatter plot.
import plotly.graph_objs as go
import plotly.offline as offline
import pandas as pd
import numpy as np
offline.init_notebook_mode()
#some data to plot:
df = pd.DataFrame({'artist':['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
'birth': pd.to_datetime(pd.Series(['1990-04-01T00:00:00.000000000', '1945-12-01T00:00:00.000000000',
'1955-01-01T00:00:00.000000000', '1956-01-01T00:00:00.000000000',
'1976-12-01T00:00:00.000000000', '1930-05-01T00:00:00.000000000',
'1942-01-01T00:00:00.000000000', '1936-11-01T00:00:00.000000000',
'1971-12-01T00:00:00.000000000', '1952-12-01T00:00:00.000000000'])),
'death': pd.to_datetime(pd.Series(['2012-04-01T00:00:00.000000000', '2015-12-01T00:00:00.000000000',
'2010-01-01T00:00:00.000000000', '2017-01-01T00:00:00.000000000',
'2016-12-01T00:00:00.000000000', '2017-05-01T00:00:00.000000000',
'2010-01-01T00:00:00.000000000', '2015-11-01T00:00:00.000000000',
'2014-12-01T00:00:00.000000000', '2013-12-01T00:00:00.000000000']))} )
#creating the barplot:
shift = df['birth'] - pd.datetime(1970, 1 ,1)
trace = {
'name': "to",
'r': (df['death']- shift).dt.date,
'theta': np.linspace(0,360,11),
'base':df['birth'].dt.date,
'type': 'barpolar'
}
data = [trace]
nsteps = 20
tracedicts = []
start_thetas = np.linspace(0,360,nsteps)
for i in start_thetas:
tracedicts.append(trace.copy())
tracedicts[-1]['theta'] = np.linspace(i,360+i,11)
frames = [{'data': [tracei]} for tracei in tracedicts]
layout = {
'polar':{
'angularaxis':{
'visible': False,
},
'radialaxis':{
'showgrid': True,
'type': 'date',
'hoverformat': '%m-%Y',
}
},
'updatemenus': [{
'type': 'buttons',
'x': 0.1,
'y': 0,
'buttons':[{'label':'Play', 'method':'animate',
'args':[None, {'frame':{'duration':600, 'redraw':True},
'transition':{'duration':400},
'fromcurrent':True,
'easing': 'linear'}]}]
}],
}
fig = go.Figure(data=data, layout=layout, frames = frames)
offline.iplot(fig,auto_play=False)
The animation works as far as showing the different frames, but the transition does not work.
Is this a feature that simply does not exist for all polar and/or bar plots?
(You'll also notice that I set 'redraw':True - this is because otherwise the animation only worked when setting auto_play=True at the end.)

Categories