Is there a way to keep images as axis values?
Two similar questions here and here does not answer my question.
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset("titanic")
sns.catplot(x="sex", y="survived", hue="class", kind="bar", data=titanic)
I would like to replace the male and female axis values with the corresponding image present in the image link. Can we map the axis values to the image links?
Male:
https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSuGDLqvyU56RbTEFQP3ohzx9d0vJv-nQOk1g&usqp=CAU
Female:
https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRnSlVrt4o9yHIHnJ7H-cPi_fhOC4bePnyOoA&usqp=CAU
The answer using an OffsetBox in the questions you linked is probably the best option
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnchoredOffsetbox
titanic = sns.load_dataset("titanic")
images = ["https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSuGDLqvyU56RbTEFQP3ohzx9d0vJv-nQOk1g&usqp=CAU",
"https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRnSlVrt4o9yHIHnJ7H-cPi_fhOC4bePnyOoA&usqp=CAU"]
pos = [0,1]
fig, ax = plt.subplots()
ax = sns.barplot(x="sex", y="survived", hue="class", data=titanic)
ax.set_xticklabels([])
for m,p in zip(images,pos):
image = plt.imread(m)
im = OffsetImage(image, zoom=0.1)
ab = AnchoredOffsetbox(loc='upper center', child=im,
bbox_to_anchor=(p,0), bbox_transform=ax.get_xaxis_transform(),
frameon=False)
ax.add_artist(ab)
plt.tight_layout()
Related
I have a dataset with two different variables, i want to give colors to each with different color, Can anyone help please? Link to my dataset : "https://github.com/mayuripandey/Data-Analysis/blob/main/word.csv"
import matplotlib.pyplot as plt
import pandas as pd
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(x = df['Friends Network-metrics'], y = df['Number of Followers'],cmap = "magma")
plt.xlabel("Friends Network-metrics")
plt.ylabel("Number of Followers")
plt.show()
Not very clear what you want to do here. But I'll provide a solution that may help you a bit.
Could use seaborn to implement the colors on the variables. Otherwise, you'd need to iterate through the points to set the color. Or create a new column that conditionally inputs a color for a value.
I don't know what your variable is, but you just want to put that in for the hue parameter:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.read_csv('https://raw.githubusercontent.com/mayuripandey/Data-Analysis/main/word.csv')
# Use the 'hue' argument to provide a factor variable
sns.lmplot(x='Friends Network-metrics',
y='Number of Followers',
height=8,
aspect=.8,
data=df,
fit_reg=False,
hue='Sentiment',
legend=True)
plt.xlabel("Friends Network-metrics")
plt.ylabel("Number of Followers")
plt.show()
This can give you a view like this:
If you were looking for color scale for one of the variables though, you would do the below. However, the max value is so big that the range also doesn't make it really an effective visual:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/mayuripandey/Data-Analysis/main/word.csv')
fig, ax = plt.subplots(figsize=(10, 6))
g = ax.scatter(x = df['Friends Network-metrics'],
y = df['Number of Followers'],
c = df['Friends Network-metrics'],
cmap = "magma")
fig.colorbar(g)
plt.xlabel("Friends Network-metrics")
plt.ylabel("Number of Followers")
plt.show()
So you could adjust the scale (I'd also add edgecolors = 'black' as its hard to see the light plots):
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/mayuripandey/Data-Analysis/main/word.csv')
fig, ax = plt.subplots(figsize=(10, 6))
g = ax.scatter(x = df['Friends Network-metrics'],
y = df['Number of Followers'],
c = df['Friends Network-metrics'],
cmap = "magma",
vmin=0, vmax=10000,
edgecolors = 'black')
fig.colorbar(g)
plt.xlabel("Friends Network-metrics")
plt.ylabel("Number of Followers")
plt.show()
I usually use matplotlib, but was playing with pandas plotting and experienced unexpected behaviour. I was assuming the following would return red and green edges rather than alternating. What am I missing here?
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"col1":[1,2,4,5,6], "col2":[4,5,1,2,3]})
def amounts(df):
fig, ax = plt.subplots(1,1, figsize=(3,4))
(df.filter(['col1','col2'])
.plot.bar(ax=ax,stacked=True, edgecolor=["red","green"],
fill=False,linewidth=2,rot=0))
ax.set_xlabel("")
plt.tight_layout()
plt.show()
amounts(df)
I think plotting each column separately and setting the bottom argument to stack the bars provides the output you desire.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"col1":[1,2,4,5,6], "col2":[4,5,1,2,3]})
def amounts(df):
fig, ax = plt.subplots(1,1, figsize=(3,4))
df['col1'].plot.bar(ax=ax, linewidth=2, edgecolor='green', rot=0, fill=False)
df['col2'].plot.bar(ax=ax, bottom=df['col1'], linewidth=2, edgecolor='red', rot=0, fill=False)
plt.legend()
plt.tight_layout()
plt.show()
amounts(df)
I have the next scatterplot
But i want to change the dots on the legend by continuos color map like this:
This is my code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
gene_list = pd.read_csv('interseccion.csv', header=None)
glist = gene_list.squeeze().str.strip().tolist()
names = gp.get_library_name()
enr = gp.enrichr(gene_list= glist,
gene_sets=['KEGG_2019_Human'],
organism='Human', # don't forget to set organism to the one you desired! e.g. Yeast
description='KEGG',
# no_plot=True,
cutoff=0.5 # test dataset, use lower value from range(0,1)
)
resultados = enr.results.head(15)
resultados['-log10(FDR)'] = -np.log10(resultados['Adjusted P-value'])
resultados['Genes'] = resultados['Genes'].str.split(';')
resultados['Genes'] = resultados['Genes'].apply(lambda x: len(x))
g = sns.scatterplot(data=resultados, x="-log10(FDR)", y="Term", hue='-log10(FDR)', palette="seismic"
, size="Genes", sizes=(30, 300), legend=True)
g.legend(loc=6, bbox_to_anchor=(1, 0.5), ncol=1)
g.fig.colorbar()
plt.ylabel('')
plt.xlabel('-log10(FDR)')
When i try to put a color bar with the funcion plt.colorbar() is not possible
I customized the code in the official sample with the understanding that I wanted to add a legend and color bars to the Seaborn scatterplot. A colormap has been created to match the colors of the sample graph, but it can be drawn without problems by specifying the colormap name. The color bar is customized by getting its position and adjusting it manually in the legend. The height of the color bar is halved to match the legend.
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
fig, ax = plt.subplots()
g = sns.scatterplot(
data=tips, x="total_bill", y="tip", hue="size", size="size",
sizes=(20, 200), legend="full", ax=ax)
g.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0), ncol=1)
norm = plt.Normalize(tips['size'].min(), tips['size'].max())
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cax = fig.add_axes([ax.get_position().x1+0.05, ax.get_position().y0, 0.06, ax.get_position().height / 2])
ax.figure.colorbar(sm, cax=cax)
plt.show()
I am trying to plot legends for the attached image and I am unable to plot legend with my below query, Could someone please help me resolve this?
#Predicted Labels on PCA
pcadf = pd.DataFrame(preprocessed_data)
pcadf["kmeans"] = pipe["clusterer"]["kmeans"].labels_
pcadf.columns = ['component_1', 'component_2', 'kmeans']
x = pcadf['component_1'].values
y = pcadf['component_2'].values
Cluster = pcadf["kmeans"].values
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
scatter = ax.scatter(x,y,c=Cluster,s=50)
ax.legend()
fig.savefig('KMeans_Cluster.png', bbox_inches='tight', dpi=1200)
See this matplotlib help page for the 2 options. You either loop through the different labels, or use the PathCollection's legend_elements() , and below I use an example for the 2nd option:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
iris = sns.load_dataset('iris')
df = StandardScaler().fit_transform(iris.iloc[:,:4])
pcadf = PCA(n_components=2).fit_transform(df)
pcadf = pd.DataFrame(pcadf,columns = ['component_1','component_2'])
pcadf["kmeans"] = KMeans(n_clusters=2).fit_predict(df)
#Cluster = pcadf["kmeans"].values
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax.scatter(pcadf['component_1'],pcadf['component_2'],c=pcadf['kmeans'],s=50)
legend1 = ax.legend(*scatter.legend_elements(),
loc="lower left", title="Clusters")
ax.add_artist(legend1)
I want to change the color of lineborder of violinplots.
I can set lines.linewidth to 0 but I want to show borders not to hide them. How to change the color of the border?
sns.set_context("paper", rc={"lines.linewidth": 0.8})
My code is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count"
)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
Thank you!
this should be very close to what you're looking for:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
def patch_violinplot(palette, n):
from matplotlib.collections import PolyCollection
ax = plt.gca()
violins = [art for art in ax.get_children() if isinstance(art, PolyCollection)]
colors = sns.color_palette(palette, n_colors=n) * (len(violins)//n)
for i in range(len(violins)):
violins[i].set_edgecolor(colors[i])
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
num_cols = df['protocol'].nunique()
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count",
palette="deep"
)
patch_violinplot("deep", num_cols)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
The patch_violin function came from here.