I'm aiming to plot the mean for a group of subplots. Using below, I'm separating each unique Item into a separate subplots. I'm hoping to plot the relevant mean of Num to each of these subplots.
import pandas as pd
import seaborn as sns
df = pd.DataFrame({
'Num' : [1,2,1,2,3,2,1,3,2,2,1,2,3,3,1,3],
'Label' : ['A','B','C','B','B','C','C','B','B','A','C','A','B','A','C','A'],
'Item' : ['Up','Left','Up','Left','Down','Right','Up','Down','Right','Down','Right','Up','Up','Right','Down','Left'],
})
g = sns.displot(data = df,
x = 'Num',
row = 'Item',
row_order = ['Up','Down','Left','Right'],
discrete = True,
aspect = 4,
height = 2,
)
#for x in df['IMMEDIATE_CONGESTION_PLAYER_COUNT']:
plt.axvline(np.median(df['Num']),color='b', linestyle='--')
Here is a great answer. For the initial graph object, use the map function to add a vertical line.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Num' : [1,2,1,2,3,2,1,3,2,2,1,2,3,3,1,3],
'Label' : ['A','B','C','B','B','C','C','B','B','A','C','A','B','A','C','A'],
'Item' : ['Up','Left','Up','Left','Down','Right','Up','Down','Right','Down','Right','Up','Up','Right','Down','Left'],
})
g = sns.displot(data = df,
x = 'Num',
row = 'Item',
row_order = ['Up','Down','Left','Right'],
discrete = True,
aspect = 4,
height = 2,
)
cols = df['Item'].unique()
for c,ax in zip(cols, g.axes.flat):
ax.axvline(np.median(df[df['Item'] == c]['Num']), color='r', linestyle='--')
plt.show()
Related
I am drawing boxplots with Python Seaborn package. I have facet grid with both rows and columns. That much I've been able to do with the Seaborn function catplot.
I also want to annotate the outliers. I have found some nice examples at SO for annotating the outliers but without facet structure. That's where I'm struggling.
Here is what I've got (borrows heavily from this post: Boxplot : Outliers Labels Python):
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cbook import boxplot_stats
sns.set_style('darkgrid')
Month = np.repeat(np.arange(1, 11), 10)
Id = np.arange(1, 101)
Value = np.random.randn(100)
Row = ["up", "down"]*50
df = pd.DataFrame({'Value': Value, 'Month': Month, 'Id': Id, 'Row': Row})
g = sns.catplot(data=df, x="Month", y="Value", row="Row", kind="box", height=3, aspect=3)
for name, group in df.groupby(["Month", "Row"]):
fliers = [y for stat in boxplot_stats(group["Value"]) for y in stat["fliers"]]
d = group[group["Value"].isin(fliers)]
g.axes.flatten().annotate(d["Id"], xy=(d["Month"] - 1, d["Value"]))
The dataframe d collects all the outliers by patch. The last line aims to match d with the graph g patches. However, that doesn't work, but I haven't found a way to flatten axes to a list where each element would correspond to a grouped dataframe element.
I'd be glad to hear alternative versions for achieving this too.
One way to do it:
for name, group in df.groupby(["Month", "Row"]):
fliers = [y for stat in boxplot_stats(group["Value"]) for y in stat["fliers"]]
d = group[group["Value"].isin(fliers)]
for i in range(len(d)):
ngrid = (0 if d.iloc[i,3]=='up' else 1)
g.fig.axes[ngrid].annotate(d.iloc[i, 2], xy=(d.iloc[i, 1] - 1, d.iloc[i, 0]))
You can loop through g.axes_dict to visit each of the axes.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cbook import boxplot_stats
sns.set_style('darkgrid')
Month = np.repeat(np.arange(1, 11), 10)
Id = np.arange(1, 101)
Value = np.random.randn(100)
Row = ["up", "down"] * 50
df = pd.DataFrame({'Value': Value, 'Month': Month, 'Id': Id, 'Row': Row})
g = sns.catplot(data=df, x="Month", y="Value", row="Row", kind="box", height=3, aspect=3)
for row, ax in g.axes_dict.items():
for month in np.unique(df["Month"]):
group = df.loc[(df["Row"] == row) & (df["Month"] == month), :]
fliers = boxplot_stats(group["Value"])[0]["fliers"]
if len(fliers) > 0:
for mon, val, id in zip(group["Month"], group["Value"], group["Id"]):
if val in fliers:
ax.annotate(f' {id}', xy=(mon - 1, val))
plt.tight_layout()
plt.show()
I was trying to plot multiple lmplots in the same figure. But I am getting too many unwanted subplots.
I found another SO link How to plot 2 seaborn lmplots side-by-side? but that also did not help me.
In this example I want 1 row 2 columns.
MWE
# imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# data
df = sns.load_dataset('titanic')
# plot
m,n = 1,2
figsize=(12,8)
cols1 = ['age','fare']
cols2 = ['fare','age']
target = 'survived'
fontsize = 12
fig, ax = plt.subplots(m,n,figsize=figsize)
for i, (col1,col2) in enumerate(zip(cols1,cols2)):
plt.subplot(m,n,i+1)
sns.lmplot(x=col1,y=col2,data=df,
hue=target, palette='Set1',
scatter_kws={'alpha':0.3})
plt.xlabel(col1,fontsize=fontsize)
plt.ylabel(col2,fontsize=fontsize)
plt.tick_params(axis='both', which='major', labelsize=fontsize)
plt.tight_layout()
for i in range(m*n-len(cols1)):
ax.flat[-(i+1)].set_visible(False)
My attempt so far:
df = pd.DataFrame({'x0':[10,20,30,40],
'y0': [100,200,300,400],
'x1':[0.1,0.2,0.3,0.1],
'y1':[0.01,0.02,0.03,0.01],
'target': [0,1,1,1]
})
df1 = df.append(df)
df1 = df1.reset_index(drop=True)
df1['x0'].iloc[len(df):] = df['x1'].to_numpy()
df1['y0'].iloc[len(df):] = df['y1'].to_numpy()
df1['col'] = ['c0']* len(df) + ['c1'] * len(df)
df1 = df1.drop(['x1','y1'],axis=1)
df1 = df1.rename(columns={'x0':'x','y0':'y'})
sns.lmplot(x='x',y='y',hue='target',data=df1,col='col')
Output:
I can't seem to get the labels on the x-axis to rotate 90 degrees.
Example df:
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['1','1','2','2','3','3','3'],
'B' : ['A','B','C','C','D','B','C'],
'C' : ['Foo','Bar','Foo','Bar','Cat','Bar','Cat'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots(figsize = (9,4))
df.assign(A=df.A.astype(int)).pivot_table(index="C", columns="B", values="A",aggfunc='count').rename_axis(None).rename_axis(None,1).plot(kind='bar')
plt.show()
I have tried the basic:
plt.xticks(rotation = 90)
Also tried this but it returns an Attribute Error:
df.assign(A=df.A.astype(int)).pivot_table(index="C", columns="B", values="A",aggfunc='count').rename_axis(None).rename_axis(None,1).plot(kind='bar', rotation = 90)
I have got the labels to rotate through this:
xticklabels = df.C.unique()
ax.set_xticklabels(xticklabels, rotation = 0)
But it returns incorrect ordering. It just takes the values as they appear. Rather than determining the appropriate label
I run the code below to produce the labels with angle 0. I don't understand why there are two plots generated so I deleted the line fig,ax = plt.subplots()
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['1','1','2','2','3','3','3'],
'B' : ['A','B','C','C','D','B','C'],
'C' : ['Foo','Bar','Foo','Bar','Cat','Bar','Cat'],
})
df = pd.DataFrame(data=d)
#fig,ax = plt.subplots()
df.assign(A=df.A.astype(int)).pivot_table(index="C", columns="B",
values="A",aggfunc='count').rename_axis(None).rename_axis(None,1).plot(kind='bar')
plt.xticks(rotation = 0)
plt.show()
You can control the xticks labels through creating a subplot and configuring the label settings, like this:
import pandas as pd
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
d = ({
'A' : ['1','1','2','2','3','3','3'],
'B' : ['A','B','C','C','D','B','C'],
'C' : ['Foo','Bar','Foo','Bar','Cat','Bar','Cat'],
})
df = pd.DataFrame(data=d)
udf = (df.assign(A=df.A.astype(int))
.pivot_table(index="C", columns="B", values="A",aggfunc='count')
.rename_axis(None)
.rename_axis(None,1))
udf.plot(kind='bar', ax=ax)
labels = ax.set_xticklabels(udf.index.values, rotation=0, fontsize=14)
The output would be:
One more thing, I think you need 0 degree rotation as the default is 90.
PS: Long chaining in pandas operations really eats away the readability.
I'm using Pandas and matplotlib to try to replicate this graph from tableau:
So far, I have this code:
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
total_price.nlargest(5).plot(kind="bar")
Which produces this graph:
It correctly groups the data, but is it possible to get it grouped similar to how Tableau shows it?
You can create some lines and labels using the respective matplotlib methods (ax.text and ax.axhline).
import pandas as pd
import numpy as np; np.random.seed(5)
import matplotlib.pyplot as plt
a = ["West"]*25+ ["Central"]*10+ ["East"]*10
b = ["Mattz","McDon","Jeffs","Warf","Utter"]*5 + ["Susanne","Lokomop"]*5 + ["Richie","Florence"]*5
c = np.random.randint(5,55, size=len(a))
df=pd.DataFrame({"Region":a, "Rep":b, "Total Price":c})
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
gtp = total_price.nlargest(5)
ax = gtp.plot(kind="bar")
#draw lines and titles
count = gtp.groupby("Region").count()
cum = np.cumsum(count)
for i in range(len(count)):
title = count.index.values[i]
ax.axvline(cum[i]-.5, lw=0.8, color="k")
ax.text(cum[i]-(count[i]+1)/2., 1.02, title, ha="center",
transform=ax.get_xaxis_transform())
# shorten xticklabels
ax.set_xticklabels([l.get_text().split(", ")[1][:-1] for l in ax.get_xticklabels()])
plt.show()
I have three different data sets where I produce a facetplot, each
a = sns.FacetGrid(data1, col="overlap", hue="comp")
a = (g.map(sns.kdeplot, "val",bw=0.8))
b = sns.FacetGrid(data2, col="overlap", hue="comp")
b = (g.map(sns.kdeplot, "val",bw=0.8))
c = sns.FacetGrid(data3, col="overlap", hue="comp")
c = (g.map(sns.kdeplot, "val",bw=0.8))
Each of those plots has three subplots in one row, so in total I have nine plots.
I would like to combine these plots, in a subplots setting like this
f, (ax1, ax2, ax3) = plt.subplots(3,1)
ax1.a
ax2.b
ax3.c
How can I do that?
A FacetGrid creates its own figure. Combining several figures into one is not an easy task. Additionally, there is no such thing as subplot rows which can be added to a figure. So one would need to manipulate the axes individually.
That said, it might be easier to find workarounds. E.g. if the dataframes to show have the same structure as it seems to be from the question code, one can combine the dataframes into a single frame with a new column and use this as the row attribute of the facet grid.
import numpy as np; np.random.seed(3)
import pandas as pd
import seaborn.apionly as sns
import matplotlib.pyplot as plt
def get_data(n=266, s=[5,13]):
val = np.c_[np.random.poisson(lam=s[0], size=n),
np.random.poisson(lam=s[1], size=n)].T.flatten()
comp = [s[0]]*n + [s[1]]*n
ov = np.random.choice(list("ABC"), size=2*n)
return pd.DataFrame({"val":val, "overlap":ov, "comp":comp})
data1 = get_data(s=[9,11])
data2 = get_data(s=[7,19])
data3 = get_data(s=[1,27])
#option1 combine
for i, df in enumerate([data1,data2,data3]):
df["data"] = ["data{}".format(i+1)] * len(df)
data = data1.append(data2)
data = data.append(data3)
bw = 2
a = sns.FacetGrid(data, col="overlap", hue="comp", row="data")
a = (a.map(sns.kdeplot, "val",bw=bw ))
plt.show()
You can also loop over the dataframes and axes to obtain the desired result.
import numpy as np; np.random.seed(3)
import pandas as pd
import seaborn.apionly as sns
import matplotlib.pyplot as plt
def get_data(n=266, s=[5,13]):
val = np.c_[np.random.poisson(lam=s[0], size=n),
np.random.poisson(lam=s[1], size=n)].T.flatten()
comp = [s[0]]*n + [s[1]]*n
ov = np.random.choice(list("ABC"), size=2*n)
return pd.DataFrame({"val":val, "overlap":ov, "comp":comp})
data1 = get_data(s=[9,11])
data2 = get_data(s=[7,19])
data3 = get_data(s=[1,27])
#option2 plot each subplot individually
data = [data1,data2,data3]
bw = 2
fig, axes = plt.subplots(3,3, sharex=True, sharey=True)
for i in range(3):
for j in range(3):
x = data[i]
x = x[x["overlap"] == x["overlap"].unique()[j]]
for hue in x["comp"].unique():
d = x[x["comp"] == hue]
sns.kdeplot(d["val"], ax=axes[i,j], bw=bw, label=hue )
plt.show()