I have three different data sets where I produce a facetplot, each
a = sns.FacetGrid(data1, col="overlap", hue="comp")
a = (g.map(sns.kdeplot, "val",bw=0.8))
b = sns.FacetGrid(data2, col="overlap", hue="comp")
b = (g.map(sns.kdeplot, "val",bw=0.8))
c = sns.FacetGrid(data3, col="overlap", hue="comp")
c = (g.map(sns.kdeplot, "val",bw=0.8))
Each of those plots has three subplots in one row, so in total I have nine plots.
I would like to combine these plots, in a subplots setting like this
f, (ax1, ax2, ax3) = plt.subplots(3,1)
ax1.a
ax2.b
ax3.c
How can I do that?
A FacetGrid creates its own figure. Combining several figures into one is not an easy task. Additionally, there is no such thing as subplot rows which can be added to a figure. So one would need to manipulate the axes individually.
That said, it might be easier to find workarounds. E.g. if the dataframes to show have the same structure as it seems to be from the question code, one can combine the dataframes into a single frame with a new column and use this as the row attribute of the facet grid.
import numpy as np; np.random.seed(3)
import pandas as pd
import seaborn.apionly as sns
import matplotlib.pyplot as plt
def get_data(n=266, s=[5,13]):
val = np.c_[np.random.poisson(lam=s[0], size=n),
np.random.poisson(lam=s[1], size=n)].T.flatten()
comp = [s[0]]*n + [s[1]]*n
ov = np.random.choice(list("ABC"), size=2*n)
return pd.DataFrame({"val":val, "overlap":ov, "comp":comp})
data1 = get_data(s=[9,11])
data2 = get_data(s=[7,19])
data3 = get_data(s=[1,27])
#option1 combine
for i, df in enumerate([data1,data2,data3]):
df["data"] = ["data{}".format(i+1)] * len(df)
data = data1.append(data2)
data = data.append(data3)
bw = 2
a = sns.FacetGrid(data, col="overlap", hue="comp", row="data")
a = (a.map(sns.kdeplot, "val",bw=bw ))
plt.show()
You can also loop over the dataframes and axes to obtain the desired result.
import numpy as np; np.random.seed(3)
import pandas as pd
import seaborn.apionly as sns
import matplotlib.pyplot as plt
def get_data(n=266, s=[5,13]):
val = np.c_[np.random.poisson(lam=s[0], size=n),
np.random.poisson(lam=s[1], size=n)].T.flatten()
comp = [s[0]]*n + [s[1]]*n
ov = np.random.choice(list("ABC"), size=2*n)
return pd.DataFrame({"val":val, "overlap":ov, "comp":comp})
data1 = get_data(s=[9,11])
data2 = get_data(s=[7,19])
data3 = get_data(s=[1,27])
#option2 plot each subplot individually
data = [data1,data2,data3]
bw = 2
fig, axes = plt.subplots(3,3, sharex=True, sharey=True)
for i in range(3):
for j in range(3):
x = data[i]
x = x[x["overlap"] == x["overlap"].unique()[j]]
for hue in x["comp"].unique():
d = x[x["comp"] == hue]
sns.kdeplot(d["val"], ax=axes[i,j], bw=bw, label=hue )
plt.show()
Related
let's assume I have the following data
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.DataFrame(dict(x=[1,2,3]*4,
y=list(range(12)),
row_separator = ["a"]*6 + ["b"]*6,
col_separator = (["a"]*3 + ["b"]*3) * 2
)
)
data
I'd like to have a simple line plot of x and y for each combination of row_separator and col_separator, and I want to do this using sns.relplot(). y-scales are different, which is fine, but I want to have the same lower bound 0 for all sub-plots.
With plt.subplots, it's like this:
fig, axs = plt.subplots(2,2)
for i in [0,1]:
for j in [0,1]:
ind = (data.row_separator == data.row_separator.unique()[i]) &\
(data.col_separator == data.col_separator.unique()[j])
axs[i,j].plot(data.loc[ind,"x"], data.loc[ind,"y"])
axs[i,j].set_ylim(bottom=0)
With seaborn, I'm struggling how to do this. (My) Basis would be
g = sns.relplot(data=data,
x="x",
y="y",
col="row_separator",
row="col_separator",
kind="line"
)
I would expect facet_kws={"ylim":(0, None)} to do what I want, but it isn't (it's rather limiting all plots to 0 and 1).
I was trying to plot multiple lmplots in the same figure. But I am getting too many unwanted subplots.
I found another SO link How to plot 2 seaborn lmplots side-by-side? but that also did not help me.
In this example I want 1 row 2 columns.
MWE
# imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# data
df = sns.load_dataset('titanic')
# plot
m,n = 1,2
figsize=(12,8)
cols1 = ['age','fare']
cols2 = ['fare','age']
target = 'survived'
fontsize = 12
fig, ax = plt.subplots(m,n,figsize=figsize)
for i, (col1,col2) in enumerate(zip(cols1,cols2)):
plt.subplot(m,n,i+1)
sns.lmplot(x=col1,y=col2,data=df,
hue=target, palette='Set1',
scatter_kws={'alpha':0.3})
plt.xlabel(col1,fontsize=fontsize)
plt.ylabel(col2,fontsize=fontsize)
plt.tick_params(axis='both', which='major', labelsize=fontsize)
plt.tight_layout()
for i in range(m*n-len(cols1)):
ax.flat[-(i+1)].set_visible(False)
My attempt so far:
df = pd.DataFrame({'x0':[10,20,30,40],
'y0': [100,200,300,400],
'x1':[0.1,0.2,0.3,0.1],
'y1':[0.01,0.02,0.03,0.01],
'target': [0,1,1,1]
})
df1 = df.append(df)
df1 = df1.reset_index(drop=True)
df1['x0'].iloc[len(df):] = df['x1'].to_numpy()
df1['y0'].iloc[len(df):] = df['y1'].to_numpy()
df1['col'] = ['c0']* len(df) + ['c1'] * len(df)
df1 = df1.drop(['x1','y1'],axis=1)
df1 = df1.rename(columns={'x0':'x','y0':'y'})
sns.lmplot(x='x',y='y',hue='target',data=df1,col='col')
Output:
I'm using Pandas and matplotlib to try to replicate this graph from tableau:
So far, I have this code:
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
total_price.nlargest(5).plot(kind="bar")
Which produces this graph:
It correctly groups the data, but is it possible to get it grouped similar to how Tableau shows it?
You can create some lines and labels using the respective matplotlib methods (ax.text and ax.axhline).
import pandas as pd
import numpy as np; np.random.seed(5)
import matplotlib.pyplot as plt
a = ["West"]*25+ ["Central"]*10+ ["East"]*10
b = ["Mattz","McDon","Jeffs","Warf","Utter"]*5 + ["Susanne","Lokomop"]*5 + ["Richie","Florence"]*5
c = np.random.randint(5,55, size=len(a))
df=pd.DataFrame({"Region":a, "Rep":b, "Total Price":c})
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
gtp = total_price.nlargest(5)
ax = gtp.plot(kind="bar")
#draw lines and titles
count = gtp.groupby("Region").count()
cum = np.cumsum(count)
for i in range(len(count)):
title = count.index.values[i]
ax.axvline(cum[i]-.5, lw=0.8, color="k")
ax.text(cum[i]-(count[i]+1)/2., 1.02, title, ha="center",
transform=ax.get_xaxis_transform())
# shorten xticklabels
ax.set_xticklabels([l.get_text().split(", ")[1][:-1] for l in ax.get_xticklabels()])
plt.show()
I have a Dataframe and I slice the Dataframe into three subsets. Each subset has 3 to 4 rows of data. After I slice the data frame into three subsets, I plot them using Matplotlib.
The problem I have is I am not able to create a plot where each subplot is plotted using sliced DataFrame. For example, in a group of three in a set, I have only one of the plots (last subplot) plotted where there is no data for the remaining two plots initial sets in a group. it looks like the 'r' value does not pass to 'r.plot' for all three subplots.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
df['key1'] = 0
df.key1.iloc[0:3] = 1
df.key1.iloc[3:7] = 2
df.key1.iloc[7:] = 3
df_grouped = df.groupby('key1')
for group_name, group_value in df_grouped:
rows, columns = group_value.shape
fig, axes = plt.subplots(rows, 1, sharex=True, sharey=True, figsize=(15,20))
for i,r in group_value.iterrows():
r = r[0:columns-1]
r.plot(kind='bar', fill=False, log=False)
I think you might want what I call df_subset to be summarized in some way, but here's a way to plot each group in its own panel.
# Your Code Setting Up the Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
df['key1'] = 0
df.key1.iloc[0:3] = 1
df.key1.iloc[3:7] = 2
df.key1.iloc[7:] = 3
# My Code to Plot in Three Panels
distinct_keys = df['key1'].unique()
fig, axes = plt.subplots(len(distinct_keys), 1, sharex=True, figsize=(3,5))
for i, key in enumerate(distinct_keys):
df_subset = df[df.key1==key]
# {maybe insert a line here to summarize df_subset somehow interesting?}
# plot
axes[i] = df_subset.plot(kind='bar', fill=False, log=False)
I have a dataframe as shown below:
PNO,SID,SIZE,N3IC,S4IC,N4TC,KPAC,NAAC,ECTC
SJV026,VIDAIC,FINE,0.0926,0.0446,0.0333,0.0185,0.005,0.0516
SJV028,CHCRUC,FINE,0,0.1472,0.0076,0.0001,0.0025,0.0301
SJV051,AMSUL,FINE,0,0.727,0.273,0,0,0
SJV035,MOVES1,FINE,0.02,0.04092,0,0,0,0.45404
I am looking to plot this using matplotlib or seaborn where there will be 'n' number of subplots for each row of data (that is one bar plot for each row of data).
import matplotlib.pyplot as plt
import pandas as pd
from tkinter.filedialog import askopenfilename
Inp_Filename = askopenfilename()
df = pd.read_csv(Inp_Filename)
rows, columns = df.shape
fig, axes = plt.subplots(rows, 1, figsize=(15, 20))
count = 0
for each in df.iterrows():
row = df.iloc[count,3:]
row.plot(kind='bar')
count = count + 1
plt.show()
The above code output is not what I am looking for. Is there a way to plot each row of the data in the 'fig' and 'axes' above?
In principle the approach is correct. There are just a couple of errors in your code, which, when corrected, give the desired result.
import io
import matplotlib.pyplot as plt
import pandas as pd
u = u"""PNO,SID,SIZE,N3IC,S4IC,N4TC,KPAC,NAAC,ECTC
SJV026,VIDAIC,FINE,0.0926,0.0446,0.0333,0.0185,0.005,0.0516
SJV028,CHCRUC,FINE,0,0.1472,0.0076,0.0001,0.0025,0.0301
SJV051,AMSUL,FINE,0,0.727,0.273,0,0,0
SJV035,MOVES1,FINE,0.02,0.04092,0,0,0,0.45404"""
df = pd.read_csv(io.StringIO(u))
rows, columns = df.shape
fig, axes = plt.subplots(rows, 1, sharex=True, sharey=True)
for i, r in df.iterrows():
row = df.iloc[i,3:]
row.plot(kind='bar', ax=axes[i])
plt.show()