Connecting jittered data points with lines - seaborn python - python

Is there a way to extract the x-axis values of the jittered points in the middle plot as generated by the code below?
See the issue here
# import libraries
import seaborn as sns
import matplotlib.pyplot as plt
# Create plot
ax2 = fig.add_subplot(132)
sns.stripplot(x="variable", y="value", data=pupil_long_df, dodge=True, jitter=True, alpha=.40, zorder=1, size=8, linewidth = 1)
sns.pointplot(x='variable', y='value', ci=95,data=pupil_long_df, join=False, scale=1, zorder=100, color='black', capsize = 0.05, palette = 'Paired')
# Add lines between the points
lines3 = plt.plot([df.iloc[:,0], df.iloc[:,1]], color = 'grey', linewidth = 0.5, linestyle = '--')

I think it would be terribly impractical to extract the x-values of the stripplot... My standard advice is that if you want to do something more than the standard plots offered by seaborn, then it's usually easier to just recreate them by hand. See the code below:
N=20
# dummy dataset
data = np.random.normal(size=(N,))
df = pd.DataFrame({'condition 1': data,
'condition 2': data+1,
'condition 3': data,
'condition 4': data-1})
jitter = 0.05
df_x_jitter = pd.DataFrame(np.random.normal(loc=0, scale=jitter, size=df.values.shape), columns=df.columns)
df_x_jitter += np.arange(len(df.columns))
fig, ax = plt.subplots()
for col in df:
ax.plot(df_x_jitter[col], df[col], 'o', alpha=.40, zorder=1, ms=8, mew=1)
ax.set_xticks(range(len(df.columns)))
ax.set_xticklabels(df.columns)
ax.set_xlim(-0.5,len(df.columns)-0.5)
for idx in df.index:
ax.plot(df_x_jitter.loc[idx,['condition 1','condition 2']], df.loc[idx,['condition 1','condition 2']], color = 'grey', linewidth = 0.5, linestyle = '--', zorder=-1)
ax.plot(df_x_jitter.loc[idx,['condition 3','condition 4']], df.loc[idx,['condition 3','condition 4']], color = 'grey', linewidth = 0.5, linestyle = '--', zorder=-1)

Related

How can I add the number of points to a seaborn.histplot?

I wanted to add a number of points (in my case there's 111 points in the data file, so it would be N = 111) under the legend.
sns.histplot(x, stat = 'density',
binwidth = 50,
kde = True,
color = 'red', alpha = .3,
kde_kws = {'cut': 2, 'bw_adjust': 0.5})
plt.legend(labels=["Sample_1"],
fontsize = 16)
plt.xlabel('Age', fontsize=18)
plt.ylabel('Density', fontsize=18)
histogram
I'm sure there are different approaches to this than mine, but one is to add it to the legend label. The second is to simply add the annotation anywhere. Since no data was provided, I used data from seaborn reference page.
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
penguins = sns.load_dataset("penguins")
hist, bins = np.histogram(penguins['bill_length_mm'].fillna(0))
print(hist,bins)
fig, ax = plt.subplots()
ax =sns.histplot(data=penguins, x='bill_length_mm', stat='density',
binwidth = 10,
kde = True,
color = 'red', alpha = .3,
kde_kws = {'cut': 2, 'bw_adjust': 0.5})
plt.legend(labels=["Sample_1\n N={}".format(max(hist))], fontsize = 16)
plt.text(0.1, 0.9, 'N={}'.format(max(hist)), transform=ax.transAxes)
plt.xlabel('Age', fontsize=18)
plt.ylabel('Density', fontsize=18)
plt.show()

python: multiple barchart in subplot

In order to plot multiple barcharts I use the following code:
import numpy as np
import matplotlib.pyplot as plt
X = ['Group A','Group B','Group C','Group D']
Ygirls = [10,20,20,40]
Zboys = [20,30,25,30]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, Ygirls, 0.4, label = 'Girls')
plt.bar(X_axis + 0.2, Zboys, 0.4, label = 'Boys')
plt.xticks(X_axis, X)
plt.xlabel("Groups")
plt.ylabel("Number of Students")
plt.title("Number of Students in each group")
plt.legend()
plt.show()
getting this result:
I would like to plot two of these figures attached to eachother, using the same y axis.
For a normal bar chart the code would be:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
fig.suptitle('Horizontally stacked subplots')
ax1.bar(, )
ax2.bar(, )
fig.subplots_adjust(wspace=0.0)
ax2.spines['left'].set_visible(False)
ax2.tick_params(axis='y', which='both', length=0)
How do I modify the code for the multiple bar charts?
What you want is a stacked bar dragram: you can view this example code here.
For your code, it would be like:
import numpy as np
import matplotlib.pyplot as plt
X = ['Group A','Group B','Group C','Group D']
Ygirls = [10,20,20,40]
Zboys = [20,30,25,30]
X_axis = np.arange(len(X))
fig, ax = plt.subplots()
ax.bar(X, Zboys, label = 'Boys')
ax.bar(X, Ygirls, label ='Grils', bottom=Zboys)
plt.xticks(X_axis, X)
plt.xlabel("Groups")
plt.ylabel("Number of Students")
plt.title("Number of Students in each group")
ax.legend()
plt.show()
And the output will be like:

plt.ylim not giving me the ranges i want

My code is as follow:
plt.figure(figsize =(15,4) )
sns.set_style("white")
plt.ylim(500,30000)
ax = sns.lineplot(data = df,x = 'date', y = 'number of requests', hue = 'account_name',style="account_name",markers=True, dashes=False)
ax.legend(bbox_to_anchor=[0.94, -0.3], frameon='false', ncol=6)
plt.title('Number of requests over the time', fontsize=20)
plt.xlabel('Date', fontsize=15)
plt.ylabel('Requests', fontsize=15)
plt.grid(True, linestyle = ':')
sns.despine()
but the y ranges are: 5000,10000,15000,20000,25000,30000.
My problem that i have requests starting in very low amount and some are very high as in the screenshot - how can i control the ranges? this graph is not so readable :(
The easiest way is ax.set_ylim(-5000, 25000). As #GigBen commented, the other way to adjust the y-axis tick is to change to log format or create your own.
simple:
ax.set_ylim([-5000,25000])
log style:
ax.yscale('log')
yticks customize:
ax.set_yticks(ax.get_yticks()[::2])
full code:
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(15,4))
sns.set_style("white")
ax = sns.lineplot(data=df, x='date', y='number of requests', hue="account_name", style="account_name", markers=True, dashes=False)
ax.legend(bbox_to_anchor=[0.94, -0.3], frameon='false', ncol=6)
ax.set_title('Number of requests over the time', fontsize=20)
ax.set_xlabel('Date', fontsize=15)
ax.set_ylabel('Requests', fontsize=15)
ax.grid(True, linestyle = ':')
# update
ax.set_yticks(ax.get_yticks()[::2])
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable
The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

box plot does not show outliers

This is my function to create box plot. But it does not show the outliers. any help?
def box_plot_draw (data, box_colors):
#plt.style.use('Solarize_Light2')
fig, ax = plt.subplots()#figsize=(10, 6))
fig.canvas.set_window_title('A Boxplot Example')
fig.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25) #
bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')
box_colors = box_colors
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) #what is which?
ax.set_axisbelow(True)
#setting the tile for distribution
ax.set_title('Comparison of the duration of time for two Distributions')
ax.set_xlabel('Distribution')
ax.set_ylabel('Value')
You can test the code using this data.
np.random.seed(10)
collectn_1 = np.random.normal(100, 10, 200)
collectn_2 = np.random.normal(80, 30, 200)
## combine these different collections into a list
data_to_plot = [collectn_1, collectn_2]
box_colors = ['black', 'green']
box_plot_draw (data_to_plot, box_colors)
plt.show()
I am using the Jupyter notebook and I used styles for other functions. But I did not use any style for this function.

Categories