The x axis is year-month of date of two years. Hence 24 values. Because the relplot function only allows numeric type for x and y axis. So the x axis is all clustered to two ends since 201801 to 201912 is not equally spaced. How I could make it equally spaced with correct label like this: 201801,201802....201912. (24 date values)
import seaborn as sns
sns.set(style="ticks")
palette = dict(zip(rel['Sub-Category'].unique(),
sns.color_palette("rocket_r", 17)))
r=sns.relplot(x='YearMonth', y="Profit",
hue="Sub-Category", col="Category",
#size="Year", size_order=["2019", "2018"],
palette=palette,
height=5, aspect=.7, facet_kws=dict(sharex=True),
kind="line", legend="full", data=rel)
r.set(yticks=[i for i in range(int(min(rel['Profit'])), int(max(rel['Profit'])) + 50, 500)],
xticks=[i for i in rel.YearMonth.unique()])
sample output
As described in the comments you just have to convert your YearMonth column into a datetime:
# Input data
df = pd.DataFrame({'YearMonth': ['2018-01','2018-01','2018-02','2018-04','2018-03','2018-05'],
'Category':['Clothing','Furniture','Clothing','Clothing','Furniture','Clothing'],
'Sub-Category':['Henkerchief','Table','Skirt','Henkerchief','Table','Skirt'],
'Profit':[16,40,110,33,44,55]})
# Create datetime column
df['date'] = pd.to_datetime(df['YearMonth'], format = '%Y-%m')
# Plot
sns.set(style="ticks")
palette = dict(zip(df['Sub-Category'].unique(),
sns.color_palette("rocket_r", 17)))
r=sns.relplot(x='date', y="Profit",
hue="Sub-Category", col="Category",
palette=palette,
height=5, aspect=.7, facet_kws=dict(sharex=True),
kind="line", legend="full", data=df)
# Adjust xticks
xticks = pd.date_range(start='2017-12',end='2018-05',
freq='MS',closed='right')
r.set(xticks=xticks)
This is the output figure:
Update:
If you want to rotate the xtick labels you can use:
for ax in r.axes.ravel():
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
Related
I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
Hi I'm trying to plot a pointplot and scatterplot on one graph with the same dataset so I can see the individual points that make up the pointplot.
Here is the code I am using:
xlPath = r'path to data here'
df = pd.concat(pd.read_excel(xlPath, sheet_name=None),ignore_index=True)
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright', capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer')
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)')
plt.show()
When I plot, for some reason the points from the scatterplot are offsetting one ID spot right on the x-axis. When I plot the scatter or the point plot separately, they each are in the correct ID spot. Why would plotting them on the same plot cause the scatterplot to offset one right?
Edit: Tried to make the ID column categorical, but that didn't work either.
Seaborn's pointplot creates a categorical x-axis while here the scatterplot uses a numerical x-axis.
Explicitly making the x-values categorical: df['ID'] = pd.Categorical(df['ID']), isn't sufficient, as the scatterplot still sees numbers. Changing the values to strings does the trick. To get them in the correct order, sorting might be necessary.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# first create some test data
df = pd.DataFrame({'ID': np.random.choice(np.arange(1, 49), 500),
'HM (N/mm2)': np.random.uniform(1, 10, 500)})
df['Layer'] = ((df['ID'] - 1) // 6) % 4 + 1
df['HM (N/mm2)'] += df['Layer'] * 8
df['Layer'] = df['Layer'].map(lambda s: f'Layer {s}')
# sort the values and convert the 'ID's to strings
df = df.sort_values('ID')
df['ID'] = df['ID'].astype(str)
fig, ax = plt.subplots(figsize=(12, 4))
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright',
capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer', ax=ax)
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)', color='purple', ax=ax)
ax.margins(x=0.02)
plt.tight_layout()
plt.show()
How is x-ticks manually set in seaborn sns in python?
This might be a duplicate of: How to set x axis ticklabels in a seaborn plot, but the solution did not work for us.
We would like the x-ticks to start from 2020-01, but as data is only available from 2020-02, it doesn't automatically start the x-ticks on 2020-01. Although it will be an empty space, we would still like to include 2020-01. Following is the function.
def create_lineplot(dataframe):
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y-%m') # This is a format. Will be clear in Screenshot
# Filtering data to only select relevant columns and data from the year 2020
dataframe = dataframe[['dev_id', 'temp_20', 'temp_60', 'datetime']]
dataframe["datetime"] = pd.to_datetime(dataframe["datetime"])
soil = dataframe[dataframe['datetime'].dt.year == 2020]
fig, axes = plt.subplots(figsize=(20, 2))
mdf = pd.melt(soil, id_vars=['datetime', 'dev_id'], var_name=['Temperature'])
g = sns.relplot(data=mdf, x='datetime', y='value', kind='line', hue='Temperature', height=5, aspect=3)
g._legend.remove()
axes.xaxis.set_major_locator(months)
axes.xaxis.set_major_formatter(years_fmt)
axes.xaxis.set_minor_locator(months)
plt.xticks(rotation='vertical')
plt.tight_layout()
plt.legend(loc='upper right')
plt.savefig('lineplot.png')
plt.show()
When we include following:
g.set_xticklabels(['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07','2020-08', '2020-09', '2020-10', '2020-11', '2020-12'])
between
g = sns.relplot(data=mdf, x='datetime', y='value', kind='line', hue='Temperature', height=5, aspect=3)
--- HERE ---
g._legend.remove()
then the tick is added as desired, but the values are stretched so it seems like there is data in 2020-01 as well.
Following is an example of the data:
Bonus
How to align the ticks after adding a new?
I have a data frame with three columns Features, CV-fold, Accuracy, Network. I want to have a boxplot for each Network, grouped by the Features and the CV-fold for the axis (see example image).
df = pd.read_csv(path)
df['Features'] = df["Features"].astype('category')
ordered_features = sorted(df.Network.value_counts().index)
df = df.loc[df['Accuracy'] > 0.1]
df.Accuracy = df.Accuracy*100
#sns.color_palette("husl", len(df['CV-fold'].value_counts().index))
#sns.set_palette('husl', len(df['CV-fold'].value_counts().index))
g = sns.FacetGrid(df, row="Network", row_order=ordered_features,
height=3, aspect=3, legend_out=True, despine=False)
g.map(sns.boxplot, x="CV-fold", y="Accuracy", hue="Features", data=df, palette='muted').add_legend()
g.set_axis_labels("", "Accuracy (%)")
Because I have 8 different networks, I would like to not have them all in a column or a row, but formatted in a grid (e.g. 2x4). Additionally, even though sharex is not enabled, the x-axis is only labeled at the very bottom graph.
How can I do that?
You would use the col_wrap keyword argument to get your plots on multiple rows with multiple columns.
For repeating the x-axis labels use ax.tick_params().
Example:
import seaborn as sns, matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
ordered_days = sorted(tips['day'].unique())
g = sns.FacetGrid(tips,col='day',col_order=ordered_days,col_wrap=2)
# change this to 4 ^
g.map(sns.boxplot,'sex','total_bill',palette='muted')
for ax in g.axes.flatten():
ax.tick_params(labelbottom=True)
plt.tight_layout()
plt.show()
Result:
I have a dataframe which has a number of values per date (datetime field). This values are classified in U (users) and S (session) by using a column Group. Seaborn is used to visualize two boxplots per date, where the hue is set to Group.
The problem comes when considering that the values corresponding to U (users) are much bigger than those corresponding to S (session), making the S data illegible. Thus, I need to come up with a solution that allows me to plot both series (U and S) in the same figure in an understandable manner.
I wonder if independent Y axes (with different scales) can be set to each hue, so that both Y axes are shown (as when using twinx but without losing hue visualization capabilities).
Any other alternative would be welcome =)
The S boxplot time series boxplot:
The combined boxplot time series using hue. Obviously it's not possible to see any information about the S group because of the scale of the Y axis:
The columns of the dataframe:
| Day (datetime) | n_data (numeric) | Group (S or U)|
The code line generating the combined boxplot:
seaborn.boxplot(ax=ax,x='Day', y='n_data', hue='Group', data=df,
palette='PRGn', showfliers=False)
Managed to find a solution by using twinx:
fig,ax= plt.subplots(figsize=(50,10))
tmpU = groups.copy()
tmpU.loc[tmp['Group']!='U','n_data'] = np.nan
tmpS = grupos.copy()
tmpS.loc[tmp['Group']!='S','n_data'] = np.nan
ax=seaborn.boxplot(ax=ax,x='Day', y = 'n_data', hue='Group', data=tmpU, palette = 'PRGn', showfliers=False)
ax2 = ax.twinx()
seaborn.boxplot(ax=ax2,x='Day', y = 'n_data', hue='Group', data=tmpS, palette = 'PRGn', showfliers=False)
handles,labels = ax.get_legend_handles_labels()
l= plt.legend(handles[0:2],labels[0:2],loc=1)
plt.setp(ax.get_xticklabels(),rotation=30,horizontalalignment='right')
for label in ax.get_xticklabels()[::2]:
label.set_visible(False)
plt.show()
plt.close('all')
The code above generates the following figure:
Which in this case turns out to be too dense to be published. Therefore I would adopt a visualization based in subplots, as Parfait susgested in his/her answer.
It wasn't an obvious solution to me so I would like to thank Parfait for his/her answer.
Consider building separate plots on same figure with y-axes ranges tailored to subsetted data. Below demonstrates with random data seeded for reproducibility (for readers of this post).
Data (with U values higher than S values)
import pandas as pd
import numpy as np
import seaborn
import matplotlib.pyplot as plt
np.random.seed(2018)
u_df = pd.DataFrame({'Day': pd.date_range('2016-10-01', periods=10)\
.append(pd.date_range('2016-10-01', periods=10)),
'n_data': np.random.uniform(0,800,20),
'Group': 'U'})
s_df = pd.DataFrame({'Day': pd.date_range('2016-10-01', periods=10)\
.append(pd.date_range('2016-10-01', periods=10)),
'n_data': np.random.uniform(0,200,20),
'Group': 'S'})
df = pd.concat([u_df, s_df], ignore_index=True)
df['Day'] = df['Day'].astype('str')
Plot
fig = plt.figure(figsize=(10,5))
for i,g in enumerate(df.groupby('Group')):
plt.title('N_data of {}'.format(g[0]))
plt.subplot(2, 1, i+1)
seaborn.boxplot(x="Day", y="n_data", data=g[1], palette="PRGn", showfliers=False)
plt.tight_layout()
plt.show()
plt.clf()
plt.close('all')
To retain original hue and grouping, render all non-group n_data to np.nan:
fig = plt.figure(figsize=(10,5))
for i,g in enumerate(df.Group.unique()):
plt.subplot(2, 1, i+1)
tmp = df.copy()
tmp.loc[tmp['Group']!=g, 'n_data'] = np.nan
seaborn.boxplot(x="Day", y="n_data", hue="Group", data=tmp,
palette="PRGn", showfliers=False)
plt.tight_layout()
plt.show()
plt.clf()
plt.close('all')
So one option to do a grouped box plot with two separate axis is to use hue_order= ['value, np.nan] in your argument for sns.boxplot:
fig = plt.figure(figsize=(14,8))
ax = sns.boxplot(x="lon_bucketed", y="value", data=m, hue='name', hue_order=['co2',np.nan],
width=0.75,showmeans=True,meanprops={"marker":"s","markerfacecolor":"black", "markeredgecolor":"black"},linewidth=0.5 ,palette = customPalette)
ax2 = ax.twinx()
ax2 = sns.boxplot(ax=ax2,x="lon_bucketed", y="value", data=m, hue='name', hue_order=[np.nan,'g_xco2'],
width=0.75,showmeans=True,meanprops={"marker":"s","markerfacecolor":"black", "markeredgecolor":"black"},linewidth=0.5, palette = customPalette)
ax1.grid(alpha=0.5, which = 'major')
plt.tight_layout()
ax.legend_.remove()
GW = mpatches.Patch(color='seagreen', label='$CO_2$')
WW = mpatches.Patch(color='mediumaquamarine', label='$XCO_2$')
ax, ax2.legend(handles=[GW,WW], loc='upper right',prop={'size': 14}, fontsize=12)
ax.set_title("$XCO_2$ vs. $CO_2$",fontsize=18)
ax.set_xlabel('Longitude [\u00b0]',fontsize=14)
ax.set_ylabel('$CO_2$ [ppm]',fontsize=14)
ax2.set_ylabel('$XCO_2$ [ppm]',fontsize=14)
ax.tick_params(labelsize=14)