Use loops to create small multiples in subplots with groupby.sum?

Use loops to create small multiples in subplots with groupby.sum? - python

I'd like to create 5 x 7 subplots to accommodate 34 line charts for each Planning Authority with a-axis as Permission Financial Year and y-value as sum of Net additional units
I have used the groupby.sum to find out the sum of net additional units by each authority in each year, and manually created a few subplots for illustration. However, I am sure there must be some more efficient ways like using loop to automate/simplify the generation of all the charts.
Hope someone can help. Many thanks.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import randint
from numpy.random import randint
pd.set_option('display.max_colwidth', 0)
df = pd.read_csv('LDD Permissions for Datastore - Final selected updated 19.04.2020 - including checked05.05 -used.csv', encoding='mac_roman')
#Aggregate the sum of net additional units by Permission Fianacial Year and Planning Authority
pd.set_option('display.max_rows', 1000)
total_permitted_unit_per_year = df.groupby(['Permission Financial Year','Planning Authority']).sum()['Net additional units'].unstack(-1)
#Generate subplots of net additional units from 2009 to 2019 for each Planning Authority
plt.rc('xtick',labelsize=10)
plt.rc('ytick',labelsize=10)
fig = plt.figure()
plt.figure(figsize=(200, 800))
ax1 = fig.add_subplot(7,5,1)
ax2 = fig.add_subplot(7,5,2)
ax3 = fig.add_subplot(7,5,3)
ax4 = fig.add_subplot(7,5,4)
ax5 = fig.add_subplot(7,5,5)
ax6 = fig.add_subplot(7,5,6)
ax7 = fig.add_subplot(7,5,7)
ax8 = fig.add_subplot(7,5,8)
ax9 = fig.add_subplot(7,5,9)
ax10 = fig.add_subplot(7,5,10)
ax11 = fig.add_subplot(7,5,11)
ax12 = fig.add_subplot(7,5,12)
total_permitted_unit_per_year.plot(y='Barking and Dagenham', ax=ax1, legend=False, marker='.', markersize=5, figsize=(30,15))
ax1.set_title('Barking and Dagenham')
ax1.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Barnet', ax=ax2, legend=False, marker='.', markersize=5, figsize=(30,15))
ax2.set_title('Barnet')
ax2.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Bexley', ax=ax3, legend=False, marker='.', markersize=5, figsize=(30,15))
ax3.set_title('Bexley')
ax3.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Sutton', ax=ax4, legend=False, marker='.', markersize=5, figsize=(30,15))
ax4.set_title('Sutton')
ax4.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Merton', ax=ax5, legend=False, marker='.', markersize=5)
ax5.set_title("Merton")
ax5.set_ylim([0, 350])

You can replace your code after the plt.figure line with:
for col_num in range(1, 13):
ax = fig.add_subplot(7, 5, col_num)
ax.set_title(total_permitted_unit_per_year.columns[col_num])
ax.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y=total_permitted_unit_per_year.columns[col_num], ax=ax, legend=False, marker='.', markersize=5, figsize=(30,15))
Replace 13 with the number of columns you want to plot + 1, which may just be len(total_permitted_unit_per_year.columns)+1
If you want to just plot certain columns in a certain order, you do something like this:
pa = ['Croyden', ... ] # fill it out with the column names
for i in range(len(pa)):
ax = fig.add_subplot(7, 5, i)
ax.set_title(pa[i])
ax.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y=pa[i], ax=ax, legend=False, marker='.', markersize=5, figsize=(30,15))

Related

Plotting two weeks of pandas time series data on single axis in matplotlib

I have two datasets that each cover one week. z1 is from 2022-02-07 to 2022-02-14, and z2 is from 2022-01-31 to 2022-02-07. So they both start on a Monday and end on the next Monday.
I want to plot z1 and z2 so they share the same x and y axes, with xticklabels showing Mon, Tue, etc. How do I do this?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
z1 = pd.DataFrame(data={'datetime': pd.date_range(start='2022-02-07',end='2022-02-14',freq='1H'), 'data1': np.random.randint(5,40,size=337)})
z1 = z1.set_index('datetime')
z1['day'] = z1.index.day_name()
z2 = pd.DataFrame(data={'datetime': pd.date_range(start='2022-01-31',end='2022-02-07',freq='1H'), 'data2': np.random.randint(22,31,size=337)})
z2 = z2.set_index('datetime')
z2['day'] = z2.index.day_name()
plt.figure(figsize=(14,6))
ax = plt.subplot(111)
ax2 = ax.twinx()
z1.plot(ax=ax, label='this week', lw=2, color='b', x_compat=True)
z2.plot(ax=ax2, label='last week', lw=2, color='r', x_compat=True)
# ax.plot(z1['day'], z1['data1'], label='this week', lw=2, color='b')
# ax2.plot(z2['day'], z2['data2'], label='last week', lw=2, color='r')
ax.xaxis.set_major_locator(mdates.DayLocator())
xfmt = mdates.DateFormatter('%a')
ax.xaxis.set_major_formatter(xfmt)
ax.tick_params(axis="x", rotation=0)
ax.legend()
ax2.legend()
plt.show()
but I want this:

I don't think it is possible to make a single graph for each of the pandas plots since they have different indices.
Using matplotlib, the x-axis should be continuous data using the number of data points. After creating the graph, set_xticks to 24 tick points where the day of the week changes. set_xticklabels to use the day of the week of either data frame and use it as a tick label for each of the 24 ticks.
fig, ax = plt.subplots(figsize=(14,6))
ax = plt.subplot(111)
ax2 = ax.twinx()
ax.plot(np.arange(169), z1['data1'], label='this week', lw=2, color='b')
ax2.plot(np.arange(169), z2['data2'], label='last week', lw=2, color='r')
ax.set_xticks(np.arange(0,169,24))
ax.set_xticklabels(z1.day[::24])
ax.tick_params(axis="x", rotation=0)
fig.legend(bbox_to_anchor=(0.12, 0.12, 0.1, 0.1))
plt.show()

Two different graph ticks parameters on Y axes from one table

Considering below table:
country
points
price
England
91.550725
51.681159
India
90.222222
13.333333
Austria
90.190782
30.762772
Germany
89.836321
42.257547
Canada
89.377953
35.712598
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
ax1.set_xlabel('Country', fontsize=12)
ax1.set_ylabel('Average Ratings', color=color, fontsize=12)
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price', color=color, fontsize=12)
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
My question: How can I modify the right side Y axis ticks parameters to price (red), so that it represents the numbers of price column as well as the title.
Pandas: 1.2.4
Seaborn: 0.11.1
Matplotlib: 3.3.4

I assume this comes close to what you want:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
#ax1.set_xlabel('Country', fontsize=12) <-- not necessary for your output
ax1.set_ylabel('Average country rating (in points)', color=color, fontsize=12) #mention unit for rating
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price (in $)', color=color, fontsize=12) #mention unit for price
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2.set_yticklabels(df['price']) #relabel right axis with price values
ax1.set_xlabel("") #remove x-label because this axis applies to both categories
plt.show()
Sample output:
However, I hope you take the point into account that Trenton mentioned in a comment (now deleted). This graph is indeed rather difficult to read. The values on the left have their labels on the right, and vice versa.

How to set space between plot and colormap table

I am using secondary y-axis and cmap color but when I plot together the color bar cross to my plot
here is my code
fig,ax1=plt.subplots()
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10,60), c='Lifetime1 (a)', colormap='jet_r', vmin=0, vmax=25, ax=ax1)
graph.axhline(0, color='k')
plt.xticks(rotation=90)
ax2 = ax1.twinx()
ax2.plot(df_Combine_min_select1["CumEnergy1 (kWH)"])
plt.show()
and here is my plotting
anyone can help how to solve this issue?
Thank you

When you let pandas automatically create a colorbar, you don't have positioning options. Therefore, you can create the colorbar in a separate step and provide the pad= parameter to set a wider gap. Default, pad is 0.05, meaning 5% of the width of the subplot.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
df_Combine = pd.DataFrame({'Parameter2': np.random.rand(10) * 10,
'NPV (MM €)': np.random.rand(10),
'Lifetime1 (a)': np.random.rand(10) * 25,
})
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10, 60), c='Lifetime1 (a)',
colormap='jet_r', vmin=0, vmax=25, ax=ax1, colorbar=False)
plt.colorbar(ax1.collections[0], ax=ax1, pad=0.1)
ax2 = ax1.twinx()
ax2.plot(np.random.rand(10))
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable

The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

Plot alignment and formatting help in Matplotlib and Seaborn

I have a dataframe with 15 rows, which I plot using a seaborn heatmap. I have three plots, each with different scale for the heatmap. The first two plots are the first two rows, which are not aligned on the plot.
I have created a grid with 15 rows, I give each of the first two rows 1/15th of the grid so I don't know why it is not aligned.
Another problem with the first two rows of the heatmap is that the text formatting doesn't work either.
So I want to do two things:
Stretch the top two rows of the table to align it with the bottom one and;
To make the formatting work for the top two rows as well.
Maybe also add titles to my white xaxes (l1 and l2) that separate the the subgroups in the bottom plot (standard methods like ax.set_title does not work).
My code:
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
gs = gs.GridSpec(15, 1) # nrows, ncols
f = plt.figure(figsize=(10, 15))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
ax1 = f.add_subplot(gs[0:1, :])
ax2 = f.add_subplot(gs[1:2, :])
ax3 = f.add_subplot(gs[2:15, :])
ticksx = plt.xticks(fontsize = 18, fontweight='bold')
ticksy = plt.yticks(fontsize = 18, fontweight='bold')
wageplot = sns.heatmap(df[0:1], vmin=3000, vmax=10000, annot=False, square=True, cmap=cmap, ax=ax1, yticklabels=True, cbar=False, xticklabels=False)
tenureplot = sns.heatmap(df[1:2], vmin=45, vmax=100, annot=True, square=True, cmap=cmap, ax=ax2, yticklabels=True, cbar=False, xticklabels=False)
heatmap = sns.heatmap(df[2:15], vmin=0, vmax=1, annot=False, square=True, cmap=cmap, ax=ax3, yticklabels=True, cbar=True, xticklabels=True)
heatmap.set_xticklabels(cols, rotation=45, ha='right')
l1 = plt.axhline(y=1, linewidth=14, color='w', label='Female')
l2 = plt.axhline(y=5, linewidth=14, color='w', label='Education')
f.tight_layout()
I would appreciate if I can pointed to where can I get some information about how to program the needed grid. An example image:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Use loops to create small multiples in subplots with groupby.sum? - python

Related

Plotting two weeks of pandas time series data on single axis in matplotlib

Two different graph ticks parameters on Y axes from one table

How to set space between plot and colormap table

Create separate distplot from countplot

Plot alignment and formatting help in Matplotlib and Seaborn

Categories

Resources