Two different graph ticks parameters on Y axes from one table - python

Considering below table:
country
points
price
England
91.550725
51.681159
India
90.222222
13.333333
Austria
90.190782
30.762772
Germany
89.836321
42.257547
Canada
89.377953
35.712598
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
ax1.set_xlabel('Country', fontsize=12)
ax1.set_ylabel('Average Ratings', color=color, fontsize=12)
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price', color=color, fontsize=12)
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
My question: How can I modify the right side Y axis ticks parameters to price (red), so that it represents the numbers of price column as well as the title.
Pandas: 1.2.4
Seaborn: 0.11.1
Matplotlib: 3.3.4

I assume this comes close to what you want:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
d = {'points': [91.5, 90.2, 90.1, 89.8, 89.3],
'price': [51.6, 13.3,30.7, 42.2, 35.7]}
index=['England', 'India','Austria', 'Germany','Canada']
df = pd.DataFrame(index=index,data=d)
fig, ax1 = plt.subplots(figsize = (10,5))
color = 'tab:purple'
#ax1.set_xlabel('Country', fontsize=12) <-- not necessary for your output
ax1.set_ylabel('Average country rating (in points)', color=color, fontsize=12) #mention unit for rating
sns.barplot(x=df['points'],y=df.index, color=color)
ax1.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2 = ax1.twinx()
plt.xlim(12, 92)
color = 'tab:red'
ax2.set_ylabel('Price (in $)', color=color, fontsize=12) #mention unit for price
sns.barplot(x=df['price'],y=df.index,color=color)
ax2.tick_params(axis='y', labelcolor=color, labelsize = 12)
ax2.set_yticklabels(df['price']) #relabel right axis with price values
ax1.set_xlabel("") #remove x-label because this axis applies to both categories
plt.show()
Sample output:
However, I hope you take the point into account that Trenton mentioned in a comment (now deleted). This graph is indeed rather difficult to read. The values on the left have their labels on the right, and vice versa.

Related

grouping, percentage, and barchart in Python

I am very new to Python, and I am trying to plot a bar chart that shows the winner_rank_status percentage, and within each bar, I want to show the percentage of the winner (colour).
My dataset is like:
The code that I wrote:
Q3_df=games_df[['winner','winner_rank_status']]
Q3_df=Q3_df.groupby(['winner_rank_status','winner']).size().groupby(level=0).apply(lambda x: round(100*x/x.sum(),2))
Q3_df=Q3_df.unstack()
ax= Q3_df.plot(
kind='bar',
stacked=True,
figsize=(14,7),
rot=0,
title='Effect of piece colour and winner rating status on the result',
color=['black','grey','white'],
edgecolor='black',
)
for c in ax.containers:
ax.bar_label(c, label_type='center',color='b')
And it's the result that I get:
This result is wrong as it shows 100% for all categories!!! I need to show each category (Equal, Higher, Lower) their true percentage and then within each category the proportion of each colour...
Would you please guide me on how I can achieve it?
I appreciate your help.
You can give a different color to the labels for each set of bars. To get the percentage where all 9 values sum to 100, you could divide by the total number games:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
winner_options = ['black', 'draw', 'white']
rank_options = ['lower', 'equal', 'higher']
Q3_df = pd.DataFrame({'winner_rank_status': pd.Categorical(np.random.choice(rank_options, 1000, p=[.46, .07, .47]), rank_options),
'winner': pd.Categorical(np.random.choice(winner_options, 1000, p=[.51, .03, .46]), winner_options)})
Q3_rank_winner_df = Q3_df.groupby(['winner_rank_status', 'winner']).size().groupby(level=0).apply(
lambda x: np.round(100 * x / len(Q3_df), 2))
Q3_rank_winner_df = Q3_rank_winner_df.unstack()
ax = Q3_rank_winner_df.plot(
kind='bar',
stacked=True,
figsize=(14, 7),
rot=0,
title='Effect of piece colour and winner rating status on the result',
color=['black', 'grey', 'white'],
edgecolor='black')
for bars, color in zip(ax.containers, ['skyblue', 'navy', 'darkblue']):
ax.bar_label(bars, label_type='center', color=color)
ax.legend(bbox_to_anchor=[1.01, 1.02], loc='upper left')
plt.tight_layout()
plt.show()
The new requirements are a bit confusing. One might add the percentages of each rank at the top of the bars:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
winner_options = ['black', 'draw', 'white']
rank_options = ['lower', 'equal', 'higher']
Q3_df = pd.DataFrame(
{'winner_rank_status': pd.Categorical(np.random.choice(rank_options, 1000, p=[.65, .05, .30]), rank_options),
'winner': pd.Categorical(np.random.choice(winner_options, 1000, p=[.46, .07, .47]), winner_options)})
Q3_rank_winner_df = Q3_df.groupby(['winner_rank_status', 'winner']).size().groupby(level=0).apply(
lambda x: np.round(100 * x / x.sum(), 2))
Q3_rank_winner_df = Q3_rank_winner_df.unstack()
ax = Q3_rank_winner_df.plot(
kind='bar',
stacked=True,
figsize=(14, 7),
rot=0,
title='Effect of piece colour and winner rating status on the result',
color=['black', 'grey', 'white'],
edgecolor='black')
for bars, color in zip(ax.containers, ['skyblue', 'navy', 'darkblue']):
ax.bar_label(bars, label_type='center', color=color)
Q3_rank_df = Q3_df.groupby(['winner_rank_status']).size() * 100 / len(Q3_df)
for row, percent in enumerate(Q3_rank_df):
ax.text(row, 103, f'{percent:.02f} %', color='navy', ha='center', va='center')
ax.margins(y=0.08) # more space on top
ax.legend(bbox_to_anchor=[1.01, 1.02], loc='upper left')
plt.tight_layout()
plt.show()

Adjusting legend layout for multiple legends associated to one Python plot?

I am creating a Python plot from a dataframe with 3 y-axes. For each y-axis, there are multiple y-values I want to plot. All data sets for the y-axes are plotted against a shared Date x-axis.
The code looks as follows:
df = pd.read_excel (r'test.xlsx', sheet_name='test', engine='openpyxl')
fig, ax = plt.subplots()
ax3 = ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.7)
ax.plot(df['Date'], df['Gas1'], label="Gas1", color='g')
ax.plot(df['Date'], df['Gas2'], label="Gas2", color='b')
ax.plot(df['Date'], df['Gas3'], label="Gas3", marker="o", markersize=2, color='r')
ax.set_xlabel("Date")
ax.set_ylabel("Gas Rate")
ax2 = ax.twinx()
ax2.plot(df['Date'], df['Water1'], label="Water1", color='k')
ax2.plot(df['Date'], df['Water2'], label="Water2", color='y')
ax2.set_ylabel("Water")
ax3.plot(df['Date'], df['Pressure1'], label="Pressure1")
ax3.plot(df['Date'], df['Pressure2'], label="Pressure2")
ax3.set_ylabel("Pressure")
ax.legend()
ax2.legend()
ax3.legend()
plt.show()
The problem I am having is that I want the legends to be outside of the plot, preferably on the right-hand side after the 2nd y-axis. Is this possible? Right now the legends are just overlayed on the plot and not fully visible. I have tried using bbox_to_anchor and loc functions but had no luck. Thank you!
ax.get_legend_handles_labels() collects all the legend handles and their labels. Combining those for each of the axes, a new legend can be created.
bbox_to_anchor= sets an anchor point for the legend, using axes coordinates. loc= needs to be set, to tell which point of the legend's box will get fixed by the anchor.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame({'Date': pd.date_range('20210401', periods=30, freq='D'),
'Gas1': np.random.randn(30).cumsum(),
'Gas2': np.random.randn(30).cumsum(),
'Gas3': np.random.randn(30).cumsum(),
'Water1': np.random.randn(30).cumsum(),
'Water2': np.random.randn(30).cumsum(),
'Pressure1': np.random.randn(30).cumsum(),
'Pressure2': np.random.randn(30).cumsum()})
fig, ax = plt.subplots()
ax3 = ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.7)
ax.plot(df['Date'], df['Gas1'], label="Gas1", color='g')
ax.plot(df['Date'], df['Gas2'], label="Gas2", color='b')
ax.plot(df['Date'], df['Gas3'], label="Gas3", marker="o", markersize=2, color='r')
ax.set_ylabel("Gas Rate")
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
ax2 = ax.twinx()
ax2.plot(df['Date'], df['Water1'], label="Water1", color='k')
ax2.plot(df['Date'], df['Water2'], label="Water2", color='y')
ax2.set_ylabel("Water")
ax3.plot(df['Date'], df['Pressure1'], label="Pressure1")
ax3.plot(df['Date'], df['Pressure2'], label="Pressure2")
ax3.set_ylabel("Pressure")
handles1, labels1 = ax.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
handles3, labels3 = ax3.get_legend_handles_labels()
ax.legend(handles=handles1 + handles2 + handles3,
labels=labels1 + labels2 + labels3,
bbox_to_anchor=(1.28, 1.02), loc='upper left')
plt.tight_layout()
plt.show()

How to set space between plot and colormap table

I am using secondary y-axis and cmap color but when I plot together the color bar cross to my plot
here is my code
fig,ax1=plt.subplots()
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10,60), c='Lifetime1 (a)', colormap='jet_r', vmin=0, vmax=25, ax=ax1)
graph.axhline(0, color='k')
plt.xticks(rotation=90)
ax2 = ax1.twinx()
ax2.plot(df_Combine_min_select1["CumEnergy1 (kWH)"])
plt.show()
and here is my plotting
anyone can help how to solve this issue?
Thank you
When you let pandas automatically create a colorbar, you don't have positioning options. Therefore, you can create the colorbar in a separate step and provide the pad= parameter to set a wider gap. Default, pad is 0.05, meaning 5% of the width of the subplot.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
df_Combine = pd.DataFrame({'Parameter2': np.random.rand(10) * 10,
'NPV (MM €)': np.random.rand(10),
'Lifetime1 (a)': np.random.rand(10) * 25,
})
ax1 = df_Combine.plot.scatter('Parameter2', 'NPV (MM €)', marker='s', s=500, ylim=(-10, 60), c='Lifetime1 (a)',
colormap='jet_r', vmin=0, vmax=25, ax=ax1, colorbar=False)
plt.colorbar(ax1.collections[0], ax=ax1, pad=0.1)
ax2 = ax1.twinx()
ax2.plot(np.random.rand(10))
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable
The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

Use loops to create small multiples in subplots with groupby.sum?

I'd like to create 5 x 7 subplots to accommodate 34 line charts for each Planning Authority with a-axis as Permission Financial Year and y-value as sum of Net additional units
I have used the groupby.sum to find out the sum of net additional units by each authority in each year, and manually created a few subplots for illustration. However, I am sure there must be some more efficient ways like using loop to automate/simplify the generation of all the charts.
Hope someone can help. Many thanks.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import randint
from numpy.random import randint
pd.set_option('display.max_colwidth', 0)
df = pd.read_csv('LDD Permissions for Datastore - Final selected updated 19.04.2020 - including checked05.05 -used.csv', encoding='mac_roman')
#Aggregate the sum of net additional units by Permission Fianacial Year and Planning Authority
pd.set_option('display.max_rows', 1000)
total_permitted_unit_per_year = df.groupby(['Permission Financial Year','Planning Authority']).sum()['Net additional units'].unstack(-1)
#Generate subplots of net additional units from 2009 to 2019 for each Planning Authority
plt.rc('xtick',labelsize=10)
plt.rc('ytick',labelsize=10)
fig = plt.figure()
plt.figure(figsize=(200, 800))
ax1 = fig.add_subplot(7,5,1)
ax2 = fig.add_subplot(7,5,2)
ax3 = fig.add_subplot(7,5,3)
ax4 = fig.add_subplot(7,5,4)
ax5 = fig.add_subplot(7,5,5)
ax6 = fig.add_subplot(7,5,6)
ax7 = fig.add_subplot(7,5,7)
ax8 = fig.add_subplot(7,5,8)
ax9 = fig.add_subplot(7,5,9)
ax10 = fig.add_subplot(7,5,10)
ax11 = fig.add_subplot(7,5,11)
ax12 = fig.add_subplot(7,5,12)
total_permitted_unit_per_year.plot(y='Barking and Dagenham', ax=ax1, legend=False, marker='.', markersize=5, figsize=(30,15))
ax1.set_title('Barking and Dagenham')
ax1.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Barnet', ax=ax2, legend=False, marker='.', markersize=5, figsize=(30,15))
ax2.set_title('Barnet')
ax2.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Bexley', ax=ax3, legend=False, marker='.', markersize=5, figsize=(30,15))
ax3.set_title('Bexley')
ax3.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Sutton', ax=ax4, legend=False, marker='.', markersize=5, figsize=(30,15))
ax4.set_title('Sutton')
ax4.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y='Merton', ax=ax5, legend=False, marker='.', markersize=5)
ax5.set_title("Merton")
ax5.set_ylim([0, 350])
You can replace your code after the plt.figure line with:
for col_num in range(1, 13):
ax = fig.add_subplot(7, 5, col_num)
ax.set_title(total_permitted_unit_per_year.columns[col_num])
ax.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y=total_permitted_unit_per_year.columns[col_num], ax=ax, legend=False, marker='.', markersize=5, figsize=(30,15))
Replace 13 with the number of columns you want to plot + 1, which may just be len(total_permitted_unit_per_year.columns)+1
If you want to just plot certain columns in a certain order, you do something like this:
pa = ['Croyden', ... ] # fill it out with the column names
for i in range(len(pa)):
ax = fig.add_subplot(7, 5, i)
ax.set_title(pa[i])
ax.set_ylim([0, 350])
total_permitted_unit_per_year.plot(y=pa[i], ax=ax, legend=False, marker='.', markersize=5, figsize=(30,15))

Categories