Aligning rotated xticklabels and ylabels with their respective xticks and yticks - python

Here is the output of the code :
array = [[64,7,5],
[9,195,1],
[6,17,2]]
df_cm = pd.DataFrame(array, range(3), range(3))
sn.set(font_scale=1.4) # for l)abel size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}, cmap='Blues', fmt='g') # font size
class_names = ['dog','cat','bear']
plt.gca().xaxis.tick_top()
plt.gca().xaxis.set_label_position('top')
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45, rotation_mode='anchor')
plt.yticks(tick_marks, class_names, rotation='horizontal')# rotation='horizontal', ha='right', rotation_mode='anchor'
plt.tight_layout()
plt.ylabel('True label',size=14)
plt.xlabel('Predicted label',size=14)
plt.show()
I would like to align labels of x and y with center position, So please how can I change the above

With tick_marks = np.arange(len(class_names)) you're setting new tick marks. Just get the existing ones with ax.get_xticks()/ax.get_yticks():
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
array = [[64,7,5],
[9,195,1],
[6,17,2]]
df_cm = pd.DataFrame(array, range(3), range(3))
sn.set(font_scale=1.4) # for l)abel size
fig, ax = plt.subplots()
sn.heatmap(df_cm, annot_kws={"size": 16}, cmap='Blues', fmt='g') # font size
class_names = ['dog','cat','bear']
plt.gca().xaxis.tick_top()
plt.gca().xaxis.set_label_position('top')
plt.xticks(ax.get_xticks(), class_names, rotation=45, rotation_mode='anchor')
plt.yticks(ax.get_yticks(), class_names, rotation='horizontal')# rotation='horizontal', ha='right', rotation_mode='anchor'
plt.tight_layout()
plt.ylabel('True label',size=14)
plt.xlabel('Predicted label',size=14)
plt.show()
Output:
Edit: you'll achieve the same result by replacing your plt.xticks(...) and plt.yticks(...) with the following:
plt.gca().set_xticklabels(class_names, rotation=45, rotation_mode='anchor')
plt.gca().set_yticklabels(class_names, rotation='horizontal')

Related

Problem with minor thicks and color bar in matplotlib

This is cod for plotting.
Here I have two problems.
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#### part where data are loaded and defined######
tab1 = pd.read_table("tab1.txt", delim_whitespace=True)
tab2 = pd.read_table("tab2.txt", delim_whitespace=True)
delen = (tab1['val2'] / tab1['val3']) *10**9
dist = tab1['val1']
size = abs(tab1['val4'])
m_Es_S0s = tab2['m1'][tab2['#type']==1]
r_Es_S0s = tab2['r1'][tab2['#type']==1]
m_dEs_dS0s = tab2['m1'][tab2['#type']==2]
r_dEs_dS0s = tab2['r1'][tab2['#type']==2]
m_dSphs = tab2['m1'][tab2['#type']==3]
r_dSphs = tab2['r1'][tab2['#type']==3]
m_Nuclear_SC = tab2['m1'][tab2['#type']==4]
r_Nuclear_SC = tab2['r1'][tab2['#type']==4]
m_GCs_UCDs_cEs = tab2['m1'][tab2['#type']==5]
r_GCs_UCDs_cEs = tab2['r1'][tab2['#type']==5]
m_YMCs = tab2['m1'][tab2['#type']==7]
r_YMCs = tab2['r1'][tab2['#type']==7]
#####part related to figure #########
fig1 = plt.figure(figsize=(10,8),dpi=100)
ax = plt.subplot()
ax.tick_params(axis='both', which='both', direction="in")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xscale('log')
plt.yscale('log')
plt.scatter(delen ,delen/(2*3.141592653*size**2), marker='o', s=80, c=dist, cmap='Greys_r', alpha=0.9, norm=matplotlib.colors.LogNorm(), edgecolors='darkorchid', linewidth=0.5)
a1=plt.scatter(m_Es_S0s ,m_Es_S0s/(2*3.141592653*r_Es_S0s**2), marker='o', facecolors='none', edgecolors='mediumblue', linewidth=0.5, s=20)
a2=plt.scatter(m_dEs_dS0s ,m_dEs_dS0s/(2*3.141592653*r_dEs_dS0s**2), marker='o', facecolors='none', edgecolors='lightgreen', linewidth=0.5, s=20)
#a3=plt.scatter(m_dSphs ,m_dSphs/(2*3.141592653*r_dSphs**2), marker='o', facecolors='none', edgecolors='red', linewidth=0.5, s=20)
a4=plt.scatter(m_Nuclear_SC ,m_Nuclear_SC/(2*3.141592653*r_Nuclear_SC**2), marker='o', facecolors='none', edgecolors='dodgerblue', linewidth=0.8, s=20)
#a5=plt.scatter(m_GCs_UCDs_cEs ,m_GCs_UCDs_cEs/(2*3.141592653*r_GCs_UCDs_cEs**2), marker='o', facecolors='none', edgecolors='dimgrey', linewidth=0.5, s=20)
a6=plt.scatter(m_YMCs ,m_YMCs/(2*3.141592653*r_YMCs**2), marker='o', facecolors='none', edgecolors='olive', linewidth=0.7, s=20)
plt.clim(1.8,6.8)
cb = plt.colorbar(pad=0.004)
cb.set_label(label='dist', size='medium', weight='bold')
cb.ax.tick_params(labelsize='large',direction='in')
plt.ylabel('yaxis', fontsize=18)
plt.xlabel('xaxis', fontsize=18)
plt.show()
Resulting plot looks like this:
But, after uncommenting a3 and a5 (so, including more data points on the plot) I am losing all minor ticks on my plot. Figure looks like this
This is first problem why I am losing minor ticks I would like to keep them. Also I would like to keep all markers .... 10^5,10^6,10^7 ......
Another problem is that color bar does not change color. You can notice that my cmap='Greys_r' and points on the plot are ok, but color bar keeps viridis all the time.
How to change color bar to Greys_r?
Tab1 and Tab2 are here:
https://www.dropbox.com/s/gwj72blzallqjl5/tab1.txt?dl=0
https://www.dropbox.com/s/mj4fr8hetsb45eo/tab2.txt?dl=0
Try this, it seems to work.
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#### part where data are loaded and defined######
tab1 = pd.read_table("tab1.txt", delim_whitespace=True)
tab2 = pd.read_table("tab2.txt", delim_whitespace=True)
delen = (tab1['val2'] / tab1['val3']) *10**9
dist = tab1['val1']
size = abs(tab1['val4'])
m_Es_S0s = tab2['m1'][tab2['#type']==1]
r_Es_S0s = tab2['r1'][tab2['#type']==1]
m_dEs_dS0s = tab2['m1'][tab2['#type']==2]
r_dEs_dS0s = tab2['r1'][tab2['#type']==2]
m_dSphs = tab2['m1'][tab2['#type']==3]
r_dSphs = tab2['r1'][tab2['#type']==3]
m_Nuclear_SC = tab2['m1'][tab2['#type']==4]
r_Nuclear_SC = tab2['r1'][tab2['#type']==4]
m_GCs_UCDs_cEs = tab2['m1'][tab2['#type']==5]
r_GCs_UCDs_cEs = tab2['r1'][tab2['#type']==5]
m_YMCs = tab2['m1'][tab2['#type']==7]
r_YMCs = tab2['r1'][tab2['#type']==7]
#####part related to figure #########
fig1 = plt.figure(figsize=(10,8),dpi=100)
ax = plt.subplot()
ax.tick_params(axis='both', which='both', direction="in")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xscale('log')
plt.yscale('log')
cc = plt.scatter(delen ,delen/(2*3.141592653*size**2), marker='o', s=80, c=dist, cmap='Greys_r', alpha=0.9, norm=matplotlib.colors.LogNorm(), edgecolors='darkorchid', linewidth=0.5)
a1=plt.scatter(m_Es_S0s ,m_Es_S0s/(2*3.141592653*r_Es_S0s**2), marker='o', facecolors='none', edgecolors='mediumblue', linewidth=0.5, s=20)
a2=plt.scatter(m_dEs_dS0s ,m_dEs_dS0s/(2*3.141592653*r_dEs_dS0s**2), marker='o', facecolors='none', edgecolors='lightgreen', linewidth=0.5, s=20)
a3=plt.scatter(m_dSphs ,m_dSphs/(2*3.141592653*r_dSphs**2), marker='o', facecolors='none', edgecolors='red', linewidth=0.5, s=20)
a4=plt.scatter(m_Nuclear_SC ,m_Nuclear_SC/(2*3.141592653*r_Nuclear_SC**2), marker='o', facecolors='none', edgecolors='dodgerblue', linewidth=0.8, s=20)
a5=plt.scatter(m_GCs_UCDs_cEs ,m_GCs_UCDs_cEs/(2*3.141592653*r_GCs_UCDs_cEs**2), marker='o', facecolors='none', edgecolors='dimgrey', linewidth=0.5, s=20)
a6=plt.scatter(m_YMCs ,m_YMCs/(2*3.141592653*r_YMCs**2), marker='o', facecolors='none', edgecolors='olive', linewidth=0.7, s=20)
plt.clim(1.8,6.8)
cb = plt.colorbar(cc,pad=0.004)
cb.set_label(label='dist', size='medium', weight='bold')
#cb.ax.tick_params(labelsize='large',direction='in')
import matplotlib.ticker
## set y ticks
y_major = matplotlib.ticker.LogLocator(base = 10, numticks = 15)
ax.yaxis.set_major_locator(y_major)
y_minor = matplotlib.ticker.LogLocator(base = 10, subs = np.arange(1.0, 10.0) * 0.1, numticks = 20)
ax.yaxis.set_minor_locator(y_minor)
ax.yaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())
x_major = matplotlib.ticker.LogLocator(base = 10, numticks = 15)
ax.xaxis.set_major_locator(x_major)
x_minor = matplotlib.ticker.LogLocator(base = 10, subs = np.arange(1.0, 10.0) * 0.1, numticks = 20)
ax.xaxis.set_minor_locator(x_minor)
ax.xaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())
plt.ylabel('yaxis', fontsize=18)
plt.xlabel('xaxis', fontsize=18)
#plt.savefig("out1.png")
plt.show()
Output fig is here.
enter image description here

How to prevent scatter legend from overlapping?

I wrote a code that read an excel sheet and plots a scatter figure with the following code:
fig, ax = plt.subplots(figsize=(13, 8))
scatter = ax.scatter(df.Date, df.TopAcc, c="blue", s=df.Param / 10000, alpha=0.2)
plot = ax.plot(dfmax.Date, dfmax.TopAcc, marker="o", c="red")
handles, labels = scatter.legend_elements(num=5, prop="sizes", alpha=0.2, color="blue")
legend = ax.legend(handles, labels, loc="lower right", title="# Parameters", )
plt.grid()
plt.show()
And I got the following figure
I have the following issues: How to prevent the legend balls from overlapping?
You can set columnspacing in the legend object:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fig, ax = plt.subplots(figsize=(13, 8))
df = pd.DataFrame(np.random.rand(20, 2), columns=['x', 'y'])
df['s'] = 5000 * np.random.rand(20)
scatter = ax.scatter(df.x, df.y, c="blue", s=df.s, alpha=0.2)
handles, labels = scatter.legend_elements(num=5, prop="sizes", alpha=0.2, color="blue")
legend = ax.legend(handles, labels, loc="lower right", title="# Parameters", ncol=6, columnspacing=3, bbox_to_anchor=(1, -0.12), frameon=False)
plt.grid()
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable
The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

Confusion Matrix font size

I have a Confusion Matrix with really small sized numbers but I can't find a way to change them.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) #edited as suggested
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g"); # annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('Observed labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['Flase', 'True']);
plt.show()
thats the code I am using and the pic I get looks like:
I would not mind changing the numbers of the classification by hand but I dont really want to do it for the labels aswell.
EDIT: Figures are bigger now but the labels stay very small
Cheers
Use sns.set to change the font size of the heatmap values. You can specify the font size of the labels and the title as a dictionary in ax.set_xlabel, ax.set_ylabel and ax.set_title, and the font size of the tick labels with ax.tick_params.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) # Adjust to fit
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");
# Labels, title and ticks
label_font = {'size':'18'} # Adjust to fit
ax.set_xlabel('Predicted labels', fontdict=label_font);
ax.set_ylabel('Observed labels', fontdict=label_font);
title_font = {'size':'21'} # Adjust to fit
ax.set_title('Confusion Matrix', fontdict=title_font);
ax.tick_params(axis='both', which='major', labelsize=10) # Adjust to fit
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['False', 'True']);
plt.show()
Use rcParams to change all text in the plot:
fig, ax = plt.subplots(figsize=(10,10))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(clf, Xt, Yt,
display_labels=classes,
cmap=plt.cm.Blues,
normalize=normalize,
ax=ax)
Found it
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion
matrix',cmap=plt.cm.Blues):
plt.figure(figsize=(15,10))
plt.imshow(cm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45,fontsize=15)
plt.yticks(tick_marks,classes,fontsize=15,rotation=90)
if normalize:
cm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
cm=np.around(cm,decimals=2)
cm[np.isnan(cm)]=0.0
print('Normalized confusion matrix')
else:
print('Confusion matrix, without normalization')
thresh=cm.max()/2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",fontsize=15,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label',fontsize=20)
plt.xlabel('Predicted label',fontsize=20)
The code changed as such

How to have 2 different ticklabels in a confusion matrix?

I have plotted the Confusion Matrix that you see below.
I want to change the labels.
Instead of s1 -> s37. I want: s1 -> s21, I1 -> I16
import itertools
def plot_confusion_matrix(cm, title='Confusion matrix RF', cmap=plt.cm.viridis):
plt.imshow(cm, interpolation='nearest', cmap=cmap) #Display an image on the axes
plt.title(title)
plt.colorbar() #the vertical bar at the right side
#tick_marks = np.arange(len(np.unique(y_oos))) #has the length of the # of classes (array)
tick_marks = np.arange(37) #I KNOW IT IS HERE!!!
plt.xticks(tick_marks, rotation=90) #to Rotate the names
ax = plt.gca()
ax.set_xticklabels(['s'+lab for lab in (ax.get_xticks()+1).astype(str)]) # AND ALSO HERE!!!
plt.yticks(tick_marks)
ax.set_yticklabels(['s'+lab for lab in (ax.get_yticks()+1).astype(str)])
plt.tight_layout() #
plt.ylabel('True label')
plt.xlabel('Predicted label')
I passed a custom list; as mentionned above.
import itertools
def plot_confusion_matrix(cm, title='Confusion matrix RF', cmap=plt.cm.viridis):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar() #the vertical bar at the right side
tick_marks = np.arange(37) #21 + 16 = 37
#THIS IS THE CUSTOM LIST!
labels = ['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','i11','i12','i13','i14','i15','i16']
plt.xticks(tick_marks, rotation=90) #to totate the names
ax = plt.gca()
ax.set_xticklabels(labels)
plt.yticks(tick_marks)
ax.set_yticklabels(labels)
plt.tight_layout() #
plt.ylabel('True label')
plt.xlabel('Predicted label')

Categories