How to plot confusion matrix correctly - python

Does anyone know how I can plot confusion matrix for 100 class labels? I did these line of codes but I ended up having a confusion matrix attached. The code is working fine for less class numbers like 5 but as the number of classes is 100, there in no clear confusion matrix.
y_pred = model.predict(X_test)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confmat)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
for j in range(confmat.shape[1]):
ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.show()

Related

Matplotlib returns duplicate legends when only ask to show one for each label

I was trying to plot two regression lines on the same plot using matplotlib and the plot returned duplicate legends for the line labeled as: 'OLS regression line'. I could not figure out why. Could someone explain the possible reasons?
fig, ax = plt.subplots(figsize =(10,5))
ax.scatter(x, y)
ax.set_ylabel('y', fontsize=12)
ax.set_xlabel('x', fontsize=12)
ax.plot(x2, y_hat, '-g', label='OLS regression line')
ax.plot(x, y_, '-r', label='population regression line')
#show plots
plt.legend(loc='upper left')
plt.show()
This is the plot:
Output Plot
Thank you.
 A simple interpretation of your question points to me that you need to display the data in text format, so you could omit label parameter and make use of text method:
rdn = np.random.default_rng(1234)
# generate data
x = rdn.uniform(0, 10, size=100)
y = x + rdn.normal(size=100)
fig, ax = plt.subplots(figsize =(10,5))
ax.scatter(x, y)
# simple linear regression
b, a = np.polyfit(x, y, deg=1)
# Create sequence of 100 numbers from 0 to 100
xseq = np.linspace(0, 10, num=100)
ax.set_ylabel('y', fontsize=12)
ax.set_xlabel('x', fontsize=12)
ax.plot(x2, y_hat, '-g', label='OLS regression line')
ax.plot(x, y_, '-r', label='population regression line')
# add text box for the statistics
stats = (f'OLS regression line\n'
f'population regression line')
bbox = dict(boxstyle='round', fc='blanchedalmond', ec='orange', alpha=0.5)
ax.text(0.95, 0.07, stats, fontsize=9, bbox=bbox, transform=ax.transAxes, horizontalalignment='right')
plt.show()

Same Matplotlib code produces different images on different machines

I have a Matplotlib function to create a confusion matrix and save it to a file:
def pretty_print_conf_matrix(y_true, y_pred,
classes,
normalize=False,
title='{} Confusion matrix'.format(describe_test_setup()),
cmap=plt.cm.Blues,
out_dir=None):
"""
Code adapted from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
"""
fig, ax = plt.subplots(figsize=(15, 15))
cm = confusion_matrix(y_true, y_pred, labels=classes)
# Configure Confusion Matrix Plot Aesthetics (no text yet)
cax = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.set_title(title, fontsize=16)
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.colorbar(cax)
ax.set_ylabel('True label', fontsize=16)
ax.set_xlabel('Predicted label', fontsize=16, rotation='horizontal')
# Calculate normalized values (so all cells sum to 1) if desired
if normalize:
cm = np.round(cm.astype('float') / cm.sum(), 2) # (axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
# Place Numbers as Text on Confusion Matrix Plot
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, cm[i, j],
ha="center",
va="center",
color="white" if cm[i, j] > thresh else "black",
fontsize=12)
#fig.tight_layout()
plt.show(block=False)
if out_dir is not None:
out_file = os.path.join(out_dir, 'Confusion Matrix{}.png'.format(describe_test_setup()))
fig.savefig(out_file, dpi=300)
This works well on two of my machines, but on the third it produces ugly squashed images. They are all running the same source code.
Example of it working properly (resolution 4500x4500):
Example of it working poorly (resolution 1028x715):
I thought this could be caused by me running different matplotlib versions, but using pip freeze I can see matplotlib==3.1.2 on both machines.
Any ideas what the cause might be?

Confusion Matrix font size

I have a Confusion Matrix with really small sized numbers but I can't find a way to change them.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) #edited as suggested
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g"); # annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('Observed labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['Flase', 'True']);
plt.show()
thats the code I am using and the pic I get looks like:
I would not mind changing the numbers of the classification by hand but I dont really want to do it for the labels aswell.
EDIT: Figures are bigger now but the labels stay very small
Cheers
Use sns.set to change the font size of the heatmap values. You can specify the font size of the labels and the title as a dictionary in ax.set_xlabel, ax.set_ylabel and ax.set_title, and the font size of the tick labels with ax.tick_params.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) # Adjust to fit
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");
# Labels, title and ticks
label_font = {'size':'18'} # Adjust to fit
ax.set_xlabel('Predicted labels', fontdict=label_font);
ax.set_ylabel('Observed labels', fontdict=label_font);
title_font = {'size':'21'} # Adjust to fit
ax.set_title('Confusion Matrix', fontdict=title_font);
ax.tick_params(axis='both', which='major', labelsize=10) # Adjust to fit
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['False', 'True']);
plt.show()
Use rcParams to change all text in the plot:
fig, ax = plt.subplots(figsize=(10,10))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(clf, Xt, Yt,
display_labels=classes,
cmap=plt.cm.Blues,
normalize=normalize,
ax=ax)
Found it
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion
matrix',cmap=plt.cm.Blues):
plt.figure(figsize=(15,10))
plt.imshow(cm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45,fontsize=15)
plt.yticks(tick_marks,classes,fontsize=15,rotation=90)
if normalize:
cm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
cm=np.around(cm,decimals=2)
cm[np.isnan(cm)]=0.0
print('Normalized confusion matrix')
else:
print('Confusion matrix, without normalization')
thresh=cm.max()/2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",fontsize=15,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label',fontsize=20)
plt.xlabel('Predicted label',fontsize=20)
The code changed as such

Matplotlib help: Formatting a scatter plot to be square

I have a scatter plot where the axis are both limited at -100 and 100. However, when I graph the data, I always get an unappealing looking plot that is rectangular with incorrect axis labels. I'd like the plot to be a square with -100 and 100 as the last axis labels. Does anyone have advice for fixing this formatting issue?
My code is as follows:
import scipy.stats
import numpy as np
r = scipy.stats.pearsonr(x_val, y_val)
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x_val, y_val, s=75, color='green', edgecolor='black', linewidth = 2, alpha=0.4)
ax.set_axisbelow(True)
matplotlib.pyplot.axvline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.axhline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.grid(linewidth=1, color='#bfbfbf')
matplotlib.pyplot.xticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.yticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.text(-95, 85,'Pearson\'s r: %.3f'%r[0], fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.show()

How to have 2 different ticklabels in a confusion matrix?

I have plotted the Confusion Matrix that you see below.
I want to change the labels.
Instead of s1 -> s37. I want: s1 -> s21, I1 -> I16
import itertools
def plot_confusion_matrix(cm, title='Confusion matrix RF', cmap=plt.cm.viridis):
plt.imshow(cm, interpolation='nearest', cmap=cmap) #Display an image on the axes
plt.title(title)
plt.colorbar() #the vertical bar at the right side
#tick_marks = np.arange(len(np.unique(y_oos))) #has the length of the # of classes (array)
tick_marks = np.arange(37) #I KNOW IT IS HERE!!!
plt.xticks(tick_marks, rotation=90) #to Rotate the names
ax = plt.gca()
ax.set_xticklabels(['s'+lab for lab in (ax.get_xticks()+1).astype(str)]) # AND ALSO HERE!!!
plt.yticks(tick_marks)
ax.set_yticklabels(['s'+lab for lab in (ax.get_yticks()+1).astype(str)])
plt.tight_layout() #
plt.ylabel('True label')
plt.xlabel('Predicted label')
I passed a custom list; as mentionned above.
import itertools
def plot_confusion_matrix(cm, title='Confusion matrix RF', cmap=plt.cm.viridis):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar() #the vertical bar at the right side
tick_marks = np.arange(37) #21 + 16 = 37
#THIS IS THE CUSTOM LIST!
labels = ['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','i11','i12','i13','i14','i15','i16']
plt.xticks(tick_marks, rotation=90) #to totate the names
ax = plt.gca()
ax.set_xticklabels(labels)
plt.yticks(tick_marks)
ax.set_yticklabels(labels)
plt.tight_layout() #
plt.ylabel('True label')
plt.xlabel('Predicted label')

Categories