sklearn confusion_matrix displaying with wrong dimensions / tick marks at wrong spots - python

I'm trying to display a confusion matrix and can't for the life of my figure out why it refuses to display in an appropriate manner. Here's my code:
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
return plt
cm = confusion_matrix(y_test, y_predicted_counts)
fig = plt.figure(figsize=(10, 10))
plot = plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
plt.show()
print(cm)
And this is what is displayed:
Any help would be appreciated. Thanks in advance.

For the call to imshow you need to specify origin='lower' (the default is 'upper'; they probably changed this at some time and the scikit-learn docs didn't update their example). So the following should do the trick:
plt.imshow(cm, interpolation='nearest', cmap=cmap, origin='lower')
# ^
# |
# added origin='lower' ------------------------------

Using Matplotlib
If you want to keep your matplotlib implementation, just add plt.ylim(-0.5,2.5) at the end of your plot_confusion_matrix function:
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
plt.ylim(-0.5, 2.5) # <-- SOLUTION
return plt
Using Seaborn
you can try the seaborn package for plotting heatmaps:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
cm_df = pd.DataFrame(cm, columns=classes, index = classes)
cm_df.index.name = 'Actual'
cm_df.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)#for label size
ax =sn.heatmap(cm_df, cmap=cmap, annot=True,annot_kws={"size": 16},fmt="d")# font size
plt.title(title)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.show()
plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
Confusion Matrix Result
Hope this works for you!

It's likely that you're using matplotlib 3.1.1 which broke the tick default behaviour. Upgrade to 3.1.2 or downgrade to 3.1.0 to fix the issue.

Related

Aligning rotated xticklabels and ylabels with their respective xticks and yticks

Here is the output of the code :
array = [[64,7,5],
[9,195,1],
[6,17,2]]
df_cm = pd.DataFrame(array, range(3), range(3))
sn.set(font_scale=1.4) # for l)abel size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}, cmap='Blues', fmt='g') # font size
class_names = ['dog','cat','bear']
plt.gca().xaxis.tick_top()
plt.gca().xaxis.set_label_position('top')
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45, rotation_mode='anchor')
plt.yticks(tick_marks, class_names, rotation='horizontal')# rotation='horizontal', ha='right', rotation_mode='anchor'
plt.tight_layout()
plt.ylabel('True label',size=14)
plt.xlabel('Predicted label',size=14)
plt.show()
I would like to align labels of x and y with center position, So please how can I change the above
With tick_marks = np.arange(len(class_names)) you're setting new tick marks. Just get the existing ones with ax.get_xticks()/ax.get_yticks():
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
array = [[64,7,5],
[9,195,1],
[6,17,2]]
df_cm = pd.DataFrame(array, range(3), range(3))
sn.set(font_scale=1.4) # for l)abel size
fig, ax = plt.subplots()
sn.heatmap(df_cm, annot_kws={"size": 16}, cmap='Blues', fmt='g') # font size
class_names = ['dog','cat','bear']
plt.gca().xaxis.tick_top()
plt.gca().xaxis.set_label_position('top')
plt.xticks(ax.get_xticks(), class_names, rotation=45, rotation_mode='anchor')
plt.yticks(ax.get_yticks(), class_names, rotation='horizontal')# rotation='horizontal', ha='right', rotation_mode='anchor'
plt.tight_layout()
plt.ylabel('True label',size=14)
plt.xlabel('Predicted label',size=14)
plt.show()
Output:
Edit: you'll achieve the same result by replacing your plt.xticks(...) and plt.yticks(...) with the following:
plt.gca().set_xticklabels(class_names, rotation=45, rotation_mode='anchor')
plt.gca().set_yticklabels(class_names, rotation='horizontal')

How to change the fontsize of yticks in the matplotlib

I tried to plot a confusion matrix of my model, and the result is as follow:
confuion matrix
I want to know why the fontsize of yticks don't change when I pass a parameter. But it works in the
xticks.The code is as follow:
def plot_matrix(cm, class_num, normalize=False, title=None, cmap=plt.cm.Blues):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('normalized confusion matrix')
else:
print('Confusin matrix, without normalization')
plt.imshow(cm, interpolation='nearest', cmap=cmap)
if title:
plt.title(title, fontsize=20)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=20)
tick_marks = np.arange(len(class_num))
plt.xticks(tick_marks, class_num, rotation=0, fontsize=20)
plt.yticks(tick_marks, class_num, rotation=0, fontsize=20) # the yticks code is here
plt.axis('equal')
ax = plt.gca()
l, r = plt.xlim()
ax.spines['left'].set_position(('data', l))
ax.spines['right'].set_position(('data', r))
for edge_i in ['top', 'bottom', 'right', 'left']:
ax.spines[edge_i].set_edgecolor('white')
thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
num = float('{:.4f}'.format(cm[i, j])) if normalize else int(cm[i, j])
plt.text(
i, j, num,
verticalalignment='center',
horizontalalignment='center',
color='white' if num > thresh else 'black',
fontsize=18
)
plt.tight_layout()
plt.ylabel('True Label', fontsize=24)
plt.xlabel('Predicted Label', fontsize=24)
plt.show()

Any idea how to fix this messy confusion matrix plot?

I'm not sure what's wrong, I tried to create a confusion matrix using matplotlib but it doesn't look right. The box isn't of the same size & the value is out of the matrix. The background is transparent but I don't mind it.
Here's the code:
# Get confusion matrix
import itertools
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(5, 5))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
# Predict test data
y_pred=model.predict(X_test)
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix,
classes=['benign', 'malignant', 'normal'],
normalize=False,
title='Confusion matrix, with normalization')
Anyone knows what's wrong and how to fix it?

Same Matplotlib code produces different images on different machines

I have a Matplotlib function to create a confusion matrix and save it to a file:
def pretty_print_conf_matrix(y_true, y_pred,
classes,
normalize=False,
title='{} Confusion matrix'.format(describe_test_setup()),
cmap=plt.cm.Blues,
out_dir=None):
"""
Code adapted from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
"""
fig, ax = plt.subplots(figsize=(15, 15))
cm = confusion_matrix(y_true, y_pred, labels=classes)
# Configure Confusion Matrix Plot Aesthetics (no text yet)
cax = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.set_title(title, fontsize=16)
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.colorbar(cax)
ax.set_ylabel('True label', fontsize=16)
ax.set_xlabel('Predicted label', fontsize=16, rotation='horizontal')
# Calculate normalized values (so all cells sum to 1) if desired
if normalize:
cm = np.round(cm.astype('float') / cm.sum(), 2) # (axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
# Place Numbers as Text on Confusion Matrix Plot
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, cm[i, j],
ha="center",
va="center",
color="white" if cm[i, j] > thresh else "black",
fontsize=12)
#fig.tight_layout()
plt.show(block=False)
if out_dir is not None:
out_file = os.path.join(out_dir, 'Confusion Matrix{}.png'.format(describe_test_setup()))
fig.savefig(out_file, dpi=300)
This works well on two of my machines, but on the third it produces ugly squashed images. They are all running the same source code.
Example of it working properly (resolution 4500x4500):
Example of it working poorly (resolution 1028x715):
I thought this could be caused by me running different matplotlib versions, but using pip freeze I can see matplotlib==3.1.2 on both machines.
Any ideas what the cause might be?

Confusion Matrix font size

I have a Confusion Matrix with really small sized numbers but I can't find a way to change them.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) #edited as suggested
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g"); # annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('Observed labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['Flase', 'True']);
plt.show()
thats the code I am using and the pic I get looks like:
I would not mind changing the numbers of the classification by hand but I dont really want to do it for the labels aswell.
EDIT: Figures are bigger now but the labels stay very small
Cheers
Use sns.set to change the font size of the heatmap values. You can specify the font size of the labels and the title as a dictionary in ax.set_xlabel, ax.set_ylabel and ax.set_title, and the font size of the tick labels with ax.tick_params.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) # Adjust to fit
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");
# Labels, title and ticks
label_font = {'size':'18'} # Adjust to fit
ax.set_xlabel('Predicted labels', fontdict=label_font);
ax.set_ylabel('Observed labels', fontdict=label_font);
title_font = {'size':'21'} # Adjust to fit
ax.set_title('Confusion Matrix', fontdict=title_font);
ax.tick_params(axis='both', which='major', labelsize=10) # Adjust to fit
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['False', 'True']);
plt.show()
Use rcParams to change all text in the plot:
fig, ax = plt.subplots(figsize=(10,10))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(clf, Xt, Yt,
display_labels=classes,
cmap=plt.cm.Blues,
normalize=normalize,
ax=ax)
Found it
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion
matrix',cmap=plt.cm.Blues):
plt.figure(figsize=(15,10))
plt.imshow(cm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45,fontsize=15)
plt.yticks(tick_marks,classes,fontsize=15,rotation=90)
if normalize:
cm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
cm=np.around(cm,decimals=2)
cm[np.isnan(cm)]=0.0
print('Normalized confusion matrix')
else:
print('Confusion matrix, without normalization')
thresh=cm.max()/2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",fontsize=15,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label',fontsize=20)
plt.xlabel('Predicted label',fontsize=20)
The code changed as such

Categories