Any idea how to fix this messy confusion matrix plot? - python

I'm not sure what's wrong, I tried to create a confusion matrix using matplotlib but it doesn't look right. The box isn't of the same size & the value is out of the matrix. The background is transparent but I don't mind it.
Here's the code:
# Get confusion matrix
import itertools
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(5, 5))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
# Predict test data
y_pred=model.predict(X_test)
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix,
classes=['benign', 'malignant', 'normal'],
normalize=False,
title='Confusion matrix, with normalization')
Anyone knows what's wrong and how to fix it?

Related

Same Matplotlib code produces different images on different machines

I have a Matplotlib function to create a confusion matrix and save it to a file:
def pretty_print_conf_matrix(y_true, y_pred,
classes,
normalize=False,
title='{} Confusion matrix'.format(describe_test_setup()),
cmap=plt.cm.Blues,
out_dir=None):
"""
Code adapted from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
"""
fig, ax = plt.subplots(figsize=(15, 15))
cm = confusion_matrix(y_true, y_pred, labels=classes)
# Configure Confusion Matrix Plot Aesthetics (no text yet)
cax = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.set_title(title, fontsize=16)
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.colorbar(cax)
ax.set_ylabel('True label', fontsize=16)
ax.set_xlabel('Predicted label', fontsize=16, rotation='horizontal')
# Calculate normalized values (so all cells sum to 1) if desired
if normalize:
cm = np.round(cm.astype('float') / cm.sum(), 2) # (axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
# Place Numbers as Text on Confusion Matrix Plot
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, cm[i, j],
ha="center",
va="center",
color="white" if cm[i, j] > thresh else "black",
fontsize=12)
#fig.tight_layout()
plt.show(block=False)
if out_dir is not None:
out_file = os.path.join(out_dir, 'Confusion Matrix{}.png'.format(describe_test_setup()))
fig.savefig(out_file, dpi=300)
This works well on two of my machines, but on the third it produces ugly squashed images. They are all running the same source code.
Example of it working properly (resolution 4500x4500):
Example of it working poorly (resolution 1028x715):
I thought this could be caused by me running different matplotlib versions, but using pip freeze I can see matplotlib==3.1.2 on both machines.
Any ideas what the cause might be?

How to get to know the order of actual labels for Confusion Matrix?

I'm confused that how do I come to know the actual labels in Confusion Matrix? I know to pass the labels but my main question is how we come to know I which sequence I've to pass the label?
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred_classes)
This returns the result of the confusion_matrix() function:
Then I declared labels and pass the labels to plot the confusion matrix:
import itertools
def plotConfusionMatrix(cm, classes, normalize=False, title='Confusion Matrix', cmap = plt.cm.Blues):
plt.figure(figsize = (10,7))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('Normalized Confusion Matrix')
else:
print('Un-normalized Confusion Matrix')
print(cm)
thresh = cm.max()/2
for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j,i, cm[i,j], horizontalalignment='center', color='white' if cm[i,j] > thresh else 'black', fontsize=25, fontweight='bold')
plt.tight_layout()
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
Then called the function and passed the labels:
classes = ['climbingdown','climbingup','jumping','lying','running','sitting','standing','walking']
plotConfusionMatrix(cm, classes)
The output for the plotted confusion matrix was:
Now, my exact question is, I've passed the labels of each class but how will I will come to know the order in which I've to pass?
You can pass the class labels into the confusion matrix function.
If you don't do that it will just use the sorted order of the labels. So i guess it depends on how your y_true and y_pred labels are mapped.

Confusion Matrix font size

I have a Confusion Matrix with really small sized numbers but I can't find a way to change them.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) #edited as suggested
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g"); # annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('Observed labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['Flase', 'True']);
plt.show()
thats the code I am using and the pic I get looks like:
I would not mind changing the numbers of the classification by hand but I dont really want to do it for the labels aswell.
EDIT: Figures are bigger now but the labels stay very small
Cheers
Use sns.set to change the font size of the heatmap values. You can specify the font size of the labels and the title as a dictionary in ax.set_xlabel, ax.set_ylabel and ax.set_title, and the font size of the tick labels with ax.tick_params.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) # Adjust to fit
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");
# Labels, title and ticks
label_font = {'size':'18'} # Adjust to fit
ax.set_xlabel('Predicted labels', fontdict=label_font);
ax.set_ylabel('Observed labels', fontdict=label_font);
title_font = {'size':'21'} # Adjust to fit
ax.set_title('Confusion Matrix', fontdict=title_font);
ax.tick_params(axis='both', which='major', labelsize=10) # Adjust to fit
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['False', 'True']);
plt.show()
Use rcParams to change all text in the plot:
fig, ax = plt.subplots(figsize=(10,10))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(clf, Xt, Yt,
display_labels=classes,
cmap=plt.cm.Blues,
normalize=normalize,
ax=ax)
Found it
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion
matrix',cmap=plt.cm.Blues):
plt.figure(figsize=(15,10))
plt.imshow(cm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45,fontsize=15)
plt.yticks(tick_marks,classes,fontsize=15,rotation=90)
if normalize:
cm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
cm=np.around(cm,decimals=2)
cm[np.isnan(cm)]=0.0
print('Normalized confusion matrix')
else:
print('Confusion matrix, without normalization')
thresh=cm.max()/2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",fontsize=15,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label',fontsize=20)
plt.xlabel('Predicted label',fontsize=20)
The code changed as such

sklearn confusion_matrix displaying with wrong dimensions / tick marks at wrong spots

I'm trying to display a confusion matrix and can't for the life of my figure out why it refuses to display in an appropriate manner. Here's my code:
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
return plt
cm = confusion_matrix(y_test, y_predicted_counts)
fig = plt.figure(figsize=(10, 10))
plot = plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
plt.show()
print(cm)
And this is what is displayed:
Any help would be appreciated. Thanks in advance.
For the call to imshow you need to specify origin='lower' (the default is 'upper'; they probably changed this at some time and the scikit-learn docs didn't update their example). So the following should do the trick:
plt.imshow(cm, interpolation='nearest', cmap=cmap, origin='lower')
# ^
# |
# added origin='lower' ------------------------------
Using Matplotlib
If you want to keep your matplotlib implementation, just add plt.ylim(-0.5,2.5) at the end of your plot_confusion_matrix function:
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
plt.ylim(-0.5, 2.5) # <-- SOLUTION
return plt
Using Seaborn
you can try the seaborn package for plotting heatmaps:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
cm_df = pd.DataFrame(cm, columns=classes, index = classes)
cm_df.index.name = 'Actual'
cm_df.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)#for label size
ax =sn.heatmap(cm_df, cmap=cmap, annot=True,annot_kws={"size": 16},fmt="d")# font size
plt.title(title)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.show()
plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
Confusion Matrix Result
Hope this works for you!
It's likely that you're using matplotlib 3.1.1 which broke the tick default behaviour. Upgrade to 3.1.2 or downgrade to 3.1.0 to fix the issue.

How to plot confusion matrix correctly

Does anyone know how I can plot confusion matrix for 100 class labels? I did these line of codes but I ended up having a confusion matrix attached. The code is working fine for less class numbers like 5 but as the number of classes is 100, there in no clear confusion matrix.
y_pred = model.predict(X_test)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confmat)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
for j in range(confmat.shape[1]):
ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.show()

Categories