I have a Matplotlib function to create a confusion matrix and save it to a file:
def pretty_print_conf_matrix(y_true, y_pred,
classes,
normalize=False,
title='{} Confusion matrix'.format(describe_test_setup()),
cmap=plt.cm.Blues,
out_dir=None):
"""
Code adapted from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
"""
fig, ax = plt.subplots(figsize=(15, 15))
cm = confusion_matrix(y_true, y_pred, labels=classes)
# Configure Confusion Matrix Plot Aesthetics (no text yet)
cax = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.set_title(title, fontsize=16)
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.colorbar(cax)
ax.set_ylabel('True label', fontsize=16)
ax.set_xlabel('Predicted label', fontsize=16, rotation='horizontal')
# Calculate normalized values (so all cells sum to 1) if desired
if normalize:
cm = np.round(cm.astype('float') / cm.sum(), 2) # (axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
# Place Numbers as Text on Confusion Matrix Plot
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, cm[i, j],
ha="center",
va="center",
color="white" if cm[i, j] > thresh else "black",
fontsize=12)
#fig.tight_layout()
plt.show(block=False)
if out_dir is not None:
out_file = os.path.join(out_dir, 'Confusion Matrix{}.png'.format(describe_test_setup()))
fig.savefig(out_file, dpi=300)
This works well on two of my machines, but on the third it produces ugly squashed images. They are all running the same source code.
Example of it working properly (resolution 4500x4500):
Example of it working poorly (resolution 1028x715):
I thought this could be caused by me running different matplotlib versions, but using pip freeze I can see matplotlib==3.1.2 on both machines.
Any ideas what the cause might be?
Related
I'm not sure what's wrong, I tried to create a confusion matrix using matplotlib but it doesn't look right. The box isn't of the same size & the value is out of the matrix. The background is transparent but I don't mind it.
Here's the code:
# Get confusion matrix
import itertools
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(5, 5))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
# Predict test data
y_pred=model.predict(X_test)
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix,
classes=['benign', 'malignant', 'normal'],
normalize=False,
title='Confusion matrix, with normalization')
Anyone knows what's wrong and how to fix it?
I have a Confusion Matrix with really small sized numbers but I can't find a way to change them.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) #edited as suggested
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g"); # annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('Observed labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['Flase', 'True']);
plt.show()
thats the code I am using and the pic I get looks like:
I would not mind changing the numbers of the classification by hand but I dont really want to do it for the labels aswell.
EDIT: Figures are bigger now but the labels stay very small
Cheers
Use sns.set to change the font size of the heatmap values. You can specify the font size of the labels and the title as a dictionary in ax.set_xlabel, ax.set_ylabel and ax.set_title, and the font size of the tick labels with ax.tick_params.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, rf_predictions)
ax = plt.subplot()
sns.set(font_scale=3.0) # Adjust to fit
sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");
# Labels, title and ticks
label_font = {'size':'18'} # Adjust to fit
ax.set_xlabel('Predicted labels', fontdict=label_font);
ax.set_ylabel('Observed labels', fontdict=label_font);
title_font = {'size':'21'} # Adjust to fit
ax.set_title('Confusion Matrix', fontdict=title_font);
ax.tick_params(axis='both', which='major', labelsize=10) # Adjust to fit
ax.xaxis.set_ticklabels(['False', 'True']);
ax.yaxis.set_ticklabels(['False', 'True']);
plt.show()
Use rcParams to change all text in the plot:
fig, ax = plt.subplots(figsize=(10,10))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(clf, Xt, Yt,
display_labels=classes,
cmap=plt.cm.Blues,
normalize=normalize,
ax=ax)
Found it
import itertools
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion
matrix',cmap=plt.cm.Blues):
plt.figure(figsize=(15,10))
plt.imshow(cm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45,fontsize=15)
plt.yticks(tick_marks,classes,fontsize=15,rotation=90)
if normalize:
cm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
cm=np.around(cm,decimals=2)
cm[np.isnan(cm)]=0.0
print('Normalized confusion matrix')
else:
print('Confusion matrix, without normalization')
thresh=cm.max()/2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",fontsize=15,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label',fontsize=20)
plt.xlabel('Predicted label',fontsize=20)
The code changed as such
I'm trying to display a confusion matrix and can't for the life of my figure out why it refuses to display in an appropriate manner. Here's my code:
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
return plt
cm = confusion_matrix(y_test, y_predicted_counts)
fig = plt.figure(figsize=(10, 10))
plot = plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
plt.show()
print(cm)
And this is what is displayed:
Any help would be appreciated. Thanks in advance.
For the call to imshow you need to specify origin='lower' (the default is 'upper'; they probably changed this at some time and the scikit-learn docs didn't update their example). So the following should do the trick:
plt.imshow(cm, interpolation='nearest', cmap=cmap, origin='lower')
# ^
# |
# added origin='lower' ------------------------------
Using Matplotlib
If you want to keep your matplotlib implementation, just add plt.ylim(-0.5,2.5) at the end of your plot_confusion_matrix function:
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=30)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, fontsize=20)
plt.yticks(tick_marks, classes, fontsize=20)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",
color="white" if cm[i, j] < thresh else "black", fontsize=40)
plt.tight_layout()
plt.ylabel('True label', fontsize=30)
plt.xlabel('Predicted label', fontsize=30)
plt.ylim(-0.5, 2.5) # <-- SOLUTION
return plt
Using Seaborn
you can try the seaborn package for plotting heatmaps:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
cm_df = pd.DataFrame(cm, columns=classes, index = classes)
cm_df.index.name = 'Actual'
cm_df.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)#for label size
ax =sn.heatmap(cm_df, cmap=cmap, annot=True,annot_kws={"size": 16},fmt="d")# font size
plt.title(title)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.show()
plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
Confusion Matrix Result
Hope this works for you!
It's likely that you're using matplotlib 3.1.1 which broke the tick default behaviour. Upgrade to 3.1.2 or downgrade to 3.1.0 to fix the issue.
I found an excellent tutorial on drawing a heatmap for a confusion matrix, but I want to add some errors of commission and omission on the sides.
I'll try to explain using this image:
This means:
I need to insert a number beside each of the boxes containing 0, 6, and 9 just right of the right edge of the image, and to the left of the legend
I need to insert a number above the each of boxes containing 13, 0 and 0 just above the top edge of the image, just below the title.
(so 6 numbers in total)
Is this even possible? I know nothing about the plotting functions in Python, as I'm new to the language. It just seems like a very difficult task from where I'm standing.
Use the following modified function. The idea is following:
Add two twin axes - one to the right and other to the top.
Set the limits of the twin axes equal to that of the original axes
Set the positions of the ticks on the twin axes to be the same as that of the original axes
Hide the tick marks and assign the tick-labels
Shift the title a bit upward using y=1.1
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False,
title=None, cmap=plt.cm.Blues):
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
cm = confusion_matrix(y_true, y_pred)
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
fig, ax = plt.subplots(figsize=(6.5,6))
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=classes, yticklabels=classes,
ylabel='True label',
xlabel='Predicted label')
ax.set_title(title, y=1.1)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Adding data to the right
ax2 = ax.twinx()
ax2.set_ylim(ax.get_ylim())
ax2.set_yticks(np.arange(cm.shape[0]))
ax2.set_yticklabels(cm[:, -1])
ax2.tick_params(axis="y", right=False)
# Adding data to the top
ax3 = ax.twiny()
ax3.set_xlim(ax.get_xlim())
ax3.set_xticks(np.arange(cm.shape[0]))
ax3.set_xticklabels(cm[:, 0])
ax3.tick_params(axis="x", top=False)
ax.set_aspect('auto')
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
You could do this using ticks.
Let me present this approach with the following easy plot:
from matplotlib import pyplot as plt
ax = plt.axes()
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
for i in range(3):
for j in range(3):
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
plt.show()
I will not focus on the colors neither on the tick style, but know that you can change these very easily.
You can create an Axes object that will share ax's Y axis, with ax.twiny(). Then, you can add X ticks on this new Axes, which will appear on top of the plot:
from matplotlib import pyplot as plt
ax = plt.axes()
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
for i in range(3):
for j in range(3):
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks([0.5, 1.5, 2.5])
ax2.set_xticklabels([13, 0, 0])
plt.show()
In order to display ticks for the X axis, you have to create an Axes object that shares ax's Y axis, with ax.twiny(). This might seem counter-intuitive, but if you used ax.twinx() instead, then modifying ax2's X ticks would modify ax's as well, because they're actually the same.
Then, you want to set the X window of ax2, so that it has three squares.
After that, you can set the ticks: one in every square, at the horizontal center, so at [0.5, 1.5, 2.5].
Finally, you can set the tick labels to display the desired value.
Then, you just do the same with the Y ticks:
from matplotlib import pyplot as plt
ax = plt.axes()
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
for i in range(3):
for j in range(3):
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
ax.fill_between((i, i+1), j, j+1)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks([0.5, 1.5, 2.5])
ax2.set_xticklabels([13, 0, 0])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks([0.5, 1.5, 2.5])
ax3.set_yticklabels([0, 6, 9])
plt.show()
A rather manual approach would consist of combining the following items until the result is satisfactory:
using twinx and twiny to get new axes on the top and on the right: twinax = ax.twinx().twiny()
using twinax.set(xlim=ax.get_xlim(), ylim=ax.get_ylim()) to match their range with the range of the original axes, then...
using twinax.set(xticks=ax.get_xticks(), yticks=ax.get_yticks, xticklabels=('0','1','2'), yticklabels = ('0','1','2')) to set the labels on the new axes as was done in your example (these two calls can be combined if you like).
(If you don't want the actual ticks (only the labels) you can give them 0 length through tick_params.)
You can reposition the axes with set_position.
See this question for info on how to move the colorbar.
Does anyone know how I can plot confusion matrix for 100 class labels? I did these line of codes but I ended up having a confusion matrix attached. The code is working fine for less class numbers like 5 but as the number of classes is 100, there in no clear confusion matrix.
y_pred = model.predict(X_test)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confmat)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
for j in range(confmat.shape[1]):
ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.show()