Recognise text (both numbers and characters) from image - python

I am trying to recognise text from image. One of the dataset that contains training data for both is Emnist. The documentation is as follows EMNIST documentation. I am training a neural network to do the job. The code is as follows
from emnist import extract_training_samples
x_train,y_train = extract_training_samples('balanced')
from emnist import extract_test_samples
x_test,y_test = extract_test_samples('balanced')
batch_size = 128
num_classes = 47
epochs = 6
# input image dimensions
img_rows, img_cols = 28, 28
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.summary()
checkpoint = ModelCheckpoint('OCR.h5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test), callbacks=callbacks_list)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
However, when this model is used to predict alphabets by means of findContour method in opencv by creating bounding boxes,it fails to do the job. For example this sample image
It can only predict numbers. Kindly suggest me alternative training dataset or please suggest me what can i do to convert this kind of image into text.

Related

TENSORFLOW Can't find a solution for: ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2, 2))

Im completely new to CNN and Im creating a CNN for image recognition. Im trying to adapt the Cats vs dogs structure for my exercise but an error is popping up and I don't know how to solve it:
Here is my code:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
img_width, img_height = 64, 64
img_rows, img_cols = 64, 64
# Prepare data to feed the NN
num_classes = 2
# Ask keras which format to use depending on used backend and arrange data as expected
if K.image_data_format() == 'channels_first':
X_train = x_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
X_test = x_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
input_shape = (3, img_width, img_height)
else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_width, img_height, 3)
# Incoming data is in uint8. Cast the input data images to be floats in range [0.0-1.0]
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
img_width, img_height = 64, 64
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
batch_size = 100
epochs = 10
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(X_test, y_test))
And the error:
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2, 2))
Thank you very much in advance :)
You should remove the lines where you one-hot encoded the labels.
In the line:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
you have one-hot encoded the values making their shapes to (batch_size, 2, 2), but the last layer (Dense) outputs a single number i.e. of shape (batch_size, 1). Also binary_crossentropy calculates loss for shapes of logits as (batch_size, 1) and labels as (batch_size, 1) (for your dataset).
binary_crossentropy documentation

Keras set_session in for loop problem, not run and stop

I want to reproduce result for keras, the backend is tensorflow.
So I use ParameterGrid for check that.
When I run K.set_session(Session) second time in a loop, the program
stop, and nothing to print. No error! How to fix it? Thanks.
Here is my code.
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import numpy
import tensorflow
from sklearn.model_selection import ParameterGrid
## TuneParameter
TuneParameter = {}
TuneParameter["Batch"] = [50, 50, 50, 50, 50, 50]
TuneParameter["Epoch"] = [2]
TuneParameter = ParameterGrid(TuneParameter)
## For each pair of parameter
for p in TuneParameter:
## Initial session
numpy.random.seed(2018)
tensorflow.set_random_seed(2018)
Session = tensorflow.Session(graph=tensorflow.get_default_graph())
K.set_session(Session)
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=p["Batch"],
epochs=p["Epoch"],
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
K.clear_session()
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Try
session = K.get_session()
Removing the:
Session = tensorflow.Session(graph=tensorflow.get_default_graph())
K.set_session(Session)

Using Keras with HDF5Matrix with labels only

I believe that this is my first question in Stack Overflow, so I apologize in advance if I don't follow all guidelines.
I recently started to use Keras for deep learning, and since I work with HDF5 files using h5py to manage large datasets, I searched for a way to train models using keras on very large HDF5 files. I found out that the most common way would be to use HDF5Matrix found in keras.utils.io_utils.
I modified one of Keras examples (mnist.cnn) as following:
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
# My Imports
from os.path import exists
import h5py
from keras.utils.io_utils import HDF5Matrix
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
#-----------------------------------HDF5 files creation---------------------------------------
sample_file_name = "x.hdf5"
solution_file_name = "y.hdf5"
train_name = "train"
test_name = "test"
#Create dataset
if (not exists(sample_file_name)) and (not exists(solution_file_name)):
samples_file = h5py.File(sample_file_name,mode='a')
solutions_file = h5py.File(solution_file_name,mode='a')
samples_train = samples_file.create_dataset(train_name,data=x_train)
samples_test = samples_file.create_dataset(test_name, data=x_test)
solution_train = solutions_file.create_dataset(train_name, data=y_train)
solution_test = solutions_file.create_dataset(test_name, data=y_test)
samples_file.flush()
samples_file.close()
solutions_file.flush()
solutions_file.close()
x_train = HDF5Matrix(sample_file_name,train_name)
x_test = HDF5Matrix(sample_file_name,test_name)
y_train = HDF5Matrix(solution_file_name,train_name)
y_test = HDF5Matrix(solution_file_name,test_name)
#---------------------------------------------------------------------------------------------
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
# If using HDF5Matrix one needs to disable shuffle
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
shuffle=False)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
However, there is something that concerns me. In segmentation problems \ multi-class problems, where the number of classes is very large, saving the solutions in categorical format is very wasteful. Moreover, doing that means that once you add a new class, the entire dataset should be changed accordingly.
That's why I thought using the normalizer feature of HDF5Matrix as following:
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
# My Imports
from os.path import exists
import h5py
from keras.utils.io_utils import HDF5Matrix
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
#-----------------------------------HDF5 files creation---------------------------------------
sample_file_name = "x.hdf5"
solution_file_name = "y.hdf5"
train_name = "train"
test_name = "test"
#Create dataset
if (not exists(sample_file_name)) and (not exists(solution_file_name)):
samples_file = h5py.File(sample_file_name,mode='a')
solutions_file = h5py.File(solution_file_name,mode='a')
samples_train = samples_file.create_dataset(train_name,data=x_train)
samples_test = samples_file.create_dataset(test_name, data=x_test)
solution_train = solutions_file.create_dataset(train_name, data=y_train)
solution_test = solutions_file.create_dataset(test_name, data=y_test)
samples_file.flush()
samples_file.close()
solutions_file.flush()
solutions_file.close()
x_train = HDF5Matrix(sample_file_name,train_name)
x_test = HDF5Matrix(sample_file_name,test_name)
y_train = HDF5Matrix(solution_file_name,train_name,normalizer=lambda solution: keras.utils.to_categorical(solution,num_classes))
y_test = HDF5Matrix(solution_file_name,test_name,normalizer=lambda solution: keras.utils.to_categorical(solution,num_classes))
#---------------------------------------------------------------------------------------------
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
# If using HDF5Matrix one needs to disable shuffle
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
shuffle=False)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
However, this yields an error implying that the shape of the solution should match, and normalizer shouldn't be used that way:
ValueError: Error when checking target: expected dense_2 to have 2, but got array with shape (60000, 1, 10)
So, is there a way to save the data in HDF5 (and if not possible, using some other format), and use Keras in a manner that saves the labels (and not the categorical vector) without turning it into a regression problem?
You are getting this error because of these lines.
Keras checks input shapes before training. The problem is that HDF5Matrix will return the pre-normalized shape if you call .shape, then Keras will believe you have a (60000,) array for y_train and a (10000,) for y_test.
However, when accessing a slice of the matrix, the normalizer is applied so that for example y_train[5:7].shape does have the final expected shape: (2, 10).
This is mainly because the normalizer isn't really expected to change the shape, but Keras could indeed handle this case.
You can fix it by using fit_generator instead of fit so that training only sees normalized data:
def generator(features, labels, size):
while True:
start, end = 0, size
while end < len(features):
s = slice(start, end)
# you can actually do the normalization here if you want
yield features[s], labels[s]
start, end = end, end + size
model.fit_generator(
generator(x_train, y_train, batch_size),
steps_per_epoch=len(x_train) // batch_size,
epochs=1,
verbose=1,
validation_data=generator(x_test, y_test, batch_size),
validation_steps=len(x_test) // batch_size,
shuffle=False)
Note that you could do any kind of normalization inside the generator function and that will be transparent to Keras. And you can use different batch sizes for train and validation.
Also, you have to change the evaluation in the same way:
score = model.evaluate_generator(
generator(x_test, y_test, batch_size),
steps=len(x_test) // batch_size)
I think your solution with the normalizer is a good idea, by the way.

Error in getting confusion matrix [duplicate]

This question already has answers here:
Multilabel-indicator is not supported for confusion matrix
(4 answers)
Closed 4 years ago.
I want to get a confusion matrix with the following code (MNIST classification):
from sklearn.metrics import confusion_matrix
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.callbacks import TensorBoard
import numpy as np
batch_size = 128
num_classes = 10
epochs = 1
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.get_weights()
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
y_pred=model.predict(x_test)
confusion_matrix(y_test, y_pred)
But I get the following error:
ValueError: Can't handle mix of multilabel-indicator and continuous-multioutput. I think I wrong interpreted meaning of y_pred or calculted it wrong.
How can I solve this?
confusion_matrix expects the true and predicted class labels, not one-hot/probability distribution representations. Replace the last line with the following:
confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
This will convert the 10000x10 format to 10000 corresponding to the predicted class for each sample.

How to get keras+theano to use >1 core

I am testing this code
https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
batch_size = 128
nb_classes = 10
nb_epoch = 12
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
if K.image_dim_ordering() == 'th':
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Sequential()
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
border_mode='valid',
input_shape=input_shape))
model.add(Activation('relu'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
I am using keras and theano with NO GPU. It runs fine but uses only one core.
How can I get it to use more than one core?
Install anaconda. It now comes with MKL and set number of thread
import mkl; mkl.set_num_threads(n_cores_in_your_cpu)

Categories