Related
Currently i am implementing Tensorflow model into Android Studio using Tensorflow Lite, i have already check the Tensorflow model using Tensorflow interperter and it gives correct result in Python. The problem is where when i input image from android studio it gives wrong classification. Here is the code if i want to predict in python.
image1 =cv2.imread("image")
image_fromarray = Image.fromarray(image1,'RGB')
resize_image = image_fromarray.resize((100, 100))
expand_input = np.expand_dims(resize_image,axis=0)
input_data = np.array(expand_input)
input_data = input_data/255
pred = loaded_model.predict(input_data)
result = pred.argmax()
result
And here is the code from Android Studio if want to get the image from imageView and predict.
public void onClick(View view) {
if (img == null) {
Toast.makeText(MainActivity.this, "No image selected", Toast.LENGTH_SHORT).show();
return;
}
try {
//resize image (100,100)
img = Bitmap.createScaledBitmap(img, imgsize, imgsize, false);
// Get pixels from the bitmap
int[] intValues = new int[imgsize *imgsize];
img.getPixels(intValues, 0, img.getWidth(), 0, 0, img.getWidth(), img.getHeight());
// Convert pixels to float values
float[] floatValues = new float[intValues.length * 3];
for (int i = 0; i < intValues.length; i++) {
final int val = intValues[i];
floatValues[i * 3] = ((val >> 16) & 0xFF) / 255.f;
floatValues[i * 3 + 1] = ((val >> 8) & 0xFF) / 255.f;
floatValues[i * 3 + 2] = (val & 0xFF) / 255.f;
}
TensorBuffer inputBuffer = TensorBuffer.createFixedSize(new int[]{1, imgsize, imgsize, 3}, DataType.FLOAT32);
inputBuffer.loadArray(floatValues);
Model2 model = Model2.newInstance(getApplicationContext());
Model2.Outputs outputs = model.process(inputBuffer);
TensorBuffer outputFeature0 = outputs.getOutputFeature0AsTensorBuffer();
// Releases model resources if no longer used.
model.close();
float[] confidence = outputFeature0.getFloatArray();
int maxPos=-1;
float maxConfidence = -1;
for (int i = 0;i<confidence.length;i++)
{
if(confidence[i]>maxConfidence){
maxConfidence = confidence[i];
maxPos=i;
}
}
String[] classes = {"Ripe Braeburn", "Ripe Red Apple", "Ripe Red Delicious", "Rotten"};
tv.setText(classes[maxPos]);
The problem is it only gives 1 result for any picture i choose from the test set. How can i modify the code in android studio so it gives the same result in python?
I'm trying to follow a python function from here to apply Color Matching in OpenCV.
This is the python function (without the mask option):
#!/usr/bin/env python
import cv2 # Import the OpenCV library
import numpy as np # Import Numpy library
import matplotlib.pyplot as plt # Import matplotlib functionality
import sys # Enables the passing of arguments
# Define the file name of the images
SOURCE_IMAGE = "aspens_in_fall.jpg"
REFERENCE_IMAGE = "forest_resized.jpg"
MASK_IMAGE = "mask.jpg"
OUTPUT_IMAGE = "aspens_in_fall_forest_output"
OUTPUT_MASKED_IMAGE = "aspens_in_fall_forest_output_masked.jpg"
def calculate_cdf(histogram):
"""
This method calculates the cumulative distribution function
:param array histogram: The values of the histogram
:return: normalized_cdf: The normalized cumulative distribution function
:rtype: array
"""
# Get the cumulative sum of the elements
cdf = histogram.cumsum()
# Normalize the cdf
normalized_cdf = cdf / float(cdf.max())
return normalized_cdf
def calculate_lookup(src_cdf, ref_cdf):
"""
This method creates the lookup table
:param array src_cdf: The cdf for the source image
:param array ref_cdf: The cdf for the reference image
:return: lookup_table: The lookup table
:rtype: array
"""
lookup_table = np.zeros(256)
lookup_val = 0
for src_pixel_val in range(len(src_cdf)):
lookup_val
for ref_pixel_val in range(len(ref_cdf)):
if ref_cdf[ref_pixel_val] >= src_cdf[src_pixel_val]:
lookup_val = ref_pixel_val
break
lookup_table[src_pixel_val] = lookup_val
return lookup_table
def match_histograms(src_image, ref_image):
"""
This method matches the source image histogram to the
reference signal
:param image src_image: The original source image
:param image ref_image: The reference image
:return: image_after_matching
:rtype: image (array)
"""
# Split the images into the different color channels
# b means blue, g means green and r means red
src_b, src_g, src_r = cv2.split(src_image)
ref_b, ref_g, ref_r = cv2.split(ref_image)
# Compute the b, g, and r histograms separately
# The flatten() Numpy method returns a copy of the array c
# collapsed into one dimension.
src_hist_blue, bin_0 = np.histogram(src_b.flatten(), 256, [0,256])
src_hist_green, bin_1 = np.histogram(src_g.flatten(), 256, [0,256])
src_hist_red, bin_2 = np.histogram(src_r.flatten(), 256, [0,256])
ref_hist_blue, bin_3 = np.histogram(ref_b.flatten(), 256, [0,256])
ref_hist_green, bin_4 = np.histogram(ref_g.flatten(), 256, [0,256])
ref_hist_red, bin_5 = np.histogram(ref_r.flatten(), 256, [0,256])
# Compute the normalized cdf for the source and reference image
src_cdf_blue = calculate_cdf(src_hist_blue)
src_cdf_green = calculate_cdf(src_hist_green)
src_cdf_red = calculate_cdf(src_hist_red)
ref_cdf_blue = calculate_cdf(ref_hist_blue)
ref_cdf_green = calculate_cdf(ref_hist_green)
ref_cdf_red = calculate_cdf(ref_hist_red)
# Make a separate lookup table for each color
blue_lookup_table = calculate_lookup(src_cdf_blue, ref_cdf_blue)
green_lookup_table = calculate_lookup(src_cdf_green, ref_cdf_green)
red_lookup_table = calculate_lookup(src_cdf_red, ref_cdf_red)
# Use the lookup function to transform the colors of the original
# source image
blue_after_transform = cv2.LUT(src_b, blue_lookup_table)
green_after_transform = cv2.LUT(src_g, green_lookup_table)
red_after_transform = cv2.LUT(src_r, red_lookup_table)
# Put the image back together
image_after_matching = cv2.merge([
blue_after_transform, green_after_transform, red_after_transform])
image_after_matching = cv2.convertScaleAbs(image_after_matching)
return image_after_matching
And this is my C++ try:
Mat Flatten(const Mat& mat)
{
auto m2 = mat.reshape(1, 1);
return m2;
}
Mat calculate_cdf(Mat m)
{
cv::Mat accumulatedHist = m.clone();
for (int i = 1; i < m.rows; i++) {
{
float& f1 = accumulatedHist.at<float>(i);
f1 += accumulatedHist.at<float>(i - 1);
}
}
float maxx = 0;
for (int i = 0; i < m.rows; i++) {
if (accumulatedHist.at<float>(i) > maxx)
maxx = accumulatedHist.at<float>(i);
}
for (int i = 0; i < m.rows; i++) {
accumulatedHist.at<float>(i) /= maxx;
}
return accumulatedHist;
}
Mat calculate_lookup(Mat src_cdf, Mat ref_cdf)
{
Mat lookup_table = Mat::zeros(256, 1, CV_32FC1);
float lookup_val = 0;
for (int src_pixel_val = 0; src_pixel_val < src_cdf.rows; src_pixel_val++)
{
float lookup_val = 0;
for (int ref_pixel_val = 0; ref_pixel_val < ref_cdf.rows; ref_pixel_val++)
{
if (ref_cdf.data[ref_pixel_val] >= src_cdf.data[src_pixel_val])
{
lookup_val = ref_pixel_val;
break;
}
}
lookup_table.data[src_pixel_val] = lookup_val;
}
return lookup_table;
}
Mat hm(Mat src_image, Mat ref_image)
{
// Split images
Mat src[3];
split(src_image,src);
Mat ref[3];
split(ref_image, ref);
// Compute the b, g, and r histograms separately
float range[] = { 0, 256 };
const float* histRange = { range };
bool uniform = 1, accumulate = 0;
Mat src_hist_blue, src_hist_green, src_hist_red;
Mat ref_hist_blue, ref_hist_green, ref_hist_red;
int histSize = 256; // expected
calcHist(&Flatten(src[0]), 1, 0, Mat(), src_hist_blue, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Flatten(src[1]), 1, 0, Mat(), src_hist_green, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Flatten(src[2]), 1, 0, Mat(), src_hist_red, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Flatten(ref[0]), 1, 0, Mat(), ref_hist_blue, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Flatten(ref[1]), 1, 0, Mat(), ref_hist_green, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Flatten(ref[2]), 1, 0, Mat(), ref_hist_red, 1, &histSize, &histRange, uniform, accumulate);
auto src_cdf_blue = calculate_cdf(src_hist_blue);
auto src_cdf_green = calculate_cdf(src_hist_green);
auto src_cdf_red = calculate_cdf(src_hist_red);
auto ref_cdf_blue = calculate_cdf(ref_hist_blue);
auto ref_cdf_green = calculate_cdf(ref_hist_green);
auto ref_cdf_red = calculate_cdf(ref_hist_red);
auto blue_lookup_table = calculate_lookup(src_cdf_blue, ref_cdf_blue);
auto green_lookup_table = calculate_lookup(src_cdf_green, ref_cdf_green);
auto red_lookup_table = calculate_lookup(src_cdf_red, ref_cdf_red);
Mat at[3];
auto to = src[0].total();
to = blue_lookup_table.total();
to = blue_lookup_table.channels();
LUT(src[0], blue_lookup_table,at[2]);
LUT(src[1], green_lookup_table, at[1]);
LUT(src[2], red_lookup_table, at[0]);
Mat image_after_matching;
merge(at, 3, image_after_matching);
Mat dst;
convertScaleAbs(image_after_matching, dst);
return dst;
}
int main()
{
Mat image_src = imread("r:\\15.jpg");
Mat image_ref = imread("r:\\130.jpg");
Mat i3 = hm(image_src, image_ref);
DeleteFile(L"r:\\r.jpg");
imwrite("r:\\r.jpg", i3);
ShellExecute(0, L"open", L"r:\\r.jpg", 0, 0, 0);
}
And this is my result:
If the accumulate parameter is true, the results are equally wrong. I'm not sure if I have converted the Python code correctly, so any help would be appreciated.
I have trained model for semantic segmentation using this repo, got good results and tried to use this net in small library writen with tensorflow c API. I turned my keras model into protobuf file using this repo and run session using this code:
typedef struct model_t {
TF_Graph* graph;
TF_Session* session;
TF_Status* status;
TF_Output input, target, output;
TF_Operation *init_op, *train_op, *save_op, *restore_op;
TF_Output checkpoint_file;
} model_t;
typedef struct NetProperties {
int width;
int height;
int border;
int classes;
int inputSize;
} NetProperties;
static model_t * model;
static NetProperties * properties;
extern "C" EXPORT int ModelCreate(const char* nnFilename, const char* inputName, const char* outputName, int pictureWidth, int pictureHeight, int border, int classes) {
ModelDestroy();
model = (model_t*)malloc(sizeof(model_t));;
model->status = TF_NewStatus();
model->graph = TF_NewGraph();
properties = (NetProperties*)malloc(sizeof(NetProperties));
properties->width = pictureWidth;
properties->height = pictureHeight;
properties->border = border;
properties->classes = classes;
properties->inputSize = (pictureWidth + border * 2) * (pictureHeight + border * 2) * 3;
{
// Create the session.
TF_SessionOptions* opts = TF_NewSessionOptions();
model->session = TF_NewSession(model->graph, opts, model->status);
TF_DeleteSessionOptions(opts);
if (!Okay(model->status)) return 0;
}
TF_Graph* g = model->graph;
{
// Import the graph.
TF_Buffer* graph_def = read_file(nnFilename);
if (graph_def == NULL) return 0;
printf("Read GraphDef of %zu bytes\n", graph_def->length);
TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(g, graph_def, opts, model->status);
TF_DeleteImportGraphDefOptions(opts);
TF_DeleteBuffer(graph_def);
if (!Okay(model->status)) return 0;
}
// Handles to the interesting operations in the graph.
model->input.oper = TF_GraphOperationByName(g, inputName);
model->input.index = 0;
model->target.oper = TF_GraphOperationByName(g, "target");
model->target.index = 0;
model->output.oper = TF_GraphOperationByName(g, outputName);
model->output.index = 0;
model->init_op = TF_GraphOperationByName(g, "init");
model->train_op = TF_GraphOperationByName(g, "train");
model->save_op = TF_GraphOperationByName(g, "save/control_dependency");
model->restore_op = TF_GraphOperationByName(g, "save/restore_all");
model->checkpoint_file.oper = TF_GraphOperationByName(g, "save/Const");
model->checkpoint_file.index = 0;
// first prediction is slow
unsigned char * randomData = (unsigned char*)malloc(properties->inputSize * sizeof(unsigned char));
for (int i = 0; i < properties->inputSize; i++) {
randomData[i] = (unsigned char)100;
}
ModelPredict(randomData);
free(randomData);
return 1;
}
extern "C" EXPORT void ModelDestroy() {
if (model == nullptr) return;
TF_DeleteSession(model->session, model->status);
Okay(model->status);
TF_DeleteGraph(model->graph);
TF_DeleteStatus(model->status);
free(model);
}
extern "C" EXPORT unsigned char* ModelPredict(unsigned char * batch1) {
if (model == NULL) return NULL;
const int64_t dims[4] = { 1, properties->height + properties->border * 2, properties->width + properties->border * 2, 3 };
size_t nbytes = properties->inputSize;
// can be faster
float * arrayOfFloats = (float*)malloc(nbytes * sizeof(float));
//float sumUp = 0;
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats[i] = batch1[i] * (1.f / 255.f);
//sumUp += arrayOfFloats[i];
}
//std::cout << sumUp << std::endl;
// removed due to jdehesa answer
//float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
//inputFloats[0] = arrayOfFloats;
// Optionally, you can check that your input_op and input tensors are correct
//// by using some of the functions provided by the C API.
//std::cout << "Input op info: " << TF_OperationNumOutputs(input_op) << "\n";
//std::cout << "Input data info: " << TF_Dim(input, 0) << "\n";
std::vector<TF_Output> inputs;
std::vector<TF_Tensor*> input_values;
TF_Operation* input_op = model->input.oper;
TF_Output input_opout = { input_op, 0 };
inputs.push_back(input_opout);
// reworked due to jdehesa answer
//TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)inputFloats, //nbytes * sizeof(float), &Deallocator, NULL);
TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)arrayOfFloats, nbytes * sizeof(float), &Deallocator, NULL);
input_values.push_back(input);
int outputSize = properties->width * properties->height * properties->classes;
int64_t out_dims[] = { 1, properties->height, properties->width, properties->classes };
// Create vector to store graph output operations
std::vector<TF_Output> outputs;
TF_Operation* output_op = model->output.oper;
TF_Output output_opout = { output_op, 0 };
outputs.push_back(output_opout);
// Create TF_Tensor* vector
//std::vector<TF_Tensor*> output_values(outputs.size(), nullptr);
// Similar to creating the input tensor, however here we don't yet have the
// output values, so we use TF_AllocateTensor()
TF_Tensor* output_value = TF_AllocateTensor(TF_FLOAT, out_dims, 4, outputSize * sizeof(float));
//output_values.push_back(output_value);
//// As with inputs, check the values for the output operation and output tensor
//std::cout << "Output: " << TF_OperationName(output_op) << "\n";
//std::cout << "Output info: " << TF_Dim(output_value, 0) << "\n";
TF_SessionRun(model->session, NULL,
&inputs[0], &input_values[0], inputs.size(),
&outputs[0], &output_value, outputs.size(),
/* No target operations to run */
NULL, 0, NULL, model->status);
if (!Okay(model->status)) return NULL;
TF_DeleteTensor(input_values[0]);
// memory allocations take place here
float* prediction = (float*)TF_TensorData(output_value);
//float* prediction = (float*)malloc(sizeof(float) * properties->inputSize / 3 * properties->classes);
//memcpy(prediction, TF_TensorData(output_value), sizeof(float) * properties->inputSize / 3 * properties->classes);
unsigned char * charPrediction = new unsigned char[outputSize * sizeof(unsigned char)];
sumUp = 0;
for (int i = 0; i < outputSize; i++) {
charPrediction[i] = (unsigned char)((prediction[i] * 255));
//sumUp += prediction[i];
}
//std::cout << sumUp << std::endl << std::endl;
//free(prediction);
TF_DeleteTensor(output_value);
return charPrediction;
}
The problem is that prediction result is always the same. I tried to pass random data and real images but the result was equal. However, defferent trained models give different prediction result, but for each model it's always same. As you can see in code snippet, I checked that pass different data and get same prediction every time
// first is float sum of passed picture, second is the float sum of answer
724306
22982.6
692004
22982.6
718490
22982.6
692004
22982.6
720861
22982.6
692004
22982.6
I tried to write my own keras to tensorflow .pb converter but result was the same.
import os, argparse
import tensorflow as tf
from tensorflow.keras.utils import get_custom_objects
from segmentation_models.losses import bce_dice_loss,dice_loss,cce_dice_loss
from segmentation_models.metrics import iou_score
# some custom functions from segmentation_models
get_custom_objects().update({
'dice_loss': dice_loss,
'bce_dice_loss': bce_dice_loss,
'cce_dice_loss': cce_dice_loss,
'iou_score': iou_score,
})
def freeze_keras(model_name):
tf.keras.backend.set_learning_phase(0)
model = tf.keras.models.load_model(model_name)
sess = tf.keras.backend.get_session()
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), [out.op.name for out in model.outputs])
tf.train.write_graph(constant_graph, './', 'saved_model.pb', as_text=False)
freeze_keras('best-weights.hdf5')
Help me to find out how to fix prediction result in c api.
UPDATE 1: Reworked input array as jdehesa suggested
UPDATE 2: Added definition of model and NetProperties
I think you are not setting the input data correctly. Let's see.
float * arrayOfFloats1 = (float*)malloc(nbytes * sizeof(float));
float sumUp = 0;
Here you create arrayOfFloats1 to hold all the image data.
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats1[i] = batch1[i] * (1.f / 255.f);
sumUp += arrayOfFloats1[i];
}
std::cout << sumUp << std::endl;
Here you set arrayOfFloats1 to the image data. This is all fine.
But then:
float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
Here you have inputFloats, which has space for nbytes float pointers. First, you probably would want to allocate space for float values, not float pointers (which probably do not have the same size). And then:
inputFloats[0] = arrayOfFloats1;
Here you are setting the first of those nbytes pointers to the pointer arrayOfFloats1. And then inputFloats is used as input to the model. But the remaining nbytes - 1 pointers have not been set to anything. Although not required, they are probably set all to zero.
If you just want to make an "array of arrays of floats" with arrayOfFloats1 you don't need to allocate any memory, you can simply do:
float ** inputFloats = &arrayOfFloats1;
But then you actually use inputFloats like this:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)inputFloats, nbytes * sizeof(float), &Deallocator, NULL);
So here you are saying that input is made up of the data in inputFloats, which will be a pointer to arrayOfFloats1 and then uninitialized memory. Probably you actually want something like:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)arrayOfFloats1, nbytes * sizeof(float), &Deallocator, NULL);
Which means input will be a tensor made up of the data in arrayOfFloats1 that you copied before. In fact, I don't think your code needs inputFloats at all.
Otherwise, from what I can tell the rest of the code seems correct. You should ensure that all allocated memory is properly freed in all cases (e.g. when you do if (!Okay(model->status)) return NULL; you should probably delete the input and output tensors before returning), but that is a different issue.
The issue was in the model. I have trained it using not normalized data from images (pixel values are between 0.0 and 255.0) and tried to interfere normilezed data (I devided each pixel value by 255 arrayOfFloats[i] = batch1[i] * (1.f / 255.f); and got values between 0.0 and 1.0) so my model thought that it gets black images every time and gave me similar answers. So I removed normalization and the model started to predict.
I'd like to remove shadow before image binarization using OpenCV. I've tried Otsu Method and adaptive thresholding, however for images where there are large regions of shadow, these two methods will not give good results.
Any better solutions? Thanks in advance.
]1
]2
Since you didn't specify any language, I'll assume Python to illustrate.
A decent starting point might be taking the approach I show in this answer and expand it to work with multiple channels.
Something along the lines of
import cv2
import numpy as np
img = cv2.imread('shadows.png', -1)
rgb_planes = cv2.split(img)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
cv2.imwrite('shadows_out.png', result)
cv2.imwrite('shadows_out_norm.png', result_norm)
The non-normalized result looks as follows:
And the normalized result:
Example C++ implementation provided by #ruben-estrada-marmolejo
Added as requested, c/c++ code, withouth relaying on using namespace
//Compile with:
//g++ example.cpp -o salida `pkg-config --cflags --libs opencv4`
//Ruben Estrada Marmolejo
//ruben.estrada#hetpro.com.mx
//Original idea: https://stackoverflow.com/questions/44752240/how-to-remove-shadow-from-scanned-images-using-opencv/44752405#44752405
#include<opencv4/opencv2/cvconfig.h>
#include<opencv2/core/core.hpp>
#include<opencv2/ml/ml.hpp>
//#include<opencv/cv.h>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/video/background_segm.hpp>
#include<opencv2/videoio.hpp>
#include<opencv2/imgcodecs.hpp>
#include <iostream>
void removeShadow(cv::Mat const& src, cv::Mat &result1_diff_img, cv::Mat &result2_norm_img){
std::vector<cv::Mat> channels;
cv::split(src, channels);
cv::Mat zero = cv::Mat::zeros(src.size(), CV_8UC1);
cv::Mat kernel;
kernel = getStructuringElement(cv::MORPH_OPEN,cv::Size(1,1));
cv::Mat diff_img[3];
cv::Mat norm_img[3];
for (int i =0; i<3;i++){
cv::Mat dilated_img;
dilate(channels[i],dilated_img,kernel,cv::Point(-1,-1),1,cv::BORDER_CONSTANT,cv::morphologyDefaultBorderValue());
cv::Mat bg_img;
cv::medianBlur(channels[i], bg_img, 21);
cv::absdiff(channels[i], bg_img, diff_img[i]);
cv::bitwise_not(diff_img[i],diff_img[i]);
cv::normalize(diff_img[i], norm_img[i], 0, 255, cv::NORM_MINMAX, CV_8UC1, cv::noArray());
}
std::vector<cv::Mat> R1B1 = { diff_img[0], zero, zero };
std::vector<cv::Mat> R1G1 = { zero, diff_img[1], zero };
std::vector<cv::Mat> R1R1 = { zero, zero, diff_img[2] };
cv::Mat result1_B;
cv::Mat result1_G;
cv::Mat result1_R;
cv::merge(R1B1, result1_B);
cv::merge(R1G1, result1_G);
cv::merge(R1R1, result1_R);
cv::bitwise_or(result1_B, result1_G, result1_G);
cv::bitwise_or(result1_G, result1_R, result1_diff_img);
std::vector<cv::Mat> R2B1 = { norm_img[0], zero, zero };
std::vector<cv::Mat> R2G1 = { zero, norm_img[1], zero };
std::vector<cv::Mat> R2R1 = { zero, zero, norm_img[2] };
cv::Mat result2_B;
cv::Mat result2_G;
cv::Mat result2_R;
cv::merge(R2B1, result2_B);
cv::merge(R2G1, result2_G);
cv::merge(R2R1, result2_R);
cv::bitwise_or(result2_B, result2_G, result2_G);
cv::bitwise_or(result2_G, result2_R, result2_norm_img);
}
int main(){
cv::Mat img = cv::imread("test.jpg", cv::IMREAD_COLOR);
if(img.empty())
{
std::cout << "Could not read the image: " << std::endl;
return 1;
}
cv::Mat result1;
cv::Mat result2;
removeShadow(img, result1, result2);
imshow("Display window", result1);
int k = cv::waitKey(0); // Wait for a keystroke in the window
if(k == 's')
{
cv::imwrite("result1.png", result1);
}
return 0;
}
I try to recognize the characters of license plates using OCR, but my licence plate have worse quality.
I'm trying to somehow improve character recognition for OCR, but my best result is this:result.
And even tesseract on this picture does not recognize any character. My code is:
#include <cv.h> // open cv general include file
#include <highgui.h> // open cv GUI include file
#include <iostream> // standard C++ I/O
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
using namespace cv;
int main( int argc, char** argv )
{
Mat src;
Mat dst;
Mat const structure_elem = getStructuringElement(
MORPH_RECT, Size(2,2));
src = imread(argv[1], CV_LOAD_IMAGE_COLOR); // Read the file
cvtColor(src,src,CV_BGR2GRAY);
imshow( "plate", src );
GaussianBlur(src, src, Size(1,1), 1.5, 1.5);
imshow( "blur", src );
equalizeHist(src, src);
imshow( "equalize", src );
adaptiveThreshold(src, src, 255, ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 15, -1);
imshow( "threshold", src );
morphologyEx(src, src, MORPH_CLOSE, structure_elem);
imshow( "morphological operation", src );
imwrite("end.jpg", src);
waitKey(0);
return 0;
}
And my question is, do you know how to achieve better results? More clear image? Despite having my licence plate worse quality, so that the result could read OCR (for example Tesseract).
Thank you for answers. Really I do not know how to do it.
One possible algorithm to clean up the images is as follows:
Scale the image up, so that the letters are more substantial.
Reduce the image to only 8 colours by k-means clustering.
Threshold the image, and erode it to fill in any small gaps and make the letters more substantial.
Invert the image to make masking easier.
Create a blank mask image of the same size, set to all zeros
Find contours in the image. For each contour:
Find bounding box of the contour
Find the area of the bounding box
If the area is too small or too large, drop the contour (I chose 1000 and 10000 as limits)
Otherwise draw a filled rectangle corresponding to the bounding box on the mask with white colour (255)
Store the bounding box and the corresponding image ROI
For each separated character (bounding box + image)
Recognise the character
Note: I prototyped this in Python 2.7 with OpenCV 3.1. C++ ports of this code are near the end of this answer.
Character Recognition
I took inspiration for the character recognition from this question on SO.
Then I found an image that we can use to extract training images for the correct font. I cut them down to only include digits and letters without accents.
train_digits.png:
train_letters.png:
Then i wrote a script that splits the individual characters, scales them up and prepares the training images that contain single character per file:
import os
import cv2
import numpy as np
# ============================================================================
def extract_chars(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
x,y,w,h = x-2, y-2, w+4, h+4
bounding_boxes.append((x,y,w,h))
characters = []
for bbox in bounding_boxes:
x,y,w,h = bbox
char_image = img[y:y+h,x:x+w]
characters.append(char_image)
return characters
# ============================================================================
def output_chars(chars, labels):
for i, char in enumerate(chars):
filename = "chars/%s.png" % labels[i]
char = cv2.resize(char
, None
, fx=3
, fy=3
, interpolation=cv2.INTER_CUBIC)
cv2.imwrite(filename, char)
# ============================================================================
if not os.path.exists("chars"):
os.makedirs("chars")
img_digits = cv2.imread("train_digits.png", 0)
img_letters = cv2.imread("train_letters.png", 0)
digits = extract_chars(img_digits)
letters = extract_chars(img_letters)
DIGITS = [0, 9, 8 ,7, 6, 5, 4, 3, 2, 1]
LETTERS = [chr(ord('A') + i) for i in range(25,-1,-1)]
output_chars(digits, DIGITS)
output_chars(letters, LETTERS)
# ============================================================================
The next step was to generate the training data from the character files we created with the previous script.
I followed the algorithm from the answer to the question mentioned above, resizing each character image to 10x10 and using all the pixels as keypoints.
I save the training data as char_samples.data and char_responses.data
Script to generate training data:
import cv2
import numpy as np
CHARS = [chr(ord('0') + i) for i in range(10)] + [chr(ord('A') + i) for i in range(26)]
# ============================================================================
def load_char_images():
characters = {}
for char in CHARS:
char_img = cv2.imread("chars/%s.png" % char, 0)
characters[char] = char_img
return characters
# ============================================================================
characters = load_char_images()
samples = np.empty((0,100))
for char in CHARS:
char_img = characters[char]
small_char = cv2.resize(char_img,(10,10))
sample = small_char.reshape((1,100))
samples = np.append(samples,sample,0)
responses = np.array([ord(c) for c in CHARS],np.float32)
responses = responses.reshape((responses.size,1))
np.savetxt('char_samples.data',samples)
np.savetxt('char_responses.data',responses)
# ============================================================================
Once we have the training data created, we can run the main script:
import cv2
import numpy as np
# ============================================================================
def reduce_colors(img, n):
Z = img.reshape((-1,3))
# convert to np.float32
Z = np.float32(Z)
# define criteria, number of clusters(K) and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = n
ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now convert back into uint8, and make original image
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((img.shape))
return res2
# ============================================================================
def clean_image(img):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized_img = cv2.resize(gray_img
, None
, fx=5.0
, fy=5.0
, interpolation=cv2.INTER_CUBIC)
resized_img = cv2.GaussianBlur(resized_img,(5,5),0)
cv2.imwrite('licence_plate_large.png', resized_img)
equalized_img = cv2.equalizeHist(resized_img)
cv2.imwrite('licence_plate_equ.png', equalized_img)
reduced = cv2.cvtColor(reduce_colors(cv2.cvtColor(equalized_img, cv2.COLOR_GRAY2BGR), 8), cv2.COLOR_BGR2GRAY)
cv2.imwrite('licence_plate_red.png', reduced)
ret, mask = cv2.threshold(reduced, 64, 255, cv2.THRESH_BINARY)
cv2.imwrite('licence_plate_mask.png', mask)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.erode(mask, kernel, iterations = 1)
cv2.imwrite('licence_plate_mask2.png', mask)
return mask
# ============================================================================
def extract_characters(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
area = w * h
center = (x + w/2, y + h/2)
if (area > 1000) and (area < 10000):
x,y,w,h = x-4, y-4, w+8, h+8
bounding_boxes.append((center, (x,y,w,h)))
cv2.rectangle(char_mask,(x,y),(x+w,y+h),255,-1)
cv2.imwrite('licence_plate_mask3.png', char_mask)
clean = cv2.bitwise_not(cv2.bitwise_and(char_mask, char_mask, mask = bw_image))
bounding_boxes = sorted(bounding_boxes, key=lambda item: item[0][0])
characters = []
for center, bbox in bounding_boxes:
x,y,w,h = bbox
char_image = clean[y:y+h,x:x+w]
characters.append((bbox, char_image))
return clean, characters
def highlight_characters(img, chars):
output_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for bbox, char_img in chars:
x,y,w,h = bbox
cv2.rectangle(output_img,(x,y),(x+w,y+h),255,1)
return output_img
# ============================================================================
img = cv2.imread("licence_plate.jpg")
img = clean_image(img)
clean_img, chars = extract_characters(img)
output_img = highlight_characters(clean_img, chars)
cv2.imwrite('licence_plate_out.png', output_img)
samples = np.loadtxt('char_samples.data',np.float32)
responses = np.loadtxt('char_responses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.ml.KNearest_create()
model.train(samples, cv2.ml.ROW_SAMPLE, responses)
plate_chars = ""
for bbox, char_img in chars:
small_img = cv2.resize(char_img,(10,10))
small_img = small_img.reshape((1,100))
small_img = np.float32(small_img)
retval, results, neigh_resp, dists = model.findNearest(small_img, k = 1)
plate_chars += str(chr((results[0][0])))
print("Licence plate: %s" % plate_chars)
Script Output
Enlarged 5x:
Equalized:
Reduced to 8 colours:
Thresholded:
Eroded:
Mask selecting only characters:
Clean image with bounding boxes:
Console output:
Licence plate: 2B99996
C++ code, using OpenCV 2.4.11 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
fs::directory_iterator end_itr;
for (fs::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!fs::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (uint32_t i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::KNearest& knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
float p = knn.find_nearest(small_char_linear, 1);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::KNearest knn;
knn.train(samples, responses);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================
C++ code, using OpenCV 3.1 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
boost::filesystem::directory_iterator end_itr;
for (boost::filesystem::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!boost::filesystem::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (int i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::Ptr<cv::ml::KNearest> knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
cv::Mat tmp;
float p = knn->findNearest(small_char_linear, 1, tmp);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::Ptr<cv::ml::KNearest> knn(cv::ml::KNearest::create());
cv::Ptr<cv::ml::TrainData> training_data =
cv::ml::TrainData::create(samples
, cv::ml::SampleTypes::ROW_SAMPLE
, responses);
knn->train(training_data);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================