How to remove shadow from scanned images using OpenCV? - python

I'd like to remove shadow before image binarization using OpenCV. I've tried Otsu Method and adaptive thresholding, however for images where there are large regions of shadow, these two methods will not give good results.
Any better solutions? Thanks in advance.
]1
]2

Since you didn't specify any language, I'll assume Python to illustrate.
A decent starting point might be taking the approach I show in this answer and expand it to work with multiple channels.
Something along the lines of
import cv2
import numpy as np
img = cv2.imread('shadows.png', -1)
rgb_planes = cv2.split(img)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
cv2.imwrite('shadows_out.png', result)
cv2.imwrite('shadows_out_norm.png', result_norm)
The non-normalized result looks as follows:
And the normalized result:
Example C++ implementation provided by #ruben-estrada-marmolejo
Added as requested, c/c++ code, withouth relaying on using namespace
//Compile with:
//g++ example.cpp -o salida `pkg-config --cflags --libs opencv4`
//Ruben Estrada Marmolejo
//ruben.estrada#hetpro.com.mx
//Original idea: https://stackoverflow.com/questions/44752240/how-to-remove-shadow-from-scanned-images-using-opencv/44752405#44752405
#include<opencv4/opencv2/cvconfig.h>
#include<opencv2/core/core.hpp>
#include<opencv2/ml/ml.hpp>
//#include<opencv/cv.h>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/video/background_segm.hpp>
#include<opencv2/videoio.hpp>
#include<opencv2/imgcodecs.hpp>
#include <iostream>
void removeShadow(cv::Mat const& src, cv::Mat &result1_diff_img, cv::Mat &result2_norm_img){
std::vector<cv::Mat> channels;
cv::split(src, channels);
cv::Mat zero = cv::Mat::zeros(src.size(), CV_8UC1);
cv::Mat kernel;
kernel = getStructuringElement(cv::MORPH_OPEN,cv::Size(1,1));
cv::Mat diff_img[3];
cv::Mat norm_img[3];
for (int i =0; i<3;i++){
cv::Mat dilated_img;
dilate(channels[i],dilated_img,kernel,cv::Point(-1,-1),1,cv::BORDER_CONSTANT,cv::morphologyDefaultBorderValue());
cv::Mat bg_img;
cv::medianBlur(channels[i], bg_img, 21);
cv::absdiff(channels[i], bg_img, diff_img[i]);
cv::bitwise_not(diff_img[i],diff_img[i]);
cv::normalize(diff_img[i], norm_img[i], 0, 255, cv::NORM_MINMAX, CV_8UC1, cv::noArray());
}
std::vector<cv::Mat> R1B1 = { diff_img[0], zero, zero };
std::vector<cv::Mat> R1G1 = { zero, diff_img[1], zero };
std::vector<cv::Mat> R1R1 = { zero, zero, diff_img[2] };
cv::Mat result1_B;
cv::Mat result1_G;
cv::Mat result1_R;
cv::merge(R1B1, result1_B);
cv::merge(R1G1, result1_G);
cv::merge(R1R1, result1_R);
cv::bitwise_or(result1_B, result1_G, result1_G);
cv::bitwise_or(result1_G, result1_R, result1_diff_img);
std::vector<cv::Mat> R2B1 = { norm_img[0], zero, zero };
std::vector<cv::Mat> R2G1 = { zero, norm_img[1], zero };
std::vector<cv::Mat> R2R1 = { zero, zero, norm_img[2] };
cv::Mat result2_B;
cv::Mat result2_G;
cv::Mat result2_R;
cv::merge(R2B1, result2_B);
cv::merge(R2G1, result2_G);
cv::merge(R2R1, result2_R);
cv::bitwise_or(result2_B, result2_G, result2_G);
cv::bitwise_or(result2_G, result2_R, result2_norm_img);
}
int main(){
cv::Mat img = cv::imread("test.jpg", cv::IMREAD_COLOR);
if(img.empty())
{
std::cout << "Could not read the image: " << std::endl;
return 1;
}
cv::Mat result1;
cv::Mat result2;
removeShadow(img, result1, result2);
imshow("Display window", result1);
int k = cv::waitKey(0); // Wait for a keystroke in the window
if(k == 's')
{
cv::imwrite("result1.png", result1);
}
return 0;
}

Related

Object Pose solvePnP

Unable to get object Pose and draw axis with 4 markers
I am trying to get the object pose by following This tutorial for Pose Estimation. In the video the author uses chessboard pattern(24,17) and mentions in the comment that any object with markers(detectable) can be used to estimate the pose.
I am using this Object with only 4 circular markers I am able to detect the markers and get the (x,y) points(ImagePoints) and ObjectPoint with an arbitrary ref. I have my camera calibrated(CameraMatrix and Distortion Coefficients). Following the tutorial i am unable to draw Object Frame.
This is what i was able to do so far.
#(x,y) points of detected markers, another function processes the image and returns the 4 points
Points = [(x1,y1),(x2,y2),(x3,y3),(x4,y5)]
image_points = np.array([
Points[0],
Points[1],
Points[2],
Points[3]
],dtype=np.float32)
image_points = np.ascontiguousarray(image_points).reshape((4,1,2))
object_points = np.array([
(80,56,0),
(80,72,0),
(57,72,0),
(57,88,0)],
dtype=np.float32).reshape((4,1,3)) #Set Z as 0
axis = np.float32([[5,0,0], [0,5,0], [0,0,-5]]).reshape(-1,3)
imgpts, jac = cv2.projectPoints(axis, rotation_vector, translation_vector, mtx, dist)
What am i missing?
This is what i am trying to acheive.
Goal
This is the current result
Current
Camera Distance from the object is fixed. I need to track Translation and Rotation in x and y.
EDIT:
Sample Image markings
Updated Object
Updated Result
Pixel Values of markers from Top-Left to bottom-right
Point_A = (1081, 544)
Point_B = (1090, 782)
Point_C = (824, 785) #Preferred Origin Point
Point_D = (826, 1050)
Camera Parameters
mtx: [[2.34613584e+03 0.00000000e+00 1.24680613e+03]
[0.00000000e+00 2.34637787e+03 1.11379469e+03]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
dist:
[[-0.05595266 0.07570472 0.00200983 0.00073797 -0.30768105]]
Python Code
Here's an example in C++ with your image and object, but I extracted the image points again (because they didnt fit to the provided values) and I used a pinhole camera (no distortion). Results should be similar/better if you use the actual camera parameters.
int main()
{
cv::Mat img = cv::imread("C:/data/StackOverflow/solvePnp.png");
// assuming a pinhole camera, because the actual intrinsics/distortion is unknown.
cv::Mat intrinsics = cv::Mat::eye(3,3,CV_64FC1);
intrinsics.at<double>(0, 2) = img.cols / 2.0;
intrinsics.at<double>(1, 2) = img.rows / 2.0;
intrinsics.at<double>(0, 0) = 1000;
intrinsics.at<double>(1, 1) = 1000;
// assumed: no distortion.
std::vector<double> distCoeffs;
distCoeffs.resize(4, 0);
std::vector<cv::Point3f> objPoints;
// provided object points from
objPoints.push_back({80.0f,56.0f,0.0f});
objPoints.push_back({ 80.0f,72.0f, 0.0f });
objPoints.push_back({ 57.0f,72.0f, 0.0f });
objPoints.push_back({ 57.0f,88.0f, 0.0f });
// we want the third point to be the origin of the object, so we have to shift the coordinate system:
cv::Point3f origin = objPoints[2];
for (int i = 0; i < objPoints.size(); ++i)
{
objPoints[i] = objPoints[i] - origin;
}
std::vector<cv::Point2f> imgPoints;
/*
// WRONT PROVIDED VALUES!
imgPoints.push_back({ 1081, 544 });
imgPoints.push_back({ 1090, 782 });
imgPoints.push_back({ 824, 785 });
imgPoints.push_back({ 826, 1050 });
*/
// image points read from the image
imgPoints.push_back({ 1123, 558 });
imgPoints.push_back({ 1132, 814 });
imgPoints.push_back({ 851, 818 });
imgPoints.push_back({ 852, 1097 });
cv::Vec3d rot;
cv::Vec3d trans;
cv::solvePnP(objPoints, imgPoints, intrinsics, distCoeffs, rot, trans);
std::vector<cv::Point3f> axis;
axis.push_back({ 0,0,0 });
axis.push_back({ 10,0,0 });
axis.push_back({ 0,10,0 });
axis.push_back({ 0,0,10 });
std::vector<cv::Point2f> axisImg;
cv::projectPoints(axis, rot, trans, intrinsics, distCoeffs, axisImg);
cv::line(img, axisImg[0], axisImg[1], cv::Scalar(0, 0, 255),5);
cv::line(img, axisImg[0], axisImg[2], cv::Scalar(0, 255, 0),5);
cv::line(img, axisImg[0], axisImg[3], cv::Scalar(255, 0, 0),5);
std::cout << axisImg[0] << std::endl;
std::cout << axisImg[1] << std::endl;
std::cout << axisImg[2] << std::endl;
std::cout << axisImg[3] << std::endl;
for (int i = 0; i < imgPoints.size(); ++i)
{
cv::circle(img, imgPoints[i], 5, cv::Scalar(i * 255, (i == 0) ? 255 : 0, i * 50));
}
cv::imwrite("C:/data/StackOverflow/solvePnp_RESULT.png", img);
cv::resize(img, img, cv::Size(), 0.25, 0.25);
cv::imshow("img", img);
cv::waitKey(0);
}
red = X
green = Y
blue = Z

Adjusting and Improving X-ray image's contrast and its quality

I want to adjust/improve the contrast of X-ray images of the hand. I have about 10000 of these X-ray images. Instead of manually editing them, I want to code or automate them. Most of the images in the dataset have image qualities like these three images..
I've tried suggestions by here, and here. However, when I run some of image samples through, I get the same output as the input.
Are there any better ways to improve the contrast particularly for hand X-ray images? So, from these three image inputs, I want them to look like these. If can be done automatically then, that would be awesome too. How would one code this?
I would suggest using skimage (rescale_intensity) to stretch the dynamic range (after covering over the label with the average color) in Python/OpenCV/Skimage. It automatically finds the input min and max values and stretches those to full black and full white (i.e. 0 and 255) or compute the min and max values and bias if desired.
Read the input
Convert to grayscale
blur
Apply morphology close
Threshold
Get contours and exclude too small ones and too large ones so that one selects the label.
Draw a filled contour on a copy of the grayscale image of value equal to the mean of the grayscale image in order to cover over the label
Use Skimage to stretch the min and max values to 0 and 255 respectively. If you do not need to bias the min or max, then you can replace (minval,maxval) with 'image'. It will automatically compute the minval and maxval and you will not need to use Numpy to find them.
Save the output
import cv2
import numpy as np
import skimage.exposure
# load images
img1 = cv2.imread('xray1.webp')
img2 = cv2.imread('xray2.webp')
img3 = cv2.imread('xray3.webp')
# convert to gray
gray1 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
gray3 = cv2.cvtColor(img3,cv2.COLOR_BGR2GRAY)
# blur
blur1 = cv2.GaussianBlur(gray1, (0,0), sigmaX=6, sigmaY=6)
blur2 = cv2.GaussianBlur(gray2, (0,0), sigmaX=6, sigmaY=6)
blur3 = cv2.GaussianBlur(gray3, (0,0), sigmaX=6, sigmaY=6)
# morphology
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (45,45))
morph1 = cv2.morphologyEx(blur1, cv2.MORPH_CLOSE, kernel)
morph2 = cv2.morphologyEx(blur2, cv2.MORPH_CLOSE, kernel)
morph3 = cv2.morphologyEx(blur3, cv2.MORPH_CLOSE, kernel)
# threshold
thresh1 = cv2.threshold(morph1, 0, 255, cv2.THRESH_OTSU)[1]
thresh2 = cv2.threshold(morph2, 0, 255, cv2.THRESH_OTSU)[1]
thresh3 = cv2.threshold(morph3, 0, 255, cv2.THRESH_OTSU)[1]
# get contours and filter on size
masked1 = gray1.copy()
meanval = int(np.mean(masked1))
contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked1, [cntr], 0, (meanval), -1)
masked2 = gray2.copy()
meanval = int(np.mean(masked2))
contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked2, [cntr], 0, (meanval), -1)
masked3 = gray3.copy()
meanval = int(np.mean(masked3))
contours = cv2.findContours(thresh3, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked3, [cntr], 0, (meanval), -1)
# stretch
minval = int(np.amin(masked1))
maxval = int(np.amax(masked1))
result1 = skimage.exposure.rescale_intensity(masked1, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
minval = int(np.amin(masked2))
maxval = int(np.amax(masked2))
result2 = skimage.exposure.rescale_intensity(masked2, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
minval = int(np.amin(masked3))
maxval = int(np.amax(masked3))
result3 = skimage.exposure.rescale_intensity(masked3, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
# save output
cv2.imwrite('xray1_stretched.png', result1)
cv2.imwrite('xray2_stretched.png', result2)
cv2.imwrite('xray3_stretched.png', result3)
# Display various images to see the steps
cv2.imshow('thresh1', thresh1)
cv2.imshow('thresh2', thresh2)
cv2.imshow('thresh3', thresh3)
cv2.imshow('result1', result1)
cv2.imshow('result2', result2)
cv2.imshow('result3', result3)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result 1:
Result 2:
Result 3:
I tried a little to a local-type-enhancement (with C++).
Looking results, I'm worried about strong halo...
Note : I tested with 50% size images, because images are too large for my monitor.
class MyTryImpl
{
public:
MyTryImpl( double Sigmoid_A=5, unsigned char T_min=5, unsigned char T_max=250 )
{ MakeTable( Sigmoid_A, T_min, T_max ); }
public:
void Proc( const cv::Mat &Src8UC1, const cv::Mat &GlobalLMap, cv::Mat &Dst8UC1 ) const
{
if( Src8UC1.empty() || Src8UC1.type()!=CV_8UC1 ){ throw std::invalid_argument( "must : Src is 8UC1" ); }
if( GlobalLMap.empty() || GlobalLMap.type()!=CV_8UC1 ){ throw std::invalid_argument( "must : GlobalLMap is 8UC1" ); }
if( Src8UC1.size() != GlobalLMap.size() ){ throw std::invalid_argument( "must : Src8UC1.size() == GlobalLMap.size()" ); }
Dst8UC1.create( Src8UC1.size(), CV_8UC1 );
for( int y=0; y<Src8UC1.rows; ++y )
{
const unsigned char *pS = Src8UC1.ptr<unsigned char>(y);
const unsigned char *pB = GlobalLMap.ptr<unsigned char>(y);
unsigned char *pR = Dst8UC1.ptr<unsigned char>(y);
for( int x=0; x<Src8UC1.cols; ++x, ++pS,++pB,++pR )
{ *pR = m_ResultTbl[ *pS ][ *pB ]; }
}
}
void Proc(
const cv::Mat &Src8UC1, cv::Mat &Dst8UC1,
int BlurKernelSize,
bool WithGaussian=true //if ture GaussianFilter, else BoxFilter
) const
{
if( Src8UC1.empty() || Src8UC1.type()!=CV_8UC1 )
{ throw std::invalid_argument( "must : Src is 8UC1" ); }
cv::Mat Blurred;
{
int s = std::max( 3, BlurKernelSize | 0x01 );
if( WithGaussian ){ cv::GaussianBlur( Src8UC1, Blurred, cv::Size(s,s), 0 ); }
else { cv::blur( Src8UC1, Blurred, cv::Size(s,s) ); }
}
Proc( Src8UC1, Blurred, Dst8UC1 );
}
private:
void MakeTable( double Sigmoid_A, unsigned char T_min, unsigned char T_max )
{
if( T_min==0 ){ throw std::invalid_argument( "must : T_min > 0" ); }
if( T_max==255 ){ throw std::invalid_argument( "must : T_max < 255" ); }
if( T_min>=T_max ){ throw std::invalid_argument( "must : T_min < T_max" ); }
unsigned char B = 0;
while( true )
{
double b = std::max( T_min, std::min(T_max,B) ) / 255.0;
const double g = log(0.5) / log( b<=0.5 ? b : 1.0-b);
unsigned char S=0;
while( true )
{
double s = S / 255.0;
double c = Sig( Gam( (b<=0.5 ? s : 1.0-s), g ), Sigmoid_A );
m_ResultTbl[S][B] = cvRound( 255.0 * (b<=0.5 ? c : 1.0-c) );
if( S==0xFF )break;
++S;
}
if( B==0xFF )break;
++B;
}
}
static double Sig( double x, double a )
{
double exp1 = exp( -a*(2*x -1) );
double exp2 = exp( -a );
double nume = (1-exp1)*(1+exp2);
double denom = (1+exp1)*(1-exp2);
return 0.5 * ( 1 + nume/denom );
}
static double Gam( double x, double g ){ return pow( x, g ); }
private:
unsigned char m_ResultTbl[256][256];
};
int main()
{//Test for 3 imgs
const std::string SrcImgFileNames[3] = { "Xray1.png", "Xray2.png", "Xray3.png" };
const std::string SaveFileNames[3] = { "Result1.png", "Result2.png", "Result3.png" };
MyTryImpl MyTest;
for( int i=0; i<3; ++i )
{
cv::Mat SrcImg = cv::imread( SrcImgFileNames[i], cv::IMREAD_GRAYSCALE );
if( SrcImg.empty() )return 0;
cv::Mat ResultImg;
MyTest.Proc( SrcImg, ResultImg, SrcImg.cols/10, true );
cv::imwrite( SaveFileNames[i], ResultImg );
}
return 0;
}
Results:

Python API to C++ - return pointer to object instead of full object

I created a python module to call some c++ functions of my library.
It use opencv to manipulate some matrix.
My python code
image = "zebre_1080p.png"
intput_image = cv2.resize(image, (int(cols - cols % ratio), int(rows - rows % ratio)))
output_image = image
cv2.imshow('input',intput_image)
cv2.waitKey(0)
test = py_module.testProcess(input_image, output_image) //increase the resolution of the input image
cv2.imshow('output',output_image) // same as image
cv2.waitKey(0)
cv2.imshow('output',test) // input_image with new resolution
cv2.waitKey(0)
My C++ code
static PyObject* testProcess(PyObject* self, PyObject* args) {
PyArrayObject *input, *output;
if ( ! PyArg_ParseTuple(args, "OO", &input, &output) ) return NULL;
cv::Mat in = cv::Mat(cv::Size(in_dims[0], in_dims[1]), CV_8UC3, PyArray_DATA(input));
cv::Mat out = cv::Mat(cv::Size(out_dims[0], out_dims[1]), CV_8UC3);
in ret = Process(in, out);
if(ret < 0){
printf("Erreur Process : %x\n", ret);
return NULL;
}
npy_intp dimens[] = {out.rows, out.cols};
PyObject* obj = PyArray_SimpleNewFromData(out.dims, dimens, NPY_UINT8, out.data);
return obj;
So this solution works but I want to return the same value as my C++ function and like the c++ function return the out matrix in output_image object.
Like that :
ret = py_module.testModule(input_image, output_image) //increase the resolution of the input image
cv2.imshow('output',output_image) // intput_image with new resolution
cv2.waitKey(0)
I don't know if it is possible, maybe with :
Py_BuildValue("O", obj);
or something else ?
Thanks.
I succeeded to do it like that :
static PyObject* testProcess(PyObject* self, PyObject* args) {
_import_array();
PyArrayObject *input, *output;
if ( ! PyArg_ParseTuple(args, "OO", &input, &output) ) return NULL;
npy_intp* in_shape = PyArray_SHAPE(input);
npy_intp* out_shape = PyArray_SHAPE(output);
cv::Mat in = cv::Mat(in_shape[0], in_shape[1], CV_8UC3, PyArray_DATA(input));
cv::Mat out = cv::Mat(out_shape[0], out_shape[1], CV_8UC3, PyArray_DATA(output));
int ret = Process(in, out);
if(ret < 0){
printf("Erreur Process : %x\n", ret);
return NULL;
}
PyArrayObject *obj = (PyArrayObject *)PyArray_SimpleNewFromData(out.channels(), out_shape, PyArray_TYPE(output), out.data);
PyArray_MoveInto(output, obj);
return PyLong_FromLong( 0 );
}
My python script :
def main():
ratio = 2
image = "zebre.png"
#Read image
orgHRImage = cv2.imread(image)
cols, rows, dim = orgHRImage.shape
#crop to %2
orgHRImage = cv2.resize(orgHRImage, (int(cols - cols % ratio), int(rows - rows % ratio)))
#downscale image
downSize = (int(cols/ratio), int(rows/ratio))
downScaledImage = cv2.resize(orgHRImage, downSize)
#create empty output image
output = np.zeros(orgHRImage.shape, orgHRImage.dtype)
#cv2.imshow('downScaledImage',downScaledImage)
#cv2.waitKey(0)
#cv2.imshow('output',output)
#cv2.waitKey(0)
py_module.testProcess(downScaledImage, output)
#cv2.imshow('output',output)
#cv2.waitKey(0)
So I create one input matrix (cv::Mat in) with the informations of &input (which is a image) and one output matrix (cv::Mat out) with the informations of &output (which was initialized at zeros in my python script.
I call the Process function to fill my output matrix.
I create a new PyArrayObject with the same properties of &output and with the fresh data of the matrix out.
I move the pointer to the new object to my old output pointer to have the new object.
I was able to see the picture resized its size divided by 2, next a black image and at the end the resized image from output.

Python vs. C++ OpenCV matchTemplate

I have a weird problem with OpenCV. I was doing template matching with OpenCV on both Python and C++, however, even though Python uses the C++ methods under the hood, I get very different results. Python method gives me really accurate place, C++ is just not even close. What is the reason for this? Is it my C++ code or something else??
I use Python 2.7.11, Apple LLVM version 7.3.0 (clang-703.0.29), and OpenCV3.0.
My Python Code:
def toGray(img):
_, _, channels = img.shape
if channels == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img
return gray
def template_match(img, template):
w, h = template.shape[::-1]
res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img,top_left, bottom_right, 255, 2)
plt.subplot(121),plt.imshow(res,cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle("TM_CCOEFF_NORMED")
plt.show()
if __name__ == "__main__":
img_name = sys.argv[1]
img_name2 = sys.argv[2]
img_rgb = cv2.imread(img_name)
img_rgb2 = cv2.imread(img_name2)
gimg1 = toGray(img_rgb)
gimg2 = toGray(img_rgb2)
template_match(gimg1, gimg2)
My C++ code (It is exactly the same with OpenCV documentation):
Mat img; Mat templ; Mat result;
char* image_window = "Source Image";
char* result_window = "Result window";
int match_method;
int max_Trackbar = 5;
/// Function Headers
void MatchingMethod( int, void* );
/** #function main */
int main( int argc, char** argv )
{
/// Load image and template
img = imread( argv[1], 1 );
templ = imread( argv[2], 1 );
/// Create windows
namedWindow( image_window, CV_WINDOW_AUTOSIZE );
namedWindow( result_window, CV_WINDOW_AUTOSIZE );
/// Create Trackbar
char* trackbar_label = "Method: \n 0: SQDIFF \n 1: SQDIFF NORMED \n 2: TM CCORR \n 3: TM CCORR NORMED \n 4: TM COEFF \n 5: TM COEFF NORMED";
createTrackbar( trackbar_label, image_window, &match_method, max_Trackbar, MatchingMethod );
MatchingMethod( 0, 0 );
waitKey(0);
return 0;
}
/**
* #function MatchingMethod
* #brief Trackbar callback
*/
void MatchingMethod( int, void* )
{
/// Source image to display
Mat img_display;
img.copyTo( img_display );
/// Create the result matrix
int result_cols = img.cols - templ.cols + 1;
int result_rows = img.rows - templ.rows + 1;
result.create( result_rows, result_cols, CV_32FC1 );
/// Do the Matching and Normalize
matchTemplate( img, templ, result, match_method );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
Point matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
/// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
if( match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
{ matchLoc = minLoc; }
else
{ matchLoc = maxLoc; }
/// Show me what you got
rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
imshow( image_window, img_display );
imshow( result_window, result );
cv::imwrite("rec.jpg", img_display);
return;
}
Original Images:
Python Output:
C++ Output
Looking through the two implementations, the most evident difference between them is the colour format of the images used.
In the Python version, you load the images "as-is". Since your input images are RGB (as the variable names also suggest), you will be doing the template matching on colour images.
img_rgb = cv2.imread(img_name)
img_rgb2 = cv2.imread(img_name2)
However in C++ you load the images as grayscale, since you pass the 1 as second parameter.
img = imread( argv[1], 1 );
templ = imread( argv[2], 1 );
According to cv::matchTemplate documentation:
In case of a color image, template summation in the numerator and each
sum in the denominator is done over all of the channels and separate
mean values are used for each channel. That is, the function can take
a color template and a color image. The result will still be a
single-channel image, which is easier to analyze.
That would suggest that it's quite possible to get different results when applying it on a 3-channel image, than when applying it to a single channel version of the same image.

Recognize the characters of license plate

I try to recognize the characters of license plates using OCR, but my licence plate have worse quality.
I'm trying to somehow improve character recognition for OCR, but my best result is this:result.
And even tesseract on this picture does not recognize any character. My code is:
#include <cv.h> // open cv general include file
#include <highgui.h> // open cv GUI include file
#include <iostream> // standard C++ I/O
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
using namespace cv;
int main( int argc, char** argv )
{
Mat src;
Mat dst;
Mat const structure_elem = getStructuringElement(
MORPH_RECT, Size(2,2));
src = imread(argv[1], CV_LOAD_IMAGE_COLOR); // Read the file
cvtColor(src,src,CV_BGR2GRAY);
imshow( "plate", src );
GaussianBlur(src, src, Size(1,1), 1.5, 1.5);
imshow( "blur", src );
equalizeHist(src, src);
imshow( "equalize", src );
adaptiveThreshold(src, src, 255, ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 15, -1);
imshow( "threshold", src );
morphologyEx(src, src, MORPH_CLOSE, structure_elem);
imshow( "morphological operation", src );
imwrite("end.jpg", src);
waitKey(0);
return 0;
}
And my question is, do you know how to achieve better results? More clear image? Despite having my licence plate worse quality, so that the result could read OCR (for example Tesseract).
Thank you for answers. Really I do not know how to do it.
One possible algorithm to clean up the images is as follows:
Scale the image up, so that the letters are more substantial.
Reduce the image to only 8 colours by k-means clustering.
Threshold the image, and erode it to fill in any small gaps and make the letters more substantial.
Invert the image to make masking easier.
Create a blank mask image of the same size, set to all zeros
Find contours in the image. For each contour:
Find bounding box of the contour
Find the area of the bounding box
If the area is too small or too large, drop the contour (I chose 1000 and 10000 as limits)
Otherwise draw a filled rectangle corresponding to the bounding box on the mask with white colour (255)
Store the bounding box and the corresponding image ROI
For each separated character (bounding box + image)
Recognise the character
Note: I prototyped this in Python 2.7 with OpenCV 3.1. C++ ports of this code are near the end of this answer.
Character Recognition
I took inspiration for the character recognition from this question on SO.
Then I found an image that we can use to extract training images for the correct font. I cut them down to only include digits and letters without accents.
train_digits.png:
train_letters.png:
Then i wrote a script that splits the individual characters, scales them up and prepares the training images that contain single character per file:
import os
import cv2
import numpy as np
# ============================================================================
def extract_chars(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
x,y,w,h = x-2, y-2, w+4, h+4
bounding_boxes.append((x,y,w,h))
characters = []
for bbox in bounding_boxes:
x,y,w,h = bbox
char_image = img[y:y+h,x:x+w]
characters.append(char_image)
return characters
# ============================================================================
def output_chars(chars, labels):
for i, char in enumerate(chars):
filename = "chars/%s.png" % labels[i]
char = cv2.resize(char
, None
, fx=3
, fy=3
, interpolation=cv2.INTER_CUBIC)
cv2.imwrite(filename, char)
# ============================================================================
if not os.path.exists("chars"):
os.makedirs("chars")
img_digits = cv2.imread("train_digits.png", 0)
img_letters = cv2.imread("train_letters.png", 0)
digits = extract_chars(img_digits)
letters = extract_chars(img_letters)
DIGITS = [0, 9, 8 ,7, 6, 5, 4, 3, 2, 1]
LETTERS = [chr(ord('A') + i) for i in range(25,-1,-1)]
output_chars(digits, DIGITS)
output_chars(letters, LETTERS)
# ============================================================================
The next step was to generate the training data from the character files we created with the previous script.
I followed the algorithm from the answer to the question mentioned above, resizing each character image to 10x10 and using all the pixels as keypoints.
I save the training data as char_samples.data and char_responses.data
Script to generate training data:
import cv2
import numpy as np
CHARS = [chr(ord('0') + i) for i in range(10)] + [chr(ord('A') + i) for i in range(26)]
# ============================================================================
def load_char_images():
characters = {}
for char in CHARS:
char_img = cv2.imread("chars/%s.png" % char, 0)
characters[char] = char_img
return characters
# ============================================================================
characters = load_char_images()
samples = np.empty((0,100))
for char in CHARS:
char_img = characters[char]
small_char = cv2.resize(char_img,(10,10))
sample = small_char.reshape((1,100))
samples = np.append(samples,sample,0)
responses = np.array([ord(c) for c in CHARS],np.float32)
responses = responses.reshape((responses.size,1))
np.savetxt('char_samples.data',samples)
np.savetxt('char_responses.data',responses)
# ============================================================================
Once we have the training data created, we can run the main script:
import cv2
import numpy as np
# ============================================================================
def reduce_colors(img, n):
Z = img.reshape((-1,3))
# convert to np.float32
Z = np.float32(Z)
# define criteria, number of clusters(K) and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = n
ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now convert back into uint8, and make original image
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((img.shape))
return res2
# ============================================================================
def clean_image(img):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized_img = cv2.resize(gray_img
, None
, fx=5.0
, fy=5.0
, interpolation=cv2.INTER_CUBIC)
resized_img = cv2.GaussianBlur(resized_img,(5,5),0)
cv2.imwrite('licence_plate_large.png', resized_img)
equalized_img = cv2.equalizeHist(resized_img)
cv2.imwrite('licence_plate_equ.png', equalized_img)
reduced = cv2.cvtColor(reduce_colors(cv2.cvtColor(equalized_img, cv2.COLOR_GRAY2BGR), 8), cv2.COLOR_BGR2GRAY)
cv2.imwrite('licence_plate_red.png', reduced)
ret, mask = cv2.threshold(reduced, 64, 255, cv2.THRESH_BINARY)
cv2.imwrite('licence_plate_mask.png', mask)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.erode(mask, kernel, iterations = 1)
cv2.imwrite('licence_plate_mask2.png', mask)
return mask
# ============================================================================
def extract_characters(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
area = w * h
center = (x + w/2, y + h/2)
if (area > 1000) and (area < 10000):
x,y,w,h = x-4, y-4, w+8, h+8
bounding_boxes.append((center, (x,y,w,h)))
cv2.rectangle(char_mask,(x,y),(x+w,y+h),255,-1)
cv2.imwrite('licence_plate_mask3.png', char_mask)
clean = cv2.bitwise_not(cv2.bitwise_and(char_mask, char_mask, mask = bw_image))
bounding_boxes = sorted(bounding_boxes, key=lambda item: item[0][0])
characters = []
for center, bbox in bounding_boxes:
x,y,w,h = bbox
char_image = clean[y:y+h,x:x+w]
characters.append((bbox, char_image))
return clean, characters
def highlight_characters(img, chars):
output_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for bbox, char_img in chars:
x,y,w,h = bbox
cv2.rectangle(output_img,(x,y),(x+w,y+h),255,1)
return output_img
# ============================================================================
img = cv2.imread("licence_plate.jpg")
img = clean_image(img)
clean_img, chars = extract_characters(img)
output_img = highlight_characters(clean_img, chars)
cv2.imwrite('licence_plate_out.png', output_img)
samples = np.loadtxt('char_samples.data',np.float32)
responses = np.loadtxt('char_responses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.ml.KNearest_create()
model.train(samples, cv2.ml.ROW_SAMPLE, responses)
plate_chars = ""
for bbox, char_img in chars:
small_img = cv2.resize(char_img,(10,10))
small_img = small_img.reshape((1,100))
small_img = np.float32(small_img)
retval, results, neigh_resp, dists = model.findNearest(small_img, k = 1)
plate_chars += str(chr((results[0][0])))
print("Licence plate: %s" % plate_chars)
Script Output
Enlarged 5x:
Equalized:
Reduced to 8 colours:
Thresholded:
Eroded:
Mask selecting only characters:
Clean image with bounding boxes:
Console output:
Licence plate: 2B99996
C++ code, using OpenCV 2.4.11 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
fs::directory_iterator end_itr;
for (fs::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!fs::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (uint32_t i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::KNearest& knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
float p = knn.find_nearest(small_char_linear, 1);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::KNearest knn;
knn.train(samples, responses);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================
C++ code, using OpenCV 3.1 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
boost::filesystem::directory_iterator end_itr;
for (boost::filesystem::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!boost::filesystem::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (int i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::Ptr<cv::ml::KNearest> knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
cv::Mat tmp;
float p = knn->findNearest(small_char_linear, 1, tmp);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::Ptr<cv::ml::KNearest> knn(cv::ml::KNearest::create());
cv::Ptr<cv::ml::TrainData> training_data =
cv::ml::TrainData::create(samples
, cv::ml::SampleTypes::ROW_SAMPLE
, responses);
knn->train(training_data);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================

Categories