Related
I want to adjust/improve the contrast of X-ray images of the hand. I have about 10000 of these X-ray images. Instead of manually editing them, I want to code or automate them. Most of the images in the dataset have image qualities like these three images..
I've tried suggestions by here, and here. However, when I run some of image samples through, I get the same output as the input.
Are there any better ways to improve the contrast particularly for hand X-ray images? So, from these three image inputs, I want them to look like these. If can be done automatically then, that would be awesome too. How would one code this?
I would suggest using skimage (rescale_intensity) to stretch the dynamic range (after covering over the label with the average color) in Python/OpenCV/Skimage. It automatically finds the input min and max values and stretches those to full black and full white (i.e. 0 and 255) or compute the min and max values and bias if desired.
Read the input
Convert to grayscale
blur
Apply morphology close
Threshold
Get contours and exclude too small ones and too large ones so that one selects the label.
Draw a filled contour on a copy of the grayscale image of value equal to the mean of the grayscale image in order to cover over the label
Use Skimage to stretch the min and max values to 0 and 255 respectively. If you do not need to bias the min or max, then you can replace (minval,maxval) with 'image'. It will automatically compute the minval and maxval and you will not need to use Numpy to find them.
Save the output
import cv2
import numpy as np
import skimage.exposure
# load images
img1 = cv2.imread('xray1.webp')
img2 = cv2.imread('xray2.webp')
img3 = cv2.imread('xray3.webp')
# convert to gray
gray1 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
gray3 = cv2.cvtColor(img3,cv2.COLOR_BGR2GRAY)
# blur
blur1 = cv2.GaussianBlur(gray1, (0,0), sigmaX=6, sigmaY=6)
blur2 = cv2.GaussianBlur(gray2, (0,0), sigmaX=6, sigmaY=6)
blur3 = cv2.GaussianBlur(gray3, (0,0), sigmaX=6, sigmaY=6)
# morphology
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (45,45))
morph1 = cv2.morphologyEx(blur1, cv2.MORPH_CLOSE, kernel)
morph2 = cv2.morphologyEx(blur2, cv2.MORPH_CLOSE, kernel)
morph3 = cv2.morphologyEx(blur3, cv2.MORPH_CLOSE, kernel)
# threshold
thresh1 = cv2.threshold(morph1, 0, 255, cv2.THRESH_OTSU)[1]
thresh2 = cv2.threshold(morph2, 0, 255, cv2.THRESH_OTSU)[1]
thresh3 = cv2.threshold(morph3, 0, 255, cv2.THRESH_OTSU)[1]
# get contours and filter on size
masked1 = gray1.copy()
meanval = int(np.mean(masked1))
contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked1, [cntr], 0, (meanval), -1)
masked2 = gray2.copy()
meanval = int(np.mean(masked2))
contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked2, [cntr], 0, (meanval), -1)
masked3 = gray3.copy()
meanval = int(np.mean(masked3))
contours = cv2.findContours(thresh3, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 500 and area < 50000:
cv2.drawContours(masked3, [cntr], 0, (meanval), -1)
# stretch
minval = int(np.amin(masked1))
maxval = int(np.amax(masked1))
result1 = skimage.exposure.rescale_intensity(masked1, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
minval = int(np.amin(masked2))
maxval = int(np.amax(masked2))
result2 = skimage.exposure.rescale_intensity(masked2, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
minval = int(np.amin(masked3))
maxval = int(np.amax(masked3))
result3 = skimage.exposure.rescale_intensity(masked3, in_range=(minval,maxval), out_range=(0,255)).astype(np.uint8)
# save output
cv2.imwrite('xray1_stretched.png', result1)
cv2.imwrite('xray2_stretched.png', result2)
cv2.imwrite('xray3_stretched.png', result3)
# Display various images to see the steps
cv2.imshow('thresh1', thresh1)
cv2.imshow('thresh2', thresh2)
cv2.imshow('thresh3', thresh3)
cv2.imshow('result1', result1)
cv2.imshow('result2', result2)
cv2.imshow('result3', result3)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result 1:
Result 2:
Result 3:
I tried a little to a local-type-enhancement (with C++).
Looking results, I'm worried about strong halo...
Note : I tested with 50% size images, because images are too large for my monitor.
class MyTryImpl
{
public:
MyTryImpl( double Sigmoid_A=5, unsigned char T_min=5, unsigned char T_max=250 )
{ MakeTable( Sigmoid_A, T_min, T_max ); }
public:
void Proc( const cv::Mat &Src8UC1, const cv::Mat &GlobalLMap, cv::Mat &Dst8UC1 ) const
{
if( Src8UC1.empty() || Src8UC1.type()!=CV_8UC1 ){ throw std::invalid_argument( "must : Src is 8UC1" ); }
if( GlobalLMap.empty() || GlobalLMap.type()!=CV_8UC1 ){ throw std::invalid_argument( "must : GlobalLMap is 8UC1" ); }
if( Src8UC1.size() != GlobalLMap.size() ){ throw std::invalid_argument( "must : Src8UC1.size() == GlobalLMap.size()" ); }
Dst8UC1.create( Src8UC1.size(), CV_8UC1 );
for( int y=0; y<Src8UC1.rows; ++y )
{
const unsigned char *pS = Src8UC1.ptr<unsigned char>(y);
const unsigned char *pB = GlobalLMap.ptr<unsigned char>(y);
unsigned char *pR = Dst8UC1.ptr<unsigned char>(y);
for( int x=0; x<Src8UC1.cols; ++x, ++pS,++pB,++pR )
{ *pR = m_ResultTbl[ *pS ][ *pB ]; }
}
}
void Proc(
const cv::Mat &Src8UC1, cv::Mat &Dst8UC1,
int BlurKernelSize,
bool WithGaussian=true //if ture GaussianFilter, else BoxFilter
) const
{
if( Src8UC1.empty() || Src8UC1.type()!=CV_8UC1 )
{ throw std::invalid_argument( "must : Src is 8UC1" ); }
cv::Mat Blurred;
{
int s = std::max( 3, BlurKernelSize | 0x01 );
if( WithGaussian ){ cv::GaussianBlur( Src8UC1, Blurred, cv::Size(s,s), 0 ); }
else { cv::blur( Src8UC1, Blurred, cv::Size(s,s) ); }
}
Proc( Src8UC1, Blurred, Dst8UC1 );
}
private:
void MakeTable( double Sigmoid_A, unsigned char T_min, unsigned char T_max )
{
if( T_min==0 ){ throw std::invalid_argument( "must : T_min > 0" ); }
if( T_max==255 ){ throw std::invalid_argument( "must : T_max < 255" ); }
if( T_min>=T_max ){ throw std::invalid_argument( "must : T_min < T_max" ); }
unsigned char B = 0;
while( true )
{
double b = std::max( T_min, std::min(T_max,B) ) / 255.0;
const double g = log(0.5) / log( b<=0.5 ? b : 1.0-b);
unsigned char S=0;
while( true )
{
double s = S / 255.0;
double c = Sig( Gam( (b<=0.5 ? s : 1.0-s), g ), Sigmoid_A );
m_ResultTbl[S][B] = cvRound( 255.0 * (b<=0.5 ? c : 1.0-c) );
if( S==0xFF )break;
++S;
}
if( B==0xFF )break;
++B;
}
}
static double Sig( double x, double a )
{
double exp1 = exp( -a*(2*x -1) );
double exp2 = exp( -a );
double nume = (1-exp1)*(1+exp2);
double denom = (1+exp1)*(1-exp2);
return 0.5 * ( 1 + nume/denom );
}
static double Gam( double x, double g ){ return pow( x, g ); }
private:
unsigned char m_ResultTbl[256][256];
};
int main()
{//Test for 3 imgs
const std::string SrcImgFileNames[3] = { "Xray1.png", "Xray2.png", "Xray3.png" };
const std::string SaveFileNames[3] = { "Result1.png", "Result2.png", "Result3.png" };
MyTryImpl MyTest;
for( int i=0; i<3; ++i )
{
cv::Mat SrcImg = cv::imread( SrcImgFileNames[i], cv::IMREAD_GRAYSCALE );
if( SrcImg.empty() )return 0;
cv::Mat ResultImg;
MyTest.Proc( SrcImg, ResultImg, SrcImg.cols/10, true );
cv::imwrite( SaveFileNames[i], ResultImg );
}
return 0;
}
Results:
I recently study with Dicom images deep learning. I want to move my dicom image processing code from python to c++.
Following is my python code.
def window_img(dcm, width=None, level=None):
pixels = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
lower = level - (width / 2)
upper = level + (width / 2)
pixels[pixels<lower] = lower
pixels[pixels>upper] = upper
pixels = (pixels - (level - width//2)) / width
return pixels
def meta_dicom_convert(dcm, ww, wc):
b = window_img(dcm, ww[0], wc[0])
g = window_img(dcm, ww[1], wc[1])
r = window_img(dcm, ww[2], wc[2])
image = np.concatenate([b, g, r], axis=2)
return image
And I try to do something like this in c++. However, I only found some method like cv::merge. I use cv::merge. But it seems a little differet from images combined by np.concatenate.
My C++ code:
DcmFileFormat dfile;
OFCondition result = dfile.loadFile("D:\\python\\RSNA\\stage_2_train\\ID_c8355f255.dcm");
DcmDataset *dcmdataset = dfile.getDataset();
double slope, intercept;
dcmdataset->findAndGetFloat64(DCM_RescaleSlope, slope);
dcmdataset->findAndGetFloat64(DCM_RescaleIntercept, intercept);
DicomImage dcmimage_brain("D:\\python\\RSNA\\stage_2_train\\ID_c8355f255.dcm");
DicomImage dcmimage_subdural("D:\\python\\RSNA\\stage_2_train\\ID_c8355f255.dcm");
DicomImage dcmimage_bone("D:\\python\\RSNA\\stage_2_train\\ID_c8355f255.dcm");
int nWidth = dcmimage_brain.getWidth();
int nHeight = dcmimage_brain.getHeight();
cout << "size = " << nWidth << " x " << nHeight << endl;
int wcenter = 40, wwidth = 80;
int lower = wcenter - (wwidth / 2);
int upper = wcenter + (wwidth / 2);
dcmimage_brain.setWindow(wcenter, wwidth);
Uint8 *pixeldata_brain = (Uint8*)(dcmimage_brain.getOutputData(8));
if (pixeldata_brain)
{
for (int i = 0; i < nWidth * nHeight; ++i) {
if (pixeldata_brain[i] < lower)
pixeldata_brain[i] = lower;
if (pixeldata_brain[i] > upper)
pixeldata_brain[i] = upper;
pixeldata_brain[i] = (255 * (pixeldata_brain[i] - lower)) / (upper - lower);
}
}
Mat brain = Mat(nWidth, nHeight, CV_8UC1, pixeldata_brain);
//brain.convertTo(brain, CV_8UC1);
wcenter = 80;
wwidth = 200;
lower = wcenter - (wwidth / 2);
upper = wcenter + (wwidth / 2);
dcmimage_subdural.setWindow(wcenter, wwidth);
Uint8 *pixeldata_subdural = (Uint8*)(dcmimage_subdural.getOutputData(8));
if (pixeldata_subdural)
{
for (int i = 0; i < nWidth * nHeight; ++i) {
if (pixeldata_subdural[i] < lower)
pixeldata_subdural[i] = lower;
if (pixeldata_subdural[i] > upper)
pixeldata_subdural[i] = upper;
pixeldata_subdural[i] = (255 * (pixeldata_subdural[i] - lower)) / (upper - lower);
}
}
Mat subdural = Mat(nWidth, nHeight, CV_8UC1, pixeldata_subdural);
//subdural.convertTo(subdural, CV_8UC1);
wcenter = 40;
wwidth = 380;
lower = wcenter - (wwidth / 2);
upper = wcenter + (wwidth / 2);
dcmimage_bone.setWindow(wcenter, wwidth);
Uint8 *pixeldata_bone = (Uint8*)(dcmimage_bone.getOutputData(8));
if (pixeldata_bone) {
for (int i = 0; i < nWidth * nHeight; ++i) {
if (pixeldata_bone[i] < lower)
pixeldata_bone[i] = lower;
if (pixeldata_bone[i] > upper)
pixeldata_bone[i] = upper;
pixeldata_bone[i] = (255 * (pixeldata_bone[i] - lower) / (upper - lower));
}
}
Mat bone = Mat(nWidth, nHeight, CV_8UC1, pixeldata_bone);
imshow("bone", bone);
//bone.convertTo(bone, CV_8UC1);
//Mat bone = Mat(nWidth, nHeight, CV_16UC1, pbone);
//bone.convertTo(bone, CV_8UC1);
// new Image size
Mat image = Mat::zeros(nWidth, nHeight, CV_8UC3);
vector<Mat> channels;
split(image, channels);
channels.at(0) = brain;
channels.at(1) = subdural;
channels.at(2) = bone;
merge(channels, image);
imshow("mergeImage", image);
waitKey();
result image: (left side: cv::merge) (right side: np.concatenate)
enter image description here
Is cv::merge actually not just concatenate images in channels?
Is there any other method I can use in c++ to do thing like np.concatenate?
Any reply would be appreciate. thanks
I am using OpenCV and Python. I am trying to draw the longest line inside a contours.
I have a contour named cnt. The image is binary, the inside of the contours is white and the outside is black. I would like to draw the longest line inside the white contours. I found how to draw lines using cv2.lines but I didn't find how to draw the longest one. Do you have any ideas?
img_copy = cv2.dilate(copy.deepcopy(img), np.ones((2,2),np.uint8),iterations = 2)
contours, hierarchy = cv2.findContours(copy.deepcopy(img_copy),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
cnt = contours[max_index]
The below method using to draw the number of number of lines from the image and get the degree of max value try this. its working fine
Mat Compute_skewAngle (Mat& src,Mat& src_gray,int drawLine) {
int thresh = 100;
RNG rng(12345);
// 1. Load Gray Scae Image
// 2. Get Size of Image
cv::Size size = src_gray.size();
// 3. blur the Grayscale image
cv::blur(src_gray, src_gray, cv::Size(3,3) );
cv::Mat threshold_output;
std::vector<std::vector<cv::Point> > contours;
std::vector<Vec4i> hierarchy;
// 4. Detect edges using Threshold / Canny edge Detector
//cv::threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
Mat dst, cdst;
cv::Canny(src_gray, dst, thresh, 200, 3);
// 5. Gray Image to BGR
cvtColor(dst, cdst, CV_GRAY2BGR);
#if 0
vector<Vec2f> lines;
HoughLines(dst, lines, 1, CV_PI/180, 100, 0, 0 );
for( size_t i = 0; i < lines.size(); i++ )
{
float rho = lines[i][0], theta = lines[i][1];
Point pt1, pt2;
double a = cos(theta), b = sin(theta);
double x0 = a*rho, y0 = b*rho;
pt1.x = cvRound(x0 + 1000*(-b));
pt1.y = cvRound(y0 + 1000*(a));
pt2.x = cvRound(x0 - 1000*(-b));
pt2.y = cvRound(y0 - 1000*(a));
line( cdst, pt1, pt2, Scalar(0,0,255), 3, CV_AA);
}
#else
vector<Vec4i> lines;
double angle = 0.;
int countNegative = 0;
int countPositive =0;
HoughLinesP(dst, lines, 1, CV_PI/180, 100, 10, 100);
NSMutableDictionary *angleCountDict = [[NSMutableDictionary alloc] init];
for( size_t i = 0; i < lines.size(); i++ )
{
if(drawLine == 1) { // draw line while pass flag value 1
Vec4i l = lines[i];
line( cdst, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), Scalar(0,0,255), 3, CV_AA);
}
double delta_y = lines[i][3] - lines[i][1];
double delta_x = lines[i][2] - lines[i][0];
double currentAngle =atan2(delta_y,delta_x);
int angleAsDeg = abs(currentAngle * 180 / CV_PI);
NSString *_retValue = [angleCountDict objectForKey:[NSString stringWithFormat:#"%d", angleAsDeg]];
int angleCount = [_retValue intValue];
[angleCountDict setObject:[NSNumber numberWithInt:angleCount + 1] forKey:[NSString stringWithFormat:#"%d", angleAsDeg]];
double slope = delta_y / delta_x ; // find the slope to detect the angle " - " or " + "
if(slope < 0)
countNegative ++;
else
countPositive ++;
}
#endif
// sort the dictionary to get the largest value of degree count
NSArray *blockSortedKeys = [angleCountDict keysSortedByValueUsingComparator: ^(id obj1, id obj2) {
return [obj2 compare:obj1];
}];
NSString *degreeVal;
if([blockSortedKeys count] > 0)
degreeVal = [blockSortedKeys objectAtIndex:0];
angle = [degreeVal doubleValue];
if(countNegative > countPositive) {
angle = - angle;
}
Mat outPut;
outPut = rotateMatImage(src,angle,cdst);
return outPut;
}
I have a weird problem with OpenCV. I was doing template matching with OpenCV on both Python and C++, however, even though Python uses the C++ methods under the hood, I get very different results. Python method gives me really accurate place, C++ is just not even close. What is the reason for this? Is it my C++ code or something else??
I use Python 2.7.11, Apple LLVM version 7.3.0 (clang-703.0.29), and OpenCV3.0.
My Python Code:
def toGray(img):
_, _, channels = img.shape
if channels == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img
return gray
def template_match(img, template):
w, h = template.shape[::-1]
res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img,top_left, bottom_right, 255, 2)
plt.subplot(121),plt.imshow(res,cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle("TM_CCOEFF_NORMED")
plt.show()
if __name__ == "__main__":
img_name = sys.argv[1]
img_name2 = sys.argv[2]
img_rgb = cv2.imread(img_name)
img_rgb2 = cv2.imread(img_name2)
gimg1 = toGray(img_rgb)
gimg2 = toGray(img_rgb2)
template_match(gimg1, gimg2)
My C++ code (It is exactly the same with OpenCV documentation):
Mat img; Mat templ; Mat result;
char* image_window = "Source Image";
char* result_window = "Result window";
int match_method;
int max_Trackbar = 5;
/// Function Headers
void MatchingMethod( int, void* );
/** #function main */
int main( int argc, char** argv )
{
/// Load image and template
img = imread( argv[1], 1 );
templ = imread( argv[2], 1 );
/// Create windows
namedWindow( image_window, CV_WINDOW_AUTOSIZE );
namedWindow( result_window, CV_WINDOW_AUTOSIZE );
/// Create Trackbar
char* trackbar_label = "Method: \n 0: SQDIFF \n 1: SQDIFF NORMED \n 2: TM CCORR \n 3: TM CCORR NORMED \n 4: TM COEFF \n 5: TM COEFF NORMED";
createTrackbar( trackbar_label, image_window, &match_method, max_Trackbar, MatchingMethod );
MatchingMethod( 0, 0 );
waitKey(0);
return 0;
}
/**
* #function MatchingMethod
* #brief Trackbar callback
*/
void MatchingMethod( int, void* )
{
/// Source image to display
Mat img_display;
img.copyTo( img_display );
/// Create the result matrix
int result_cols = img.cols - templ.cols + 1;
int result_rows = img.rows - templ.rows + 1;
result.create( result_rows, result_cols, CV_32FC1 );
/// Do the Matching and Normalize
matchTemplate( img, templ, result, match_method );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
Point matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
/// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
if( match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
{ matchLoc = minLoc; }
else
{ matchLoc = maxLoc; }
/// Show me what you got
rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
imshow( image_window, img_display );
imshow( result_window, result );
cv::imwrite("rec.jpg", img_display);
return;
}
Original Images:
Python Output:
C++ Output
Looking through the two implementations, the most evident difference between them is the colour format of the images used.
In the Python version, you load the images "as-is". Since your input images are RGB (as the variable names also suggest), you will be doing the template matching on colour images.
img_rgb = cv2.imread(img_name)
img_rgb2 = cv2.imread(img_name2)
However in C++ you load the images as grayscale, since you pass the 1 as second parameter.
img = imread( argv[1], 1 );
templ = imread( argv[2], 1 );
According to cv::matchTemplate documentation:
In case of a color image, template summation in the numerator and each
sum in the denominator is done over all of the channels and separate
mean values are used for each channel. That is, the function can take
a color template and a color image. The result will still be a
single-channel image, which is easier to analyze.
That would suggest that it's quite possible to get different results when applying it on a 3-channel image, than when applying it to a single channel version of the same image.
I try to recognize the characters of license plates using OCR, but my licence plate have worse quality.
I'm trying to somehow improve character recognition for OCR, but my best result is this:result.
And even tesseract on this picture does not recognize any character. My code is:
#include <cv.h> // open cv general include file
#include <highgui.h> // open cv GUI include file
#include <iostream> // standard C++ I/O
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
using namespace cv;
int main( int argc, char** argv )
{
Mat src;
Mat dst;
Mat const structure_elem = getStructuringElement(
MORPH_RECT, Size(2,2));
src = imread(argv[1], CV_LOAD_IMAGE_COLOR); // Read the file
cvtColor(src,src,CV_BGR2GRAY);
imshow( "plate", src );
GaussianBlur(src, src, Size(1,1), 1.5, 1.5);
imshow( "blur", src );
equalizeHist(src, src);
imshow( "equalize", src );
adaptiveThreshold(src, src, 255, ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 15, -1);
imshow( "threshold", src );
morphologyEx(src, src, MORPH_CLOSE, structure_elem);
imshow( "morphological operation", src );
imwrite("end.jpg", src);
waitKey(0);
return 0;
}
And my question is, do you know how to achieve better results? More clear image? Despite having my licence plate worse quality, so that the result could read OCR (for example Tesseract).
Thank you for answers. Really I do not know how to do it.
One possible algorithm to clean up the images is as follows:
Scale the image up, so that the letters are more substantial.
Reduce the image to only 8 colours by k-means clustering.
Threshold the image, and erode it to fill in any small gaps and make the letters more substantial.
Invert the image to make masking easier.
Create a blank mask image of the same size, set to all zeros
Find contours in the image. For each contour:
Find bounding box of the contour
Find the area of the bounding box
If the area is too small or too large, drop the contour (I chose 1000 and 10000 as limits)
Otherwise draw a filled rectangle corresponding to the bounding box on the mask with white colour (255)
Store the bounding box and the corresponding image ROI
For each separated character (bounding box + image)
Recognise the character
Note: I prototyped this in Python 2.7 with OpenCV 3.1. C++ ports of this code are near the end of this answer.
Character Recognition
I took inspiration for the character recognition from this question on SO.
Then I found an image that we can use to extract training images for the correct font. I cut them down to only include digits and letters without accents.
train_digits.png:
train_letters.png:
Then i wrote a script that splits the individual characters, scales them up and prepares the training images that contain single character per file:
import os
import cv2
import numpy as np
# ============================================================================
def extract_chars(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
x,y,w,h = x-2, y-2, w+4, h+4
bounding_boxes.append((x,y,w,h))
characters = []
for bbox in bounding_boxes:
x,y,w,h = bbox
char_image = img[y:y+h,x:x+w]
characters.append(char_image)
return characters
# ============================================================================
def output_chars(chars, labels):
for i, char in enumerate(chars):
filename = "chars/%s.png" % labels[i]
char = cv2.resize(char
, None
, fx=3
, fy=3
, interpolation=cv2.INTER_CUBIC)
cv2.imwrite(filename, char)
# ============================================================================
if not os.path.exists("chars"):
os.makedirs("chars")
img_digits = cv2.imread("train_digits.png", 0)
img_letters = cv2.imread("train_letters.png", 0)
digits = extract_chars(img_digits)
letters = extract_chars(img_letters)
DIGITS = [0, 9, 8 ,7, 6, 5, 4, 3, 2, 1]
LETTERS = [chr(ord('A') + i) for i in range(25,-1,-1)]
output_chars(digits, DIGITS)
output_chars(letters, LETTERS)
# ============================================================================
The next step was to generate the training data from the character files we created with the previous script.
I followed the algorithm from the answer to the question mentioned above, resizing each character image to 10x10 and using all the pixels as keypoints.
I save the training data as char_samples.data and char_responses.data
Script to generate training data:
import cv2
import numpy as np
CHARS = [chr(ord('0') + i) for i in range(10)] + [chr(ord('A') + i) for i in range(26)]
# ============================================================================
def load_char_images():
characters = {}
for char in CHARS:
char_img = cv2.imread("chars/%s.png" % char, 0)
characters[char] = char_img
return characters
# ============================================================================
characters = load_char_images()
samples = np.empty((0,100))
for char in CHARS:
char_img = characters[char]
small_char = cv2.resize(char_img,(10,10))
sample = small_char.reshape((1,100))
samples = np.append(samples,sample,0)
responses = np.array([ord(c) for c in CHARS],np.float32)
responses = responses.reshape((responses.size,1))
np.savetxt('char_samples.data',samples)
np.savetxt('char_responses.data',responses)
# ============================================================================
Once we have the training data created, we can run the main script:
import cv2
import numpy as np
# ============================================================================
def reduce_colors(img, n):
Z = img.reshape((-1,3))
# convert to np.float32
Z = np.float32(Z)
# define criteria, number of clusters(K) and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = n
ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now convert back into uint8, and make original image
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((img.shape))
return res2
# ============================================================================
def clean_image(img):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized_img = cv2.resize(gray_img
, None
, fx=5.0
, fy=5.0
, interpolation=cv2.INTER_CUBIC)
resized_img = cv2.GaussianBlur(resized_img,(5,5),0)
cv2.imwrite('licence_plate_large.png', resized_img)
equalized_img = cv2.equalizeHist(resized_img)
cv2.imwrite('licence_plate_equ.png', equalized_img)
reduced = cv2.cvtColor(reduce_colors(cv2.cvtColor(equalized_img, cv2.COLOR_GRAY2BGR), 8), cv2.COLOR_BGR2GRAY)
cv2.imwrite('licence_plate_red.png', reduced)
ret, mask = cv2.threshold(reduced, 64, 255, cv2.THRESH_BINARY)
cv2.imwrite('licence_plate_mask.png', mask)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.erode(mask, kernel, iterations = 1)
cv2.imwrite('licence_plate_mask2.png', mask)
return mask
# ============================================================================
def extract_characters(img):
bw_image = cv2.bitwise_not(img)
contours = cv2.findContours(bw_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
char_mask = np.zeros_like(img)
bounding_boxes = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
area = w * h
center = (x + w/2, y + h/2)
if (area > 1000) and (area < 10000):
x,y,w,h = x-4, y-4, w+8, h+8
bounding_boxes.append((center, (x,y,w,h)))
cv2.rectangle(char_mask,(x,y),(x+w,y+h),255,-1)
cv2.imwrite('licence_plate_mask3.png', char_mask)
clean = cv2.bitwise_not(cv2.bitwise_and(char_mask, char_mask, mask = bw_image))
bounding_boxes = sorted(bounding_boxes, key=lambda item: item[0][0])
characters = []
for center, bbox in bounding_boxes:
x,y,w,h = bbox
char_image = clean[y:y+h,x:x+w]
characters.append((bbox, char_image))
return clean, characters
def highlight_characters(img, chars):
output_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for bbox, char_img in chars:
x,y,w,h = bbox
cv2.rectangle(output_img,(x,y),(x+w,y+h),255,1)
return output_img
# ============================================================================
img = cv2.imread("licence_plate.jpg")
img = clean_image(img)
clean_img, chars = extract_characters(img)
output_img = highlight_characters(clean_img, chars)
cv2.imwrite('licence_plate_out.png', output_img)
samples = np.loadtxt('char_samples.data',np.float32)
responses = np.loadtxt('char_responses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.ml.KNearest_create()
model.train(samples, cv2.ml.ROW_SAMPLE, responses)
plate_chars = ""
for bbox, char_img in chars:
small_img = cv2.resize(char_img,(10,10))
small_img = small_img.reshape((1,100))
small_img = np.float32(small_img)
retval, results, neigh_resp, dists = model.findNearest(small_img, k = 1)
plate_chars += str(chr((results[0][0])))
print("Licence plate: %s" % plate_chars)
Script Output
Enlarged 5x:
Equalized:
Reduced to 8 colours:
Thresholded:
Eroded:
Mask selecting only characters:
Clean image with bounding boxes:
Console output:
Licence plate: 2B99996
C++ code, using OpenCV 2.4.11 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
fs::directory_iterator end_itr;
for (fs::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!fs::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (uint32_t i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::KNearest& knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
float p = knn.find_nearest(small_char_linear, 1);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::KNearest knn;
knn.train(samples, responses);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================
C++ code, using OpenCV 3.1 and Boost.Filesystem to iterate over files in a directory.
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
// ============================================================================
namespace fs = boost::filesystem;
// ============================================================================
typedef std::vector<std::string> string_list;
struct char_match_t
{
cv::Point2i position;
cv::Mat image;
};
typedef std::vector<char_match_t> char_match_list;
// ----------------------------------------------------------------------------
string_list find_input_files(std::string const& dir)
{
string_list result;
fs::path dir_path(dir);
boost::filesystem::directory_iterator end_itr;
for (boost::filesystem::directory_iterator i(dir_path); i != end_itr; ++i) {
if (!boost::filesystem::is_regular_file(i->status())) continue;
if (i->path().extension() == ".png") {
result.push_back(i->path().string());
}
}
return result;
}
// ----------------------------------------------------------------------------
cv::Mat reduce_image(cv::Mat const& img, int K)
{
int n = img.rows * img.cols;
cv::Mat data = img.reshape(1, n);
data.convertTo(data, CV_32F);
std::vector<int> labels;
cv::Mat1f colors;
cv::kmeans(data, K, labels
, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001)
, 5, cv::KMEANS_PP_CENTERS, colors);
for (int i = 0; i < n; ++i) {
data.at<float>(i, 0) = colors(labels[i], 0);
}
cv::Mat reduced = data.reshape(1, img.rows);
reduced.convertTo(reduced, CV_8U);
return reduced;
}
// ----------------------------------------------------------------------------
cv::Mat clean_image(cv::Mat const& img)
{
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(), 5.0, 5.0, cv::INTER_CUBIC);
cv::Mat equalized_img;
cv::equalizeHist(resized_img, equalized_img);
cv::Mat reduced_img(reduce_image(equalized_img, 8));
cv::Mat mask;
cv::threshold(reduced_img
, mask
, 64
, 255
, cv::THRESH_BINARY);
cv::Mat kernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)));
cv::erode(mask, mask, kernel, cv::Point(-1, -1), 1);
return mask;
}
// ----------------------------------------------------------------------------
cv::Point2i center(cv::Rect const& bounding_box)
{
return cv::Point2i(bounding_box.x + bounding_box.width / 2
, bounding_box.y + bounding_box.height / 2);
}
// ----------------------------------------------------------------------------
char_match_list extract_characters(cv::Mat const& img)
{
cv::Mat inverse_img;
cv::bitwise_not(img, inverse_img);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(inverse_img.clone(), contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
char_match_list result;
double const MIN_CONTOUR_AREA(1000.0);
double const MAX_CONTOUR_AREA(6000.0);
for (int i(0); i < contours.size(); ++i) {
cv::Rect bounding_box(cv::boundingRect(contours[i]));
int bb_area(bounding_box.area());
if ((bb_area >= MIN_CONTOUR_AREA) && (bb_area <= MAX_CONTOUR_AREA)) {
int PADDING(2);
bounding_box.x -= PADDING;
bounding_box.y -= PADDING;
bounding_box.width += PADDING * 2;
bounding_box.height += PADDING * 2;
char_match_t match;
match.position = center(bounding_box);
match.image = img(bounding_box);
result.push_back(match);
}
}
std::sort(begin(result), end(result)
, [](char_match_t const& a, char_match_t const& b) -> bool
{
return a.position.x < b.position.x;
});
return result;
}
// ----------------------------------------------------------------------------
std::pair<float, cv::Mat> train_character(char c, cv::Mat const& img)
{
cv::Mat small_char;
cv::resize(img, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
return std::pair<float, cv::Mat>(
static_cast<float>(c)
, small_char_linear);
}
// ----------------------------------------------------------------------------
std::string process_image(cv::Mat const& img, cv::Ptr<cv::ml::KNearest> knn)
{
cv::Mat clean_img(clean_image(img));
char_match_list characters(extract_characters(clean_img));
std::string result;
for (char_match_t const& match : characters) {
cv::Mat small_char;
cv::resize(match.image, small_char, cv::Size(10, 10), 0, 0, cv::INTER_LINEAR);
cv::Mat small_char_float;
small_char.convertTo(small_char_float, CV_32FC1);
cv::Mat small_char_linear(small_char_float.reshape(1, 1));
cv::Mat tmp;
float p = knn->findNearest(small_char_linear, 1, tmp);
result.push_back(char(p));
}
return result;
}
// ============================================================================
int main()
{
string_list train_files(find_input_files("./chars"));
cv::Mat samples, responses;
for (std::string const& file_name : train_files) {
cv::Mat char_img(cv::imread(file_name, 0));
std::pair<float, cv::Mat> tinfo(train_character(file_name[file_name.size() - 5], char_img));
responses.push_back(tinfo.first);
samples.push_back(tinfo.second);
}
cv::Ptr<cv::ml::KNearest> knn(cv::ml::KNearest::create());
cv::Ptr<cv::ml::TrainData> training_data =
cv::ml::TrainData::create(samples
, cv::ml::SampleTypes::ROW_SAMPLE
, responses);
knn->train(training_data);
string_list input_files(find_input_files("./input"));
for (std::string const& file_name : input_files) {
cv::Mat plate_img(cv::imread(file_name, 0));
std::string plate(process_image(plate_img, knn));
std::cout << file_name << " : " << plate << "\n";
}
}
// ============================================================================