Python number recognition (on colored screen) - python

I searched for image recognition using python. It seems there is no tutorial about Extracting Numbers from colored background so I followed THIS TUTORIAL
import cv2
import matplotlib.pyplot as plt
def detect_edge(image):
''' function Detecting Edges '''
image_with_edges = cv2.Canny(image , 100, 200)
images = [image , image_with_edges]
location = [121, 122]
for loc, img in zip(location, images):
plt.subplot(loc)
plt.imshow(img, cmap='gray')
plt.savefig('edge.png')
plt.show()
image = cv2.imread('myscreenshot.png', 0)
detect_edge(image)
This is my image:
This is the result:
Any solution to print out these numbers?

Here is some code for getting clean canny edges for this image.
import cv2
import numpy as np
# load image
img = cv2.imread("numbers.png");
# change to hue colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# use clahe to improve contrast
# (the contrast is pretty good already, so not much change, but good habit to have here)
clahe = cv2.createCLAHE(clipLimit = 10)
contrast = clahe.apply(v);
# use canny
canny = cv2.Canny(contrast, 20, 110);
# show
cv2.imshow('i', img);
cv2.imshow('v', v);
cv2.imshow('c', contrast);
cv2.imshow("canny", canny);
cv2.waitKey(0);
# save
cv2.imwrite("edges.png", canny);
Without using any OCR like pytesseract or something, I don't see an obvious way to be able to consistently turn this image into "text" numbers. I'll leave that for someone else who might know how to solve that without any pattern recognition stuff because I don't even know where to begin without that. If you're willing to forgo that restriction then pytessaract should have no problem with this; possibly even without doing processing like this.
Ok, I filled in the numbers for the image. OpenCV's findContours' hierarchy wasn't cooperating for some reason so I had to manually do it which makes this code pretty janky. Honestly, if I were to try this again from scratch, I'd try to find colors that contribute to a small number of total pixels and threshold on each and combine the masks.
import cv2
import numpy as np
# check if small box is in big box
def contained(big, small):
# big corners
x,y,w,h = big;
big_tl = [x, y];
big_br = [x+w, y+h];
# small corners
x,y,w,h = small;
small_tl = [x, y];
small_br = [x+w, y+h];
# check
if small_tl[0] > big_tl[0] and small_br[0] < big_br[0]:
if small_tl[1] > big_tl[1] and small_br[1] < big_br[1]:
return True;
return False;
# load image
img = cv2.imread("numbers.png");
# change to hue colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# use clahe to improve contrast
# (the contrast is pretty good already, so not much change, but good habit to have here)
clahe = cv2.createCLAHE(clipLimit = 10)
contrast = clahe.apply(v);
# rescale
scale = 2.0;
h, w = img.shape[:2];
h = int(h * scale);
w = int(w * scale);
contrast = cv2.resize(contrast, (w,h), cv2.INTER_LINEAR);
img = cv2.resize(img, (w,h), cv2.INTER_LINEAR);
# use canny
canny = cv2.Canny(contrast, 10, 60);
# show
cv2.imshow('i', img);
cv2.imshow('v', v);
cv2.imshow('c', contrast);
cv2.imshow("canny", canny);
cv2.waitKey(0);
# try to fill in contours
# contours
_, contours, hierarchy = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# filter contours by size
# filter out noisy bits and the big grid boxes
filtered = [];
for contour in contours:
perimeter = cv2.arcLength(contour, True);
if 50 < perimeter and perimeter < 750:
filtered.append(contour);
# draw contours again
# create a mask of the contoured image
mask = np.zeros_like(contrast);
mask = cv2.drawContours(mask, filtered, -1, 255, -1);
# close to get rid of annoying little gaps
kernel = np.ones((3,3),np.uint8)
mask = cv2.dilate(mask,kernel,iterations = 1);
mask = cv2.erode(mask,kernel, iterations = 1);
# contours
_, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# alright, hierarchy is being stupid, plan B
# SUUUUUPEEERRR JAAAANK
outer_cntrs = [a for a in range(len(contours))];
children = [];
for a in range(len(contours)):
if a in outer_cntrs:
# get current box
big_box = cv2.boundingRect(contours[a]);
# check against all other boxes
for b in range(0, len(contours)):
if b in outer_cntrs:
small_box = cv2.boundingRect(contours[b]);
# remove any children
if contained(big_box, small_box):
outer_cntrs.remove(b);
children.append(contours[b]);
# # select by hierarchy
top_cntrs = [];
for a in range(len(contours)):
if a in outer_cntrs:
top_cntrs.append(contours[a]);
# create a mask of the contoured image
mask = np.zeros_like(contrast);
mask = cv2.drawContours(mask, top_cntrs, -1, 255, -1);
mask = cv2.drawContours(mask, children, -1, 255, -1);
# close
kernel = np.ones((3,3),np.uint8)
mask = cv2.dilate(mask,kernel,iterations = 1);
mask = cv2.erode(mask,kernel, iterations = 1);
# do contours agains because opencv is being super difficult
# honestly, at this point, a fill method would've been better
# contours
_, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# fill in
for con in contours:
cv2.fillPoly(mask, pts = [con], color=(255));
for con in children:
cv2.fillPoly(mask, pts = [con], color=(0));
# resize back down
h, w = mask.shape;
h = int(h / scale);
w = int(w / scale);
mask = cv2.resize(mask, (w,h));
# show
cv2.imshow("mask", mask);
cv2.waitKey(0);
# save
cv2.imwrite("filled.png", mask);

You can find the digits in three-steps
Applying Adaptive-threshold
Applying erosion
Read using pytesseract
Adaptive-threshold result:
Here we see 9 and 0 is different from rest of the digits. We need to remove the boundaries of the 9.
Erosion result:
Pytesseract result:
8 | 1
5 9
4 #
3 | 3
6 | 1
There are multiple page-segmentation-modes are available for pytesseract
If you want to remove | from the output you can use re.sub
text = re.sub('[^A-Za-z0-9]+', ',', text)
Result will be:
8
1
5
9
4
3
3
6
1
Code:
import cv2
import pytesseract
import re
import numpy as np
image = cv2.imread("7UUGYHw.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 13, 2)
erode = cv2.erode(thresh, np.array((7, 7)), iterations=1)
text = pytesseract.image_to_string(erode, config="--psm 6")
text = re.sub('[^A-Za-z0-9]+', '\n', text)
print(text)

Related

Remove white borders from segmented images

I am trying to segment lung CT images using Kmeans by using code below:
def process_mask(mask):
convex_mask = np.copy(mask)
for i_layer in range(convex_mask.shape[0]):
mask1 = np.ascontiguousarray(mask[i_layer])
if np.sum(mask1)>0:
mask2 = convex_hull_image(mask1)
if np.sum(mask2)>2*np.sum(mask1):
mask2 = mask1
else:
mask2 = mask1
convex_mask[i_layer] = mask2
struct = generate_binary_structure(3,1)
dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=10)
return dilatedMask
def lumTrans(img):
lungwin = np.array([-1200.,600.])
newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
newimg[newimg<0]=0
newimg[newimg>1]=1
newimg = (newimg*255).astype('uint8')
return newimg
def lungSeg(imgs_to_process,output,name):
if os.path.exists(output+'/'+name+'_clean.npy') : return
imgs_to_process = Image.open(imgs_to_process)
img_to_save = imgs_to_process.copy()
img_to_save = np.asarray(img_to_save).astype('uint8')
imgs_to_process = lumTrans(imgs_to_process)
imgs_to_process = np.expand_dims(imgs_to_process, axis=0)
x,y,z = imgs_to_process.shape
img_array = imgs_to_process.copy()
A1 = int(y/(512./100))
A2 = int(y/(512./400))
A3 = int(y/(512./475))
A4 = int(y/(512./40))
A5 = int(y/(512./470))
for i in range(len(imgs_to_process)):
img = imgs_to_process[i]
print(img.shape)
x,y = img.shape
#Standardize the pixel values
allmean = np.mean(img)
allstd = np.std(img)
img = img-allmean
img = img/allstd
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[A1:A2,A1:A2]
mean = np.mean(middle)
max = np.max(img)
min = np.min(img)
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img<threshold,1.0,0.0) # threshold the image
eroded = morphology.erosion(thresh_img,np.ones([4,4]))
dilation = morphology.dilation(eroded,np.ones([10,10]))
labels = measure.label(dilation)
label_vals = np.unique(labels)
regions = measure.regionprops(labels)
good_labels = []
for prop in regions:
B = prop.bbox
if B[2]-B[0]<A3 and B[3]-B[1]<A3 and B[0]>A4 and B[2]<A5:
good_labels.append(prop.label)
mask = np.ndarray([x,y],dtype=np.int8)
mask[:] = 0
for N in good_labels:
mask = mask + np.where(labels==N,1,0)
mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation
imgs_to_process[i] = mask
m1 = imgs_to_process
convex_mask = m1
dm1 = process_mask(m1)
dilatedMask = dm1
Mask = m1
extramask = dilatedMask ^ Mask
bone_thresh = 180
pad_value = 0
img_array[np.isnan(img_array)]=-2000
sliceim = img_array
sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype('uint8')
bones = sliceim*extramask>bone_thresh
sliceim[bones] = pad_value
x,y,z = sliceim.shape
if not os.path.exists(output):
os.makedirs(output)
img_to_save[sliceim.squeeze()==0] = 0
im = Image.fromarray(img_to_save)
im.save(output + name + '.png', 'PNG')
The problem is the segmented lung still contains white borderers like this:
Segmented lung (output):
Unsegmented lung (input):
The full code can be found in Google Colab Notebook. code.
And sample of the dataset is here.
For this problem, I don't recommend using Kmeans color quantization since this technique is usually reserved for a situation where there are various colors and you want to segment them into dominant color blocks. Take a look at this previous answer for a typical use case. Since your CT scan images are grayscale, Kmeans would not perform very well. Here's a potential solution using simple image processing with OpenCV:
Obtain binary image. Load input image, convert to grayscale, Otsu's threshold, and find contours.
Create a blank mask to extract desired objects. We can use np.zeros() to create a empty mask with the same size as the input image.
Filter contours using contour area and aspect ratio. We search for the lung objects by ensuring that contours are within a specified area threshold as well as aspect ratio. We use cv2.contourArea(), cv2.arcLength(), and cv2.approxPolyDP() for contour perimeter and contour shape approximation. If we have have found our lung object, we utilize cv2.drawContours() to fill in our mask with white to represent the objects that we want to extract.
Bitwise-and mask with original image. Finally we convert the mask to grayscale and bitwise-and with cv2.bitwise_and() to obtain our result.
Here is our image processing pipeline visualized step-by-step:
Grayscale -> Otsu's threshold
Detected objects to extract highlighted in green -> Filled mask
Bitwise-and to get our result -> Optional result with white background instead
Code
import cv2
import numpy as np
image = cv2.imread('1.png')
highlight = image.copy()
original = image.copy()
# Convert image to grayscale, Otsu's threshold, and find contours
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# Create black mask to extract desired objects
mask = np.zeros(image.shape, dtype=np.uint8)
# Search for objects by filtering using contour area and aspect ratio
for c in contours:
# Contour area
area = cv2.contourArea(c)
# Contour perimeter
peri = cv2.arcLength(c, True)
# Contour approximation
approx = cv2.approxPolyDP(c, 0.035 * peri, True)
(x, y, w, h) = cv2.boundingRect(approx)
aspect_ratio = w / float(h)
# Draw filled contour onto mask if passes filter
# These are arbitary values, may need to change depending on input image
if aspect_ratio <= 1.2 or area < 5000:
cv2.drawContours(highlight, [c], 0, (0,255,0), -1)
cv2.drawContours(mask, [c], 0, (255,255,255), -1)
# Convert 3-channel mask to grayscale then bitwise-and with original image for result
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(original, original, mask=mask)
# Uncomment if you want background to be white instead of black
# result[mask==0] = (255,255,255)
# Display
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('highlight', highlight)
cv2.imshow('mask', mask)
cv2.imshow('result', result)
# Save images
# cv2.imwrite('gray.png', gray)
# cv2.imwrite('thresh.png', thresh)
# cv2.imwrite('highlight.png', highlight)
# cv2.imwrite('mask.png', mask)
# cv2.imwrite('result.png', result)
cv2.waitKey(0)
A simpler approach to solve this problem is using morphological erosion. Its just that than you will have to tune in threshold values

Is there any way to crop an image inside a box?

I want to crop the image only inside the box or rectangle. I tried so many approaches but nothing worked.
import cv2
import numpy as np
img = cv2.imread("C:/Users/hp/Desktop/segmentation/add.jpeg", 0);
h, w = img.shape[:2]
# print(img.shape)
kernel = np.ones((3,3),np.uint8)
img2 = img.copy()
img2 = cv2.medianBlur(img2,5)
img2 = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
img2 = 255 - img2
img2 = cv2.dilate(img2, kernel)
img2 = cv2.medianBlur(img2, 9)
img2 = cv2.medianBlur(img2, 9)
cv2.imshow('anything', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()
position = np.where(img2 !=0)
x0 = position[0].min()
x1 = position[0].max()
y0 = position[1].min()
y1 = position[1].max()
print(x0,x1,y0,y1)
result = img[x0:x1,y0:y1]
cv2.imshow('anything', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output should be the image inside the sqaure.
You can use contour detection for this. If your image has basically only a hand drawn rectangle in it, I think it's good enough to assume it's the largest closed contour in the image. From that contour, we can figure out a polygon/quadrilateral approximation and then finally get an approximate rectangle. I'll define some utilities at the beginning which I generally use to make my time easier when messing around with images:
def load_image(filename):
return cv2.imread(filename)
def bnw(image):
return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
def col(image):
return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
def fixrgb(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
def show_image(image, figsize=(7,7), cmap=None):
cmap = cmap if len(image.shape)==3 else 'gray'
plt.figure(figsize=figsize)
plt.imshow(image, cmap=cmap)
plt.show()
def AdaptiveThresh(gray):
blur = cv2.medianBlur(gray, 5)
adapt_type = cv2.ADAPTIVE_THRESH_GAUSSIAN_C
thresh_type = cv2.THRESH_BINARY_INV
return cv2.adaptiveThreshold(blur, 255, adapt_type, thresh_type, 11, 2)
def get_rect(pts):
xmin = pts[:,0,1].min()
ymin = pts[:,0,0].min()
xmax = pts[:,0,1].max()
ymax = pts[:,0,0].max()
return (ymin,xmin), (ymax,xmax)
Let's load the image and convert it to grayscale:
image_name = 'test.jpg'
image_original = fixrgb(load_image(image_name))
image_gray = 255-bnw(image_original)
show_image(image_gray)
Use some morph ops to enhance the image:
kernel = np.ones((3,3),np.uint8)
d = 255-cv2.dilate(image_gray,kernel,iterations = 1)
show_image(d)
Find the edges and enhance/denoise:
e = AdaptiveThresh(d)
show_image(e)
m = cv2.dilate(e,kernel,iterations = 1)
m = cv2.medianBlur(m,11)
m = cv2.dilate(m,kernel,iterations = 1)
show_image(m)
Contour detection:
contours, hierarchy = cv2.findContours(m, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
total_area = np.prod(image_gray.shape)
max_area = 0
for cnt in contours:
# Simplify contour
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
area = cv2.contourArea(approx)
# Shape is recrangular, so 4 points approximately and it's convex
if (len(approx) == 4 and cv2.isContourConvex(approx) and max_area<area<total_area):
max_area = cv2.contourArea(approx)
quad_polygon = approx
img1 = image_original.copy()
img2 = image_original.copy()
cv2.polylines(img1,[quad_polygon],True,(0,255,0),10)
show_image(img1)
tl, br = get_rect(quad_polygon)
cv2.rectangle(img2, tl, br, (0,255,0), 10)
show_image(img2)
So you can see the approximate polygon and the corresponding rectangle, using which you can get your crop. I suggest you play around with median blur and morphological ops like erosion, dilation, opening, closing etc and see which set of operations suits your images the best; I can't really say what's good from just one image. You can crop using the top left and bottom right coordinates:
show_image(image_original[tl[1]:br[1],tl[0]:br[0],:])
Draw the square with a different color (e.g red) so it can be distinguishable from other writing and background. Then threshold it so you get a black and white image: the red line will be white in this image. Get the coordinates of white pixels: from this set, select only the two pairs (minX, minY)(maxX,maxY). They are the top-left and bottom-right points of the box (remember that in an image the 0,0 point is on the top left of the image) and you can use them to crop the image.

Is there any way I can remove background colour from the enclosed spaces in the image?

I am trying to remove background colour from a image in JPG format and saving it in PNG format. The background colour should be removed completely, even from the enclosed space present in the image (see example below).
I have used the solution provided here. I modified it according to my use case and was able to successfully remove the background colour present outside of the object.
For example:
JPG image:
PNG image:
I need to remove the background colour from the small enclosed space near the right neck and the hair. Can we implement it using python opencv?
file_name = "hello.jpg"
img = cv2.imread(file_name)
# == Parameters =======================================================================
BLUR = 5
CANNY_THRESH_1 = 127
CANNY_THRESH_2 = 255
MASK_DILATE_ITER = 2
MASK_ERODE_ITER = 2
MASK_COLOR = (0.0, 0.0, 1.0) # In BGR format
# == Processing =======================================================================
# -- Change the image to gray scale image -----------------------------------------------
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# -- Edge detection -------------------------------------------------------------------
edges = cv2.Canny(gray, CANNY_THRESH_1, CANNY_THRESH_2)
edges = cv2.dilate(edges, None)
edges = cv2.erode(edges, None)
# -- Find contours in edges, sort by area ---------------------------------------------
contour_info = []
# _, contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
# Previously, for a previous version of cv2, this line was:
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for c in contours:
contour_info.append((
c,
cv2.isContourConvex(c),
cv2.contourArea(c),
))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
max_contour = contour_info[0]
# -- Create empty mask, draw filled polygon on it corresponding to largest contour ----
# Mask is black, polygon is white
mask = np.zeros(edges.shape)
cv2.fillConvexPoly(mask, max_contour[0], (255))
# -- Smooth mask, then blur it --------------------------------------------------------
mask = cv2.dilate(mask, None, iterations=MASK_DILATE_ITER)
mask = cv2.erode(mask, None, iterations=MASK_ERODE_ITER)
mask = cv2.GaussianBlur(mask, (BLUR, BLUR), 0)
mask_stack = np.dstack([mask]*3) # Create 3-channel alpha mask
# -- Blend masked img into MASK_COLOR background --------------------------------------
mask_stack = mask_stack.astype('float32') / 255.0 # Use float matrices,
img = img.astype('float32') / 255.0 # for easy blending
masked = (mask_stack * img) + ((1-mask_stack) * MASK_COLOR) # Blend
masked = (masked * 255).astype('uint8') # Convert back to 8-bit
# Display
cv2.imshow('img', masked)
cv2.waitKey(5000)
# split image into channels
c_red, c_green, c_blue = cv2.split(img)
# merge with mask got on one of a previous steps
img_a = cv2.merge((c_red, c_green, c_blue, mask.astype('float32') / 255.0))
# save to disk
cv2.imwrite("/Documents/jpg-to-png/converted.png", img_a*255)

contour for ID card

i'm trying to detect contour for ID card but it never work;
I tried with the four_point_transform, boundingrect,boundries,active_contours,hough transform and also the same result , the contour with be used to scan just the id card .
the id looks like that : here
This is how the code looks like:
from trans import four_point_transform
from skimage.filters import threshold_local
import numpy as np
import cv2
import imutils
def edgeDetection(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(image, (5, 5), 0)
edged = cv2.Canny(gray, 200,150 )
return edged
def detectrectarrondi(image,edged):
orig = image.copy()
gray = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY)
edged = cv2.Canny(gray, 50, 40)
orig_edged = edged.copy()
(_,contours, _) = cv2.findContours(orig_edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
for contour in contours:
c = max(contours, key = cv2.contourArea)
(x,y,w,h) = cv2.boundingRect(c)
screen = cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,0), 2)
return screen
def scan(screen,image):
ratio = image.shape[0] / 500.0
warped = four_point_transform(image, screen.reshape(4, 2) * ratio)
warped= cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
T = threshold_local(warped, 11, offset = 10, method = "gaussian")
warped = (warped > T).astype("uint8") * 255
return warped
As I can't write a comment because I don't have 50 reputations, I will give you some steps to follow here:
1/ Convert your image to gray scale using cvtColor.
2/ Apply GaussianBlur to reduce noise.
3/ Apply Canny edge detector, you need to play with lower and higher threshold values to get the best result
4/ This step is not needed but can be helpful, apply a morphological operation to close incomplete contours using MORPH_CLOSE parameter.
5/ Find your contours using findContours
6/ Loop through the found contours and draw the bonding rectangle that have the biggest area.
I hope this help you, tell me if you want to see some code.
EDIT:
Imgproc.cvtColor(origMat, mGray, Imgproc.COLOR_BGR2GRAY);
Imgproc.GaussianBlur(mGray, mGray, new Size(5, 5), 5);
Imgproc.Canny(mGray, mGray, 30, 80, 3, false);
Mat kernell = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(9,9));
Imgproc.morphologyEx(mGray, mGray, Imgproc.MORPH_CLOSE, kernell);
Imgproc.dilate(mGray, mGray, Imgproc.getStructuringElement(Imgproc.MORPH_CROSS, new Size(3, 3)));
List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
Mat hierarchy = new Mat();
Imgproc.findContours(mGray, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE);
MatOfPoint2f approxCurve = new MatOfPoint2f();
double largest_area=0;
int largest_contour_index=0;
Rect rect = new Rect();
for (int idx = 0; idx < contours.size() ; idx++) {
double a = Imgproc.contourArea(contours.get(idx)); //Find the area of contour
if (a > largest_area) {
largest_area = a;
largest_contour_index = idx;
rect = Imgproc.boundingRect(contours.get(idx));
}
}
Imgproc.rectangle(origMat, rect.tl(), rect.br(), new Scalar(0, 255, 0));
return origMat;
You can have a look at this good answer to set Canny theshold values automatically using the median value of your image
https://stackoverflow.com/a/41895229

How to extract white region in an image

I have a sample image like this
I'm looking for a way to black out the noise from the image such that I end up with an image that just has black text on white background so that I may send it to tesseract.
I've tried morphing with
kernel = np.ones((4,4),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow("opening", opening)
but it doesn't seem to work.
I've also tried to find contours
img = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
(cnts, _) = cv2.findContours(img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
roi=rotated[y:y+h,x:x+w].copy()
cv2.imwrite("roi.png", roi)
With the above code, I get the following contours:
which leads to this image when cropped:
which is still not good enough. I want black text on white background, so that I can send it to tesseract OCR and have good success rate.
Is there anything else I can try?
Update
Here is an additional similar image. This one is a bit easier because it has a smooth rectangle in it
The following works for your given example, although it might need tweaking for a wider range of images.
import numpy as np
import cv2
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
dst = cv2.bitwise_and(image_src, mask)
mask = 255 - mask
roi = cv2.add(dst, mask)
roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(roi_gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_x = 0
max_y = 0
min_x = image_src.shape[1]
min_y = image_src.shape[0]
for c in contours:
if 150 < cv2.contourArea(c) < 100000:
x, y, w, h = cv2.boundingRect(c)
min_x = min(x, min_x)
min_y = min(y, min_y)
max_x = max(x+w, max_x)
max_y = max(y+h, max_y)
roi = roi[min_y:max_y, min_x:max_x]
cv2.imwrite("roi.png", roi)
Giving you the following type of output images:
And...
The code works by first locating the largest contour area. From this a mask is created which is used to first select only the area inside, i.e. the text. The inverse of the mask is then added to the image to convert the area outside the mask to white.
Lastly contours are found again for this new image. Any contour areas outside a suitable size range are discarded (this is used to ignore any small noise areas), and a bounding rect is found for each. With each of these rectangles, an outer bounding rect is calculated for all of the remaining contours, and a crop is made using these values to give the final image.
Update - To get the remainder of the image, i.e. with the above area removed, the following could be used:
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 10, 255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
image_remainder = cv2.bitwise_and(image_src, 255 - mask)
cv2.imwrite("remainder.png", image_remainder)
I get this:
Result
Source Code:
if __name__ == '__main__':
SrcImg = cv2.imread('./Yahi9.png', cv2.CV_LOAD_IMAGE_GRAYSCALE)
_, BinImg = cv2.threshold(SrcImg, 80, 255, cv2.THRESH_OTSU)
Contours, Hierarchy = cv2.findContours(image=copy.deepcopy(SrcImg),
mode=cv2.cv.CV_RETR_EXTERNAL,
method=cv2.cv.CV_CHAIN_APPROX_NONE)
MaxContour, _ = getMaxContour(Contours)
Canvas = np.ones(SrcImg.shape, np.uint8)
cv2.drawContours(image=Canvas, contours=[MaxContour], contourIdx=0, color=(255), thickness=-1)
mask = (Canvas != 255)
RoiImg = copy.deepcopy(BinImg)
RoiImg[mask] = 255
RoiImg = cv2.morphologyEx(src=RoiImg, op=cv2.MORPH_CLOSE, kernel=np.ones((3,3)), iterations=4)
cv2.imshow('RoiImg', RoiImg)
cv2.waitKey(0)
Function:
def getMaxContour(contours):
MaxArea = 0
Location = 0
for idx in range(0, len(contours)):
Area = cv2.contourArea(contours[idx])
if Area > MaxArea:
MaxArea = Area
Location = idx
MaxContour = np.array(contours[Location])
return MaxContour, MaxArea
Ehh, it's python code.
It only works when the white region is the max contour.
Basic idea of this answer is to use border around text.
1) Erode horizontally with a very large kernel, say size of 100 px or 8 times size of single expected character, something like that. It should be done row-wise. The extreme ordinate will give y-location of boundaries around text.
2) Process vertically same way to get x-location of boundaries around text. Then use these locations to crop out image you want.
-- One benefit of this method is you will get every sentence/word segmented separately which, I presume, is good for an OCR.
Happy Coding :)
Edited in by Mark Setchell
Here is a demo of 1)
Here is a demo of 2)

Categories