I've been trying to clean this image for OCR but getting mixed results:
Best I achieved:
def image_smoothening(img):
ret1, th1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY)
ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
blur = cv2.GaussianBlur(th2, (1, 1), 0)
ret3, th3 = cv2.threshold(
blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return th3
def remove_noise_and_smooth(img):
filtered = cv2.adaptiveThreshold(img.astype(
np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 45, 3)
kernel = np.ones((1, 1), np.uint8)
opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
img = image_smoothening(img)
or_image = cv2.bitwise_or(img, closing)
return or_image
Any clue as to what I'm missing?
My MATLAB code to solve it. I know you are writing in Python so you'll have to translate.
%Read in
im = imread('DuQy7.png');
%Convert to grayscale
img = rgb2gray(im);
img = rescale(img);
%Binarize with threshold of 0.7/1.0
imbw = imbinarize(img,0.7/1);
%Flip blacks/whites
imbw = imcomplement(imbw);
%Label, L is labelled image, n is # of labels
[L,n] = bwlabeln(imbw);
count = zeros(n,1);
[y,x] = size(L);
%Get count for each label
L = uint8(L);
for j=1:y
for i=1:x
if L(j,i) ~= 0
count(L(j,i)) = count(L(j,i)) + 1;
end
end
end
%Find label with most values in image
max = 0;
maxi = 1;
for index=1:n
if max < count(index)
max = count(index);
maxi = index;
end
end
%Replace large region and color other labels to white
for j=1:y
for i=1:x
if L(j,i) == maxi
L(j,i) = 0;
elseif L(j,i) ~= 0
L(j,i) = 256;
end
end
end
%view and save
imshow(L)
imwrite(L,'outputTXT.bmp');
You could probably better adjust the threshold to better cut out background regions that got included. You could also look for labelled regions that are very small and remove them since they are probably erroneously included.
Some parts of the background are going to be impossible to get rid of since they are indistinguishable from the actual symbols. For example, between symbol x2,y1 and x2,y2 there is a black background region between the outlined white which is the same value as the symbols. Therefore it would be very difficult to parse out.
You can do "division normalization" in Python/OpenCV to remove the background. But that will not help with the outline font issue.
Input:
import cv2
import numpy as np
# read the image
img = cv2.imread('img.png')
# convert to gray
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (3,3))
smooth = cv2.morphologyEx(gray, cv2.MORPH_DILATE, kernel)
# alternate blur in place of morphology
#smooth = cv2.GaussianBlur(gray, (15,15), 0)
# divide gray by morphology image
division = cv2.divide(gray, smooth, scale=255)
# threshold
result = cv2.threshold(division, 0, 255, cv2.THRESH_OTSU )[1]
# save results
cv2.imwrite('img_thresh.png',result)
# show results
cv2.imshow('smooth', smooth)
cv2.imshow('division', division)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Related
I need to remove the gray drawing from the image background and only need symbols drawn over it.
Here is my code to do that using morphologyEx but it did not remove the entire gray drawing that is in background.
img_path = "images/new_drawing.png"
img = cv2.imread(img_path)
kernel = np.ones((2,2), dtype=np.uint8)
result = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=1)
cv2.imshow('Without background',result);
cv2.waitKey(0)
cv2.destroyAllWindows()
I tried this also and got expected results in grayscale but unable to convert it to BGR.
Here is my code
img = cv2.imread('images/new_drawing.png')
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
med_blur = cv2.medianBlur(gray_img, ksize=3)
_, thresh = cv2.threshold(med_blur, 190, 255, cv2.THRESH_BINARY)
blending = cv2.addWeighted(gray_img, 0.5, thresh, 0.9, gamma=0)
cv2.imshow("blending", blending);
Also i used contours to identify symbols and draw them to white image but problem is that it also identify background drawing that i don't want.
Input image
Expected output image
Also the drawing will be always in gray color as in image.
Please help me out to get better result.
You are almost there...
Instead of using cv2.inRange to "catch" the non-gray pixel I suggest using cv2.inRange for catching all the pixels you want to change to white color:
mask = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))
The hue range is irrelevant.
The saturation is close to zero (shades of gray).
The brightness excludes the black pixels (you like to keep).
In order to get a nicer solution, I also used the following additional stages:
Build a mask of non-black pixels:
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))
Erode the above mask:
nzmask = cv2.erode(nzmask, np.ones((3,3)))
Apply and operation between mask and nzmask:
mask = mask & nzmask
The above stages keeps the gray pixels around the black text.
Without the above stages, the black text gets thinner.
The last stage is replacing mask pixels with white:
new_img = img.copy()
new_img[np.where(mask)] = 255
Here is the code:
import numpy as np
import cv2
img_path = "new_drawing.png"
img = cv2.imread(img_path)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))
cv2.imshow('mask before and with nzmask', mask);
# Build mask of non black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))
# Erode the mask - all pixels around a black pixels should not be masked.
nzmask = cv2.erode(nzmask, np.ones((3,3)))
cv2.imshow('nzmask', nzmask);
mask = mask & nzmask
new_img = img.copy()
new_img[np.where(mask)] = 255
cv2.imshow('mask', mask);
cv2.imshow('new_img', new_img);
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Here is one way to do that in Python/OpenCV.
Read the input
Convert to HSV and separate channels
Threshold the saturation channel
Threshold the value channel and invert
Combine the two threshold images as a mask
Apply the mask to the input to write white where the mask is black
Save the result
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('symbols.png')
# convert image to hsv colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
# threshold saturation image
thresh1 = cv2.threshold(s, 92, 255, cv2.THRESH_BINARY)[1]
# threshold value image and invert
thresh2 = cv2.threshold(v, 128, 255, cv2.THRESH_BINARY)[1]
thresh2 = 255 - thresh2
# combine the two threshold images as a mask
mask = cv2.add(thresh1,thresh2)
# use mask to remove lines in background of input
result = img.copy()
result[mask==0] = (255,255,255)
# display IN and OUT images
cv2.imshow('IMAGE', img)
cv2.imshow('SAT', s)
cv2.imshow('VAL', v)
cv2.imshow('THRESH1', thresh1)
cv2.imshow('THRESH2', thresh2)
cv2.imshow('MASK', mask)
cv2.imshow('RESULT', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save output image
cv2.imwrite('symbols_thresh1.png', thresh1)
cv2.imwrite('symbols_thresh2.png', thresh2)
cv2.imwrite('symbols_mask.png', mask)
cv2.imwrite('symbols_cleaned.png', result)
Saturation channel thresholded:
Value channel thresholded and inverted:
Mask:
Result:
I want to change this background into the original black. This background is not pure black. Its values contain 1, 2 or 3. After using the following code I got the background value very near to black but not black. Although the background looks black
img = cv2.imread("images.bmp")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2. THRESH_BINARY)
img[thresh == 5] = 0
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
erosion = cv2.erode(img, kernel, iterations = 1)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow("image", erosion)
cv2.waitKey(0)
cv2.destroyAllWindows()
This should fix your problem, to the best of my understanding.
import cv2
gray = cv2.imread(r"brain.png", cv2.IMREAD_GRAYSCALE)
thresh_val = 5
gray[gray < thresh_val] = 0
Besides that, watch out that
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)
is basically going to set the whole image to 255, since the second argument is the threshold and every pixel above threshold is set to the third value, which is 255.
Try replacing this:
ret, thresh = cv2.threshold(gray, 0, 255, cv2. THRESH_BINARY)
img[thresh == 5] = 0
with this:
# threshold to 10% of the maximum
threshold = 0.10 * np.max(img)
img[gray <= threshold] = 0
The issue is that cv2.threshold() does not compute a threshold for you, but applies one and, for example, thresh in your code is already the thresholded image.
(EDITED)
This is the imageI am trying to give proper shape to the images in my folder but unable to get that perfect result. Following is one type of example:
Following is the coding that I have done for my folder containing this type of images:
''''code''''
import cv2
import numpy as np
import glob
path = r'C:\Users\User\Desktop\A\*.jpg'
def k_function(image,k):
z= image.reshape((-1,4))
z=np.float32(z)
criteria = (cv2.TERM_CRITERIA_EPS+cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center=cv2.kmeans(z,k,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((image.shape))
return res2
def noise_function(image):
kernel = np.ones((2, 2), np.uint8)
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE,
kernel, iterations = 2)
bg = cv2.dilate(closing, kernel, iterations = 1)
dist_transform = cv2.distanceTransform(closing, cv2.DIST_L2, 0)
ret, fg = cv2.threshold(dist_transform, 0.02
* dist_transform.max(), 255, 0)
return fg
def filling(thresh):
im_floodfill = thresh.copy()
h, w = thresh.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
cv2.floodFill(im_floodfill, mask,(60,60),255);
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
n = thresh | im_floodfill_inv
return n
for i, img in enumerate(glob.glob(path)):
img1 = cv2.imread(img)
n = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
b= k_function(n,2)
nm, thresh1 = cv2.threshold(b, 127, 255, cv2.THRESH_BINARY_INV);
fill = filling(thresh1)
noise = noise_function(fill)
cv2.imwrite(r'C:\Users\User\Desktop\New folder\image{}.jpg'.format(i),noise)
Try using copyMakeBorder to make a border. It looks like you're trying to use floodFill and I've never figured out how that is supposed to work.
import cv2
image = cv2.imread('elbow.png')
image = cv2.copyMakeBorder(image, 10, 0, 0, 10, cv2.BORDER_CONSTANT)
cv2.imwrite('elbow_border.png', image)
elbow.png:
elbow_border.png:
I would approach it a bit differently in Python/OpenCV. I would convert to HSV and threshold the saturation channel. Then use morphology open to smooth outline.
Input (cropped from your post):
import cv2
# load image as HSV and select saturation
img = cv2.imread("finger.png")
sat = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:,:,1]
# threshold the saturation channel
ret, thresh = cv2.threshold(sat,25,255,0)
# apply morphology open to smooth the outline
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (19,19))
smoothed = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# write result to disk
cv2.imwrite("finger_smoothed.png", smoothed)
cv2.imshow("SAT", sat)
cv2.imshow("THRESH", thresh)
cv2.imshow("SMOOTHED", smoothed)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
I have this source image below (after cropped) and I try to do some image processing before I read text.
With python and opencv, I tried to remove the lines in the background with k-means with k =2, and the result is
I tried to smooth the image using this code below
def process_image_for_ocr(file_path):
# TODO : Implement using opencv
temp_filename = set_image_dpi(file_path)
im_new = remove_noise_and_smooth(temp_filename)
return im_new
def set_image_dpi(file_path):
im = Image.open(file_path)
length_x, width_y = im.size
factor = max(1, int(IMAGE_SIZE / length_x))
size = factor * length_x, factor * width_y
# size = (1800, 1800)
im_resized = im.resize(size, Image.ANTIALIAS)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
temp_filename = temp_file.name
im_resized.save(temp_filename, dpi=(300, 300))
return temp_filename
def image_smoothening(img):
ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
blur = cv2.GaussianBlur(th2, (1, 1), 0)
ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return th3
def remove_noise_and_smooth(file_name):
img = cv2.imread(file_name, 0)
filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3)
kernel = np.ones((1, 1), np.uint8)
opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
img = image_smoothening(img)
or_image = cv2.bitwise_or(img, closing)
return or_image
And the result is
Can you help me (any idea) to remove the lines on the background of the source image?
One approach to achieve this is by computing a k-means unsupervised segmentation of the image. You just need to play with the k and i_val values to get the desired output.
First, you need to create a function which will find the k threshold values.This simply calculates an image histogram which is used to compute the k_means. .ravel() just converts your numpy array to a 1-D array. np.reshape(img, (-1,1)) then converts it to an 2-D array which is of shape n,1. Next we carry out the k_means as described here.
The function takes the input gray-scale image, your number of k intervals and the value you want to threshold from (i_val). It returns the threshold value at your desired i_val.
def kmeans(input_img, k, i_val):
hist = cv2.calcHist([input_img],[0],None,[256],[0,256])
img = input_img.ravel()
img = np.reshape(img, (-1, 1))
img = img.astype(np.float32)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
flags = cv2.KMEANS_RANDOM_CENTERS
compactness,labels,centers = cv2.kmeans(img,k,None,criteria,10,flags)
centers = np.sort(centers, axis=0)
return centers[i_val].astype(int), centers, hist
img = cv2.imread('Y8CSE.jpg', 0)
_, thresh = cv2.threshold(img, kmeans(input_img=img, k=8, i_val=2)[0], 255, cv2.THRESH_BINARY)
cv2.imwrite('text.png',thresh)
The output for this looks like:
You could carry on with this method by using morphological operators, or pre-mask the image using a hough transform as seen in the first answer here.
I would like to increase the letters size in the image contains letter distributed in multiple lines and keep their coordinates the same also remove the lines in between the letters .
For Example:
I have applied Morphological Transformations. It helps but not enough for character recognition using tesseract OCR the problem in this image in letter 4
I have applied erode and cv2.MORPH_CLOSE
kernel = np.ones((2,2),np.uint8)
erosion = cv2.erode(img,kernel,iterations = 1)
kernel = np.ones((3,3),np.uint8)
closing = cv2.morphologyEx(erosion, cv2.MORPH_CLOSE, kernel)
and I get this output :
Edited
The Input Image
https://ibb.co/i2FdkT
https://ibb.co/igiQX8
The Complete code i used
import cv2
import numpy as np
img = cv2.imread('total_2.png',0)
edges = cv2.Canny(img,50,150,apertureSize = 3)
minLineLength=100
lines = cv2.HoughLinesP(image=edges,rho=1,theta=np.pi/180, threshold=100,lines=np.array([]), minLineLength=minLineLength,maxLineGap=80)
a,b,c = lines.shape
for i in range(a):
x = lines[i][0][0] - lines [i][0][2]
y = lines[i][0][1] - lines [i][0][3]
if x!= 0 and abs(y/x) <1:
cv2.line(img, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (255, 255, 255), 1, cv2.LINE_AA)
se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE , (4,3))
gray = cv2.morphologyEx(img, cv2.MORPH_CLOSE, se)
img = cv2.fastNlMeansDenoising(gray, None, 65, 5, 21)
img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)[1]
img = cv2.bitwise_not(img)
k1 = np.zeros((3, 3), np.uint8)
img = cv2.erode(img, k1, iterations = 1)
ret,img = cv2.threshold(img,0,255,0)
element = cv2.getStructuringElement(cv2.cv2.MORPH_RECT,(3,3))
kernel = np.ones((1,1),np.uint8)
#dilation = cv2.dilate(img,element,iterations = 1)
erosion = cv2.erode(img,element,iterations = 1)
kernel = np.ones((3,3),np.uint8)
#opening = cv2.morphologyEx(erosion, cv2.MORPH_OPEN, element)
closing = cv2.morphologyEx(erosion, cv2.MORPH_CLOSE, element)
Another approach with cv2.findContours
# threshold the gray image to binarize, and negate it
_,binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY)
if flag :
binary = cv2.bitwise_not(binary)
# find external contours of all shapes
_,contours,_ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# create a mask for floodfill function, see documentation
h,w= image.shape
mask = np.zeros((h+2,w+2), np.uint8)
# determine which contour belongs to a square or rectangle
for cnt in contours:
poly = cv2.approxPolyDP(cnt, 0.05*cv2.arcLength(cnt,True),True)
if len(poly) == 4:
# if the contour has 4 vertices then floodfill that contour with black color
cnt = np.vstack(cnt).squeeze()
_,binary,_,_ = cv2.floodFill(binary, mask, tuple(cnt[0]), 0)
# convert image back to original color
if flag:
binary = cv2.bitwise_not(binary)
The problem with cv2.findContours is that I have other images I apply the same preprocessing technique over them but they are unboxed and this approach destroys some letters.
For Example this Image :
My ideal solution would be increasing the letter sizes and also make the letter more clear