Not able to create mask on text in image - python

import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cv2.imshow('dilate', dilate)
cv2.waitKey()
I am trying to mask text in image using this code. But it is working for only one image.

Related

Detecting handwritten boxes using OpenCV

I have the following image:
I want to extract the boxed diagrams as so:
Here's what I've attempted:
import cv2
import matplotlib.pyplot as plt
# Load the image
image = cv2.imread('diagram.jpg')
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Find contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw the contours
cv2.drawContours(image, contours, -1, (0, 0, 255), 2)
# Show the final image
plt.imshow(image), plt.show()
However, I've realized it'll be difficult to extract the diagrams because the contours aren't closed:
I've tried using morphological closing to close the gaps in the box edges:
# Define a rectangular kernel for morphological closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# Perform morphological closing to close the gaps in the box edges
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
But this changes almost nothing. How should I approach this problem?
We may replace morphological closing with dilate then erode, but filling the contours between the dilate and erode.
For filling the gaps, the kernel size should be much larger than 5x5 (I used 51x51).
Assuming the handwritten boxes are colored, we may convert from BGR to HSV, and apply the threshold on the saturation channel of HSV:
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert from BGR to HSV color space
gray = hsv[:, :, 1] # Use saturation from HSV channel as "gray".
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU) # Apply automatic thresholding (use THRESH_OTSU).
Apply dilate with large kernel, and use drawContours for filling the contours:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51)) # Use relatively large kernel for closing the gaps
dilated = cv2.dilate(thresh, kernel) # Dilate with large kernel
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)
Apply erode after filling the contours
Erode after dilate is equivalent to closing, but here we are closing after filling.
closed = cv2.erode(dilated, kernel)
Code sample:
import cv2
import numpy as np
# Load the image
image = cv2.imread('diagram.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert from BGR to HSV color space
# Convert to grayscale
#gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = hsv[:, :, 1] # Use saturation from HSV channel as "gray".
# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU) # Apply automatic thresholding (use THRESH_OTSU).
thresh = np.pad(thresh, ((100, 100), (100, 100))) # Add zero padding (required due to large dilate kernels).
# Define a rectangular kernel for morphological operations.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51)) # Use relatively large kernel for closing the gaps
dilated = cv2.dilate(thresh, kernel) # Dilate with large kernel
# Fill the contours, before applying erode.
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)
closed = cv2.erode(dilated, kernel) # Apply erode after filling the contours.
closed = closed[100:-100, 100:-100] # Remove the padding.
# Find contours
contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw the contours
cv2.drawContours(image, contours, -1, (255, 0, 0), 2)
# Show images for testing
# plt.imshow(image), plt.show()
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('dilated', dilated)
cv2.imshow('closed', closed)
cv2.imshow('image', image)
cv2.waitKey()
cv2.destroyAllWindows()
Result:
gray (saturation channel):
thresh:
dilated (after filling):
closed:
Just need to dilate the image to make the rectangle closed, then define a threshold for the area of the contours:
import cv2
# Load the image
image = cv2.imread('diagram.jpg')
# Convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# Apply thresholding to create a binary image
ret,thresh = cv2.threshold(gray,200,255,1)
# Need to dilate the image to make the contours closed
dilate = cv2.dilate(thresh,None)
erode = cv2.erode(dilate,None)
# Find contours
contours,hierarchy = cv2.findContours(erode,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
for i,cnt in enumerate(contours):
# Check if it is an external contour and its area is more than 8000
if hierarchy[0,i,3] == -1 and cv2.contourArea(cnt)>8000:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('template {0}.jpg'.format(i), image[y:y+h,x:x+w])
cv2.imshow('img',image)
You will get :

Extracting text from image - OCR

I'm using cv2 and pytesseract library to extract text from image. Here is the image (image3_3.png) and the python code:
def threshold_image(img_src):
"""Grayscale image and apply Otsu's threshold"""
# Grayscale
img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY)
# Binarisation and Otsu's threshold
img_thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return img_thresh
img = np.array(Image.open('image3_3.png'))
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# normalise the image
norm_img = np.zeros((img.shape[0], img.shape[1]))
img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
# Apply blur to smooth out the edges
img = cv2.GaussianBlur(img, (5, 5), 0)
string_ocr = pytesseract.image_to_string(threshold_image(img), lang = 'eng', config = '--psm 6')
print(string_ocr)
Here is the result:
Image A3. This is image A3 with more texts.
ISAS Visual Analytics
INow everyone can easily discover and share powerful
Nsights that inspire action
Why am I not getting the same exact text? Any help highly appreciated.

How to extract the stain from the image?

I have an image that has a stain (the green stain on the surface of the water), and my goal is to extract that stain. It was guiding me with this solution but it doesn't extract it properly. Is there any way to extract using Python?
Input image:
The attempt:
img = cv2.imread('/content/001.jpg')
# blur
blur = cv2.GaussianBlur(img, (3,3), 0)
# convert to hsv and get saturation channel
sat = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)[:,:,1]
# threshold saturation channel
thresh = cv2.threshold(sat, 90, 255, cv2.THRESH_BINARY)[1]
# apply morphology close and open to make mask
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
mask = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel, iterations=1)
# do OTSU threshold to get circuit image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# write black to otsu image where mask is black
otsu_result = otsu.copy()
otsu_result[mask==0] = 0
# write black to input image where mask is black
img_result = img.copy()
img_result[mask==0] = 0
The result:
I followed your approach but used the LAB color space instead.
img = cv2.imread(image_path)
# blur
blur = cv2.GaussianBlur(img, (3,3), 0)
# convert to LAB space and get a-channel
lab = cv2.cvtColor(blur, cv2.COLOR_BGR2LAB)
a = lab[:,:,1]
thresh = cv2.threshold(a, 95, 255, cv2.THRESH_BINARY_INV)[1]
thresh = cv2.threshold(lab[:,:,1], 95, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel, iterations=1)
inv_morph = cv2.bitwise_not(morph)
img_result = img.copy()
img_result[inv_morph==0] = 0
cv2.imshow('Final result', img_result)
The result is not accurate. You can alter the threshold value and/or morphological kernel to get the desired result.

Remove letter artifacts from mammography image

I want to remove the letter artifacts "L:CC and Strin" from breast mammography using python. How could I get that done? this is my image
Here is one way to do that in Python/OpenCV.
Read the input
Convert to grayscale
Threshold
Dilate as mask
Apply mask to change white letters to black
Save the results
import cv2
import numpy as np
# read image
img = cv2.imread('mammogram_letters.png')
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# create mask
thresh = cv2.threshold(gray, 247, 255, cv2.THRESH_BINARY)[1]
# dilate mask
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
# apply change
result = img.copy()
result[mask == 255] = (0,0,0)
# save result
cv2.imwrite("mammogram_letters_thresh.png", thresh)
cv2.imwrite("mammogram_letters_mask.png", mask)
cv2.imwrite("mammogram_letters_blackened.png", result)
# show results
cv2.imshow("THRESH", thresh)
cv2.imshow("MASK", mask)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
Threshold image:
Mask image:
Result:
You have to get pixel coordinate of the box containing test, if they are always the same my code will work.
from PIL import Image
im = Image.open('SqbIx.png')
img =im.load()
for i in range (73,116):
for j in range (36,57):
img[i,j]= (0, 0, 0)
im.save('mod.png')

How to process and extract text from image

I'm trying to extract text from image using python cv2. The result is pathetic and I can't figure out a way to improve my code.
I believe the image needs to be processed before the extraction of text but not sure how.
I've tried to convert it into black and white but no luck.
import cv2
import os
import pytesseract
from PIL import Image
import time
pytesseract.pytesseract.tesseract_cmd='C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
cam = cv2.VideoCapture(1,cv2.CAP_DSHOW)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 8000)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 6000)
while True:
return_value,image = cam.read()
image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
image = image[127:219, 508:722]
#(thresh, image) = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite('test.jpg',image)
print('Text detected: {}'.format(pytesseract.image_to_string(Image.open('test.jpg'))))
time.sleep(2)
cam.release()
#os.system('del test.jpg')
Preprocessing to clean the image before performing text extraction can help. Here's a simple approach
Convert image to grayscale and sharpen image
Adaptive threshold
Perform morpholgical operations to clean image
Invert image
First we convert to grayscale then sharpen the image using a sharpening kernel
Next we adaptive threshold to obtain a binary image
Now we perform morphological transformations to smooth the image
Finally we invert the image
import cv2
import numpy as np
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
result = 255 - close
cv2.imshow('sharpen', sharpen)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('result', result)
cv2.waitKey()

Categories