I want to remove the color from the below image, due to this color I am unable to extract the text clearly from the image.
I am using the below code, but I am not getting the clear text,
import numpy as np
from PIL import Image
im = Image.open('my_file.tif')
im = im.convert('RGBA')
data = np.array(im)
# just use the rgb values for comparison
rgb = data[:,:,:3]
color = [246, 213, 139] # Original value
black = [0,0,0, 255]
white = [255,255,255,255]
mask = np.all(rgb == color, axis = -1)
# change all pixels that match color to white
data[mask] = white
# change all pixels that don't match color to black
##data[np.logical_not(mask)] = black
new_im = Image.fromarray(data)
new_im.save('new_file.tif')
and
def black_and_white(input_image_path,
output_image_path):
color_image = Image.open(input_image_path)
bw = color_image.convert('L')
bw.save(output_image_path)
Please help me with this...
Image 2:
I'm assuming you want to extract the quote. To do this, you can do a series of filtering operations to remove non-text contours. Once you have the processed result you can use an OCR tool such as Pytesseract for text extraction.
Result from OCR
On behalf of the hundreds of ACLU activists who
called on Governor Walker to veto House Bill
156, we are disappointed that he did not put
students or the Constitution first today.”
—Joshua A. Decker
Executive Director
Code
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image and threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Connect text with a horizontal shaped kernel
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,3))
dilate = cv2.dilate(thresh, kernel, iterations=3)
# Remove non-text contours using aspect ratio filtering
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
aspect = w/h
if aspect < 3:
cv2.drawContours(thresh, [c], -1, (0,0,0), -1)
# Invert image and OCR
result = 255 - thresh
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('result', result)
cv2.waitKey()
Try OpenCV conversion, but remember to use 3 chanels, otherwise you'll get error
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
Related
Here's an example image ->
I would like to extract text that has text-decoration/styling of strikethrough.
So for the above image I would like to extract - de location
How would I do this ?
Here's what I have so far using OpenCV and python :
import cv2
import numpy as np
import matplotlib.pyplot as plt
im = cv2.imread(<image>)
kernel = np.ones((1,44), np.uint8)
morphed = cv2.morphologyEx(im, cv2.MORPH_CLOSE, kernel)
plt.imshow(morphed)
This gives me the horizontal lines ->
I am new to image processing and hence having a difficult time isolating only the text that has strikethroughs.
Bonus -> Along with the strikethrough text, I would like to also extract neighboring text so that I can correctly style/mark the strikethrough text information back along with other text.
UPDATE 1 :
Based on the first answer I did the following : -
import cv2
# Load image, convert to grayscale, Otsu's threshold
image = cv2.imread('image.png')
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)[1]
# Detect horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(40,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN,
horizontal_kernel, iterations=10)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (36,255,12), 2)
plt.imshow(result)
I was able to get this image -
I tried playing with the values for the horizontal kernel but no luck.
UPDATE 2:
I modified the above snippet further and got this -
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Load image, convert to grayscale, Otsu's threshold
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = np.ones((4,2),np.uint8)
erosion = cv2.erode(thresh,kernel,iterations = 1)
dilation = cv2.dilate(thresh,kernel,iterations = 1)
trans = dilation
# plt.imshow(erosion)
# Detect horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8,1))
detect_horizontal = cv2.morphologyEx(trans, cv2.MORPH_OPEN, horizontal_kernel, iterations=10)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (36,255,12), 2)
plt.imshow(result)
I was able to get this image -
And this solution applies to my other image types as well -
This is not a 100% accuracy solution (failed to get the de strikethrough text) but I like the performance so far.
Now, I am struggling with how to check if the neighboring pixels are black or white to isolate the strikethrough.
one way you can achieve this is:
Binarise the image (https://docs.opencv.org/master/d7/d4d/tutorial_py_thresholding.html)
Find horizontal lines (Horizontal Line detection with OpenCV)
For each line, check if the top and bottom pixels are white or not
If there are non white top and bottom pixels, that region corresponds to strikethrough
Do a connected component of the image (connected component labeling in python)
Check the label corresponding to the lines detected previously and mask that label to get the strike-through texts.
You can use a strikethrough property such as thickness. The thickness of the strikethrough line is less than the underline. It can be select by morphology and restore the connected components by morphological reconstruction.
import cv2
img = cv2.imread('juFpe.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV )[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1,5))
kernel2=cv2.getStructuringElement(cv2.MORPH_RECT,(8,8))
detect_thin = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
detect_thin = cv2.morphologyEx(detect_thin, cv2.MORPH_DILATE, kernel2)
marker=cv2.compare(detect_thin, thresh,cv2.CMP_LT) # thin lines
while True: #morphological reconstruction
tmp=marker.copy()
marker=cv2.dilate(marker, kernel2)
marker=cv2.min(thresh, marker)
difference = cv2.subtract(marker, tmp)
if cv2.countNonZero(difference) == 0:
break
cv2.imwrite('lines.png', marker)
Result:
I have a problem because I have separate registrations for photos. Now I would like to get the registration number from the photo. Unfortunately, the effectiveness of the code I wrote is very low and I would like to ask for help in achieving greater efficiency. any tips?
In the first phase, the photo looks like this
Then transform the photo to gray and only the black color contrasts
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# define range of black color in HSV
lower_val = np.array([0,0,0])
upper_val = np.array([179,100,130])
# Threshold the HSV image to get only black colors
mask = cv2.inRange(hsv, lower_val, upper_val)
receives
What can I add or do to improve the effectiveness of the program. is there a way for the program to retrieve registrations a bit? Will this help
configr = ('-l eng --oem 1 --psm 6-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
text = pytesseract.image_to_string(mask,lang='eng', config=configr)
print(text)
Here's an approach:
Color threshold to extract black text. We load the image, convert to HSV colorspace, define a lower and upper color range, and use cv2.inRange() to color threshold and obtain a binary mask
Perform morphological operations. Create a kernel and perform morph close to fill holes in the contours.
Filter license plate contours. Find contours and filter using bounding rectangle area. If a contour passes this filter, we extract the ROI and paste it onto a new blank mask.
OCR using Pytesseract. We invert the image so desired text is black and throw it into Pytesseract.
Here's a visualization of each step:
Obtained mask from color thresholding + morph closing
Filter for license plate contours highlighted in green
Pasted plate contours onto a blank mask
Inverted image ready for Tesseract
Result from Tesseract OCR
PZ 689LR
Code
import numpy as np
import pytesseract
import cv2
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, create blank mask, convert to HSV, define thresholds, color threshold
image = cv2.imread('1.png')
result = np.zeros(image.shape, dtype=np.uint8)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0,0,0])
upper = np.array([179,100,130])
mask = cv2.inRange(hsv, lower, upper)
# Perform morph close and merge for 3-channel ROI extraction
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)
extract = cv2.merge([close,close,close])
# Find contours, filter using contour area, and extract using Numpy slicing
cnts = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
area = w * h
if area < 5000 and area > 2500:
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
result[y:y+h, x:x+w] = extract[y:y+h, x:x+w]
# Invert image and throw into Pytesseract
invert = 255 - result
data = pytesseract.image_to_string(invert, lang='eng',config='--psm 6')
print(data)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('result', result)
cv2.imshow('invert', invert)
cv2.waitKey()
I want to detect and then extract letters and numbers from this image. I have just started to learn OpenCV and I think that this can be done with that lib. You have the image that I used and desired output below.
This is the code that I have:
import cv2
# read original image
img = cv2.imread('image.jpg')
cv2.imshow('original', img)
cv2.waitKey(0)
# convert it to gray and apply filter
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grey scale
gray = cv2.bilateralFilter(gray, 11, 17, 17)
cv2.imshow('gray', gray)
cv2.waitKey(0)
#apply treshold
thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_OTSU)[1]
cv2.imshow('thresh', thresh)
cv2.waitKey(0)
This is the image:
My goal is to get separated images of each letter and number (I did this in paint):
So, what should I do to get this?
It would be perfect to keep the same order of letters and numbers, for example:
MXF51051
Here's an approach using simple thresholding + contour filtering
Convert image to grayscale and Otsu's threshold
Find contours and filter using contour area
Extract and save ROI
We begin by converting to grayscale and then Otsu's threshold to obtain a binary image
Next we find contours using cv2.findContours(). To keep the same order of letters/numbers, we use imutils.contours.sort_contours() with the left-to-right parameter to ensure that when we iterate through the contours, we have each contour in the correct order. For each contour, we filter using a minimum and maximum area threshold to ensure that we only keep contours with the desired text. Once we have the filtered ROI, we extract/save the ROI using Numpy slicing. Here's the filtered mask with only the desired text
Detected numbers and letters
The extracted ROIs in the correct order
import cv2
import numpy as np
from imutils import contours
image = cv2.imread('1.jpg')
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="left-to-right")
ROI_number = 0
for c in cnts:
area = cv2.contourArea(c)
if area < 800 and area > 200:
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('mask', mask)
cv2.imshow('thresh', thresh)
cv2.waitKey()
I am working on OCRing a document image. I want to detect all pictures and remove from the document image. I want to retain tables in the document image. Once I detect pictures I will remove and then want to OCR. I tried to find contour tried to detect all the bigger areas. unfortunately it detects tables also. Also how to remove the objects keeping other data in the doc image. I am using opencv and python
Here's my code
import os
from PIL import Image
import pytesseract
img = cv2.imread('block2.jpg' , 0)
mask = np.ones(img.shape[:2], dtype="uint8") * 255
ret,thresh1 = cv2.threshold(img,127,255,0)
contours, sd = cv2.findContours(thresh1,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
areacontainer = []
for cnt in contours:
area = cv2.contourArea(cnt)
areacontainer.append(area)
avgArea = sum(areacontainer)/len(areacontainer)
[enter code here][1]
for c in contours:# average area heuristics
if cv2.contourArea(c)>6*avgArea:
cv2.drawContours(mask, [c], -1, 0, -1)
binary = cv2.bitwise_and(img, img, mask=mask) # subtracting
cv2.imwrite("bin.jpg" , binary)
cv2.imwrite("mask.jpg" , mask)
Here's an approach:
Convert image to grayscale and Gaussian blur
Perform canny edge detection
Perform morphological operations to smooth image
Find contours and filter using a minimum/maximum threshold area
Remove portrait images
Here's the detected portraits highlighted in green
Now that we have the bounding box ROIs, we can effectively remove the pictures by filling them in with white. Here's the result
import cv2
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
canny = cv2.Canny(blur, 120, 255, 1)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(canny, cv2.MORPH_CLOSE, kernel, iterations=2)
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 15000 and area < 35000:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (255,255,255), -1)
cv2.imshow('image', image)
cv2.waitKey()
I'm trying to use Tessract in the code below to extract the two lines of the image. I tryied to improve the image quality but even though it didn't work.
Can anyone help me?
from PIL import Image, ImageEnhance, ImageFilter
import pytesseract
img = Image.open(r'C:\ocr\test00.jpg')
new_size = tuple(4*x for x in img.size)
img = img.resize(new_size, Image.ANTIALIAS)
img.save(r'C:\\test02.jpg', 'JPEG')
print( pytesseract.image_to_string( img ) )
Given the comment by #barny I don't know if this will work, but you can try the code below. I created a script that selects the display area and warps this into a straight image. Next a threshold to a black and white mask of the characters and the result is cleaned up a bit.
Try if it improves recognition. If it does, also look at the intermediate stages so you'll understand all that happens.
Update: It seems Tesseract prefers black text on white background, inverted and dilated the result.
Result:
Updated result:
Code:
import numpy as np
import cv2
# load image
image = cv2.imread('disp.jpg')
# create grayscale
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# perform threshold
retr, mask = cv2.threshold(gray_image, 190, 255, cv2.THRESH_BINARY)
# findcontours
ret, contours, hier = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# select the largest contour
largest_area = 0
for cnt in contours:
if cv2.contourArea(cnt) > largest_area:
cont = cnt
largest_area = cv2.contourArea(cnt)
# find the rectangle (and the cornerpoints of that rectangle) that surrounds the contours / photo
rect = cv2.minAreaRect(cont)
box = cv2.boxPoints(rect)
box = np.int0(box)
#### Warp image to square
# assign cornerpoints of the region of interest
pts1 = np.float32([box[2],box[3],box[1],box[0]])
# provide new coordinates of cornerpoints
pts2 = np.float32([[0,0],[500,0],[0,110],[500,110]])
# determine and apply transformationmatrix
M = cv2.getPerspectiveTransform(pts1,pts2)
tmp = cv2.warpPerspective(image,M,(500,110))
# create grayscale
gray_image2 = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
# perform threshold
retr, mask2 = cv2.threshold(gray_image2, 160, 255, cv2.THRESH_BINARY_INV)
# remove noise / close gaps
kernel = np.ones((5,5),np.uint8)
result = cv2.morphologyEx(mask2, cv2.MORPH_CLOSE, kernel)
#draw rectangle on original image
cv2.drawContours(image, [box], 0, (255,0,0), 2)
# dilate result to make characters more solid
kernel2 = np.ones((3,3),np.uint8)
result = cv2.dilate(result,kernel2,iterations = 1)
#invert to get black text on white background
result = cv2.bitwise_not(result)
#show image
cv2.imshow("Result", result)
cv2.imshow("Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()