I try yo detect and crop handwritten characters from an image. Some characters can be recognized and enclosed in a rectangle, but for others the same parameters do not work. How can I generlize it?
Raw Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
im = cv2.imread('mission.png',0)
img_blured = cv2.GaussianBlur(im,(5,5),7)
closing = cv2.morphologyEx(img_blured, cv2.MORPH_CLOSE, (31,31))
thresh = 195
ret, bw_img = cv2.threshold(closing, thresh, 255, cv2.THRESH_BINARY)
_,contours, hierarchy = cv2.findContours(bw_img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),3)
i=0
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if w>150 and h>150:
cv2.imwrite(str(i)+".jpg",bw_img[y:y+h,x:x+w])
i=i+1
plt.imshow(im)
plt.show()
cv2.imwrite("output.png",im)
Processed Image
Maybe this can help you:
# Import preprocessors
import os
import cv2
import numpy as np
# Read image
dir = os.path.abspath(os.path.dirname(__file__))
im = cv2.imread(dir+'/nvCXT.png')
# Add padding around the original image
pad = 5
h, w = im.shape[:2]
im2 = ~(np.ones((h+pad*2, w+pad*2, 3), dtype=np.uint8))
im2[pad:pad+h, pad:pad+w] = im[:]
im = im2
# Blur it to remove noise
im = cv2.GaussianBlur(im, (5, 5), 5)
# Gray and B/W version
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(im, 200, 255, cv2.THRESH_BINARY)[1]
# Find contours and sort them by position
cnts, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts.sort(key=lambda x:cv2.boundingRect(x)[0])
# Find and save blocks
s1, s2 = w/2, w/10
i = 0
x2, y2, w2, h2 = 0, 0, 0, 0
for cnt in cnts:
x, y, w, h = cv2.boundingRect(cnt)
if (w+h < s1 and w+h > s2) and (i==0 or (x2+w2) < x):
i += 1
cv2.imwrite(dir+'/_'+str(i)+".jpg", im[y:y+h, x:x+w])
cv2.rectangle(im, (x, y), (x+w, y+h), (0, 255, 0), 2)
x2, y2, w2, h2 = x, y, w, h
# Save the processed images
cv2.imwrite(dir+'/out.png', im)
cv2.imwrite(dir+'/out_bw.png', bw)
Related
I wrote a code that can detect the differences between two pages, but I want the boxes close to each other to appear as a single box,I want to see it in the form of the purple box in the picture I added.
How can I do this, I have the coordinates of all the boxes?
The other scenario;
from calendar import c
import cv2
import imutils
import numpy as np
from skimage.metrics import structural_similarity as ssim
img1 = cv2.imread(r'C:\deneme\19371\19371-028.jpg')
img2 = cv2.imread(r'C:\deneme\19371_Ogretmen\19371_Ogretmen-028.jpg')
print(img1.shape)
img_height = img1.shape[0]
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
(similar, diff) = ssim(gray1, gray2, full=True)
print("Level of similarity : {}".format(similar))
diff = (diff*255).astype("uint8")
#cv2.imshow(diff)
thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
#cv2.imshow(thresh)
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
counter=0
for contour in contours:
if cv2.contourArea(contour) > 10:
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(img1, (x, y), (x+w, y+h), (0,0,255), 2)
cv2.putText(img1,str(counter),(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),2)
cv2.rectangle(img2, (x, y), (x+w, y+h), (0,0,255), 2)
cv2.putText(img2,str(counter),(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),2)
counter=counter+1
print("{}".format(counter),x,y,w,h)
x = np.zeros((img_height,10,3), np.uint8)
result = np.hstack((img1, x, img2))
#cv2.imshow("Result",result)
cv2.imwrite('cikti.jpg',result)
print(len(liste))
#cv2.waitKey(0)
#cv2.destroyAllWindows()
Image Source => https://drive.google.com/drive/folders/1bsmp7WePSngmygrOYmo7NxbrYUmVLDok?usp=sharing
I am trying to loop over a folder of images in OpenCv. Each images is a line of text. I would like to segment each line into words and then save each word as an image. I have used the following code which works well on a single image at a time. But when I try to apply it on all the files in the folder, it just exports one of them and doesn't start looping over them all.
I'll appreciate your help and thank you in advance.
ayaasiin
from PIL import Image
import cv2
import numpy as np
from pathlib import Path
import glob
corpus_dir = Path('data')
files = list(corpus_dir.glob(pattern='*.jpg'))
files
for f in files:
image = Image.open(f)
image = cv2.imread(str(f))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
ret, thresh1 = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU |
cv2.THRESH_BINARY_INV)
cv2.imwrite('threshold_image.jpg',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,30))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 6)
cv2.imwrite('dilation_image.jpg',dilation)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
im2 = img.copy()
crop_number=1
cnt = sorted(contours, key=lambda x: cv2.boundingRect(x)[1:])
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > 50 and w > 50:
roi = im2[y:y+h, 0:x]
#Draw the bounding box on the text area
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
cv2.imwrite("sawir "+str(crop_number)+".jpeg",cropped)
crop_number+=1
I am trying to put a bounding box around a sequence of contours like the following. A top contour and a bottom contour
image1
I wrote the following basic code and this was the result image2
import cv2
import numpy as np
img = cv2.imread('light2.png')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (0, 0, 46), (179, 255, 255))
kernel = np.ones((5,5),np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
try: hierarchy = hierarchy[0]
except: hierarchy = []
height, width, _ = img.shape
min_x, min_y = width, height
max_x = max_y = 0
for contour, hier in zip(contours, hierarchy):
(x, y, w, h) = cv2.boundingRect(contour)
min_x, max_x = min(x, min_x), max(x+w, max_x)
min_y, max_y = min(y, min_y), max(y+h, max_y)
if w > 80 and h > 80:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
if max_x - min_x > 0 and max_y - min_y > 0:
cv2.rectangle(img, (min_x, min_y), (max_x, max_y), (255, 0, 0), 2)
I am kind of struggling with the logic when there are other contours in the environment like in image3. And still want to put a bounding box around the top and bottom contour detection only (something like this image4). But with the current code, it puts the bounding box like this image5. Any help is appreciated.
You need to explain computer what you want using tools that you have. I suggest use threshold -> connectedComponents -> filter wrong bboxes -> find 2 bbox with same X position and ≈ area (not implemented) -> union bboxes
Code example:
import cv2
import numpy as np
def drawStats(img: np.array, arr: np.array):
for i in range(arr.shape[0]):
w = arr[i, cv2.CC_STAT_WIDTH]
h = arr[i, cv2.CC_STAT_HEIGHT]
l = arr[i, cv2.CC_STAT_LEFT]
t = arr[i, cv2.CC_STAT_TOP]
cv2.rectangle(img, (l, t), (l+w,t+h), (20, 0, 255), 3)
def filterStats(arr: np.array) -> np.array:
result = []
for i in range(arr.shape[0]):
w = arr[i, cv2.CC_STAT_WIDTH]
h = arr[i, cv2.CC_STAT_HEIGHT]
if w > h * 4:
result.append(arr[i])
result = np.array(result)
return result
img = cv2.imread("/Users/alex/Downloads/exo7R.jpg", cv2.IMREAD_GRAYSCALE)
_, img2 = cv2.threshold(img, 230, 255, cv2.THRESH_BINARY)
comp = cv2.connectedComponentsWithStats(img2, connectivity=8)
debugImg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
stats = filterStats(comp[2])
drawStats(debugImg, stats)
cv2.imshow("threshold", img2)
cv2.imshow("found components", debugImg)
cv2.waitKey()
I'm trying to align letters from an image in order to obtain the full word with tesseract OCR:
import cv2
import numpy as np
img = cv2.imread("captcha.png", 0)
h1, w1 = img.shape
img = cv2.resize(img, (w1*5, h1*5))
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background iamge to paste the letters on
bg = np.zeros((200, 200), np.uint8)
bg[:] = 255
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
# Paste it onto the background
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
left += (w + 5)
cv2.imshow('thresh', bg)
cv2.waitKey()
And the image that I want to process is this one
However, I got this message:
>Traceback (most recent call last):
File ".\img.py", line 24, in <module>
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
ValueError: could not broadcast input array from shape (72,750) into shape (72,195)
Just with tesseract OCR I got "acba" without the zero and four so I need to reorder the letters to obtain it. Any suggestions?
You try to put bigger image in smaller area - but they have to be the same.
You may get shapes for both objects and get min() for width and height and use it
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
EDIT:
OR maybe you should use x,y instead of 5 and left (also 5)
bg[y:y+h, x:x+w] = img[y:y+h, x:x+w]
And maybe you should create bg with the same size as img (after resizing)
h1, w1 = img.shape
bg = np.zeros((h1, w1), np.uint8)
EDIT:
Full working code with other changes.
I read image in RGB to see what contours it found because it seems it found something different then you may expect.
import cv2
import numpy as np
print('CV:', cv2.__version__)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy color regions and draw contours
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()
contours
background
EDIT:
I get better result if I revese image img = ~img and change threshold from 123 to 30
thresh
contours
background (and now I see it could have size even (75, 255) or safer (100, 300))
import cv2
import numpy as np
print('CV:', cv2.__version__)
#img_color = cv2.imread("captcha.png", cv2.IMREAD_UNCHANGED)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
img = ~img
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('thresh', thresh)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy (color) region and draw contour
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()
I have a batch of screenshots like here:
and I try to detect the region with six digits and recognize them. The second part works like a charm. I have a problem detecting the correct region because it can be placed with a shift depending on screen dimensions. For example, crop image looks like this:
Seems it looks ok, but I have to add some workaround in code to select the right place.
My code:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 6))
sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
# Load and resize image to standard size
img0 = Image.open('./data/test.png')
img0.thumbnail((720, 1423))
img = np.array(img0)
# The magic from https://www.pyimagesearch.com/2017/07/17/credit-card-ocr-with-opencv-and-python/
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, rectKernel)
gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)
gradX = np.absolute(gradX)
(minVal, maxVal) = (np.min(gradX), np.max(gradX))
gradX = (255 * ((gradX - minVal) / (maxVal - minVal)))
gradX = gradX.astype("uint8")
gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
thresh = cv2.threshold(gradX, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel)
cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
locs = []
for (i, c) in enumerate(cnts):
(x, y, w, h) = cv2.boundingRect(c)
ar = w / h
if x > 140 and x < 220 and w > 100 and h > 12 and h < 20 and ar >= 4 and ar <= 6:
locs.append((x, y, w, h))
# Calculate the crop rectangle
LEFT_TOP = (181, 316)
RIGHT_BOTTOM = (299, 346)
if len(locs) > 0:
(x, y, w, h) = locs[0]
LEFT_TOP = (x - 5, y - 5) # workaround place
RIGHT_BOTTOM = (x + w + 5, y + h) # workaround place
print(LEFT_TOP, RIGHT_BOTTOM)
img1 = img0.crop(LEFT_TOP + RIGHT_BOTTOM)
Selected contour looks like:
It selects a contour smaller than the actual region. Why? How to fix it?
Thank you!
Test file:
There are no magics in software...
Inappropriate filters "eats" part of your digits.
Remove the tophat filter.
Remove the Sobel filter.
Replace cv2.THRESH_BINARY with cv2.THRESH_BINARY_INV.
Increase the size of sqKernel.
I recommend you to draw the contours, and show (or save) intermediate results for testing.
Here is the modified code:
import cv2
import numpy as np
#from matplotlib import pyplot as plt
from PIL import Image
#rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 6))
#sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 11))
# Load and resize image to standard size
img0 = Image.open('./data/test.png')
img0.thumbnail((720, 1423))
img = np.array(img0)
# The magic from https://www.pyimagesearch.com/2017/07/17/credit-card-ocr-with-opencv-and-python/
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
#tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, rectKernel)
#gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)
#gradX = np.absolute(gradX)
#(minVal, maxVal) = (np.min(gradX), np.max(gradX))
#gradX = (255 * ((gradX - minVal) / (maxVal - minVal)))
#gradX = gradX.astype("uint8")
#gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
#thresh = cv2.threshold(gradX, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel)
cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
locs = []
# Draw contours for testing
tmp_im = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
cv2.drawContours(tmp_im, cnts, -1, (0, 255, 0), 1) # Draw green line around the contour
cv2.imshow('tmp_im', tmp_im)
cv2.waitKey()
cv2.destroyAllWindows()
cv2.imwrite('./data/tmp_im.png', tmp_im)
for (i, c) in enumerate(cnts):
(x, y, w, h) = cv2.boundingRect(c)
ar = w / h
if x > 140 and x < 220 and w > 100 and h > 12 and h < 20 and ar >= 4 and ar <= 6:
locs.append((x, y, w, h))
# Calculate the crop rectangle
LEFT_TOP = (181, 316)
RIGHT_BOTTOM = (299, 346)
if len(locs) > 0:
(x, y, w, h) = locs[0]
#LEFT_TOP = (x - 5, y - 5) # workaround place
#RIGHT_BOTTOM = (x + w + 5, y + h) # workaround place
LEFT_TOP = (x, y) # workaround place
RIGHT_BOTTOM = (x + w, y + h) # workaround place
print(LEFT_TOP, RIGHT_BOTTOM)
img1 = img0.crop(LEFT_TOP + RIGHT_BOTTOM)
img1.show()
img1.save('./data/digits.png')
Result:
tmp_img (for testing):