How could I crop image in exact location where numbers are situated? - python

I have my code prototype:
import cv2
import numpy as np
img = cv2.imread('/home/follia/Pictures/scan.jpg')
h, w, k = img.shape
M = cv2.getRotationMatrix2D((w / 2, h / 2), 15.5, 1)
img = cv2.warpAffine(img, M, (w, h))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200, apertureSize=3)
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 80)
for x in range(0, len(lines)):
for x1,y1,x2,y2 in lines[x]:
cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)
cv2.imshow("origin", img)
cv2.waitKey(0)
Original image:
and it return this image:
And I need this image to be cropped and show only numbers:
Could you please help me out, how could I cut this location?
And then, how could I recognize numbers and extract it from image to text?

Try this:
Basic idea of this solution is, get the contours of the image after performing threshold() and detect the biggest contour among contours.
INPUT:
CODE:
import cv2
image = cv2.imread("test.jpg", 1)
h, w, k = image.shape
M = cv2.getRotationMatrix2D((w / 2, h / 2), 15.5, 1)
image = cv2.warpAffine(image, M, (w, h), cv2.INTER_LINEAR, cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
img = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
threshold = 80
cv2.threshold(img,threshold,255,cv2.THRESH_BINARY,img)
cv2.bitwise_not(img,img)
cv2.imshow("Result", img)
cv2.waitKey(0)
im2, contours, hier = cv2.findContours(img, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
if len(contours) != 0:
#find the biggest area
c = max(contours, key = cv2.contourArea)
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
crop_img = image[y:y + h, x:x + w]
cv2.imshow("Result", crop_img)
cv2.waitKey(0)
cv2.imshow("Result", image)
cv2.waitKey(0)
OUTPUT:

Related

How to iterate/loop over a folder of images in opencv?

I am trying to loop over a folder of images in OpenCv. Each images is a line of text. I would like to segment each line into words and then save each word as an image. I have used the following code which works well on a single image at a time. But when I try to apply it on all the files in the folder, it just exports one of them and doesn't start looping over them all.
I'll appreciate your help and thank you in advance.
ayaasiin
from PIL import Image
import cv2
import numpy as np
from pathlib import Path
import glob
corpus_dir = Path('data')
files = list(corpus_dir.glob(pattern='*.jpg'))
files
for f in files:
image = Image.open(f)
image = cv2.imread(str(f))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
ret, thresh1 = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU |
cv2.THRESH_BINARY_INV)
cv2.imwrite('threshold_image.jpg',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,30))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 6)
cv2.imwrite('dilation_image.jpg',dilation)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
im2 = img.copy()
crop_number=1
cnt = sorted(contours, key=lambda x: cv2.boundingRect(x)[1:])
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > 50 and w > 50:
roi = im2[y:y+h, 0:x]
#Draw the bounding box on the text area
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
​
​
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
cv2.imwrite("sawir "+str(crop_number)+".jpeg",cropped)
crop_number+=1

Python - How to process a binary image to align sparse letters in a row

I'm trying to align letters from an image in order to obtain the full word with tesseract OCR:
import cv2
import numpy as np
img = cv2.imread("captcha.png", 0)
h1, w1 = img.shape
img = cv2.resize(img, (w1*5, h1*5))
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background iamge to paste the letters on
bg = np.zeros((200, 200), np.uint8)
bg[:] = 255
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
# Paste it onto the background
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
left += (w + 5)
cv2.imshow('thresh', bg)
cv2.waitKey()
And the image that I want to process is this one
However, I got this message:
>Traceback (most recent call last):
File ".\img.py", line 24, in <module>
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
ValueError: could not broadcast input array from shape (72,750) into shape (72,195)
Just with tesseract OCR I got "acba" without the zero and four so I need to reorder the letters to obtain it. Any suggestions?
You try to put bigger image in smaller area - but they have to be the same.
You may get shapes for both objects and get min() for width and height and use it
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
EDIT:
OR maybe you should use x,y instead of 5 and left (also 5)
bg[y:y+h, x:x+w] = img[y:y+h, x:x+w]
And maybe you should create bg with the same size as img (after resizing)
h1, w1 = img.shape
bg = np.zeros((h1, w1), np.uint8)
EDIT:
Full working code with other changes.
I read image in RGB to see what contours it found because it seems it found something different then you may expect.
import cv2
import numpy as np
print('CV:', cv2.__version__)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy color regions and draw contours
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()
contours
background
EDIT:
I get better result if I revese image img = ~img and change threshold from 123 to 30
thresh
contours
background (and now I see it could have size even (75, 255) or safer (100, 300))
import cv2
import numpy as np
print('CV:', cv2.__version__)
#img_color = cv2.imread("captcha.png", cv2.IMREAD_UNCHANGED)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
img = ~img
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('thresh', thresh)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy (color) region and draw contour
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()

Remove Contours OpenCV

My Image
I want to get
https://ibb.co/t8hNkM2
I could only get
I was able to find the maximum contour
def img_counter_max(image_file: str):
img = cv2.imread(image_file)
# grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # меняем цветовую модель с BGR на HSV
cv2.waitKey(0)
# binarize
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
cv2.waitKey(0)
# find contours
ctrs, hier = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
# sorted_ctrs sorted(ctrs, key=cv2.contourArea, reverse=True)[0]
contour_sizes = [(cv2.contourArea(contour), contour) for contour in sorted_ctrs]
biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]
x, y, w, h = cv2.boundingRect(biggest_contour)
roi = img[y:y + h, x:x + w]
cv2.imwrite("C:\\Users\\dennn\\PycharmProjects\\untitled2\\imag\\roi1.jpg",
roi)
cv2.rectangle(img, (x, y), (x + w, y + h), (90, 255, 0), 2)
from tensorflow.python import Size
resize_img = cv2.resize(img, (512,512))
# cv2.resize(img, Size(512,512), interpolation=cv2.INTER_AREA)
cv2.namedWindow("Display frame", cv2.WINDOW_AUTOSIZE);
cv2.imshow('Display frame', resize_img)
cv2.waitKey(0)
How do I get the image I need?
I found that sorting by contourArea() gives wrong results. Probably it calculates all points inside contour but not rectangle area which it uses - and this rectangle can be bigger.
I use boundingRect() to get rectangle used by contour and later calculate size using w*h and then it sorts contours in correct way.
I use for-loop to display image with different rectangles and see which contour gives expected region. And this way I see that third contour gives expected region so I can use [2] to get it and save it.
Eventually I would use size to select region which has w*h is in some range
expecte_region_size - range < w*h < expecte_region_size + range
Eventually I would use for-loop which display image with different rectangles to select manually which rectangle to use to save in file.
import cv2
img = cv2.imread('image.jpg')
# grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # меняем цветовую модель с BGR на HSV
# binarize
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# find contours
ctrs, hier = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# find rect and area - and create items [contour, rect, area] - but sorting by area gives wrong results
#items = [[ctr, cv2.boundingRect(ctr), cv2.contourArea(ctr)] for ctr in ctrs]
# find rect - and create items [contour, rect]
items = [[ctr, cv2.boundingRect(ctr)] for ctr in ctrs]
# find rect's size and create items [contour, rect, size]
items = [[ctr, rect, rect[2]*rect[3]] for ctr, rect in items]
# sort by size
items = sorted(items, key=lambda x: x[2], reverse=True)
for index, item in enumerate(items[:5]):
contour = item[0]
x, y, w, h = item[1]
size = item[2]
print(index, '->', size, '(', x, y, w, h, ')')
img_copy = img.copy()
cv2.rectangle(img_copy, (x, y), (x + w, y + h), (0, 0, 255), 15)
resize_img = cv2.resize(img_copy, (512,512))
cv2.imshow('frame', resize_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# --- save image ---
item = items[2]
contour = item[0]
x, y, w, h = item[1]
size = item[2]
img = img[y:y+h, x:x+w]
cv2.imwrite('output.jpg', img)
Preview:
Output:
The code finds characters well,but outputs them out of order
I found a piece of code that should solve this problem, but I can't -
after finding the contours using contours=cv2.findContours(),use -
boundary=[]
for c,cnt in enumerate(contours):
x,y,w,h = cv2.boundingRect(cnt)
boundary.append((x,y,w,h))
count=np.asarray(boundary)
max_width = np.sum(count[::, (0, 2)], axis=1).max()
max_height = np.max(count[::, 3])
nearest = max_height * 1.4
ind_list=np.lexsort((count[:,0],count[:,1]))
c=count[ind_list]
Find symbols
img = "C:\\Users\\dennn\\PycharmProjects\\untitled2\\output.jpg" dir = os.curdir
path = os.path.join(dir,img)
raw_image = cv2.imread(path,0)
cv2.imshow("original",raw_image)
plt.subplot(2,3,1)
plt.title("Original")
plt.imshow(raw_image,'gray')
plt.xticks([]),plt.yticks([]);
sm_image = cv2.blur(raw_image,(8,8))
cv2.imshow("smoothed",sm_image)
plt.subplot(2,3,2)
plt.title("Smoothed")
plt.imshow(sm_image,'gray')
plt.xticks([]),plt.yticks([]);
#cv2.imshow("smoothed",sm_image)
ret,bw_image = cv2.threshold(sm_image,160,255,cv2.THRESH_BINARY_INV)
cv2.imshow("thresholded",bw_image)
plt.subplot(2,3,3)
plt.title("Thresholded")
plt.imshow(bw_image,'gray')
plt.xticks([]),plt.yticks([]);
kernel = np.ones((4,4),np.uint8)
er_image = cv2.erode(bw_image,kernel)
cv2.imshow("eroded",er_image)
plt.subplot(2,3,4)
plt.title("Eroded")
plt.imshow(er_image,'gray')
plt.xticks([]),plt.yticks([]);
kernel = np.ones((2,2),np.uint8)
di_image = cv2.dilate(er_image,kernel)
cv2.imshow("dilated",di_image)
plt.title("Dilated")
plt.subplot(2,3,5)
plt.imshow(di_image,'gray')
plt.xticks([]),plt.yticks([]);
mo_image = di_image.copy()
contour0 =
cv2.findContours(mo_image.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
contours = [cv2.approxPolyDP(cnt,3,True) for cnt in contour0[0]]
maxArea = 0
rect = []
for ctr in contours:
maxArea = max(maxArea, cv2.contourArea(ctr))
if img == "C:\\Users\\dennn\\PycharmProjects\\untitled2\\output.jpg":
areaRatio = 0.05
for ctr in contours:
if cv2.contourArea(ctr) > maxArea * areaRatio:
rect.append(cv2.boundingRect(cv2.approxPolyDP(ctr, 1, True)))
symbols = []
for i in rect:
x = i[0]
y = i[1]
w = i[2]
h = i[3]
p1 = (x, y)
p2 = (x + w, y + h)
cv2.rectangle(mo_image, p1, p2, 255, 2)
image = cv2.resize(mo_image[y:y + h, x:x + w], (32, 32))
symbols.append(image.reshape(1024, ).astype("uint8"))
testset_data = np.array(symbols)
cv2.imshow("segmented", mo_image)
plt.subplot(2, 3, 6)
plt.title("Segmented")
plt.imshow(mo_image, 'gray')
plt.xticks([]), plt.yticks([]);
# plt.show()
# garbage collection
cv2.destroyAllWindows()
plt.close()
# show glyphs
for i in range(len(symbols)):
image = np.zeros(shape=(64,64))
image[15:47,15:47] = symbols[i].reshape((32,32))
cv2.imshow("sym",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
plt.close()

Python: Showing every Object of an image in its own window

I've written some code, to crop an object (in this case the Data Matrix Code) from an image:
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
img_height, img_width = image.shape[:2]
WHITE = [255, 255, 255]
# Threshold filter
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV)
# Get Contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get Last element of the contours object
max = len(contours) - 1
cnt = contours[max]
# Get coordinates for the bounding box
x, y, w, h = cv2.boundingRect(cnt)
image_region = image[ int(((img_height / 2) - h) / 2) : int(((img_height / 2) - h) / 2 + h), int(x): int(x + w) ]
dmc = cv2.copyMakeBorder(image_region, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value = WHITE)
cv2.imshow("Test", dmc)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code works fine and I received as result:
However, the next image is a little more complicated.
I receive the same result as in the previous image, but I have no idea how to detect the two other objects.
Is there an easier way every object showing in its window?
For this specific image take the biggest contours you have and check if the object is 4 sided shape.If the half-point between the bounding box's corners (see pairs below) is in the contour array then voila, problem solved.
Pairs : TopRight-TopLeft, TopRight-BottomRight, TopLeft-BottomLeft, BottomLeft-BottomRight
Or you could check if there pixels that are not black/white inside the bounding box ?
And for the ploting individualy just slap a for on what you allready have
How about this?
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3),np.uint8)
closing = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel, iterations=4)
n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_img)
size_thresh = 5000
for i in range(1, n_labels):
if stats[i, cv2.CC_STAT_AREA] >= size_thresh:
print(stats[i, cv2.CC_STAT_AREA])
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
cv2.imshow('img', image[y:y+h, x:x+w])
cv2.waitKey(0)

Set white color outside boundingBox (Python, OpenCV)

I have this image:
(or this..)
How can I set to white all the area outside the boundingBox'es ?
I would like to obtain this result:
Thanks
As mentioned in the comments if you have the positions of the ROIs, you can use them to paste them on the an image with white background having the same shape as the original.
import cv2
import numpy as np
image = cv2.imread(r'C:\Users\Desktop\rus.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
white_bg = 255*np.ones_like(image)
ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
blur = cv2.medianBlur(thresh, 1)
kernel = np.ones((10, 20), np.uint8)
img_dilation = cv2.dilate(blur, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
roi = image[y:y + h, x:x + w]
if (h > 50 and w > 50) and h < 200:
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 1)
cv2.imshow('{}.png'.format(i), roi)
#--- paste ROIs on image with white background
white_bg[y:y+h, x:x+w] = roi
cv2.imshow('white_bg_new', white_bg)
cv2.waitKey(0)
cv2.destroyAllWindows()
The result:
Take a mask with the dimensions same as that of your image. The mask is an array with values 255 (white). I assume if you are drawing the bounding box, you definitely have the coordinates for each of them. For each of the bounding box you simply replace the region in the mask with the region bounded by the bounding box as below:
mask[y:y+h,x:x+w] = image[y:y+h,x:x+w], where mask is your final output with your desired result and image is your input image on which the processing is to take place. The values x,y,w,h is the same for both the image as we have made sure that the dimensions for the mask and input image are the same.

Categories