I am following this tutorial to recognize six digits from the following image
The threshold seems (to me) to be very good
However, when I reach the contour definition, digits 7, 1, 0 (and possibly more) are always split in two or more boxes.
By definition, a contour is a boundary of a continuous entity, which means that these digits separated by a small ligature cannot be classified as such. What to do in this case? My first instinct is to try and merge these small boxes? I have already tried to play around with the height and width of the contour with no success. The code is written below.
# https://pyimagesearch.com/2017/02/13/recognizing-digits-with-opencv-and-python/
# import the necessary packages
from imutils.perspective import four_point_transform
from imutils import contours
import imutils
import cv2
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
(0, 0, 1, 0, 0, 1, 0): 1,
(1, 0, 1, 1, 1, 1, 0): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 0, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
# load the example image
image = cv2.imread('DSC_01922.JPG', 1)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
# image = imutils.resize(image, height=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# edged = cv2.Canny(blurred, 50, 200, 255)
# threshold the warped image, then apply a series of morphological
# operations to cleanup the thresholded image
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# cv2.imshow('thresh', thresh)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if (h >= 90 and h <= 300):
digitCnts.append(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Update 1
Using MORPH_CLOSE instead of OPEN and enlarging the kernel as suggested by #Croolman improves the results as can be seen below
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 7))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
Note that I am doing this as a hobby and I am not familiar with/doing research on existent tools of OpenCV/python. Thank you in advance.
Update 2
This solution works.
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 15))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
threshold
contour
This is the complete answer. It required tweeking the kernel values + using morph_close
# https://pyimagesearch.com/2017/02/13/recognizing-digits-with-opencv-and-python/
# import the necessary packages
from imutils.perspective import four_point_transform
from imutils import contours
import imutils
import cv2
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
(0, 0, 1, 0, 0, 1, 0): 1,
(1, 0, 1, 1, 1, 1, 0): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 0, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
# load the example image
image = cv2.imread('DSC_01922.JPG', 1)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
# image = imutils.resize(image, height=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# edged = cv2.Canny(blurred, 50, 200, 255)
# threshold the warped image, then apply a series of morphological
# operations to cleanup the thresholded image
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 15))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
cv2.imshow('thresh', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if (h >= 90 and h <= 300):
digitCnts.append(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Related
I am trying to recognize six digits from a meter using python-OpenCV. It's surprising how incredibly hard it is to set morphological operations working in the right way, given the time I have spent adjusting the focus/distance of my raspberry pi camera to the meter screen and I even have bought a separate led lamp to have as much uniform light as possible. This is a template image
and I've tried using and adjusting the code from these two sources: enter link description here and enter link description here reproduced below without any progress. I got stuck right at the start when setting the thresholding options. Thank you in advance for any help.
# Code 1
import cv2
import numpy as np
import pytesseract
# Load the image
img = cv2.imread("test.jpg")
# Color-segmentation to get binary mask
lwr = np.array([43, 0, 71])
upr = np.array([103, 255, 130])
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(hsv, lwr, upr)
cv2.imwrite("msk.png", msk)
# Extract digits
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=5)
res = 255 - cv2.bitwise_and(dlt, msk)
cv2.imwrite("res.png", res)
# Displaying digits and OCR
txt = pytesseract.image_to_string(res, config="--psm 6 digits")
print(''.join(t for t in txt if t.isalnum()))
cv2.imshow("res", res)
cv2.waitKey(0)
cv2.destroyAllWindows()
# code 2
# https://pyimagesearch.com/2017/02/13/recognizing-digits-with-opencv-and-python/
# import the necessary packages
# from imutils.perspective import four_point_transform
from imutils import contours
import imutils
import cv2
import numpy as np
from numpy.linalg import norm
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
(1, 0, 1, 0, 1, 0, 1): 1,
(1, 0, 1, 1, 1, 0, 1): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 1, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
images = 'test.jpg'
image = cv2.imread(images, 1)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
# gray = cv2.medianBlur(blurred, 1)
# threshold the warped image, then apply a series of morphological
# operations to cleanup the thresholded image
(T, thresh) = cv2.threshold(blurred, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow('thresh', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
mask = np.zeros((image.shape[0] + 2, image.shape[1] + 2), np.uint8)
cv2.floodFill(thresh, mask, (0, 0), 0)
cv2.floodFill(thresh, mask, (image.shape[1]-1, 0), 0)
cv2.floodFill(thresh, mask, (round(image.shape[1]/2.4), 0), 0)
cv2.floodFill(thresh, mask, (image.shape[1]//2, 0), 0)
cv2.floodFill(thresh, mask, (0, image.shape[0]-1), 0)
cv2.floodFill(thresh, mask, (image.shape[1]-1, image.shape[0]-1), 0)
kernel = np.ones((2, 2), np.uint8)
thresh = cv2.erode(thresh, kernel, iterations=2)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 13))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
# cv2.imshow('thresh', thresh)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if w <= 300 and (h >= 130 and h <= 300):
digitCnts.append(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
# cv2.imshow('image', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# sort the contours from left-to-right, then initialize the
# actual digits themselves
digitCnts = contours.sort_contours(digitCnts, method="left-to-right")[0]
digits = []
clao = 0
# loop over each of the digits
for c in digitCnts:
clao = clao + 1
# extract the digit ROI
(x, y, w, h) = cv2.boundingRect(c)
roi = thresh[y:y + h, x:x + w]
# compute the width and height of each of the 7 segments
# we are going to examine
(roiH, roiW) = roi.shape
(dW, dH) = (int(roiW * 0.25), int(roiH * 0.15))
dHC = int(roiH * 0.05)
# define the set of 7 segments
segments = [
((0, 0), (w, dH)), # top
((0, 0), (dW, h // 2)), # top-left
((w - dW, 0), (w, h // 2)), # top-right
((0, (h // 2) - dHC), (w, (h // 2) + dHC)), # center
((0, h // 2), (dW, h)), # bottom-left
((w - dW, h // 2), (w, h)), # bottom-right
((0, h - dH), (w, h)) # bottom
]
on = [0] * len(segments)
# loop over the segments
for (i, ((xA, yA), (xB, yB))) in enumerate(segments):
# extract the segment ROI, count the total number of
# thresholded pixels in the segment, and then compute
# the area of the segment
segROI = roi[yA:yB, xA:xB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 50% of the area, mark the segment as "on"
if clao == 1:
if total / float(area) > 0.34:
if area < 1500:
on = [1, 0, 1, 0, 1, 0, 1]
else:
on[i] = 1
else:
if total / float(area) > 0.39:
if area < 1500:
on = [1, 0, 1, 0, 1, 0, 1]
else:
on[i] = 1
# lookup the digit and draw it on the image
digit = DIGITS_LOOKUP.get(tuple(on)) or DIGITS_LOOKUP[
min(DIGITS_LOOKUP.keys(), key=lambda key: norm(np.array(key)-np.array(on)))]
# digit = DIGITS_LOOKUP[tuple(on)]
digits.append(digit)
# print(digits)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.putText(image, str(digit), (x - 10, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
# display the digits
print(digits)
cv2.imshow("Input", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Update
Apologies for my late reply but I have been quite busy with work.
I have captured 22 images throughout the day and used #fmw42 code (with some amendments) to apply thresholding and morphological operations. I am making the images available here and the code that I am using is available below. Overall the performance is quite robust, although 1s and sometimes 8s get mixed up with 2s. I am happy to accept a code that provides improved performance. Note: I think that one problem is that the vertical lines of the numbers are slightly slanted? Thank you in advance.
import cv2
import numpy as np
from numpy.linalg import norm
from imutils import contours
import imutils
import os
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
(1, 0, 1, 0, 1, 0, 1): 1,
(1, 0, 1, 1, 1, 0, 1): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 1, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
path_of_the_directory = "/home/myusername/mypathdirectory"
ext = ('.jpg')
for files in os.listdir(path_of_the_directory):
if files.endswith(ext):
# load image
print(files)
img = cv2.imread(path_of_the_directory+files)
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blur
blur = cv2.GaussianBlur(gray, (0,0), sigmaX=51, sigmaY=51)
# divide
divide = cv2.divide(gray, blur, scale=255)
# threshold
thresh = cv2.threshold(divide, 235, 255, cv2.THRESH_BINARY)[1]
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (41,41))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (41,41))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.bitwise_not(morph) # reverse
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 70))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# write result to disk
cv2.imwrite("digits_division.jpg", divide)
cv2.imwrite("digits_threshold.jpg", thresh)
cv2.imwrite("digits_morph.jpg", morph)
# display it
cv2.imshow("divide", divide)
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.waitKey(0)
cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(morph.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if w >= 60 and (h >= 300 and h <= 800):
digitCnts.append(c)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# sort the contours from left-to-right, then initialize the
# actual digits themselves
digitCnts = contours.sort_contours(digitCnts, method="left-to-right")[0]
digits = []
clao = 0
# loop over each of the digits
for c in digitCnts:
clao = clao + 1
# extract the digit ROI
(x, y, w, h) = cv2.boundingRect(c)
roi = morph[y:y + h, x:x + w]
# compute the width and height of each of the 7 segments
# we are going to examine
(roiH, roiW) = roi.shape
(dW, dH) = (int(roiW * 0.25), int(roiH * 0.15))
dHC = int(roiH * 0.05)
# define the set of 7 segments
segments = [
((0, 0), (w, dH)), # top
((0, 0), (dW, h // 2)), # top-left
((w - dW, 0), (w, h // 2)), # top-right
((0, (h // 2) - dHC), (w, (h // 2) + dHC)), # center
((0, h // 2), (dW, h)), # bottom-left
((w - dW, h // 2), (w, h)), # bottom-right
((0, h - dH), (w, h)) # bottom
]
on = [0] * len(segments)
# loop over the segments
for (i, ((xA, yA), (xB, yB))) in enumerate(segments):
# extract the segment ROI, count the total number of
# thresholded pixels in the segment, and then compute
# the area of the segment
segROI = roi[yA:yB, xA:xB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 50% of the area, mark the segment as "on"
if clao == 1:
if total / float(area) > 0.34:
if area < 1500:
on = [1, 0, 1, 0, 1, 0, 1]
else:
on[i] = 1
else:
if total / float(area) > 0.42:
if area < 1500:
on = [1, 0, 1, 0, 1, 0, 1]
else:
on[i] = 1
# lookup the digit andq draw it on the image
digit = DIGITS_LOOKUP.get(tuple(on)) or DIGITS_LOOKUP[
min(DIGITS_LOOKUP.keys(), key=lambda key: norm(np.array(key)-np.array(on)))]
# digit = DIGITS_LOOKUP[tuple(on)]
digits.append(digit)
# print(digits)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.putText(img, str(digit), (x - 10, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
# display the digits
print(digits)
cv2.imshow("Input", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
continue
Perhaps this will help you using division normalization in Python/OpenCV.
Input:
import cv2
import numpy as np
# load image
img = cv2.imread("digits.jpg")
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blur
blur = cv2.GaussianBlur(gray, (0,0), sigmaX=51, sigmaY=51)
# divide
divide = cv2.divide(gray, blur, scale=255)
# threshold
thresh = cv2.threshold(divide, 235, 255, cv2.THRESH_BINARY)[1]
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# write result to disk
cv2.imwrite("digits_division.jpg", divide)
cv2.imwrite("digits_threshold.jpg", thresh)
cv2.imwrite("digits_morph.jpg", morph)
# display it
cv2.imshow("divide", divide)
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.waitKey(0)
cv2.destroyAllWindows()
Division normalized image:
Thresholded image:
Morphology processed image:
You can then clean up further by getting contours and removing small contours and very long horizontal contours.
The key to getting this working is cleaning the image up which I have done to a good enough level to get it to work. I've done this using scikit image library.
I then look at certain squares on the image and take an average reading from that area.
On the right hand-side image I've marked some of the locations with red squares.
My script I used to get this result:
import numpy as np
from pathlib import Path
import imageio.v3 as iio
import skimage.filters as skif
from skimage.color import rgb2gray
from skimage.util import img_as_ubyte
from skimage.restoration import denoise_bilateral
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.cm as cm
threshold = 125
digit_loc = [1600, 1300, 1000, 730, 420, 155]
size = 20
x_mid = 80
x_right = 160
y_top = 130
y_mt = 250
y_mid = 380
y_bm = 520
y_bot = 630
def img_with_threshold(orig_img):
block_size = 255
local_thresh = skif.threshold_local(
orig_img,
block_size,
method="mean",
)
binary_local = orig_img > local_thresh
u8_val = img_as_ubyte(binary_local)
return u8_val
def image_denoise(orig_img):
return denoise_bilateral(orig_img, win_size=10, bins=10, )
def plot_imgs(orig_img, mod_img):
# Display the image
fig, axes = plt.subplots(1, 2, figsize=(8, 8), sharex=True, sharey=True)
ax = axes.ravel()
ax[0].imshow(orig_img, cmap=cm.Greys_r)
ax[1].imshow(mod_img, cmap=cm.Greys_r)
# Create a Rectangle patch
for x_loc in digit_loc:
rect1 = Rectangle((x_loc + x_mid, y_top), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect2 = Rectangle((x_loc, y_mt), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect3 = Rectangle((x_loc + x_right, y_mt), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect4 = Rectangle((x_loc + x_mid, y_mid), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect5 = Rectangle((x_loc, y_bm), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect6 = Rectangle((x_loc + x_right, y_bm), size, size, linewidth=1, edgecolor='r', facecolor='none')
rect7 = Rectangle((x_loc + x_mid, y_bot), size, size, linewidth=1, edgecolor='r', facecolor='none')
# Add the patch to the Axes
ax[1].add_patch(rect1)
ax[1].add_patch(rect2)
ax[1].add_patch(rect3)
ax[1].add_patch(rect4)
ax[1].add_patch(rect5)
ax[1].add_patch(rect6)
ax[1].add_patch(rect7)
plt.show()
def seg_to_digit(segments, location):
digit_values = {0b1110111: 0,
0b0010010: 1,
0b1011101: 2,
0b1011011: 3,
0b0111010: 4,
0b1101011: 5,
0b1101111: 6,
0b1110010: 7,
0b1111111: 8,
0b1111011: 9,
}
result = int("".join(["1" if i < threshold else "0" for i in segments]), 2)
# print("score:", result)
return digit_values.get(result, 0) * 10 ** location
def get_digit(location, mod_img):
"""
a
b c
d
e f
g
"""
x_loc = digit_loc[location]
m_loc = (x_loc + x_mid, x_loc + x_mid + size)
l_loc = (x_loc, x_loc + size)
r_loc = (x_loc + x_right, x_loc + x_right + size)
seg_a = np.average(mod_img[y_top:y_top + size, m_loc[0]:m_loc[1]])
seg_b = np.average(mod_img[y_mt:y_mt + size, l_loc[0]:l_loc[1]])
seg_c = np.average(mod_img[y_mt:y_mt + size, r_loc[0]:r_loc[1]])
seg_d = np.average(mod_img[y_mid:y_mid + size, m_loc[0]:m_loc[1]])
seg_e = np.average(mod_img[y_bm:y_bm + size, l_loc[0]:l_loc[1]])
seg_f = np.average(mod_img[y_bm:y_bm + size, r_loc[0]:r_loc[1]])
seg_g = np.average(mod_img[y_bot:y_bot + size, m_loc[0]:m_loc[1]])
segments = [seg_a, seg_b, seg_c, seg_d, seg_e, seg_f, seg_g]
# print(f"x loc: {x_loc}, digit index: {location}, segment values: {segments}")
# create an integer from the bits
# print('value:', result)
return seg_to_digit(segments, location)
def main():
data_dir = Path(__file__).parent.joinpath('data')
meter_img = data_dir.joinpath('meter_test.jpg')
img = iio.imread(meter_img)
gray_img = img_as_ubyte(rgb2gray(img))
img_result = image_denoise(gray_img)
img_result1 = img_with_threshold(img_result)
reading = 0
for dig_loc in range(6):
reading += get_digit(dig_loc, img_result1)
print(f"{reading:>21}")
print("Final reading:", reading)
plot_imgs(gray_img, img_result1)
if __name__ == '__main__':
main()
This gave the following output:
7
77
677
4677
24677
924677
Final reading: 924677
I have a pdf from which I want to extract text. I use tesseract for OCR which does a good job. But my problem is that it does not recognize the 2 column format of the document and hence it merges the 2 columns together.
I want to split the document on the vertical (in the middle of the page) and horizontal (on top of the page) lines and then feed it to tesseract. So I do the following
Preprocessing steps:
# color to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# edge detection
edges = cv2.Canny(gray, 500, 1000, apertureSize=7)
# dialate
kernel = np.ones((5,5),np.float32)/25
edges = cv2.dilate(edges, kernel, iterations=1)
# blur
blur = cv2.GaussianBlur(edges, (7, 7), 0)
These steps produce:
Now, I do line detection:
minLineLength = 1000
maxLineGap = 500
lines = cv2.HoughLinesP(processed_img, 1, np.pi, 2, minLineLength, maxLineGap)
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 1)
The final result (after stitching all the images back into a pdf) looks like this.
I have tried various combinations for theta, minLineLength and maxLineGap and this was the best result I could get. Any help/pointers would be greatly appreciated!
One of the possible solutions is described below:
1) Detect the horizontal line. Below is one way to do this:
import cv2
import numpy as np
def discard(image):
image = np.uint8(image)
_, im_label, stts, _ = cv2.connectedComponentsWithStats(image, connectivity=4)
msk1 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_WIDTH] > 500)[0])
msk2 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_HEIGHT] > 500)[0])
image[(msk1 | msk2)] = 0
return image
img = cv2.imread("page_1.jpg", 0)
img = cv2.resize(img, None, fx=0.35, fy=0.35, interpolation=cv2.INTER_LINEAR)
height, width = img.shape[:2]
# Binarization
thresh = 255 - img
ret, thresh = cv2.threshold(thresh, 5, 255, cv2.THRESH_BINARY)
# Discarding long connected components
without_lines = discard(thresh.copy())
just_lines = cv2.bitwise_xor(thresh, without_lines)
horizontal = just_lines.copy()
# separating horizontal line
h_kernel_large = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]], np.uint8)
horizontal = cv2.morphologyEx(horizontal, cv2.MORPH_OPEN, h_kernel_large, iterations=2)
cv2.imshow("horizontal_line", horizontal)
This is what we get in the horizontal matrix:
2) Use findContours and boundingRect to get the coordinates of that horizontal line. Then use that coordinate to crop the image horizontally.
upper_portion = img
lower_portion = img
contours, hierarchy = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
upper_portion = img[0:y, 0:width]
lower_portion = img[y+h:height, 0:width]
cv2.imshow("upper_portion", upper_portion)
cv2.imshow("lower_portion", lower_portion)
cv2.waitKey(0)
Below are images after cropping.
upper_portion:
lower_portion:
3) Detect the vertical line and crop lower_portion image using the same procedure described in step 1.
In step one, I basically used "Connected Component Analysis" followed by an "Opening operation". Read them here and here
I am trying to draw multiple contours on an image and by far I have managed to draw the contours by applying different thresholds. The only problem is that most of the contour regions are overlapping and I am stuck here on how to deal with it. What I would ideally want is that whenever there is an overlap it should divide the contours into individual regions. For instance, as in the Conceptual image there are 4 regions(contours) orange, green, blue and black. Whenever there is an overlap, it should divide into purple regions. It seems very tricky and I am not even sure if that is possible. If not, I would want all the overlapping to merge. Can anyone help with how to solve this issue?
Sample image
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
im = cv.imread('images/sample.jpg')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret1, thresh1 = cv.threshold(imgray, 30, 80, 0)
ret2, thresh2 = cv.threshold(imgray, 80, 110, 0)
ret3, thresh3 = cv.threshold(imgray, 110, 150, 0)
ret4, thresh4 = cv.threshold(imgray, 150, 200, 0)
ret5, thresh5 = cv.threshold(imgray, 200, 255, 0)
_,contours1, hierarchy1 = cv.findContours(thresh1, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
_,contours2, hierarchy2 = cv2.findContours(thresh2,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
_,contours3, hierarchy3 = cv2.findContours(thresh3,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
_,contours4, hierarchy4 = cv2.findContours(thresh4,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
_,contours5, hierarchy5 = cv2.findContours(thresh5,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(im, contours1, -1, (0, 0, 255), 1)
cv2.drawContours(im, contours2, -1, (0, 255, 0), 1)
cv2.drawContours(im, contours3, -1, (0, 0, 255), 1)
cv2.drawContours(im, contours4, -1, (10, 200, 200), 1)
cv2.drawContours(im, contours5, -1, (255, 255, 0), 1)
cv2.imshow("im",im)
cv2.waitKey(0)
As mentioned before, generating masks from your contours, and then calculate the pair-wise intersections as well as the "exclusive" parts from the original masks, will certainly give you the desired regions, but this approach will tend to be expensive as well. From your sample image and your code, I couldn't figure out, what you actually want to do, so I stick to some very basic example to illustrate that approach.
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Generate some dummy images, whose (main) contours overlap
img1 = cv2.circle(np.zeros((400, 400, 3), np.uint8), (150, 150), 100, (0, 255, 0), cv2.FILLED)
img2 = cv2.rectangle(np.zeros((400, 400, 3), np.uint8), (175, 175), (325, 325), (0, 0, 255), cv2.FILLED)
# Find contours (OpenCV 4.x)
contours1, _ = cv2.findContours(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours2, _ = cv2.findContours(cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Find contours (OpenCV 3.x)
#_, contours1, _ = cv2.findContours(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#_, contours2, _ = cv2.findContours(cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Generate masks of (main) contours; Attention: Hard-coded selection of first contour here!
mask1 = cv2.drawContours(np.zeros((400, 400), np.uint8), [contours1[0]], -1, 255, cv2.FILLED)
mask2 = cv2.drawContours(np.zeros((400, 400), np.uint8), [contours2[0]], -1, 255, cv2.FILLED)
# Find intersection of both masks
mask_combined = cv2.bitwise_and(mask1, mask2)
# Generate "exclusive" masks, i.e. masks without the intersection parts
mask1_excl = cv2.bitwise_xor(mask1, mask_combined)
mask2_excl = cv2.bitwise_xor(mask2, mask_combined)
# Visualization
plt.figure()
plt.subplot(3, 3, 1), plt.imshow(img1), plt.ylabel('img1')
plt.subplot(3, 3, 2), plt.imshow(img2), plt.ylabel('img2')
plt.subplot(3, 3, 3), plt.imshow(img1 + img2), plt.ylabel('img1 + img2')
plt.subplot(3, 3, 4), plt.imshow(mask1, cmap='gray'), plt.ylabel('mask1')
plt.subplot(3, 3, 5), plt.imshow(mask2, cmap='gray'), plt.ylabel('mask2')
plt.subplot(3, 3, 6), plt.imshow(mask_combined, cmap='gray'), plt.ylabel('mask_combined')
plt.subplot(3, 3, 7), plt.imshow(mask1_excl, cmap='gray'), plt.ylabel('mask1_excl')
plt.subplot(3, 3, 8), plt.imshow(mask2_excl, cmap='gray'), plt.ylabel('mask2_excl')
plt.subplot(3, 3, 9), plt.imshow(mask_combined, cmap='gray'), plt.ylabel('mask_combined')
plt.show()
Visualization:
Now, this has to be done for each tuple of contours - not only pairs, since you can have intersections of three or more contours. To keep track of all those resulting masks, etc. will be most likely memory-intensive, but not that computationally expensive. In the end, all approaches will somehow need to store the resulting regions as some kind of masks.
Hope that helps!
I am extracting the length of individual bars from a chart image. It works fine in most of the cases but in some cases the contour groups 2 bars as 1 which is detrimental to my cause. I tried different combinations of canny,dilate, erode, and color scheme. It improved the result only slightly. How can avoid the grouping? Here is the complete code and one image. You can run using this image too see the problem.
from scipy.spatial import distance as dist
from imutils import perspective
from imutils import contours
import numpy as np
import argparse
import imutils
import cv2
def midpoint(ptA, ptB):
return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5)
image = cv2.imread("somefile.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
#edged=cv2.Laplacian(gray, cv2.CV_8U, gray, ksize=7)
edged = cv2.Canny(gray, 30, 50)
cv2.imwrite("test00.png", edged)
edged = cv2.dilate(edged, None, iterations=1)
cv2.imwrite("test01.png", edged)
edged = cv2.erode(edged, None, iterations=1)
cv2.imwrite("test02.png", edged)
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
pixelsPerMetric = 100
for c in cnts:
orig = image.copy()
box = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
box = np.array(box, dtype="int")
print(box)
box = perspective.order_points(box)
cv2.drawContours(orig, [box.astype("int")], -1, (0, 255, 0), 2)
for (x, y) in box:
cv2.circle(orig, (int(x), int(y)), 5, (0, 0, 255), -1)
(tl, tr, br, bl) = box
(tltrX, tltrY) = midpoint(tl, tr)
(blbrX, blbrY) = midpoint(bl, br)
(tlblX, tlblY) = midpoint(tl, bl)
(trbrX, trbrY) = midpoint(tr, br)
cv2.circle(orig, (int(tltrX), int(tltrY)), 5, (255, 0, 0), -1)
cv2.circle(orig, (int(blbrX), int(blbrY)), 5, (255, 0, 0), -1)
cv2.circle(orig, (int(tlblX), int(tlblY)), 5, (255, 0, 0), -1)
cv2.circle(orig, (int(trbrX), int(trbrY)), 5, (255, 0, 0), -1)
cv2.line(orig, (int(tltrX), int(tltrY)), (int(blbrX), int(blbrY)),
(255, 0, 255), 2)
cv2.line(orig, (int(tlblX), int(tlblY)), (int(trbrX), int(trbrY)),
(255, 0, 255), 2)
dA = dist.euclidean((tltrX, tltrY), (blbrX, blbrY))
dB = dist.euclidean((tlblX, tlblY), (trbrX, trbrY))
dimA = dA / pixelsPerMetric
dimB = dB / pixelsPerMetric
cv2.putText(orig, "{:.1f}in".format(dimA),
(int(tltrX - 15), int(tltrY - 10)), cv2.FONT_HERSHEY_SIMPLEX,
0.65, (255, 255, 255), 2)
cv2.putText(orig, "{:.1f}in".format(dimB),
(int(trbrX + 10), int(trbrY)), cv2.FONT_HERSHEY_SIMPLEX,
0.65, (255, 255, 255), 2)
cv2.imshow("Image", orig)
cv2.waitKey(0)
This image is trivial to segment. The color of the bars is exactly RGB=(245,222,179). You can use OpenCV's function inRange to find pixels of this color. In this function, we need to give the color in BGR order, because that is how OpenCV reads in images by default. Here I'm picking a slightly larger range in case the image used JPEG compression (which is lossy and therefore changes pixel values slightly):
image = cv2.imread("somefile.png")
mask = cv2.inRange(image, (177, 220, 243), (181, 224, 247))
This image mask now has perfectly separated bars:
I am trying to build an OCR for recognising seven segment display as mentioned below
Using preprocessing tools of open CV I got it here
Now I am trying to follow this tutorial - https://www.pyimagesearch.com/2017/02/13/recognizing-digits-with-opencv-and-python/
But on the part
digitCnts = contours.sort_contours(digitCnts,
method="left-to-right")[0]
digits = []
I am getting error as -
The error is solved using THRESH_BINARY_INV but still the OCR is not working any fix would be great
File "/Users/ms/anaconda3/lib/python3.6/site-packages/imutils/contours.py", line 25, in sort_contours
key=lambda b: b1[i], reverse=reverse))
ValueError: not enough values to unpack (expected 2, got 0)
Any idea how to solve this and make my OCR a working model
My whole code is :
import numpy as np
import cv2
import imutils
# import the necessary packages
from imutils.perspective import four_point_transform
from imutils import contours
import imutils
import cv2
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
(0, 0, 1, 0, 0, 1, 0): 1,
(1, 0, 1, 1, 1, 1, 0): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 0, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
# load image
image = cv2.imread('d4.jpg')
# create hsv
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# set lower and upper color limits
low_val = (60,180,160)
high_val = (179,255,255)
# Threshold the HSV image
mask = cv2.inRange(hsv, low_val,high_val)
# find contours in mask
ret, cont, hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# select the largest contour
largest_area = 0
for cnt in cont:
if cv2.contourArea(cnt) > largest_area:
cont = cnt
largest_area = cv2.contourArea(cnt)
# get the parameters of the boundingbox
x,y,w,h = cv2.boundingRect(cont)
# create and show subimage
roi = image[y:y+h, x:x+w]
cv2.imshow("Result", roi)
# draw box on original image and show image
cv2.rectangle(image, (x,y),(x+w,y+h), (0,0,255),2)
cv2.imshow("Image", image)
grayscaled = cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
retval, threshold = cv2.threshold(grayscaled, 10, 255, cv2.THRESH_BINARY)
retval2,threshold2 = cv2.threshold(grayscaled,125,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow('threshold',threshold2)
cv2.waitKey(0)
cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(threshold2.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if w >= 15 and (h >= 30 and h <= 40):
digitCnts.append(c)
# sort the contours from left-to-right, then initialize the
# actual digits themselves
digitCnts = contours.sort_contours(digitCnts,
method="left-to-right")[0]
digits = []
# loop over each of the digits
for c in digitCnts:
# extract the digit ROI
(x, y, w, h) = cv2.boundingRect(c)
roi = thresh[y:y + h, x:x + w]
# compute the width and height of each of the 7 segments
# we are going to examine
(roiH, roiW) = roi.shape
(dW, dH) = (int(roiW * 0.25), int(roiH * 0.15))
dHC = int(roiH * 0.05)
# define the set of 7 segments
segments = [
((0, 0), (w, dH)), # top
((0, 0), (dW, h // 2)), # top-left
((w - dW, 0), (w, h // 2)), # top-right
((0, (h // 2) - dHC) , (w, (h // 2) + dHC)), # center
((0, h // 2), (dW, h)), # bottom-left
((w - dW, h // 2), (w, h)), # bottom-right
((0, h - dH), (w, h)) # bottom
]
on = [0] * len(segments)
# loop over the segments
for (i, ((xA, yA), (xB, yB))) in enumerate(segments):
# extract the segment ROI, count the total number of
# thresholded pixels in the segment, and then compute
# the area of the segment
segROI = roi[yA:yB, xA:xB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 50% of the area, mark the segment as "on"
if total / float(area) > 0.5:
on[i]= 1
# lookup the digit and draw it on the image
digit = DIGITS_LOOKUP[tuple(on)]
digits.append(digit)
cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.putText(output, str(digit), (x - 10, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
# display the digits
print(u"{}{}.{}{}.{}{} \u00b0C".format(*digits))
cv2.imshow("Input", image)
cv2.imshow("Output", output)
cv2.waitKey(0)
A help would be great in fixing my OCR
I think the lookup-table you created is is for seven-digit display, not for seven-digit OCR. As for the size of display is fixed, I think you can try to segment it into seperated regions and recognise using template-matching or k-means.
This is my preprocessed steps:
(1) Find light green display in the HSV
mask = cv2.inRange(hsv, (50, 100, 180), (70, 255, 255))
(2) try to seperate by projecting and recognise standard seven-digits using LUT:
(3) try on the detected green display
So, as I said in the comments, there were two problems:
You were trying to find black contours on a white background, which is opposite of OpenCV documentation. This was solved using THRESH_BINARY_INV flag instead of THRESH_BINARY.
Due to the numbers not being connected, a full contour for the number couldn't be found. So I tried some morphological operations. Following are the steps:
2a) Opening on the above image with following code:
threshold2 = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, np.ones((3,3), np.uint8))
2b) Dilation on the previous image:
threshold2 = cv2.dilate(threshold2, np.ones((5,1), np.uint8), iterations=1)
2c) Crop the top part of the image to separate numbers due to dilating into the top border:
height, width = threshold2.shape[:2]
threshold2 = threshold2[5:height,5:width]
Note Somehow, the images are being displayed here without the white border that I am talking about. Try opening the image in a new window and you will see what I mean.
So, after solving these issues, the contours were pretty good and how they were supposed to be as seen here:
cnts = cv2.findContours(threshold2.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if w <= width * 0.5 and (h >= height * 0.2):
digitCnts.append(c)
# sort the contours from left-to-right, then initialize the
# actual digits themselves
cv2.drawContours(image2, digitCnts, -1, (0, 0, 255))
cv2.imwrite("cnts-sort.jpg", image2)
As you can see below, the contours are being drawn in red.
Now, for estimating whether the digit is a code or not, this part somehow doesn't work and I blame the look-up table for it. As you can see from the below images, the bounding rects for all the numbers are correctly cropped but the lookup table fails to recognize them.
# loop over each of the digits
j = 0
for c in digitCnts:
# extract the digit ROI
(x, y, w, h) = cv2.boundingRect(c)
roi = threshold2[y:y + h, x:x + w]
cv2.imwrite("roi" + str(j) + ".jpg", roi)
j += 1
# compute the width and height of each of the 7 segments
# we are going to examine
(roiH, roiW) = roi.shape
(dW, dH) = (int(roiW * 0.25), int(roiH * 0.15))
dHC = int(roiH * 0.05)
# define the set of 7 segments
segments = [
((0, 0), (w, dH)), # top
((0, 0), (dW, h // 2)), # top-left
((w - dW, 0), (w, h // 2)), # top-right
((0, (h // 2) - dHC) , (w, (h // 2) + dHC)), # center
((0, h // 2), (dW, h)), # bottom-left
((w - dW, h // 2), (w, h)), # bottom-right
((0, h - dH), (w, h)) # bottom
]
on = [0] * len(segments)
# loop over the segments
for (i, ((xA, yA), (xB, yB))) in enumerate(segments):
# extract the segment ROI, count the total number of
# thresholded pixels in the segment, and then compute
# the area of the segment
segROI = roi[yA:yB, xA:xB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 50% of the area, mark the segment as "on"
if area != 0:
if total / float(area) > 0.5:
on[i] = 1
# lookup the digit and draw it on the image
try:
digit = DIGITS_LOOKUP[tuple(on)]
digits.append(digit)
cv2.rectangle(roi, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.putText(roi, str(digit), (x - 10, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
except KeyError:
continue
I read through the website you mentioned in the question and from the comments it seems some of the entries in the LUT might be wrong. So I am going to leave it to you to figure that out. Following are the individual digits found (but not recognised):
Alternatively, you could use tesseract instead to recognise these detected digits.
Hope it helps!