I am following this tutorial to recognize six digits from the following image
The threshold seems (to me) to be very good
However, when I reach the contour definition, digits 7, 1, 0 (and possibly more) are always split in two or more boxes.
By definition, a contour is a boundary of a continuous entity, which means that these digits separated by a small ligature cannot be classified as such. What to do in this case? My first instinct is to try and merge these small boxes? I have already tried to play around with the height and width of the contour with no success. The code is written below.
# import the necessary packages
from imutils.perspective import four_point_transform
from imutils import contours
import imutils
import cv2
# define the dictionary of digit segments so we can identify
# each digit on the thermostat
(1, 1, 1, 0, 1, 1, 1): 0,
(0, 0, 1, 0, 0, 1, 0): 1,
(1, 0, 1, 1, 1, 1, 0): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 0, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
# load the example image
image = cv2.imread('DSC_01922.JPG', 1)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
# image = imutils.resize(image, height=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# edged = cv2.Canny(blurred, 50, 200, 255)
# threshold the warped image, then apply a series of morphological
# operations to cleanup the thresholded image
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# cv2.imshow('thresh', thresh)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if (h >= 90 and h <= 300):
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('image', image)
Update 1
Using MORPH_CLOSE instead of OPEN and enlarging the kernel as suggested by #Croolman improves the results as can be seen below
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 7))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
Note that I am doing this as a hobby and I am not familiar with/doing research on existent tools of OpenCV/python. Thank you in advance.
Update 2
This solution works.
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 15))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
This is the complete answer. It required tweeking the kernel values + using morph_close
I'm trying to get the coordinate of every end point on every line, but i couldn't come up with a solution, this is what I've currently got but its finding the outline of the lines not the lines itself
import cv2
import numpy as np
img = cv2.imread('out copy.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(blur_gray, low_threshold, high_threshold)
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 180 # angular resolution in radians of the Hough grid
threshold = 15 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
# Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
cv2.imshow('out copy.png', lines_edges)
cv2.waitKey(0) ```
The hit-or-miss transform can be used to find end points of a line after skeletonization.
img = cv2.imread('image.png')
img2 = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# inverse binary image, to make the lines in white
th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# obtain binary skeleton
sk = cv2.ximgproc.thinning(th, None, 1)
# kernels to find endpoints in all 4 directions
k1 = np.array(([0, 0, 0], [-1, 1, -1], [-1, -1, -1]), dtype="int")
k2 = np.array(([0, -1, -1], [0, 1, -1], [0, -1, -1]), dtype="int")
k3 = np.array(([-1, -1, 0], [-1, 1, 0], [-1, -1, 0]), dtype="int")
k4 = np.array(([-1, -1, -1], [-1, 1, -1], [0, 0, 0]), dtype="int")
# perform hit-miss transform for every kernel
o1 = cv2.morphologyEx(sk, cv2.MORPH_HITMISS, k1)
o2 = cv2.morphologyEx(sk, cv2.MORPH_HITMISS, k2)
o3 = cv2.morphologyEx(sk, cv2.MORPH_HITMISS, k3)
o4 = cv2.morphologyEx(sk, cv2.MORPH_HITMISS, k4)
# add results of all the above 4
out = o1 + o2 + o3 + o4
# find points in white (255) and draw them on original image
pts = np.argwhere(out == 255)
for pt in pts:
img2 =, (pt[1], pt[0]), 15, (0,255,0), -1)
I am new to Histogram comparisons.
This code uses these images to make a histogram comparison. The result was impressive with a 0.99 %, however I think that the result resulted in 99% because of the background color. Can someone tell me how can I ignore the white color and compare the actual fruit.
The following code was found here.
# Load the images
img1 = cv2.imread('D:/downloads/app1.jpg')
img2 = cv2.imread('D:/downloads/app2.jpg')
# Convert it to HSV
img1_hsv = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
img2_hsv = cv2.cvtColor(img2, cv2.COLOR_BGR2HSV)
# Calculate the histogram and normalize it
hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
# find the metric value
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_BHATTACHARYYA)
Using some mask as Fred suggested seems to be the cleanest solution, but Fred's comment regarding the HSV color space is even more important here! But, first of all, the reported metric value of 0.99... (also in the linked article) was obtained using cv2.HISTCMP_CORREL, not using cv2.HISTCMP_BHATTACHARYYA!
Now, let's stick to OpenCV's common BGR color space, and adapt the code:
import cv2
# Load the images
img1 = cv2.imread('app1.png')
img2 = cv2.imread('app2.png')
# Calculate the histograms, and normalize them
hist_img1 = cv2.calcHist([img1], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
hist_img2 = cv2.calcHist([img2], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
# Find the metric value
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CORREL)
# 0.9995753648895891
The metric value is still something at 99.9 %.
So, now, let's ignore all white pixels by manually setting hist_imgx[255, 255, 255] = 0:
import cv2
# Load the images
img1 = cv2.imread('app1.png')
img2 = cv2.imread('app2.png')
# Calculate the histograms, set bin for (255, 255, 255) to 0, and normalize them
hist_img1 = cv2.calcHist([img1], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
hist_img1[255, 255, 255] = 0
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
hist_img2 = cv2.calcHist([img2], [0, 1, 2], None, [256, 256, 256], [0, 256, 0, 256, 0, 256])
hist_img2[255, 255, 255] = 0
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
# Find the metric value
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CORREL)
# 0.6199666001215806
And, the metric value drops to 62 %!
So, your assumption seems to be correct, the white background distorts the whole histogram comparison.
System information
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
PyCharm: 2021.1.1
OpenCV: 4.5.1
You'll only need to raise the minimum saturation value for an HSV mask to effectively mask away all the white background:
import cv2
import numpy as np
def get_masked(img):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower = np.array([0, 50, 0])
upper = np.array([179, 255, 255])
mask = cv2.inRange(img_hsv, lower, upper)
return cv2.bitwise_and(img, img, mask=mask)
img1 = cv2.imread("apple1.png")
img2 = cv2.imread("apple2.png")
cv2.imshow("Apple 1", get_masked(img1))
cv2.imshow("Apple 2", get_masked(img2))
I have a pdf from which I want to extract text. I use tesseract for OCR which does a good job. But my problem is that it does not recognize the 2 column format of the document and hence it merges the 2 columns together.
I want to split the document on the vertical (in the middle of the page) and horizontal (on top of the page) lines and then feed it to tesseract. So I do the following
Preprocessing steps:
# color to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# edge detection
edges = cv2.Canny(gray, 500, 1000, apertureSize=7)
# dialate
kernel = np.ones((5,5),np.float32)/25
edges = cv2.dilate(edges, kernel, iterations=1)
# blur
blur = cv2.GaussianBlur(edges, (7, 7), 0)
These steps produce:
Now, I do line detection:
minLineLength = 1000
maxLineGap = 500
lines = cv2.HoughLinesP(processed_img, 1, np.pi, 2, minLineLength, maxLineGap)
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 1)
The final result (after stitching all the images back into a pdf) looks like this.
I have tried various combinations for theta, minLineLength and maxLineGap and this was the best result I could get. Any help/pointers would be greatly appreciated!
One of the possible solutions is described below:
1) Detect the horizontal line. Below is one way to do this:
import cv2
import numpy as np
def discard(image):
image = np.uint8(image)
_, im_label, stts, _ = cv2.connectedComponentsWithStats(image, connectivity=4)
msk1 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_WIDTH] > 500)[0])
msk2 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_HEIGHT] > 500)[0])
image[(msk1 | msk2)] = 0
return image
img = cv2.imread("page_1.jpg", 0)
img = cv2.resize(img, None, fx=0.35, fy=0.35, interpolation=cv2.INTER_LINEAR)
height, width = img.shape[:2]
# Binarization
thresh = 255 - img
ret, thresh = cv2.threshold(thresh, 5, 255, cv2.THRESH_BINARY)
# Discarding long connected components
without_lines = discard(thresh.copy())
just_lines = cv2.bitwise_xor(thresh, without_lines)
horizontal = just_lines.copy()
# separating horizontal line
h_kernel_large = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]], np.uint8)
horizontal = cv2.morphologyEx(horizontal, cv2.MORPH_OPEN, h_kernel_large, iterations=2)
cv2.imshow("horizontal_line", horizontal)
This is what we get in the horizontal matrix:
2) Use findContours and boundingRect to get the coordinates of that horizontal line. Then use that coordinate to crop the image horizontally.
upper_portion = img
lower_portion = img
contours, hierarchy = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
upper_portion = img[0:y, 0:width]
lower_portion = img[y+h:height, 0:width]
cv2.imshow("upper_portion", upper_portion)
cv2.imshow("lower_portion", lower_portion)
Below are images after cropping.
3) Detect the vertical line and crop lower_portion image using the same procedure described in step 1.
In step one, I basically used "Connected Component Analysis" followed by an "Opening operation". Read them here and here
I am using cv2.VideoWriter() as an intermediate step in a larger image processing workflow. Basically I have a stack of images that need to be turned into a timelapse, and then the frames in that are processed and then used downstream to mask original imagery. My masking isn't working because array sizes to not correspond with one another, and I've diagnosed the problem to arise from cv2.VideoWriter(). My time lapse assembly process came from here.
There are a ton of posts about cv2.VideoWriter() not working because the frame size is wrong etc. but my problem is not that the video won't write - it's that dimensions of my imagery are being changed. In fact, I'm not even sure if the top row or bottom row is what's being cut off, or if there is some underlying resampling step or something.
import cv2
import numpy as np
import glob
imgs = glob.glob('*.jpg')
img_array = []
for filename in imgs:
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width,height)
size # calling `size` returns (250,187)
out = cv2.VideoWriter('project.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
cap = cv2.VideoCapture('project.avi')
mycap =
mycap[1].shape # this returns (186,250,3)
I would have expected mycap[1].shape to have the same attributes as size but while size indicates I have a 250 pixel wide and 187 pixel tall array, mycap[1].shape shows that the video has dimensions 250x186.
After some testing I confirmed that cv2.VideoWriter() is not simply clipping an image with odd dimension values, but is instead altering values in the arrays while changing dimensions:
import numpy as np
import pylab as plt
import cv2 as cv
# Create RGB layers
r = np.array([[255, 0, 255, 0, 255, 0, 255, 0, 255], [255, 0, 255, 0, 255, 0, 255, 0, 255], [255, 0, 255, 0, 255, 0, 255, 0, 255]],dtype=np.uint8)
g = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]],dtype=np.uint8)
b = np.array([[10, 0, 10, 0, 10, 0, 255, 0, 255], [10, 0, 10, 0, 10, 0, 255, 0, 255], [10, 0, 10, 0, 10, 0, 255, 0, 255]],dtype=np.uint8)
# Create a few image layers
rgb1 = np.dstack((r,g,b))
rgb2 = np.dstack((r,g,b))
rgb3 = np.dstack((r,g,b))
rgb4 = np.dstack((r,g,b))
imgs = [rgb1,rgb2,rgb3,rgb4]
# Create timelapse
img_array = []
for img in imgs:
height, width, layers = img.shape
size = (width,height)
out = cv.VideoWriter('SO_question.avi',cv.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
# Read video in
cap = cv.VideoCapture('SO_question.avi')[1].shape
plt.imshow(rgb1) produces the following image:
But plt.imshow([1]) produces the following image:
Furthermore, using print([1]) shows that array values are not maintained across the process. Thus, I conclude that a resampling process is occurring (rather than a simple crop step) when width and height are an odd number of pixels.