I want to crop the image to only extract the text sections. There are thousands of them with different sizes so I can't hardcode coordinates. I'm trying to remove the unwanted lines on the left and on the bottom. How can I do this?
Determine the least spanning bounding box by finding all the non-zero points in the image. Finally, crop your image using this bounding box. Finding the contours is time-consuming and unnecessary here, especially because your text is axis-aligned. You may accomplish your goal by combining cv2.findNonZero and cv2.boundingRect.
Hope this will work ! :
import numpy as np
import cv2
img = cv2.imread(r"W430Q.png")
# Read in the image and convert to grayscale
img = img[:-20, :-20] # Perform pre-cropping
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255*(gray < 50).astype(np.uint8) # To invert the text to white
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, np.ones(
(2, 2), dtype=np.uint8)) # Perform noise filtering
coords = cv2.findNonZero(gray) # Find all non-zero points (text)
x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
# Crop the image - note we do this on the original image
rect = img[y:y+h, x:x+w]
cv2.imshow("Cropped", rect) # Show it
in above code from forth line of code is where I set the threshold below 50 to make the dark text white. However, because this outputs a binary image, I convert to uint8, then scale by 255. The text is effectively inverted.
Then, using cv2.findNonZero, we discover all of the non-zero locations for this image.We then passed this to cv2.boundingRect, which returns the top-left corner of the bounding box, as well as its width and height. Finally, we can utilise this to crop the image. This is done on the original image, not the inverted version.
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, then Otsu's threshold to obtain a binary black/white image.
Remove horizontal lines. Since we're trying to only extract text, we remove horizontal lines to aid us in our next step so incorrect contours will not merge together.
Merge text into a single contour. The idea is that characters which are adjacent to each other are part of the wall of text. So we can dilate individual contours together to obtain a single contour to extract.
Find contours and extract ROI. We find contours, sort contours by area, then extract the largest contour ROI using Numpy slicing.
Here's the visualization of each step:
Binary image -> Removed horizontal lines in green
Dilate to combine into a single contour -> Detected ROI to extract in green
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Dilate to merge into a single contour
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,30))
dilate = cv2.dilate(thresh, vertical_kernel, iterations=3)
# Find contours, sort for largest contour and extract ROI
cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2:]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:-1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 4)
ROI = original[y:y+h, x:x+w]
cv2.imshow('image', image)
cv2.imshow('dilate', dilate)
cv2.imshow('thresh', thresh)
cv2.imshow('ROI', ROI)
I have the following image of a lego board with some bricks on it
Now I am trying to detect the thick black lines (connecting the white squares) with OpenCV. I have already experimented a lot with HoughLinesP, converted the image to gray or b/w before, applied blur, ...
Nonthing led to usable results.
# Read image
img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Resize Image
img = cv2.resize(img, (0,0), fx=0.25, fy=0.25)
# Initialize output
out = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# Median blurring to get rid of the noise; invert image
img = cv2.medianBlur(img, 5)
# Adaptive Treshold
bw = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# HoughLinesP
linesP = cv2.HoughLinesP(bw, 500, np.pi / 180, 50, None, 50, 10)
# Draw Lines
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
cv2.line(out, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)
The adaptive treshold lets you see edges quite well, but with HoughLinesP you don't get anything usable out of it
What am I doing wrong?
Thanks, both #fmw42 and #jeru-luke for your great solutions to this problem! I liked isolating / masking the green board, so I combined both:
import cv2
import numpy as np
img = cv2.imread("image.jpg")
scale_percent = 50 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
a_component = lab[:,:,1]
# binary threshold the a-channel
th = cv2.threshold(a_component,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]
# numpy black
black = np.zeros((img.shape[0],img.shape[1]),np.uint8)
# function to obtain the largest contour in given image after filling it
def get_region(image):
contours, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea)
mask = cv2.drawContours(black,[c],0,255, -1)
return mask
mask = get_region(th)
# turning the region outside the green block white
green_block = cv2.bitwise_and(img, img, mask = mask)
# median blur
median = cv2.medianBlur(green_block, 5)
# threshold on black
lower = (0,0,0)
upper = (15,15,15)
thresh = cv2.inRange(median, lower, upper)
# apply morphology open and close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (29,29))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# filter contours on area
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = green_block.copy()
for c in contours:
area = cv2.contourArea(c)
if area > 1000:
cv2.drawContours(result, [c], -1, (0, 0, 255), 2)
# view result
cv2.imshow("result", result)
Here I am presenting a repeated segmentation approach using color.
This answer is based on the usage of LAB color space
1. Isolating the green lego block
img = cv2.imread(image_path)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
a_component = lab[:,:,1]
# binary threshold the a-channel
th = cv2.threshold(a_component,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]
# function to obtain the largest contour in given image after filling it
def get_region(image):
contours, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea)
black = np.zeros((image.shape[0], image.shape[1]), np.uint8)
mask = cv2.drawContours(black,[c],0,255, -1)
return mask
mask = get_region(th)
# turning the region outside the green block white
green_block = cv2.bitwise_and(img, img, mask = mask)
2. Segmenting the road
To get an approximate region of the road, I subtracted the mask and th.
cv2.subtract() performs arithmetic subtraction, where cv2 will take care of negative values.
road = cv2.subtract(mask,th)
# `road` contains some unwanted spots/contours which are removed using the function "get_region"
only_road = get_region(road)
Masking only the road segment with the original image gives
road_colored = cv2.bitwise_and(img, img, mask = only_road)
From the above image only the black regions (road) are present, which is easy to segment:
# converting to grayscale and applying threshold
th2 = cv2.threshold(road_colored[:,:,1],127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]
# using portion of the code from fmw42's answer, to get contours above certain area
contours = cv2.findContours(th2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
for c in contours:
area = cv2.contourArea(c)
if area > 1000:
cv2.drawContours(result, [c], -1, (0, 0, 255), 4)
To clean up the end result, you can apply morphological operations on th2 before drawing contours.
Here is one way to do that in Python/OpenCV.
Read the image
Apply median blur
Threshold on black color using cv2.inRange()
Apply morphology to clean it up
Get contours and filter on area
Draw contours on input
Save the result
import cv2
import numpy as np
# read image
img = cv2.imread('black_lines.jpg')
# median blur
median = cv2.medianBlur(img, 5)
# threshold on black
lower = (0,0,0)
upper = (15,15,15)
thresh = cv2.inRange(median, lower, upper)
# apply morphology open and close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (29,29))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# filter contours on area
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
for c in contours:
area = cv2.contourArea(c)
if area > 1000:
cv2.drawContours(result, [c], -1, (0, 0, 255), 2)
# save result
cv2.imwrite("black_lines_threshold.jpg", thresh)
cv2.imwrite("black_lines_morphology.jpg", morph)
cv2.imwrite("black_lines_result.jpg", result)
# view result
cv2.imshow("threshold", thresh)
cv2.imshow("morphology", morph)
cv2.imshow("result", result)
Threshold image:
Morphology image:
I am trying to detect cells in bill image:
I have this image
Removed the stamp with this code:
import cv2
import numpy as np
# read image
img = cv2.imread('dummy1.PNG')
# threshold on yellow
lower = (0, 200, 200)
upper = (100, 255, 255)
thresh = cv2.inRange(img, lower, upper)
# apply dilate morphology
kernel = np.ones((9, 9), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
# get largest contour
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(big_contour)
# draw filled white contour on input
result = img.copy()
cv2.drawContours(result, [big_contour], 0, (255, 255, 255), -1)
cv2.imwrite('removed.png', result)
# show the images
cv2.imshow("RESULT", result)
And obtained this image:
Then applied grayscale, inverted, detected vertical and horizontal kernel and merged through this :
# Imports
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
from PIL import Image
except ImportError:
import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Read your file
file = 'removed.png'
img = cv2.imread(file, 0)
# thresholding the image to a binary image
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# inverting the image
img_bin = 255 - img_bin
cv2.imwrite(r'C:\Users\marou\Desktop\cv_inverted.png', img_bin)
# Plotting the image to see the output
plotting = plt.imshow(img_bin, cmap='gray')
# Define a kernel to detect rectangular boxes
# Length(width) of kernel as 100th of total width
kernel_len = np.array(img).shape[1] // 100
# Defining a vertical kernel to detect all vertical lines of image
ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
# Defining a horizontal kernel to detect all horizontal lines of image
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
# A kernel of 2x2
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#### Vertical LINES ####
# Use vertical kernel to detect and save the vertical lines in a jpg
image_1 = cv2.erode(img_bin, ver_kernel, iterations=5)
vertical_lines = cv2.dilate(image_1, ver_kernel, iterations=5)
cv2.imwrite(r'C:\Users\marou\Desktop\vertical.jpg', vertical_lines)
# Plot the generated image
plotting = plt.imshow(image_1, cmap='gray')
# Use horizontal kernel to detect and save the horizontal lines in a jpg
image_2 = cv2.erode(img_bin, hor_kernel, iterations=5)
horizontal_lines = cv2.dilate(image_2, hor_kernel, iterations=5)
cv2.imwrite(r'C:\Users\marou\Desktop\horizontal.jpg', horizontal_lines)
# Plot the generated image
plotting = plt.imshow(image_2, cmap='gray')
# Combining both H and V
# Combine horizontal and vertical lines in a new third image, with both having same weight.
img_vh = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
# Eroding and thesholding the image
img_vh = cv2.erode(~img_vh, kernel, iterations=2)
thresh, img_vh = cv2.threshold(img_vh, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite(r'C:\Users\marou\Desktop\img_vh.jpg', img_vh)
plotting = plt.imshow(img_vh, cmap='gray')
To get this :
Now I am trying to fill the voids in my lines that happened due to the watermark removal, to be able to apply correct OCR.
I tried following the steps in this thread but I can't seem to get it right.
When I try to fill the grid holes :
# Fill individual grid holes
cnts = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(result, (x, y), (x + w, y + h), 255, -1)
cv2.imshow('result', result)
I get blank image:
I have outlined an approach to fill the missing lines in the table using the second image as input.
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Now to create a separate mask for the horizontal lines:
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
# contains only the horizontal lines
h_mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel, iterations=1)
# performing repeated iterations to join lines
h_mask = cv2.dilate(h_mask, h_kernel, iterations=7)
And a separate mask for the vertical lines:
v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
v_mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel, iterations=1)
Upon combining the above results we get the following:
joined_lines = cv2.bitwise_or(v_mask, h_mask)
The result above is not what you expected, the lines have extended beyond the boundaries of the table. In order to avoid this, I created a separate mask bounding the table region.
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=1)
Now find the largest contour in the above image and draw it on another binary image to create the mask.
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea) # contour with largest area
black = np.zeros((image.shape[0], image.shape[1]), np.uint8)
mask = cv2.drawContours(black, [c], 0, 255, -1) # --> -1 to fill the contour
Using the above image as mask over the joined_lines created further above
fin = cv2.bitwise_and(joined_lines, joined_lines, mask = mask)
You can perform more iterations over the morphological operations to better join the discontinuous lines
Let's imagine the following newspaper article needs to be analyzed regarding the amount of columns (the solution should be 3 text columns). I tried to retrieve the amount of columns using the cv2 library with python and found the following suggestion on StackOverflow: Detect number of rows and columns in table image with OpenCV
However, as the table of that solution is well structured, the amount of columns and rows can be extracted quite easily. Based on that solution, here is what I came up with:
import numpy as np
from imutils import contours
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('example_newspaper_article.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 240, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and remove text inside cells
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 10000:
cv2.drawContours(thresh, [c], -1, (255, 255, 255), 30)
# Invert image
invert = thresh
offset, old_cY, first = 10, 0, True
visualize = cv2.cvtColor(invert, cv2.COLOR_GRAY2BGR)
# Find contours, sort from top-to-bottom and then sum up column/rows
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
# Find centroid
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# New row
if (abs(cY) - abs(old_cY)) > offset:
if first:
row, table = [], []
first = False
old_cY = cY
row = []
# Cell in same row
if ((abs(cY) - abs(old_cY)) <= offset) or first:
# Uncomment to visualize, (cX, cY), 10, (36, 255, 12), -1)
#cv2.imshow('visualize', visualize)
print('Rows: {}'.format(len(table)))
print('Columns: {}'.format(len(table[1])))
cv2.imshow('invert', invert)
cv2.imshow('thresh', thresh)
I thought, that increasing the thickness argument of the drawContours-method would help somehow, but unfortunately that does not do the trick. The result looks like this:
I assume, that drawing rectangles over the text area would be more helpful?
Does anyone know a solution and could help me out?
Thanks in advance!
Whenever there's such a task, I tend to count pixels along the y-axis, and try to find (large) differences between neighbouring columns. That'd be my complete pipeline:
Convert image to grayscale; inverse binary threshold using Otsu's to get white pixels on black background.
Do some morphological closing, here using a large vertical line kernel to connect all pixels in the same column.
Count all white pixels; calculate the absolute difference between neighbouring columns.
Find peaks in that "signal" – manually or, as shown here, by using scipy.signal.find_peaks. The peaks identify the start and end of each text column, so the number of text columns is half the number of peaks.
Here's the whole code including some visualization:
import cv2
import matplotlib.pyplot as plt # Only for visualization output
import numpy as np
from scipy import signal
from skimage import io # Only for web grabbing images
# Read image from web (attention: RGB order here, scikit-image)
image = io.imread('')
# Convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Inverse binary threshold by Otsu's
thr = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
# Morphological closing with large vertical line kernel
thr_mod = cv2.morphologyEx(thr, cv2.MORPH_CLOSE, np.ones((image.shape[0], 1)))
# Count white pixels along y-axis
y_count = np.sum(thr_mod / 255, 0)
# Calculate absolute difference between neighbouring x-axis values
y_count_diff = np.abs(np.diff(y_count))
# Find peaks in that "signal"
peaks = signal.find_peaks(y_count_diff, distance=50)[0]
# Number of columns is half the number of found peaks
n_cols =[0] / 2)
# Text output
print('Number of columns: ' + str(n_cols))
# Some visualization output
plt.title('Original image')
plt.imshow(thr_mod, cmap='gray')
plt.title('Thresholded, morphlogically closed image')
plt.plot(peaks, y_count[peaks], 'r.')
plt.title('Summed white pixels along y-axis')
plt.plot(peaks, y_count_diff[peaks], 'r.')
plt.title('Absolute difference in summed white pixels')
The textual output:
Number of columns: 3
The visualization output:
Limitations: If your image is tilted, etc. you might get bad results. If you have a lot of (large) images crossing text columns, you also might get bad results. In general, you'll need to adapt the details in the given implementation to meet your actual requirements (no more examples were given).
System information
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.5
Matplotlib: 3.3.1
NumPy: 1.19.1
OpenCV: 4.4.0
SciPy: 1.5.2
You could prep the image a little differently before searching for columns. For example you can connect the text horizontally first (with some morphological operation). That will give you contours with a certain height (the heading will be connected vertically as one contour per row and the text in columns will be connected as one contour per row). Then search for all contours and draw bounding rectangles over the ones that are higher than certain value you set (can be calculated or set manually). After that perform the morphological operation again with bigger kernel (horizontal and vertical) so you get all the remaining text connected if it is close together.
Here is an example code:
import cv2
import numpy as np
img = cv2.imread("columns.png") # read image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # grayscale transform
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1] # OTSU thresold
kernel = np.ones((5, 10), dtype=np.uint8) # kernel for first closing procedure (connect blobs in x direction)
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # closing
cv2.imwrite("closing1.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
heights = [] # all of contours heights
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
heights.append(h) # append height of one contours
boundary = np.mean(heights, axis=0) # mean of heights will serve as boundary but
# this will probably not be the case on other samples - you would need to make
# a function to determin this boundary or manualy set it
# iterate through contours
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
if h > boundary: # condition - contour must be higher than height boundary
cv2.rectangle(closing, (x, y), (x+w, y+h), (0, 0, 0), -1) # draw filled rectangle on the closing image
cv2.imwrite("closing1-filled.png", closing)
kernel = np.ones((25, 25), dtype=np.uint8) # kernel for second closing (connect blobs in x and y direction)
closing = cv2.morphologyEx(closing, cv2.MORPH_CLOSE, kernel) # closing again
cv2.imwrite("closing2.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours again
# iterate through contours
print("Number of columns: ", len(contours)) # this is the number of columns
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # this are height, width and coordinates of the columns
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3) # draw bouning rectangle on original image
cv2.imwrite("result.png", img)
cv2.imshow("img", img)
Number of columns: 3
Step 1:
Step 2:
Step 3:
I have pictures of apple slices that have been soaked in an iodine solution. The goal is to segment the apples into individual regions of interest and evaluate the starch level of each one. This is for a school project so my goal is to test different methods of segmentation and objectively find the best solution whether it be a single technique or a combination of multiple techniques.
The problem is that so far I have only come close on one method. That method is using HoughCircles. I had originally planned to use the Watershed method, Morphological operations, or simple thresholding. This plan failed when I couldn't modify any of them to work.
The original images look similar to this, with varying shades of darkness of the apple
I've tried removing the background tray using cv2.inRange with HSV values, but it doesn't work well with darker apples.
This is what the HoughCircles produced on the original image with a grayscale and median blur applied, also with an attempted mask of the tray.
Any advice or direction on where to look next would be greatly appreciated. I can supply the code I'm using if that will help.
Thank you!
EDIT 1 : Adding some code and clarifying the question
Thank you for the responses. My real question is are there any other methods of segmentation that this scenario lends itself well to? I would like to try a couple different methods and compare results on a large set of photos. My next in line to try is using k-means segmentation. Also I'll add some code below to show what I've tried so far.
import cv2
import numpy as np
# Load image
image = cv2.imread('ApplePic.jpg')
# Set minimum and max HSV values to display
lower = np.array([0, 0, 0])
upper = np.array([105, 200, 255])
# Create HSV Image and threshold into a range.
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower, upper)
maskedImage = cv2.bitwise_and(image, image, mask=mask)
# Show Image
cv2.imshow('HSV Mask', image)
# import the necessary packages
import numpy as np
import argparse
import cv2
import os
directory = os.fsencode('Photos\\Sample N 100')
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith('.jpg'):
# Load the image
image = cv2.imread('Photos\\Sample N 100\\' + filename)
# Calculate scale
scale_factor = 800 / image.shape[0]
width = int(image.shape[1] * scale_factor)
height = 800
dimension = (width, height)
min_radius = int((width / 10) * .8)
max_radius = int((width / 10) * 1.2)
# Resize image
image = cv2.resize(image, dimension, interpolation=cv2.INTER_AREA)
# Copy Image
output = image.copy()
# Grayscale Image
gray = cv2.medianBlur(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 5)
# Detect circles in image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, min_radius * 2, 4, 60, 20, min_radius, max_radius)
# ensure at least some circles were found
if circles is not None:
# convert the (x, y) coordinates and radius of the circles to integers
circles = np.round(circles[0, :]).astype("int")
# loop over the (x, y) coordinates and radius of the circles
for (x, y, r) in circles:
# draw the circle in the output image, then draw a rectangle
# corresponding to the center of the circle, (x, y), r, (0, 255, 0), 4)
cv2.rectangle(output, (x - 5, y - 5), (x + 5, y + 5), (0, 128, 255), -1)
cv2.putText(output, '(' + str(x) + ',' + str(y) + ',' + str(r) + ')', (x, y),
# show the output image
cv2.imshow("output", np.hstack([image, output, maskedImage]))
An alternative approach to segmenting the apples is to perform Kmeans color segmentation before thresholding then using contour filtering to isolate the apple objects:
Apply Kmeans color segmentation. We load the image, resize smaller using imutils.resize then apply Kmeans color segmentation. Depending on the number of clusters, we can segment the image into the desired number of colors.
Obtain binary image. Next we convert to grayscale, Gaussian blur and Otsu's threshold.
Filter using contour approximation. We filter out non-circle contours and small noise.
Morphological operations. We perform a morph close to fill adjacent contours
Draw minimum enclosing circles using contour area as filter. We find contours and draw the approximated circles. For this we use two sections, one where there was a good threshold and another where we approximate the radius.
Kmeans color quantization with clusters=3 and binary image
Morph close and result
The "good" contours that had the radius automatically calculated using cv2.minEnclosingCircle is highlighted in green while the approximated contours are highlighted in teal. These approximated contours were not segmented well from the thresholding process so we average the "good" contours radius and use that to draw the circle.
import cv2
import numpy as np
import imutils
# Kmeans color segmentation
def kmeans_color_quantization(image, clusters=8, rounds=1):
h, w = image.shape[:2]
samples = np.zeros([h*w,3], dtype=np.float32)
count = 0
for x in range(h):
for y in range(w):
samples[count] = image[x][y]
count += 1
compactness, labels, centers = cv2.kmeans(samples,
(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10000, 0.0001),
centers = np.uint8(centers)
res = centers[labels.flatten()]
return res.reshape((image.shape))
# Load image, resize smaller, perform kmeans, grayscale
# Apply Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=600)
kmeans = kmeans_color_quantization(image, clusters=3)
gray = cv2.cvtColor(kmeans, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Filter out contours not circle
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
if len(approx) < 4:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours and draw minimum enclosing circles
# using contour area as filter
approximated_radius = 63
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
# Large circles
if area > 6000 and area < 15000:
((x, y), r) = cv2.minEnclosingCircle(c), (int(x), int(y)), int(r), (36, 255, 12), 2)
# Small circles
elif area > 1000 and area < 6000:
((x, y), r) = cv2.minEnclosingCircle(c), (int(x), int(y)), approximated_radius, (200, 255, 12), 2)
cv2.imshow('kmeans', kmeans)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('image', image)