I'm new to opencv and for a school project i need to detect a red and a green circle with a camera, so i've use blobdetection, but it detect me the two colors, i think that my mask is bad, each color is linked to a specific action.
At the moment my code detect well red and green circle on the same page but i want it to detect only red circle on a white page.
Thank for your help
# Standard imports
import cv2
import numpy as np;
# Read image
im = cv2.VideoCapture(0)
# Setup SimpleBlobDetector parameters.
params = cv2.SimpleBlobDetector_Params()
# Change thresholds
params.minThreshold = 100;
params.maxThreshold = 200;
# Filter by Area.
params.filterByArea = True
params.minArea = 200
params.maxArea = 20000
# Filter by Circularity
params.filterByCircularity = True
params.minCircularity = 0.1
# Filter by Convexity
params.filterByConvexity = True
params.minConvexity = 0.1
# Filter by Inertia
params.filterByInertia = True
params.minInertiaRatio = 0.1
blueLower = (0,85,170) #100,130,50
blueUpper = (140,110,255) #200,200,130
while(1):
ret, frame=im.read()
mask = cv2.inRange(frame, blueLower, blueUpper)
mask = cv2.erode(mask, None, iterations=0)
mask = cv2.dilate(mask, None, iterations=0)
frame = cv2.bitwise_and(frame,frame,mask = mask)
# Set up the detector with default parameters.
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(mask)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(mask, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Display the resulting frame
frame = cv2.bitwise_and(frame,im_with_keypoints,mask = mask)
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
im.release()
cv2.destroyAllWindows()
EDIT 1: Code update
Now i got a issue where my full circle isn't detected.
No Blob Detection
Second Version
# Standard imports
import cv2
import numpy as np;
# Read image
im = cv2.VideoCapture(0)
while(1):
ret, frame=im.read()
lower = (130,150,80) #130,150,80
upper = (250,250,120) #250,250,120
mask = cv2.inRange(frame, lower, upper)
lower, contours, upper = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
blob = max(contours, key=lambda el: cv2.contourArea(el))
M = cv2.moments(blob)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
canvas = im.copy()
cv2.circle(canvas, center, 2, (0,0,255), -1)
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
im.release()
cv2.destroyAllWindows()
You need to work out what the BGR numbers for your green are (let's say for arguments sake [0, 255, 0]), then create a mask that ignores any colours outside a tolerance around your green:
mask = cv2.inRange(image, lower, upper)
Take a look at this tutorial for a step by step.
Play around with lower and upper to get the right behaviour. Then you can find the contours in the mask:
_, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
Then go through the contours list to find the biggest one (filter out any possible noise):
blob = max(contours, key=lambda el: cv2.contourArea(el))
And that's your final 'blob'. You can find the center by doing:
M = cv2.moments(blob)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
You can draw this center onto a copy of your image, for checking:
canvas = im.copy()
cv2.circle(canvas, center, 2, (0,0,255), -1)
Obviously, this makes the assumption that there's only one green ball and nothing else green in the image. But it's a start.
EDIT - RESPONSE TO SECOND POST
I think the following should work. I haven't tested it, but you should be able to at least do a bit more debugging with the canvas and mask displayed:
# Standard imports
import cv2
import numpy as np;
# Read image
cam = cv2.VideoCapture(0)
while(1):
ret, frame = cam.read()
if not ret:
break
canvas = frame.copy()
lower = (130,150,80) #130,150,80
upper = (250,250,120) #250,250,120
mask = cv2.inRange(frame, lower, upper)
try:
# NB: using _ as the variable name for two of the outputs, as they're not used
_, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
blob = max(contours, key=lambda el: cv2.contourArea(el))
M = cv2.moments(blob)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
cv2.circle(canvas, center, 2, (0,0,255), -1)
except (ValueError, ZeroDivisionError):
pass
cv2.imshow('frame',frame)
cv2.imshow('canvas',canvas)
cv2.imshow('mask',mask)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
im.release()
cv2.destroyAllWindows()
You should use HSV color space for better results if you wanna make filter by color.
ret, frame=im.read()
frame= cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Add this to your code
mask = cv2.inRange(frame, blueLower, blueUpper)
Related
My code does the following :
Grab the video frame.
Convert the video frame to the HSV colour space.
Split the frame into individual components (separate images for H, S, and V).
Apply a threshold to each component
Locates the centroid and applies the bounding circle
I can find the two largest contours. From the two largest indexed contours how do I calculate the moments for these contours to find the centroid?
Here is an example image to help. Blue are the centroids, Red is the minimum enclosing circle and white are the laser pointers (After the threshold has been applied).
#Dependacies import
from cmath import inf
from pickle import FRAME
import cv2
from matplotlib.pyplot import hsv
import numpy as np
import imutils
# Video Capture
cap = cv2.VideoCapture(0)
if (cap.isOpened()== False):
print("Error opening video stream or file")
# Read until video is completed
while(cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read() #reading video
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) #Making hsv frame
red_lower = np.array ([16, 132, 0])
red_upper = np.array ([20, 236, 255])
mask = cv2.inRange (hsv,red_lower,red_upper)
res = cv2.bitwise_and(frame,frame, mask= mask)
thresh = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
if ret== True:
contours, hiearachy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnt = sorted(contours,key=cv2.contourArea)
print ('Number of contours found = ', len(contours))
if len(contours) >= 2:
def get_contour_areas(contours):
all_areas = []
for cnt in contours:
area = cv2.contourArea(cnt)
all_areas.append(area)
return all_areas
sorted_contours= sorted(contours, key=cv2.contourArea, reverse= True)
largest_item= sorted_contours[0]
second_item= sorted_contours[1]
cv2.drawContours(frame, largest_item, -1, (255,0,0),1)
cv2.drawContours(frame, second_item, -1, (255,0,0),1)
if ret == True:
cv2.imshow('Frame',frame)
cv2.imshow('mask', mask)
cv2.imshow('gray', thresh)
# Press Q on keyboard to exit
if cv2.waitKey(25) & 0xFF == ord('q'):
break
# Break the loop
else:
break
# When everything done, release the video capture object
cap.release()
# Closes all the frames
cv2.destroyAllWindows()
I followed a tutorial on watershed segmentation and used it to segment each red blood cell in an image. I'm new to openCV and I would like to know if it is possible to draw circles around the cells by using watershed segmentation? If so, could you please show how it is done.
Original image
Output of Watershed segmentation
Code is given below
import numpy as np
import cv2
from matplotlib import pyplot as plt
def fillHoles(otsuImg):
# find contours
contours, _ = cv2.findContours(otsuImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# filter out contours by size
small_cntrs = []
for con in contours:
area = cv2.contourArea(con)
# print(area)
if area < 1000: # size threshold
small_cntrs.append(con)
cv2.drawContours(otsuImg, small_cntrs, -1, 0, -1)
# load the image
img = cv2.imread('resources/rbc2.png')
img_pyr = cv2.pyrMeanShiftFiltering(img, 21, 51)
img_median = cv2.medianBlur(img_pyr, 9)
img_gray = cv2.cvtColor(img_median, cv2.COLOR_BGR2GRAY)
ret, img_thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# fill holes of RBC
fillHoles(img_thresh)
# invert the image
img_thresh = cv2.bitwise_not(img_thresh)
# noise removal
kernel = np.ones((3,3),np.uint8)
opening = cv2.morphologyEx(img_thresh,cv2.MORPH_OPEN,kernel, iterations=2)
# sure background area
sure_bg = cv2.dilate(opening,kernel,iterations=3)
# Finding sure foreground area
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform,0.1*dist_transform.max(),255,0)
# _, sure_fg = cv2.threshold(np.uint8(dist_transform), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# Marker labelling
ret, markers = cv2.connectedComponents(sure_fg)
# Add one to all labels so that sure background is not 0, but 1
markers = markers+1
# Now, mark the region of unknown with zero
markers[unknown==255] = 0
markers = cv2.watershed(img,markers)
img[markers == -1] = [255,0,0]
cv2.imshow('markers2', np.uint8(markers))
cv2.imshow('Final output', img)
cv2.waitKey(0)
I had the same problem and ended up using skimage.segmentation.watershed for the last step, to get the labels I could use to calculate the contours of the watershed cells. Once you have the contours, you can calculate and plot the enclosing circle as usual:
# your code above
ret, markers = cv2.connectedComponents(sure_fg)
from skimage.segmentation import watershed
labels = watershed(-dist_transform,
markers,
mask=img_thresh,
watershed_line=False)
watershed_contours = list(map(lambda l: cv2.findContours((labels == l).astype(np.uint8),
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0][0],
np.unique(labels)[1:]))
output = img.copy()
for contour in watershed_contours:
(x,y),radius = cv2.minEnclosingCircle(contour)
center = (int(x),int(y))
radius = int(radius)
cv2.circle(output,center,radius,(0,255,0),2)
I am pretty new to OpenCV and am trying to achieve drawing simple contours along the outline of my hand using a webcam. I decided on using cv2.adaptiveThreshold() to deal with the different light intensities when the camera adjusts to the hand moving. Everything seems to work fine except that it is struggling with finding the fingers and then also drawing closed contours.
See here:
I thought about trying to detect a convex hull and detect anything deviating from it somehow.
How do I go about this best? Firstly I need to manage to maybe not find weird closed contours and then go from there?
Here's the code, I fixed the trackbar values for you :)
import cv2
import numpy as np
#####################################
winWidth = 640
winHeight = 840
brightness = 100
cap = cv2.VideoCapture(0)
cap.set(3, winWidth)
cap.set(4, winHeight)
cap.set(10, brightness)
kernel = (7, 7)
#######################################################################
def empty(a):
pass
cv2.namedWindow("TrackBars")
cv2.resizeWindow("TrackBars", 640, 240)
cv2.createTrackbar("cVal", "TrackBars", 10, 40, empty)
cv2.createTrackbar("bSize", "TrackBars", 77, 154, empty)
def preprocessing(frame, value_BSize, cVal):
imgGray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# mask = cv2.inRange(imgHsv, lower, upper)
imgBlurred = cv2.GaussianBlur(imgGray, kernel, 4)
gaussC = cv2.adaptiveThreshold(imgBlurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, value_BSize,
cVal)
imgDial = cv2.dilate(gaussC, kernel, iterations=3)
imgErode = cv2.erode(imgDial, kernel, iterations=1)
return imgDial
def getContours(imPrePro):
contours, hierarchy = cv2.findContours(imPrePro, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
area = cv2.contourArea(cnt)
if area > 60:
cv2.drawContours(imgCon, cnt, -1, (0, 255, 0), 2, cv2.FONT_HERSHEY_SIMPLEX)
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
#######################################################################################################
while cap.isOpened():
success, frame = cap.read()
cVal = cv2.getTrackbarPos("cVal", "TrackBars")
bVal = cv2.getTrackbarPos("bVal", "TrackBars")
value_BSize = cv2.getTrackbarPos("bSize", "TrackBars")
value_BSize = max(3, value_BSize)
if (value_BSize % 2 == 0):
value_BSize += 1
if success == True:
frame = cv2.flip(frame, 1)
imgCon = frame.copy()
imPrePro = preprocessing(frame, value_BSize, cVal)
getContours(imPrePro)
cv2.imshow("Preprocessed", imPrePro)
cv2.imshow("Original", imgCon)
if cv2.waitKey(1) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
L*a*b color space can help find objects brighter than the background. One advantage is that color space is hardware independent, so it should yield relatively similar results from any camera. Using the OTSU option to threshold the image can help it work in different lightning conditions, as it calculates the optimal threshold intensity to separate bright and dark areas in the image. Obviously it is not a silver bullet and will NOT work perfectly in every situation, especially in extreme cases, but as long your hand's brightness is relatively different from the background, it should work.
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
tv, thresh = cv2.threshold(lab[:,:,0], 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
plt.imshow(thresh)
Once the hand is properly thresholded, you can proceed to find the contours and do your analysis as needed.
Note: the artifacts in the threholded image are caused by removing the green contour lines from the original posted image.
I'm using a light threshold so this might work differently depending on the image, but here's what works for this one.
import cv2
import numpy as np
# load image
img = cv2.imread("hand.jpg");
# lab
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# threshold
thresh = cv2.inRange(l, 90, 255);
# contour
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# filter contours by size
marked = img.copy();
cv2.drawContours(marked, contours, -1, (0, 255, 0), 3);
# show
cv2.imshow("marked", marked);
cv2.imshow("Thresh", thresh);
cv2.waitKey(0);
# save
cv2.imwrite("marked_hand.png", marked);
Have you looked into google's mediapipe, as an alternate to opencv?
Also, I wonder if adding a thin bottom black border to the frame would help the contour to know to wrap around the wrist.
So, for a school project i have to create an app that would make a tatoo appear on your arm.
At the moment openCV uses the color of the skin to detect which part of the image is skin.
My problem is this one: on the last step of the code, where the mask with the tattoo is merged to the video feed, the size of the array changes
#attempt to save the ROI coordinates
fy1=y1
fy2=y1+tatHeight
fx1=x1
fx2=x1+tatWidth
#create a ROI mask
roi = frame[fy1:fy2,fx1:fx2]
#merge the roi mask with the tatoo and the inverted tatoo masks
roi_bg = cv2.bitwise_and(roi,roi,mask = mask2inv)
roi_fg = cv2.bitwise_and(tatoo,tatoo,mask = mask2)
#merge the background and foreground ROI masks
dst = cv2.add(roi_bg,roi_fg)
# add the merged mask to the video feed
roiColor[fy1:fy2,fx1:fx2]=dst #the problem is here
I get this error
ValueError: could not broadcast input array from shape (33,2,3) into shape (0,0,3)
Could someone help me figure out why the value of fx and fy change?
You can find the repo with the full code here
Thanks to anyone that can help
EDIT : This is the website where I found some inspiration for my code
EDIT 2: Here is the code
# USAGE
# python ball_tracking.py --video ball_tracking_example.mp4
# python ball_tracking.py
# import the necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
#load tatoo image
imgTatoo=cv2.imread('mustache.png',-1)
tatMask=imgTatoo[:,:,3]
#create a mask from the image
invTatMask=cv2.bitwise_not(tatMask)
imgTatoo=imgTatoo[:,:,0:3]
#define original sizes for the tatoo
tatOrigHeight,tatOrigWidth = imgTatoo.shape[:2]
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "green"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (0, 0, 73)
greenUpper = (35, 93, 255)
pts = deque(maxlen=args["buffer"])
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
while True:
# grab the current frame
(grabbed, frame) = camera.read()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame, blur it, and convert it to the HSV
# color space
frame = imutils.resize(frame, width=600)
frame = cv2.bilateralFilter(frame, 11, 17, 17)
# blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# construct a mask for the color "green", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
#draw contour of desired shape
cv2.drawContours( frame, c, -1, (239, 0, 0),6 )
#create the smallest box containing that contour
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
#draw the box
cv2.drawContours(frame,[box],0,(0,0,255),2)
#Save the box parameters (center,height,width and angle)
areaCenter=rect[0]
areaX,areaY=int(areaCenter[0]),int(areaCenter[1])
areaSize=rect[1]
areaHeight=int(areaSize[0])
areaWidth=int(areaSize[1])
areaAngle=rect[2]
#define the tattoo size
tatWidth=int(0.2*areaWidth)
tatHeight=tatWidth * tatOrigHeight // tatOrigWidth
#face = cv2.rectangle(frame,(areaX-areaWidth//4,areaY-areaHeight//4),(areaX+areaWidth//4,areaY+areaHeight//4),(255,0,0),2)
#roiGray=gray[areaY-areaHeight//2:areaY+areaHeight//2, areaX-areaWidth//2:areaX+areaWidth//2]
#create a mask from the video feed with the size of the region of interest (box created before)
roiColor=frame[areaY-areaHeight//2:areaY+areaHeight//2, areaX-areaWidth//2:areaX+areaWidth//2]
# print(areaX,areaY,areaWidth,areaHeight)
# print(tatWidth,tatHeight)
# save the center of the region of interest (ROI)
x1 = areaX - (tatWidth//2)
x2 = areaX + (tatWidth//2)
y1 = areaY - (tatHeight//2)
y2 = areaY + (tatHeight//2)
# protect from wierd center coordinates (outside of the frame)
if x1 < 0:
x1 = 0
if y1 < 0:
y1 = 0
if x2 > areaWidth:
x2 = areaWidth
if y2 > areaHeight:
y2 = areaHeight
print(x1,x2,y1,y2)
# resize the tattoo to match the ROI size
tatHeight=tatWidth * tatOrigHeight // tatOrigWidth
tatWidth=x2-x1
# protect from wierd (negative) tatoo sizes
if tatHeight<=0:
tatHeight=1
if tatWidth<=0:
tatWidth=2
print(tatHeight)
print(tatWidth)
# resize all the masks to the same size in order to merge them
tatoo=cv2.resize(imgTatoo,(tatWidth,tatHeight),interpolation=cv2.INTER_AREA)
mask2=cv2.resize(tatMask,(tatWidth,tatHeight),interpolation=cv2.INTER_AREA)
mask2inv=cv2.resize(invTatMask,(tatWidth,tatHeight),interpolation=cv2.INTER_AREA)
print(mask2inv.shape)
#attempt to save the ROI coordinates
fy1=y1
fy2=y1+tatHeight
fx1=x1
fx2=x1+tatWidth
#create a ROI mask
roi = frame[fy1:fy2,fx1:fx2]
print(roi.shape)
#merge the roi mask with the tatoo and the inverted tatoo masks
roi_bg = cv2.bitwise_and(roi,roi,mask = mask2inv)
roi_fg = cv2.bitwise_and(tatoo,tatoo,mask = mask2)
print(roi_bg.shape,roi_fg.shape)
#merge the background and foreground ROI masks
dst = cv2.add(roi_bg,roi_fg)
print("dst: ",dst.shape)
print("roi: ",roiColor.shape)
print(fy1,fy2,fy2-fy1)
print(fx1,fx2,fx2-fx1)
# add the merged mask to the video feed
roiColor[fy1:fy2,fx1:fx2]=dst
# show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()
Ok so thanks to Dan Masek, if solved the problem by bypassing the roiColor that was useless with the following line:
frame[fy1:fy2,fx1:fx2]=dst
In order to be sure that the area that I want can be included in the original image.
I have a sample image like this
I'm looking for a way to black out the noise from the image such that I end up with an image that just has black text on white background so that I may send it to tesseract.
I've tried morphing with
kernel = np.ones((4,4),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow("opening", opening)
but it doesn't seem to work.
I've also tried to find contours
img = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
(cnts, _) = cv2.findContours(img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
roi=rotated[y:y+h,x:x+w].copy()
cv2.imwrite("roi.png", roi)
With the above code, I get the following contours:
which leads to this image when cropped:
which is still not good enough. I want black text on white background, so that I can send it to tesseract OCR and have good success rate.
Is there anything else I can try?
Update
Here is an additional similar image. This one is a bit easier because it has a smooth rectangle in it
The following works for your given example, although it might need tweaking for a wider range of images.
import numpy as np
import cv2
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
dst = cv2.bitwise_and(image_src, mask)
mask = 255 - mask
roi = cv2.add(dst, mask)
roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(roi_gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_x = 0
max_y = 0
min_x = image_src.shape[1]
min_y = image_src.shape[0]
for c in contours:
if 150 < cv2.contourArea(c) < 100000:
x, y, w, h = cv2.boundingRect(c)
min_x = min(x, min_x)
min_y = min(y, min_y)
max_x = max(x+w, max_x)
max_y = max(y+h, max_y)
roi = roi[min_y:max_y, min_x:max_x]
cv2.imwrite("roi.png", roi)
Giving you the following type of output images:
And...
The code works by first locating the largest contour area. From this a mask is created which is used to first select only the area inside, i.e. the text. The inverse of the mask is then added to the image to convert the area outside the mask to white.
Lastly contours are found again for this new image. Any contour areas outside a suitable size range are discarded (this is used to ignore any small noise areas), and a bounding rect is found for each. With each of these rectangles, an outer bounding rect is calculated for all of the remaining contours, and a crop is made using these values to give the final image.
Update - To get the remainder of the image, i.e. with the above area removed, the following could be used:
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 10, 255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
image_remainder = cv2.bitwise_and(image_src, 255 - mask)
cv2.imwrite("remainder.png", image_remainder)
I get this:
Result
Source Code:
if __name__ == '__main__':
SrcImg = cv2.imread('./Yahi9.png', cv2.CV_LOAD_IMAGE_GRAYSCALE)
_, BinImg = cv2.threshold(SrcImg, 80, 255, cv2.THRESH_OTSU)
Contours, Hierarchy = cv2.findContours(image=copy.deepcopy(SrcImg),
mode=cv2.cv.CV_RETR_EXTERNAL,
method=cv2.cv.CV_CHAIN_APPROX_NONE)
MaxContour, _ = getMaxContour(Contours)
Canvas = np.ones(SrcImg.shape, np.uint8)
cv2.drawContours(image=Canvas, contours=[MaxContour], contourIdx=0, color=(255), thickness=-1)
mask = (Canvas != 255)
RoiImg = copy.deepcopy(BinImg)
RoiImg[mask] = 255
RoiImg = cv2.morphologyEx(src=RoiImg, op=cv2.MORPH_CLOSE, kernel=np.ones((3,3)), iterations=4)
cv2.imshow('RoiImg', RoiImg)
cv2.waitKey(0)
Function:
def getMaxContour(contours):
MaxArea = 0
Location = 0
for idx in range(0, len(contours)):
Area = cv2.contourArea(contours[idx])
if Area > MaxArea:
MaxArea = Area
Location = idx
MaxContour = np.array(contours[Location])
return MaxContour, MaxArea
Ehh, it's python code.
It only works when the white region is the max contour.
Basic idea of this answer is to use border around text.
1) Erode horizontally with a very large kernel, say size of 100 px or 8 times size of single expected character, something like that. It should be done row-wise. The extreme ordinate will give y-location of boundaries around text.
2) Process vertically same way to get x-location of boundaries around text. Then use these locations to crop out image you want.
-- One benefit of this method is you will get every sentence/word segmented separately which, I presume, is good for an OCR.
Happy Coding :)
Edited in by Mark Setchell
Here is a demo of 1)
Here is a demo of 2)