The code is for the implementation of a SIFT-based algorithm with FLANN matcher on a captured image from the webcam. The error for some reason is in the knnMatch where we deal with the captured image. The attached image link shows the error causing line. It would be great if someone could provide some solution to this issue, please comment below for specific details.
import cv2
import numpy as np
MIN_MATCH_COUNT = 30
detector = cv2.xfeatures2d.SIFT_create()
FLANN_INDEX_KDITREE = 0
flannParam = dict(algorithm=FLANN_INDEX_KDITREE,tree=5)
searchParam = dict(check = 50)
flann=cv2.FlannBasedMatcher(flannParam,searchParam)
trainImg=cv2.imread("E:\\EXCHANGE_Courses\\training_img1.jpg")
trainImg1 = cv2.cvtColor(trainImg,cv2.COLOR_BGR2GRAY)
trainKP,trainDecs = detector.detectAndCompute(trainImg1,None)
cam = cv2.VideoCapture(1)
print(cam.isOpened())
for i in range(1):
return_value, image = cam.read()
cv2.imwrite('capture'+str(i)+'.jpg', image)
del(cam)
while True:
QImage = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
queryKP,queryDesc = detector.detectAndCompute(QImage,None)
# Now match the key descriptions from the training image and the query image
# np.asarray(des1,np.float32),np.asarray(des2,np.float32),k=2
# queryDesc,trainDecs, k=2
matches=flann.knnMatches(queryDesc,trainDecs, k=2)
print("upper part clear")
# Filter the pool of keypoints as we need to collect the key points of interest only with the object in mind
goodMatch=[]
for m,n in matches:
if(m.distance<0.75*n.distance):
goodMatch.append(m)
print("all ok here")
if(len(goodMatch)>MIN_MATCH_COUNT):
tp=[]
qp=[]
for m in goodMatch:
tp.append(trainKP[m.trainIdx].pt)
qp.append(queryKP[m.queryIdx].pt)
tp,qp = np.float32((tp,qp))
H,status = cv2.findHomography(tp,qp,cv2.RANSAC,3.0)
h,w=trainImg.shape
trainBorder = np.float32([[[0,0],[0,h-1],[w-1,h-1],[0,w-1]]])
queryBorder = cv2.perspectiveTransform(trainBorder,H)
# changed QImageBGR to image
cv2.polylines(QImage,[np.uint8(queryBorder)],True,(0,255,0),3)
else:
print("Not enough matches - %d/%d" %len(goodMatch),MIN_MATCH_COUNT)
cv2.imshow('results',QImage)
#print ("Not enough matches are found - %d/%d" % (len(goodMatch),MIN_MATCH_COUNT))
#matchesMask = None
#draw_params = dict(matchColor = (0,255,0), # draw matches in green color
# singlePointColor = None,
# matchesMask = matchesMask, # draw only inliers
# flags = 2)
#img3 = cv2.drawMatches(trainImg1,trainKP,QImage,queryKP,goodMatch,None,**draw_params)
#plt.imshow(img3, 'gray'),plt.show()
if cv2.waitKey(10)==ord('q'):
break
#cam.release()
#cv2.destroyAllWindows()
A bit late to the party, but I'm guessing you meant knnMatch rather than knnMatches.
Related
I've got three images I'm trying to run object detection on. It's either 1, 2 or 3 of the same pentagon, the scale will vary but only by a few %.
I tried using different sections of the screen (they always appear in the same location) for matchTemplate i.e. the first full pentagon and the center section of the second pentagons in a loop to try and catch the correct scale, this worked some of the time but rarely and it felt a bit hacky.
Instead I've moved over to trying keypoint detection, I figured I'm likely to get roughly double the amount of matches on the second, and tripple the amount of matches on the third. Then I can go => x to determine which has been found.
I've written the keypoint detection code and it works, but I can't figure out which variable to use to determine how many keypoints were found.
When I try print good I get this: [<DMatch 0000018FDE162B90>, <DMatch 0000018FDE162ED0>, <DMatch 0000018FDE180150>, <DMatch 0000018FDE180210>, <DMatch 0000018FDE1807D0>, <DMatch 0000018FDE180810>, <DMatch 0000018FDE1820D0>] which I assumed would be 7 keypoints detected, but there's vastly more being found in the cv.imshow.
Which variable can I use to identify the number of matched keypoints?
n.b this is functional MVP code.
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
crewstars = glob.glob(r"C:\Users\images\crewstars\*.png")
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def keypointDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[170:230, 800:1200]
kp2, matches, match_points = match_keypoints(descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(0)
if match_points:
cv.imshow('Keypoint Search', match_image)
cv.waitKey(0)
res = True
break
return res
while True:
crewstarsdir = loadImages(crewstars)
needle_kp1_desc = preProcessNeedle(crewstarsdir)
if keypointDetection(needle_kp1_desc):
pass
when I write this code: (my entire code, school project on Augmented Reality)
Everything worked perfectly until I tried to run the video.
...........................................................................................................................................................................................................
import cv2
import numpy as np
cap=cv2.VideoCapture(2)
imgTarget=cv2.imread('F1racecars.jpeg')
vidTarget= cv2.VideoCapture('F1racecars.mp4')
success, vidTarget = vidTarget.read()
imgTarget=cv2.resize(imgTarget,(640,360))
hT, wT, cT = imgTarget.shape
vidTarget=cv2.resize(vidTarget,(wT,hT))
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(imgTarget,None)
detect = False
fcount = 0
while True:
success, imgWebcam= cap.read()
imgAug = imgWebcam.copy()
imgWarp = np.zeros((imgWebcam.shape[1], imgWebcam.shape[0],imgWebcam.shape[2]))
masknew = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1],imgWebcam.shape[2]), np.uint8)
maskInv = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
Mergecamfeed = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
ARfinal = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
if detect is False:
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
fcount =0
else:
if fcount == vidTarget.get(cv2.CAP_PROP_FRAME_COUNT, 0):
vidTarget.set(cv2.CAP_PROP_POS_FRAMES, 0)
fcount = 0
success, vidTarget= vidTarget.read()
vidTarget= cv2.resize(vidTarget, (wT, hT))
kp2, des2 = orb.detectAndCompute(imgWebcam,None)
if des2 is None: print(False)
else:
bf = cv2.BFMatcher()
featmatch = bf.knnMatch(des1,des2,k=2)
good=[]
for m,n in featmatch:
if m.distance < 0.75 * n.distance: good.append(m)
print(len(good))
if len(good)>20:
detect = True
srcpts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dstpts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
matrix, mask = cv2.findHomography(srcpts,dstpts, cv2.RANSAC, 5)
print(matrix)
pts = np.float32([[0,0],[0,360],[640,360],[640,0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts,matrix)
cv2.polylines(imgWebcam,[np.int32(dst)],True,(255,0,255),3)
imgWarp = cv2.warpPerspective(vidTarget,matrix, (imgWebcam.shape[1],imgWebcam.shape[0]))
cv2.fillPoly(masknew, [np.int32(dst)], (255,255,255))
maskInv = cv2.bitwise_not(masknew)
Mergecamfeed = cv2.bitwise_and(imgAug,imgAug,None, mask = maskInv[:,:,0])
ARfinal = cv2.bitwise_or(imgWarp, Mergecamfeed)
cv2.imshow('imgTarget', imgTarget)
cv2.imshow('imgTargetVdo', vidTarget)
cv2.imshow('webcam', imgWebcam)
cv2.imshow('warp', imgWarp)
cv2.imshow('mask', masknew)
cv2.imshow('Modified mask', maskInv)
cv2.imshow('Aug Image', Mergecamfeed)
cv2.imshow('Augmented Reality Final O/P', ARfinal)
cv2.waitKey(1)
fcount += 1
It shows like this:
AttributeError: 'numpy.ndarray' object has no attribute 'set'
Normally we ask for the full error message, with traceback. That makes it easier to identify where the error occurs. In this case though, set is only used a couple of times.
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
What's this thing vidTarget? The error says it's a numpy array, and is clear that such an object does not have a set method. Experienced numpy users also know that. So what kind of object did you expect it to be?
We see attribute errors for one of two reasons. Either the code writer did not read the documentation, and tried to use a non-existent method. Or the variable in question is not what he expected. You should know, at every, step what the variable is - not just guess or hope, know. Test if necessary.
edit
Initially
vidTarget= cv2.VideoCapture('F1racecars.mp4')
From a quick read of cv2 docs, this has get/set methods
but then you do
succses, vidTarget = vidTarget.read()
# and resize
That redefines vidTarget.
I am trying to develop a script which will detect pixelation from LiveTV from an external camera. To test my script I have been using a short snippet of LiveTV which has two instances of pixelation.
See Google Drive below for video:
https://drive.google.com/file/d/1f339HJSWKhyPr1y5sf9tWW4vcXgBOVbz/view?usp=sharing
Currently I am able to filter out most of the noise in the video, and detect the pixelation. However, I am also detecting the white text (given the intensity of the text it gets picked up by the kernel I am applying).
See the code below:
import cv2
import numpy as np
cap = cv2.VideoCapture("./hgtv_short.ts")
while True:
success, image = cap.read()
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
coordinates = zip(white[1], white[0])
for p in coordinates:
cv2.circle(canny, p, 30, (200, 0, 0), 2)
cv2.imshow('image', image)
cv2.imshow('edges', canny)
cv2.waitKey(1)
What I would like to do is apply a threshold and findContours to the given coordinates to see if text is in the region. Then I can discern between actual pixelation and text.
NOTE:
If anyone has any other ideas on finding pixelation I am open to suggestions.
UPDATE
Here is a screenshot from the video showing the type of pixelation I am looking for in this video (macro-blocking) to be specific.
Image
Edges
From the above Images you can see that I am detecting the macro-blocking, but also the white text. I would like to be able to discern between text and actual macro-blocking.
SECOND UPDATE
After more trial and error, I found that it will be best to use some sort of reference model to help predict when an image is showing macro-blocking, pixelation, artifacts, etc...
I have decided to use the hog(Histogram of Oriented Gradients) descriptor to create my feature vector. I have created to functions, one loops through the GOOD images and the other the BAD images:
def pos_train_set(self):
print("Starting to Gather Positive Photos")
for pos_file in glob.iglob(os.path.join(self.base_path, "Bad_Images", "*.jpg")):
pos_img = cv2.imread(pos_file, 1)
pos_img = cv2.resize(pos_img, self.winSize, interpolation=cv2.CV_32F)
pos_des = self.hog.compute(pos_img)
pos_des = cv2.normalize(pos_des, None)
self.labels.append(1)
self.training_data.append(pos_des)
print("Gathered Positive Photos")
def neg_train_set(self):
print("Starting to Gather Negative Photos")
for neg_file in glob.iglob(os.path.join(self.base_path, "Good_Images", "*.jpg")):
neg_img = cv2.imread(neg_file, 1)
neg_img = cv2.resize(neg_img, self.winSize, interpolation=cv2.CV_32F)
neg_des = self.hog.compute(neg_img)
neg_des = cv2.normalize(neg_des, None)
self.labels.append(0)
self.training_data.append(neg_des)
print("Gathered Negative Photos")
I then train my model using the SVM(Support Vector Machines) classification algorithm.
def train_set(self):
print("Starting to Convert")
td = np.float32(self.training_data)
lab = np.array(self.labels)
print("Converted List")
print("Starting Shuffle")
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(td))
td = td[shuffle]
lab = lab[shuffle]
print("Shuffled List")
print("Starting SVM")
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
# Exponential Chi2 kernel, similar to the RBF kernel: K(xi,xj)=e−γχ2(xi,xj),χ2(xi,xj)=(xi−xj)2/(xi+xj),γ>0.
svm.setKernel(cv2.ml.SVM_CHI2)
svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
svm.setGamma(5.383)
svm.setC(2.67)
print("Starting Training")
svm.train(td, cv2.ml.ROW_SAMPLE, lab)
print("Saving to .yml")
svm.save(os.path.join(self.base_path, "svm_model.yml"))
I then use that SVM model to try and predict if an image is a 1 (Bad Image) or a 0 (Good Image). With the help of the kernel and edge detection I used in my first attempt:
def predict(self):
svm = cv2.ml.SVM_load("./svm_model.yml")
for file in self.files:
os.mkdir(os.path.join(self.base_path, "1_Frames", os.path.basename(file)))
print(f"Starting predict on {file}")
cap = cv2.VideoCapture(file)
while cap.isOpened():
success, image = cap.read(1)
if success:
img = cv2.resize(image, self.winSize, interpolation=cv2.CV_32F)
test_data = self.hog.compute(img)
test_data = cv2.normalize(test_data, None)
test_data = np.float32(test_data)
test_data = np.transpose(test_data)
if not np.any(test_data):
print("Invalid Dimension")
success, image = cap.read(1)
print(f"New Frame {success}")
else:
response = svm.predict(test_data)[1]
if response == 1:
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
if not len(white[0]) == 0:
cv2.imwrite(os.path.join(self.base_path, '1_Frames', os.path.basename(file), f'found_{self.x}.jpg'), image)
success, image = cap.read(1)
self.x += 1
else:
success, image = cap.read(1)
pass
else:
cv2.imwrite(os.path.join(self.base_path, '0_Frames', f'found_{self.y}.jpg'), image)
success, image = cap.read(1)
self.y += 1
else:
break
cap.release()
cv2.destroyAllWindows()
This method seems to work well, but I am still open to any further ideas of suggestions. I posted this new update in hopes it may assist someone else looking for suggestions on how to detect issues in images.
I have a goal to do homography on a live video by capturing my screen and processing it.
In order to do so, I took the code from this link, and manipulated it inside a while loop as follows:
from __future__ import print_function
import cv2 as cv
import numpy as np
from windowcapture import WindowCapture
# initialize the WindowCapture class
capture = WindowCapture('My Window')
bar_img = cv.imread('hammer.jpg',cv.IMREAD_GRAYSCALE)
while(True):
# get an updated image of the game
screenshot = capture.get_screenshot()
screenshot = cv.cvtColor(screenshot,cv.IMREAD_GRAYSCALE)
if bar_img is None or screenshot is None:
print('Could not open or find the images!')
exit(0)
#-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(bar_img, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(screenshot, None)
#-- Step 2: Matching descriptor vectors with a FLANN based matcher
# Since SURF is a floating-point descriptor NORM_L2 is used
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
#-- Filter matches using the Lowe's ratio test
ratio_thresh = 0.75
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
#-- Draw matches
img_matches = np.empty((max(bar_img.shape[0], screenshot.shape[0]), bar_img.shape[1]+screenshot.shape[1], 3), dtype=np.uint8)
cv.drawMatches(bar_img, keypoints_obj, screenshot, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
#-- Localize the object
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
#-- Get the keypoints from the good matches
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
#-- Get the corners from the image_1 ( the object to be "detected" )
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = bar_img.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = bar_img.shape[1]
obj_corners[2,0,1] = bar_img.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = bar_img.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
#-- Draw lines between the corners (the mapped object in the scene - image_2 )
cv.line(img_matches, (int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])),\
(int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])),\
(int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])),\
(int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])),\
(int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])), (0,255,0), 4)
#-- Show detected matches
cv.imshow('Good Matches & Object detection', img_matches)
cv.waitKey()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
The class WindowCapture that I used uses win32gui to capture the window (maybe it makes a difference if I used it like this and not imread?)
I get the following error when I run the code:
C:\Users\Tester\AppData\Local\Temp\pip-req-build-1i5nllza\opencv\modules\calib3d\src\fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'cv::findHomography'
Any idea why it happens?
I tried to detect text in images specially images with quotes using OpenCV Python. For that I first train some text images. I detect each characters of text in the image to train. For images with proper word style the characters are detect properly. But for some images the text(character) area can't be detect properly. I attached the code for this below. How can I modify the code so that the characters can be detected properly
import sys
import numpy as np
import cv2
import os
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png")
if imgTrainingNumbers is None:
print ("error: image not read from file \n\n")
os.system("pause")
return
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)
imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
cv2.imshow("imgThresh", imgThresh)
imgThreshCopy = imgThresh.copy()
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = []
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]
for npaContour in npaContours:
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npaContour)
cv2.rectangle(imgTrainingNumbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)
imgROI = imgThresh[intY:intY+intH, intX:intX+intW]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
cv2.imshow("imgROI", imgROI)
cv2.imshow("imgROIResized", imgROIResized)
cv2.imshow("training_numbers.png", imgTrainingNumbers)
intChar = cv2.waitKey(0)
if intChar == 27:
sys.exit()
elif intChar in intValidChars:
print(intChar)
intClassifications.append(intChar)
print(intChar)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
fltClassifications = np.array(intClassifications, np.float32)
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
print ("\n\ntraining complete !!\n")
np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()
What you are trying to do is a very naive approach, just applying the threshold and detecting contours won't work here. A lot of research papers have been published around this task. You may refer those and try to implement or can use image_to_boxes function of the famous tesseract OCR. You can download it from here and as you are using python you can install pytesseract - python wrapper for tesseract from here and use the following code to achieve what you are expecting.
import pytesseract
import cv2
originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)
h, w = img.shape
letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]
for letter in letters:
cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)
cv2.imshow('', originalImg)
The resultant image
Note that there are many false detections, you need to ignore them in your training process.