OpenCV Python Error in simple digit recognition - python

import cv2
import numpy as np
####### training part ###############
samples = np.loadtxt('generalsamples.data',np.float32)
responses = np.loadtxt('generalresponses.data',np.float32)
responses = responses.reshape((responses.size,1))
model = cv2.KNearest()
model.train(samples,responses)
############################# testing part #########################
im = cv2.imread('/home/manoj/Pictures/Untitled-1.jpg')
out = np.zeros(im.shape,np.uint8)
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
if cv2.contourArea(cnt)>50:
[x,y,w,h] = cv2.boundingRect(cnt)
if h>28:
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
roi = thresh[y:y+h,x:x+w]
roismall = cv2.resize(roi,(10,10))
roismall = roismall.reshape((1,100))
roismall = np.float32(roismall)
retval, results, neigh_resp, dists = model.find_nearest(roismall, k = 1)
string = str(int((results[0][0])))
cv2.putText(out,string,(x,y+h),0,1,(0,255,0))
cv2.imshow('im',im)
cv2.imshow('out',out)
cv2.waitKey(0)
I used this python code with Opencv for character recognition but I get this error while running the code.
AttributeError: 'module' object has no attribute 'KNearest'

Since the new OpenCV's version you need to replace
model = cv2.KNearest()
by:
model = cv2.ml.KNearest_create()
Moreover, the line:
model.train(samples,responses)
is also going to raise an error, this should fix it:
model.train(samples,cv2.ml.ROW_SAMPLE,responses) # Might be adapted
Hope this will help.

Related

AttributeError: 'numpy.ndarray' object has no attribute 'set'

when I write this code: (my entire code, school project on Augmented Reality)
Everything worked perfectly until I tried to run the video.
...........................................................................................................................................................................................................
import cv2
import numpy as np
cap=cv2.VideoCapture(2)
imgTarget=cv2.imread('F1racecars.jpeg')
vidTarget= cv2.VideoCapture('F1racecars.mp4')
success, vidTarget = vidTarget.read()
imgTarget=cv2.resize(imgTarget,(640,360))
hT, wT, cT = imgTarget.shape
vidTarget=cv2.resize(vidTarget,(wT,hT))
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(imgTarget,None)
detect = False
fcount = 0
while True:
success, imgWebcam= cap.read()
imgAug = imgWebcam.copy()
imgWarp = np.zeros((imgWebcam.shape[1], imgWebcam.shape[0],imgWebcam.shape[2]))
masknew = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1],imgWebcam.shape[2]), np.uint8)
maskInv = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
Mergecamfeed = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
ARfinal = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
if detect is False:
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
fcount =0
else:
if fcount == vidTarget.get(cv2.CAP_PROP_FRAME_COUNT, 0):
vidTarget.set(cv2.CAP_PROP_POS_FRAMES, 0)
fcount = 0
success, vidTarget= vidTarget.read()
vidTarget= cv2.resize(vidTarget, (wT, hT))
kp2, des2 = orb.detectAndCompute(imgWebcam,None)
if des2 is None: print(False)
else:
bf = cv2.BFMatcher()
featmatch = bf.knnMatch(des1,des2,k=2)
good=[]
for m,n in featmatch:
if m.distance < 0.75 * n.distance: good.append(m)
print(len(good))
if len(good)>20:
detect = True
srcpts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dstpts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
matrix, mask = cv2.findHomography(srcpts,dstpts, cv2.RANSAC, 5)
print(matrix)
pts = np.float32([[0,0],[0,360],[640,360],[640,0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts,matrix)
cv2.polylines(imgWebcam,[np.int32(dst)],True,(255,0,255),3)
imgWarp = cv2.warpPerspective(vidTarget,matrix, (imgWebcam.shape[1],imgWebcam.shape[0]))
cv2.fillPoly(masknew, [np.int32(dst)], (255,255,255))
maskInv = cv2.bitwise_not(masknew)
Mergecamfeed = cv2.bitwise_and(imgAug,imgAug,None, mask = maskInv[:,:,0])
ARfinal = cv2.bitwise_or(imgWarp, Mergecamfeed)
cv2.imshow('imgTarget', imgTarget)
cv2.imshow('imgTargetVdo', vidTarget)
cv2.imshow('webcam', imgWebcam)
cv2.imshow('warp', imgWarp)
cv2.imshow('mask', masknew)
cv2.imshow('Modified mask', maskInv)
cv2.imshow('Aug Image', Mergecamfeed)
cv2.imshow('Augmented Reality Final O/P', ARfinal)
cv2.waitKey(1)
fcount += 1
It shows like this:
AttributeError: 'numpy.ndarray' object has no attribute 'set'
Normally we ask for the full error message, with traceback. That makes it easier to identify where the error occurs. In this case though, set is only used a couple of times.
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
What's this thing vidTarget? The error says it's a numpy array, and is clear that such an object does not have a set method. Experienced numpy users also know that. So what kind of object did you expect it to be?
We see attribute errors for one of two reasons. Either the code writer did not read the documentation, and tried to use a non-existent method. Or the variable in question is not what he expected. You should know, at every, step what the variable is - not just guess or hope, know. Test if necessary.
edit
Initially
vidTarget= cv2.VideoCapture('F1racecars.mp4')
From a quick read of cv2 docs, this has get/set methods
but then you do
succses, vidTarget = vidTarget.read()
# and resize
That redefines vidTarget.

Perform Macro-Blocking detection within given x, y coordinates using Python OpenCV

I am trying to develop a script which will detect pixelation from LiveTV from an external camera. To test my script I have been using a short snippet of LiveTV which has two instances of pixelation.
See Google Drive below for video:
https://drive.google.com/file/d/1f339HJSWKhyPr1y5sf9tWW4vcXgBOVbz/view?usp=sharing
Currently I am able to filter out most of the noise in the video, and detect the pixelation. However, I am also detecting the white text (given the intensity of the text it gets picked up by the kernel I am applying).
See the code below:
import cv2
import numpy as np
cap = cv2.VideoCapture("./hgtv_short.ts")
while True:
success, image = cap.read()
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
coordinates = zip(white[1], white[0])
for p in coordinates:
cv2.circle(canny, p, 30, (200, 0, 0), 2)
cv2.imshow('image', image)
cv2.imshow('edges', canny)
cv2.waitKey(1)
What I would like to do is apply a threshold and findContours to the given coordinates to see if text is in the region. Then I can discern between actual pixelation and text.
NOTE:
If anyone has any other ideas on finding pixelation I am open to suggestions.
UPDATE
Here is a screenshot from the video showing the type of pixelation I am looking for in this video (macro-blocking) to be specific.
Image
Edges
From the above Images you can see that I am detecting the macro-blocking, but also the white text. I would like to be able to discern between text and actual macro-blocking.
SECOND UPDATE
After more trial and error, I found that it will be best to use some sort of reference model to help predict when an image is showing macro-blocking, pixelation, artifacts, etc...
I have decided to use the hog(Histogram of Oriented Gradients) descriptor to create my feature vector. I have created to functions, one loops through the GOOD images and the other the BAD images:
def pos_train_set(self):
print("Starting to Gather Positive Photos")
for pos_file in glob.iglob(os.path.join(self.base_path, "Bad_Images", "*.jpg")):
pos_img = cv2.imread(pos_file, 1)
pos_img = cv2.resize(pos_img, self.winSize, interpolation=cv2.CV_32F)
pos_des = self.hog.compute(pos_img)
pos_des = cv2.normalize(pos_des, None)
self.labels.append(1)
self.training_data.append(pos_des)
print("Gathered Positive Photos")
def neg_train_set(self):
print("Starting to Gather Negative Photos")
for neg_file in glob.iglob(os.path.join(self.base_path, "Good_Images", "*.jpg")):
neg_img = cv2.imread(neg_file, 1)
neg_img = cv2.resize(neg_img, self.winSize, interpolation=cv2.CV_32F)
neg_des = self.hog.compute(neg_img)
neg_des = cv2.normalize(neg_des, None)
self.labels.append(0)
self.training_data.append(neg_des)
print("Gathered Negative Photos")
I then train my model using the SVM(Support Vector Machines) classification algorithm.
def train_set(self):
print("Starting to Convert")
td = np.float32(self.training_data)
lab = np.array(self.labels)
print("Converted List")
print("Starting Shuffle")
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(td))
td = td[shuffle]
lab = lab[shuffle]
print("Shuffled List")
print("Starting SVM")
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
# Exponential Chi2 kernel, similar to the RBF kernel: K(xi,xj)=e−γχ2(xi,xj),χ2(xi,xj)=(xi−xj)2/(xi+xj),γ>0.
svm.setKernel(cv2.ml.SVM_CHI2)
svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
svm.setGamma(5.383)
svm.setC(2.67)
print("Starting Training")
svm.train(td, cv2.ml.ROW_SAMPLE, lab)
print("Saving to .yml")
svm.save(os.path.join(self.base_path, "svm_model.yml"))
I then use that SVM model to try and predict if an image is a 1 (Bad Image) or a 0 (Good Image). With the help of the kernel and edge detection I used in my first attempt:
def predict(self):
svm = cv2.ml.SVM_load("./svm_model.yml")
for file in self.files:
os.mkdir(os.path.join(self.base_path, "1_Frames", os.path.basename(file)))
print(f"Starting predict on {file}")
cap = cv2.VideoCapture(file)
while cap.isOpened():
success, image = cap.read(1)
if success:
img = cv2.resize(image, self.winSize, interpolation=cv2.CV_32F)
test_data = self.hog.compute(img)
test_data = cv2.normalize(test_data, None)
test_data = np.float32(test_data)
test_data = np.transpose(test_data)
if not np.any(test_data):
print("Invalid Dimension")
success, image = cap.read(1)
print(f"New Frame {success}")
else:
response = svm.predict(test_data)[1]
if response == 1:
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
if not len(white[0]) == 0:
cv2.imwrite(os.path.join(self.base_path, '1_Frames', os.path.basename(file), f'found_{self.x}.jpg'), image)
success, image = cap.read(1)
self.x += 1
else:
success, image = cap.read(1)
pass
else:
cv2.imwrite(os.path.join(self.base_path, '0_Frames', f'found_{self.y}.jpg'), image)
success, image = cap.read(1)
self.y += 1
else:
break
cap.release()
cv2.destroyAllWindows()
This method seems to work well, but I am still open to any further ideas of suggestions. I posted this new update in hopes it may assist someone else looking for suggestions on how to detect issues in images.

Using OpenCV homography with python error in method findHomography

I have a goal to do homography on a live video by capturing my screen and processing it.
In order to do so, I took the code from this link, and manipulated it inside a while loop as follows:
from __future__ import print_function
import cv2 as cv
import numpy as np
from windowcapture import WindowCapture
# initialize the WindowCapture class
capture = WindowCapture('My Window')
bar_img = cv.imread('hammer.jpg',cv.IMREAD_GRAYSCALE)
while(True):
# get an updated image of the game
screenshot = capture.get_screenshot()
screenshot = cv.cvtColor(screenshot,cv.IMREAD_GRAYSCALE)
if bar_img is None or screenshot is None:
print('Could not open or find the images!')
exit(0)
#-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(bar_img, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(screenshot, None)
#-- Step 2: Matching descriptor vectors with a FLANN based matcher
# Since SURF is a floating-point descriptor NORM_L2 is used
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
#-- Filter matches using the Lowe's ratio test
ratio_thresh = 0.75
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
#-- Draw matches
img_matches = np.empty((max(bar_img.shape[0], screenshot.shape[0]), bar_img.shape[1]+screenshot.shape[1], 3), dtype=np.uint8)
cv.drawMatches(bar_img, keypoints_obj, screenshot, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
#-- Localize the object
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
#-- Get the keypoints from the good matches
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
#-- Get the corners from the image_1 ( the object to be "detected" )
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = bar_img.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = bar_img.shape[1]
obj_corners[2,0,1] = bar_img.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = bar_img.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
#-- Draw lines between the corners (the mapped object in the scene - image_2 )
cv.line(img_matches, (int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])),\
(int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])),\
(int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])),\
(int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])),\
(int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])), (0,255,0), 4)
#-- Show detected matches
cv.imshow('Good Matches & Object detection', img_matches)
cv.waitKey()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
The class WindowCapture that I used uses win32gui to capture the window (maybe it makes a difference if I used it like this and not imread?)
I get the following error when I run the code:
C:\Users\Tester\AppData\Local\Temp\pip-req-build-1i5nllza\opencv\modules\calib3d\src\fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'cv::findHomography'
Any idea why it happens?

'cv2.FlannBasedMatcher' object has no attribute 'knnMatches'

The code is for the implementation of a SIFT-based algorithm with FLANN matcher on a captured image from the webcam. The error for some reason is in the knnMatch where we deal with the captured image. The attached image link shows the error causing line. It would be great if someone could provide some solution to this issue, please comment below for specific details.
import cv2
import numpy as np
MIN_MATCH_COUNT = 30
detector = cv2.xfeatures2d.SIFT_create()
FLANN_INDEX_KDITREE = 0
flannParam = dict(algorithm=FLANN_INDEX_KDITREE,tree=5)
searchParam = dict(check = 50)
flann=cv2.FlannBasedMatcher(flannParam,searchParam)
trainImg=cv2.imread("E:\\EXCHANGE_Courses\\training_img1.jpg")
trainImg1 = cv2.cvtColor(trainImg,cv2.COLOR_BGR2GRAY)
trainKP,trainDecs = detector.detectAndCompute(trainImg1,None)
cam = cv2.VideoCapture(1)
print(cam.isOpened())
for i in range(1):
return_value, image = cam.read()
cv2.imwrite('capture'+str(i)+'.jpg', image)
del(cam)
while True:
QImage = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
queryKP,queryDesc = detector.detectAndCompute(QImage,None)
# Now match the key descriptions from the training image and the query image
# np.asarray(des1,np.float32),np.asarray(des2,np.float32),k=2
# queryDesc,trainDecs, k=2
matches=flann.knnMatches(queryDesc,trainDecs, k=2)
print("upper part clear")
# Filter the pool of keypoints as we need to collect the key points of interest only with the object in mind
goodMatch=[]
for m,n in matches:
if(m.distance<0.75*n.distance):
goodMatch.append(m)
print("all ok here")
if(len(goodMatch)>MIN_MATCH_COUNT):
tp=[]
qp=[]
for m in goodMatch:
tp.append(trainKP[m.trainIdx].pt)
qp.append(queryKP[m.queryIdx].pt)
tp,qp = np.float32((tp,qp))
H,status = cv2.findHomography(tp,qp,cv2.RANSAC,3.0)
h,w=trainImg.shape
trainBorder = np.float32([[[0,0],[0,h-1],[w-1,h-1],[0,w-1]]])
queryBorder = cv2.perspectiveTransform(trainBorder,H)
# changed QImageBGR to image
cv2.polylines(QImage,[np.uint8(queryBorder)],True,(0,255,0),3)
else:
print("Not enough matches - %d/%d" %len(goodMatch),MIN_MATCH_COUNT)
cv2.imshow('results',QImage)
#print ("Not enough matches are found - %d/%d" % (len(goodMatch),MIN_MATCH_COUNT))
#matchesMask = None
#draw_params = dict(matchColor = (0,255,0), # draw matches in green color
# singlePointColor = None,
# matchesMask = matchesMask, # draw only inliers
# flags = 2)
#img3 = cv2.drawMatches(trainImg1,trainKP,QImage,queryKP,goodMatch,None,**draw_params)
#plt.imshow(img3, 'gray'),plt.show()
if cv2.waitKey(10)==ord('q'):
break
#cam.release()
#cv2.destroyAllWindows()
A bit late to the party, but I'm guessing you meant knnMatch rather than knnMatches.

How to enhance Text detection in image using Python

I tried to detect text in images specially images with quotes using OpenCV Python. For that I first train some text images. I detect each characters of text in the image to train. For images with proper word style the characters are detect properly. But for some images the text(character) area can't be detect properly. I attached the code for this below. How can I modify the code so that the characters can be detected properly
import sys
import numpy as np
import cv2
import os
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png")
if imgTrainingNumbers is None:
print ("error: image not read from file \n\n")
os.system("pause")
return
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)
imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
cv2.imshow("imgThresh", imgThresh)
imgThreshCopy = imgThresh.copy()
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = []
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]
for npaContour in npaContours:
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npaContour)
cv2.rectangle(imgTrainingNumbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)
imgROI = imgThresh[intY:intY+intH, intX:intX+intW]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
cv2.imshow("imgROI", imgROI)
cv2.imshow("imgROIResized", imgROIResized)
cv2.imshow("training_numbers.png", imgTrainingNumbers)
intChar = cv2.waitKey(0)
if intChar == 27:
sys.exit()
elif intChar in intValidChars:
print(intChar)
intClassifications.append(intChar)
print(intChar)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
fltClassifications = np.array(intClassifications, np.float32)
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
print ("\n\ntraining complete !!\n")
np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()
What you are trying to do is a very naive approach, just applying the threshold and detecting contours won't work here. A lot of research papers have been published around this task. You may refer those and try to implement or can use image_to_boxes function of the famous tesseract OCR. You can download it from here and as you are using python you can install pytesseract - python wrapper for tesseract from here and use the following code to achieve what you are expecting.
import pytesseract
import cv2
originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)
h, w = img.shape
letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]
for letter in letters:
cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)
cv2.imshow('', originalImg)
The resultant image
Note that there are many false detections, you need to ignore them in your training process.

Categories