Background extraction in Python using open CV - python

I am trying to extract a background image from a video so I can detect moving objects in it.
I have found functions like cv2.BackgroundSubtractorMOG(), however I just can't get it to work.
Does someone have some experience using this ?
I have created object mog = cv2.BackgroundSubtractorMOG(300,-1,-1,-1)
Then I try mog.apply(Nmat,Nforemat,-1), but that doesnt seem to work, I get the following
error:
......\OpenCV-2.4.0\modules\video\src\bgfg_gaussmix.cpp:117: error: (-215) CV_MAT_DEPTH(frameType) == CV_8U
Nmat and N foremat are numpy arrays because i was also getting an error if they weren't.
Here is work in progress...
import cv
import cv2
import numpy as np
if __name__ == '__main__':
cv.NamedWindow("test1", cv.CV_WINDOW_AUTOSIZE)
cv.NamedWindow("test2", cv.CV_WINDOW_AUTOSIZE)
capture = cv.CreateFileCapture('test.avi')
frame = cv.QueryFrame(capture)
img = cv.CreateImage(cv.GetSize(frame),8,1)
thresh = cv.CreateImage(cv.GetSize(frame),8,1)
foreground = cv.CreateImage(cv.GetSize(frame),8,1)
foremat = cv.GetMat(foreground)
Nforemat = np.array(foremat, dtype=np.float32)
thresh = cv.CreateImage(cv.GetSize(img),8,1)
mog = cv2.BackgroundSubtractorMOG()
loop = True
nframes=0
while(loop):
frame = cv.QueryFrame(capture)
mat = cv.GetMat(frame)
Nmat = np.array(mat, dtype=np.float32)
cv.CvtColor(frame,img,cv.CV_BGR2GRAY)
if (frame == None):
break
mog.apply(Nmat,Nforemat,-1)
cv.Threshold(img,thresh,100,255,cv.CV_THRESH_BINARY)
cv.ShowImage("test1", thresh)
cv.ShowImage("test2",frame)
char = cv.WaitKey(50)
if (char != -1):
if (char == 27):
break
cv.DestroyWindow("test1")
cv.DestroyWindow("test2")

change
Nmat = np.array(mat, dtype=np.float32)
for
Nmat = np.array(mat, dtype=np.uint8)

Why are you using these lines:
thresh = cv.CreateImage(cv.GetSize(img),8,1)
and
cv.Threshold(img,thresh,100,255,cv.CV_THRESH_BINARY)
?

Related

cv error on image stacking using cv.hconcat

I'm trying to read an image and convert it to a different color space and split all it's individual components and display all four in the form of a tile as mentioned below :
[ Original , Component_1
Component_2, Component_3 ]
But, when I try to do it. I get this error:
File "chromaKey.py", line 64, in <module>
img_obj.colorSpaceComponents(option,path)
File "chromaKey.py", line 31, in colorSpaceComponents
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
File "chromaKey.py", line 31, in <listcomp>
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
cv2.error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\matrix_operations.cpp:67: error: (-215:Assertion failed) src[i].dims <= 2 && src[i].rows == src[0].rows && src[i].type() == src[0].type() in function 'cv::hconcat'
I've also checked the dimensions and they seem to be the same for all 4 images. I'm not sure why they won't stack properly.
Here's my code below:
# Importing Packages
import sys
import cv2 as cv
import numpy as np
class Image_Handler:
def __init__(self):
pass
def colorSpaceComponents(self,option,path):
# reading images
img_original = cv.imread(path)
c1 = c2 = c3 = ''
# Converting the images to different color-spaces and grey-scaling them
if option == '-XYZ':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2XYZ)
c1,c2,c3 = cv.split(img_converted)
elif option == '-YCrCb':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2YCrCb)
c1,c2,c3 = cv.split(img_converted)
elif option == '-Lab':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2Lab)
c1,c2,c3 = cv.split(img_converted)
elif option == '-HSB':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2HSV)
c1,c2,c3 = cv.split(img_converted)
elif option == '-RGB':
c1,c2,c3 = cv.split(img_original)
# Structuring the images to tiles
img_2d = [[img_original,c1],[c2,c3]]
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
# Creating the windows
cv.namedWindow("Stacked_Image", cv.WINDOW_NORMAL)
cv.resizeWindow("Stacked_Image", 1280, 720)
# Displaying the result
cv.imshow('Stacked_Image',img_2d_tile)
cv.waitKey(0)
# Destroying the windows
cv.destroyAllWindows()
return
if __name__ == "__main__":
# Getting the arguments given to the program
argument_one = sys.argv[1]
argument_two = sys.argv[2]
# Checking the first argument, to determine if it's task 1 or task 2
task_flag = 0
if('-' in argument_one):
task_flag = 1
else:
task_flag = 2
# Creating an object for the class Image_handler
img_obj = Image_Handler()
# Calling the function according to task flag
if task_flag == 1:
path = str(sys.argv[2])
option = str(sys.argv[1])
img_obj.colorSpaceComponents(option,path)
print('Task 01 complete')
Thanks for your answers in advance. Also attaching an image of the desired result.
The problem is I'm trying to concatenate images with different shapes, most particularly the original has 3 channels where the individual components c1, c2 and c3 are greyscale images so they only have a width and height in their shape so all we need to do is add a channel to the components using.
c1 = np.stack((c1,)*3, axis=-1)
c2 = np.stack((c2,)*3, axis=-1)
c3 = np.stack((c3,)*3, axis=-1)

AttributeError: 'numpy.ndarray' object has no attribute 'set'

when I write this code: (my entire code, school project on Augmented Reality)
Everything worked perfectly until I tried to run the video.
...........................................................................................................................................................................................................
import cv2
import numpy as np
cap=cv2.VideoCapture(2)
imgTarget=cv2.imread('F1racecars.jpeg')
vidTarget= cv2.VideoCapture('F1racecars.mp4')
success, vidTarget = vidTarget.read()
imgTarget=cv2.resize(imgTarget,(640,360))
hT, wT, cT = imgTarget.shape
vidTarget=cv2.resize(vidTarget,(wT,hT))
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(imgTarget,None)
detect = False
fcount = 0
while True:
success, imgWebcam= cap.read()
imgAug = imgWebcam.copy()
imgWarp = np.zeros((imgWebcam.shape[1], imgWebcam.shape[0],imgWebcam.shape[2]))
masknew = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1],imgWebcam.shape[2]), np.uint8)
maskInv = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
Mergecamfeed = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
ARfinal = np.zeros((imgWebcam.shape[0], imgWebcam.shape[1], imgWebcam.shape[2]), np.uint8)
if detect is False:
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
fcount =0
else:
if fcount == vidTarget.get(cv2.CAP_PROP_FRAME_COUNT, 0):
vidTarget.set(cv2.CAP_PROP_POS_FRAMES, 0)
fcount = 0
success, vidTarget= vidTarget.read()
vidTarget= cv2.resize(vidTarget, (wT, hT))
kp2, des2 = orb.detectAndCompute(imgWebcam,None)
if des2 is None: print(False)
else:
bf = cv2.BFMatcher()
featmatch = bf.knnMatch(des1,des2,k=2)
good=[]
for m,n in featmatch:
if m.distance < 0.75 * n.distance: good.append(m)
print(len(good))
if len(good)>20:
detect = True
srcpts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dstpts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
matrix, mask = cv2.findHomography(srcpts,dstpts, cv2.RANSAC, 5)
print(matrix)
pts = np.float32([[0,0],[0,360],[640,360],[640,0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts,matrix)
cv2.polylines(imgWebcam,[np.int32(dst)],True,(255,0,255),3)
imgWarp = cv2.warpPerspective(vidTarget,matrix, (imgWebcam.shape[1],imgWebcam.shape[0]))
cv2.fillPoly(masknew, [np.int32(dst)], (255,255,255))
maskInv = cv2.bitwise_not(masknew)
Mergecamfeed = cv2.bitwise_and(imgAug,imgAug,None, mask = maskInv[:,:,0])
ARfinal = cv2.bitwise_or(imgWarp, Mergecamfeed)
cv2.imshow('imgTarget', imgTarget)
cv2.imshow('imgTargetVdo', vidTarget)
cv2.imshow('webcam', imgWebcam)
cv2.imshow('warp', imgWarp)
cv2.imshow('mask', masknew)
cv2.imshow('Modified mask', maskInv)
cv2.imshow('Aug Image', Mergecamfeed)
cv2.imshow('Augmented Reality Final O/P', ARfinal)
cv2.waitKey(1)
fcount += 1
It shows like this:
AttributeError: 'numpy.ndarray' object has no attribute 'set'
Normally we ask for the full error message, with traceback. That makes it easier to identify where the error occurs. In this case though, set is only used a couple of times.
vidTarget.set(cv2.CAP_PROP_POS_FRAMES,0)
What's this thing vidTarget? The error says it's a numpy array, and is clear that such an object does not have a set method. Experienced numpy users also know that. So what kind of object did you expect it to be?
We see attribute errors for one of two reasons. Either the code writer did not read the documentation, and tried to use a non-existent method. Or the variable in question is not what he expected. You should know, at every, step what the variable is - not just guess or hope, know. Test if necessary.
edit
Initially
vidTarget= cv2.VideoCapture('F1racecars.mp4')
From a quick read of cv2 docs, this has get/set methods
but then you do
succses, vidTarget = vidTarget.read()
# and resize
That redefines vidTarget.

How to enhance Text detection in image using Python

I tried to detect text in images specially images with quotes using OpenCV Python. For that I first train some text images. I detect each characters of text in the image to train. For images with proper word style the characters are detect properly. But for some images the text(character) area can't be detect properly. I attached the code for this below. How can I modify the code so that the characters can be detected properly
import sys
import numpy as np
import cv2
import os
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png")
if imgTrainingNumbers is None:
print ("error: image not read from file \n\n")
os.system("pause")
return
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)
imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
cv2.imshow("imgThresh", imgThresh)
imgThreshCopy = imgThresh.copy()
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = []
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]
for npaContour in npaContours:
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npaContour)
cv2.rectangle(imgTrainingNumbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)
imgROI = imgThresh[intY:intY+intH, intX:intX+intW]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
cv2.imshow("imgROI", imgROI)
cv2.imshow("imgROIResized", imgROIResized)
cv2.imshow("training_numbers.png", imgTrainingNumbers)
intChar = cv2.waitKey(0)
if intChar == 27:
sys.exit()
elif intChar in intValidChars:
print(intChar)
intClassifications.append(intChar)
print(intChar)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
fltClassifications = np.array(intClassifications, np.float32)
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
print ("\n\ntraining complete !!\n")
np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()
What you are trying to do is a very naive approach, just applying the threshold and detecting contours won't work here. A lot of research papers have been published around this task. You may refer those and try to implement or can use image_to_boxes function of the famous tesseract OCR. You can download it from here and as you are using python you can install pytesseract - python wrapper for tesseract from here and use the following code to achieve what you are expecting.
import pytesseract
import cv2
originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)
h, w = img.shape
letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]
for letter in letters:
cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)
cv2.imshow('', originalImg)
The resultant image
Note that there are many false detections, you need to ignore them in your training process.

OpenCV + Numpy Script

The issue I'm having is that the two scripts below are both outputting this error: https://i.imgur.com/sLH6Mv4.png
TypeError: FeatureDetector.detect() takes at most 2 arguments (3 given)
which I can avoid in the script 2 below by deleting:
useProvidedKeypoints = False
from the end of
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
which leads to this error in the 2nd script :https://i.imgur.com/ap0odal.png
TypeError: float() argument must be a string or a number
And this error in the first script: i.imgur.com/UVzNvP1.png (2 link limit add manually)
TypeError: trainData data type = 17 is not supported
Any help would be greatly appreciated and the main thing I want to come out of this is with a script I can tweak and edit till I understand the functions involved slightly better.
Summary; I'm not really sure why kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False) is telling me there too many arguments because the person who helped me write this seemed to think this should work.
1
import cv2
import numpy as np
img =cv2.imread('win18.jpg')
imgg =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
surf = cv2.SURF()
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
samples = np.array(descritors)
responses = np.arange(len(kp),dtype = np.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
template = cv2.imread('win17.jpg')
templateg= cv2.cvtColor(template,cv2.COLOR_BGR2GRAY)
keys,desc = surf.detect(templateg,None,useProvidedKeypoints = False)
for h,des in enumerate(desc):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res,dist = int(results[0][0]),dists[0][0]
if dist<0.1:
color = (0,0,255)
else:
print dist
color = (255,0,0)
x,y = kp[res].pt
center = (int(x),int(y))
cv2.circle(img,center,2,color,-1)
x,y = keys[h].pt
center = (int(x),int(y))
cv2.circle(template,center,2,color,-1)
cv2.imshow('img',img)
cv2.imshow('tm',template)
cv2.waitKey(0)
cv2.destroyAllWindows()
2
import cv2
import numpy
opencv_haystack =cv2.imread('win12.jpg')
opencv_needle =cv2.imread('win1.jpg')
ngrey = cv2.cvtColor(opencv_needle, cv2.COLOR_BGR2GRAY)
hgrey = cv2.cvtColor(opencv_haystack, cv2.COLOR_BGR2GRAY)
hessian_threshold = 85
detector = cv2.SURF(hessian_threshold)
(hkeypoints, hdescriptors) = detector.detect(hgrey, None, useProvidedKeypoints = False)
(nkeypoints, ndescriptors) = detector.detect(ngrey, None, useProvidedKeypoints = False)
rowsize = len(hdescriptors) / len(hkeypoints)
if rowsize > 1:
hrows = numpy.array(hdescriptors, dtype = numpy.float32).reshape((-1, rowsize))
nrows = numpy.array(ndescriptors, dtype = numpy.float32).reshape((-1, rowsize))
else:
hrows = numpy.array(hdescriptors, dtype = numpy.float32)
nrows = numpy.array(ndescriptors, dtype = numpy.float32)
rowsize = len(hrows[0])
samples = hrows
responses = numpy.arange(len(hkeypoints), dtype = numpy.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
if dist < 0.1:
color = (0, 0, 255)
else:
color = (255, 0, 0)
x,y = hkeypoints[res].pt
center = (int(x),int(y))
cv2.circle(opencv_haystack,center,2,color,-1)
x,y = nkeypoints[i].pt
center = (int(x),int(y))
cv2.circle(opencv_needle,center,2,color,-1)
cv2.imshow('haystack',opencv_haystack)
cv2.imshow('needle',opencv_needle)
cv2.waitKey(0)
cv2.destroyAllWindows()
Hi I know it's late but for the ones still facing the problem, try replacing detect() with detectAndCompute().
I got the error removed this way.
when in doubt, ...
>>> s = cv2.SURF()
>>> help(s.detect)
Help on built-in function detect:
detect(...)
detect(image[, mask]) -> keypoints
so, your assumptions about the args to SURF.detect() were quite off.

data type errors for input images of cv2.calcOpticalFlowPyrLK

I'm running opencv 2.4.1 using python bindings and am having difficulty calculating the optical flow.
Specifically this section of code:
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
Returns the error:
<unknown> is not a numpy array
So then I try to convert the images to numpy arrays:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
Now I have a new error:
<unknown> data type = 17 is not supported
In a last-ditch effort I convert the images to cvmat (from iplimage) before converting it to a numpy array, just to see what happens
error: ..\..\..\OpenCV-2.4.1\modules\video\src\lkpyramid.cpp:607: error: (-215) nextPtsMat.checkVector(2, CV_32F, true) == npoints
So now I'm stuck. Below is the code in it's entirety for reference
import cv
import cv2
import numpy as np
class Target:
def __init__(self):
self.capture = cv.CaptureFromFile("raw_gait_cropped.avi")
def run(self):
#initiate font
font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 3, 8)
#instantiate images
img_size=cv.GetSize(cv.QueryFrame(self.capture))
hsv_img=cv.CreateImage(img_size,8,3)
saturation_img=cv.CreateImage(img_size,8,1)
saturation_thresh_img=cv.CreateImage(img_size,8,1)
prev_saturation_thresh_img=None
#create params for GoodFeaturesToTrack and calcOpticalFlowPyrLK
gftt_params = dict( cornerCount=11,
qualityLevel=0.2,
minDistance=5,
mask=None,
useHarris=True
)
lk_params = dict( winSize = (15, 15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03),
flags = cv2.OPTFLOW_USE_INITIAL_FLOW,
minEigThreshold=1
)
tracks=[]
writer=cv.CreateVideoWriter("angle_tracking.avi",cv.CV_FOURCC('M','J','P','G'),30,cv.GetSize(hsv_img),1)
i=0
while True:
#grab a frame from the video capture
img=cv.QueryFrame(self.capture)
#break the loop when the video is over
if img == None:
break
#convert the image to HSV
cv.CvtColor(img,hsv_img,cv.CV_BGR2HSV)
#Get Saturation channel
cv.MixChannels([hsv_img],[saturation_img],[(1,0)])
#Apply threshold to saturation channel
cv.InRangeS(saturation_img,145,255,saturation_thresh_img)
#locate initial features to track
if i==0:
eig_image=temp_image = cv.CreateMat(img.height, img.width, cv.CV_32FC1)
for (x,y) in cv.GoodFeaturesToTrack(saturation_thresh_img, eig_image, temp_image, **gftt_params):
tracks.append([(x,y)])
cv.Circle(saturation_thresh_img,(int(x),int(y)),5,(255,255,255),-1,cv.CV_AA,0)
tracks_np=np.float32(tracks).reshape(-1,2)
print tracks
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
prev_saturation_thresh_img=saturation_img
i=i+1
print i
#display frames to users
cv.ShowImage("Raw Video",img)
cv.ShowImage("Saturation Channel",saturation_img)
cv.ShowImage("Saturation Thresholded",saturation_thresh_img)
# Listen for ESC or ENTER key
c = cv.WaitKey(7) % 0x100
if c == 27 or c == 10:
break
#close all windows once video is done
cv.DestroyAllWindows()
if __name__=="__main__":
t = Target()
t.run()
OpenCV can be very picky about the data formats it accepts. The following code extract works for me:
prev = cv.LoadImage('images/'+file_list[0])
prev = np.asarray(prev[:,:])
prev_gs = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
current = cv.LoadImage('images/'+file)
current = np.asarray(current[:,:])
current_gs = cv2.cvtColor(current, cv2.COLOR_BGR2GRAY)
features, status, track_error = cv2.calcOpticalFlowPyrLK(prev_gs, current_gs, good_features, None,
**lk_params)
Note the [:,:] when converting from images to numpy arrays, I have found that they are required.
I hope that this may solve your problem.

Categories