I'm trying to read an image and convert it to a different color space and split all it's individual components and display all four in the form of a tile as mentioned below :
[ Original , Component_1
Component_2, Component_3 ]
But, when I try to do it. I get this error:
File "chromaKey.py", line 64, in <module>
img_obj.colorSpaceComponents(option,path)
File "chromaKey.py", line 31, in colorSpaceComponents
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
File "chromaKey.py", line 31, in <listcomp>
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
cv2.error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\matrix_operations.cpp:67: error: (-215:Assertion failed) src[i].dims <= 2 && src[i].rows == src[0].rows && src[i].type() == src[0].type() in function 'cv::hconcat'
I've also checked the dimensions and they seem to be the same for all 4 images. I'm not sure why they won't stack properly.
Here's my code below:
# Importing Packages
import sys
import cv2 as cv
import numpy as np
class Image_Handler:
def __init__(self):
pass
def colorSpaceComponents(self,option,path):
# reading images
img_original = cv.imread(path)
c1 = c2 = c3 = ''
# Converting the images to different color-spaces and grey-scaling them
if option == '-XYZ':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2XYZ)
c1,c2,c3 = cv.split(img_converted)
elif option == '-YCrCb':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2YCrCb)
c1,c2,c3 = cv.split(img_converted)
elif option == '-Lab':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2Lab)
c1,c2,c3 = cv.split(img_converted)
elif option == '-HSB':
img_converted = cv.cvtColor(img_original,cv.COLOR_BGR2HSV)
c1,c2,c3 = cv.split(img_converted)
elif option == '-RGB':
c1,c2,c3 = cv.split(img_original)
# Structuring the images to tiles
img_2d = [[img_original,c1],[c2,c3]]
img_2d_tile = cv.vconcat([cv.hconcat(img_list) for img_list in img_2d])
# Creating the windows
cv.namedWindow("Stacked_Image", cv.WINDOW_NORMAL)
cv.resizeWindow("Stacked_Image", 1280, 720)
# Displaying the result
cv.imshow('Stacked_Image',img_2d_tile)
cv.waitKey(0)
# Destroying the windows
cv.destroyAllWindows()
return
if __name__ == "__main__":
# Getting the arguments given to the program
argument_one = sys.argv[1]
argument_two = sys.argv[2]
# Checking the first argument, to determine if it's task 1 or task 2
task_flag = 0
if('-' in argument_one):
task_flag = 1
else:
task_flag = 2
# Creating an object for the class Image_handler
img_obj = Image_Handler()
# Calling the function according to task flag
if task_flag == 1:
path = str(sys.argv[2])
option = str(sys.argv[1])
img_obj.colorSpaceComponents(option,path)
print('Task 01 complete')
Thanks for your answers in advance. Also attaching an image of the desired result.
The problem is I'm trying to concatenate images with different shapes, most particularly the original has 3 channels where the individual components c1, c2 and c3 are greyscale images so they only have a width and height in their shape so all we need to do is add a channel to the components using.
c1 = np.stack((c1,)*3, axis=-1)
c2 = np.stack((c2,)*3, axis=-1)
c3 = np.stack((c3,)*3, axis=-1)
Related
I am posting this while using a translation.
I would like to use a live stream on YouTube to action recognition.
Action recognition
https://github.com/felixchenfy/Realtime-Action-Recognition
Read Youtube live stream with opencv
How to read Youtube live stream using openCV python?
Youtube URL
https://www.youtube.com/watch?v=DjdUEyjx8GM
We have programmed it with reference to the above.The program runs normally for a few hours, but after 4 or 5 hours, I get an error saying that communication from the remote host has been lost.Checking the youtube video, the delivery does not seem to be interrupted.
A partial code for videocapture can be found here.
url = "https://www.youtube.com/watch?v=DjdUEyjx8GM"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
class ReadFromVideo(object):
def __init__(self, video_path, sample_interval=1):
''' A video reader class for reading video frames from video.
Arguments:
video_path
sample_interval {int}: sample every kth image.
''' assert isinstance(sample_interval, int) and sample_interval >= 1
self.cnt_imgs = 0
self._is_stoped = False
self._video = cv2.VideoCapture(best.url)
ret, image = self._video.read()
ret, image2 = self._video2.read()
self._next_image = image
self._next_image2 = image2
self._sample_interval = sample_interval
self._fps = self.get_fps()
if not self._fps >= 0.0001:
import warnings
warnings.warn("Invalid fps of video: {}".format(video_path))
def has_image(self):
return self._next_image is not None
def get_curr_video_time(self):
return 1.0 / self._fps * self.cnt_imgs
def read_image(self):
image = self._next_image
image2 = self._next_image2
for i in range(self._sample_interval):
if self._video.isOpened():
ret, frame = self._video.read()
self._next_image = frame
ret, frame2 = self._video2.read()
self._next_image2 = frame2
else:
self._next_image = None
self._next_image2 = None
break
self.cnt_imgs += 1
return image, image2
def stop(self):
self._video.release()
self._video2.release()
self._is_stoped = True
def __del__(self):
if not self._is_stoped:
self.stop()
def get_fps(self):
# Find OpenCV version
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
# With webcam get(CV_CAP_PROP_FPS) does not work.
# Let's see for ourselves.
# Get video properties
if int(major_ver) < 3:
fps = self._video.get(cv2.cv.CV_CAP_PROP_FPS)
else:
fps = self._video.get(cv2.CAP_PROP_FPS)
return fps
#!/usr/bin/env python
# coding: utf-8
'''
Test action recognition on
(1) a video, (2) a folder of images, (3) or web camera.
Input:
model: model/trained_classifier.pickle
Output:
result video: output/${video_name}/video.avi
result skeleton: output/${video_name}/skeleton_res/XXXXX.txt
visualization by cv2.imshow() in img_displayer
'''
The entire source code is here
'''
Example of usage:
(1) Test on video file:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type video \
--data_path data_test/exercise.avi \
--output_folder output
(2) Test on a folder of images:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type folder \
--data_path data_test/apple/ \
--output_folder output
(3) Test on web camera:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type webcam \
--data_path 0 \
--output_folder output
if True: # Include project path
import sys
import os
ROOT = os.path.dirname(os.path.abspath(__file__))+"/../"
CURR_PATH = os.path.dirname(os.path.abspath(__file__))+"/"
sys.path.append(ROOT)
import utils.lib_images_io as lib_images_io
import utils.lib_plot as lib_plot
import utils.lib_commons as lib_commons
from utils.lib_openpose import SkeletonDetector
from utils.lib_tracker import Tracker
from utils.lib_tracker import Tracker
from utils.lib_classifier import ClassifierOnlineTest
from utils.lib_classifier import * # Import all sklearn related libraries
def par(path): # Pre-Append ROOT to the path if it's not absolute
return ROOT + path if (path and path[0] != "/") else path
# -- Command-line input
def get_command_line_arguments():
def parse_args():
parser = argparse.ArgumentParser(
description="Test action recognition on \n"
"(1) a video, (2) a folder of images, (3) or web camera.")
parser.add_argument("-m", "--model_path", required=False,
default='model/trained_classifier.pickle')
parser.add_argument("-t", "--data_type", required=False, default='webcam',
choices=["video", "folder", "webcam"])
parser.add_argument("-p", "--data_path", required=False, default="",
help="path to a video file, or images folder, or webcam. \n"
"For video and folder, the path should be "
"absolute or relative to this project's root. "
"For webcam, either input an index or device name. ")
parser.add_argument("-o", "--output_folder", required=False, default='output/',
help="Which folder to save result to.")
args = parser.parse_args()
return args
args = parse_args()
if args.data_type != "webcam" and args.data_path and args.data_path[0] != "/":
# If the path is not absolute, then its relative to the ROOT.
args.data_path = ROOT + args.data_path
return args
def get_dst_folder_name(src_data_type, src_data_path):
''' Compute a output folder name based on data_type and data_path.
The final output of this script looks like this:
DST_FOLDER/folder_name/vidoe.avi
DST_FOLDER/folder_name/skeletons/XXXXX.txt
'''
assert(src_data_type in ["video", "folder", "webcam"])
if src_data_type == "video": # /root/data/video.avi --> video
folder_name = str("video")
elif src_data_type == "folder": # /root/data/video/ --> video
folder_name = src_data_path.rstrip("/").split("/")[-1]
elif src_data_type == "webcam":
# month-day-hour-minute-seconds, e.g.: 02-26-15-51-12
folder_name = lib_commons.get_time_string()
return folder_name
args = get_command_line_arguments()
SRC_DATA_TYPE = args.data_type
SRC_DATA_PATH = args.data_path
SRC_MODEL_PATH = args.model_path
DST_FOLDER_NAME = get_dst_folder_name(SRC_DATA_TYPE, SRC_DATA_PATH)
# -- Settings
cfg_all = lib_commons.read_yaml(ROOT + "config/config.yaml")
cfg = cfg_all["s5_test.py"]
CLASSES = np.array(cfg_all["classes"])
SKELETON_FILENAME_FORMAT = cfg_all["skeleton_filename_format"]
# Action recognition: number of frames used to extract features.
WINDOW_SIZE = int(cfg_all["features"]["window_size"])
# Output folder
DST_FOLDER = args.output_folder + "/" + DST_FOLDER_NAME + "/"
DST_SKELETON_FOLDER_NAME = cfg["output"]["skeleton_folder_name"]
DST_VIDEO_NAME = cfg["output"]["video_name"]
# framerate of output video.avi
DST_VIDEO_FPS = float(cfg["output"]["video_fps"])
# writer
fmt = cv2.VideoWriter_fourcc(*'MP4V')
fps = 10.0
size = (800, 480)
writer = cv2.VideoWriter('outtest.mp4', fmt, fps, size)
backup_time = 3600 # sec
time = 0 # init
now = datetime.datetime.now()
s_now = now.strftime('%Y-%m-%d-%H-%M-%S')
back_up = cv2.VideoWriter(f'{s_now}.mp4',fmt, fps, size)
# Video setttings
# If data_type is webcam, set the max frame rate.
SRC_WEBCAM_MAX_FPS = float(cfg["settings"]["source"]
["webcam_max_framerate"])
# If data_type is video, set the sampling interval.
# For example, if it's 3, then the video will be read 3 times faster.
SRC_VIDEO_SAMPLE_INTERVAL = int(cfg["settings"]["source"]
["video_sample_interval"])
# Openpose settings
OPENPOSE_MODEL = cfg["settings"]["openpose"]["model"]
OPENPOSE_IMG_SIZE = cfg["settings"]["openpose"]["img_size"]
# Display settings
img_disp_desired_rows = int(cfg["settings"]["display"]["desired_rows"])
# -- Function
def select_images_loader(src_data_type, src_data_path):
if src_data_type == "video":
images_loader = lib_images_io.ReadFromVideo(
src_data_path,
sample_interval=SRC_VIDEO_SAMPLE_INTERVAL)
elif src_data_type == "folder":
images_loader = lib_images_io.ReadFromFolder(
folder_path=src_data_path)
elif src_data_type == "webcam":
if src_data_path == "":
webcam_idx = 0
elif src_data_path.isdigit():
webcam_idx = int(src_data_path)
else:
webcam_idx = src_data_path
images_loader = lib_images_io.ReadFromWebcam(
SRC_WEBCAM_MAX_FPS, webcam_idx)
return images_loader
class MultiPersonClassifier(object):
''' This is a wrapper around ClassifierOnlineTest
for recognizing actions of multiple people.
'''
def __init__(self, model_path, classes):
self.dict_id2clf = {} # human id -> classifier of this person
# Define a function for creating classifier for new people.
self._create_classifier = lambda human_id: ClassifierOnlineTest(
model_path, classes, WINDOW_SIZE, human_id)
def classify(self, dict_id2skeleton):
''' Classify the action type of each skeleton in dict_id2skeleton '''
# Clear people not in view
old_ids = set(self.dict_id2clf)
cur_ids = set(dict_id2skeleton)
humans_not_in_view = list(old_ids - cur_ids)
for human in humans_not_in_view:
del self.dict_id2clf[human]
# Predict each person's action
id2label = {}
for id, skeleton in dict_id2skeleton.items():
if id not in self.dict_id2clf: # add this new person
self.dict_id2clf[id] = self._create_classifier(id)
classifier = self.dict_id2clf[id]
id2label[id] = classifier.predict(skeleton) # predict label
# print("\n\nPredicting label for human{}".format(id))
# print(" skeleton: {}".format(skeleton))
# print(" label: {}".format(id2label[id]))
return id2label
def get_classifier(self, id):
''' Get the classifier based on the person id.
Arguments:
id {int or "min"}
'''
if len(self.dict_id2clf) == 0:
return None
if id == 'min':
id = min(self.dict_id2clf.keys())
return self.dict_id2clf[id]
def remove_skeletons_with_few_joints(skeletons):
''' Remove bad skeletons before sending to the tracker '''
good_skeletons = []
for skeleton in skeletons:
px = skeleton[2:2+13*2:2]
py = skeleton[3:2+13*2:2]
num_valid_joints = len([x for x in px if x != 0])
num_leg_joints = len([x for x in px[-6:] if x != 0])
total_size = max(py) - min(py)
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# IF JOINTS ARE MISSING, TRY CHANGING THESE VALUES:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if num_valid_joints >= 5 and total_size >= 0.1 and num_leg_joints >= 0:
# add this skeleton only when all requirements are satisfied
good_skeletons.append(skeleton)
return good_skeletons
def draw_result_img(img_disp, ith_img, humans, dict_id2skeleton,
skeleton_detector, multiperson_classifier):
''' Draw skeletons, labels, and prediction scores onto image for display '''
# Resize to a proper size for display
r, c = img_disp.shape[0:2]
desired_cols = int(1.0 * c * (img_disp_desired_rows / r))
img_disp = cv2.resize(img_disp,
dsize=(desired_cols, img_disp_desired_rows))
# Draw all people's skeleton
skeleton_detector.draw(img_disp, humans)
# Draw bounding box and label of each person
if len(dict_id2skeleton):
for id, label in dict_id2label.items():
skeleton = dict_id2skeleton[id]
# scale the y data back to original
skeleton[1::2] = skeleton[1::2] / scale_h
# print("Drawing skeleton: ", dict_id2skeleton[id], "with label:", label, ".")
lib_plot.draw_action_result(img_disp, id, skeleton, label)
# Add blank to the left for displaying prediction scores of each class
img_disp = lib_plot.add_white_region_to_left_of_image(img_disp)
cv2.putText(img_disp, "Frame:" + str(ith_img),
(20, 20), fontScale=1.5, fontFace=cv2.FONT_HERSHEY_PLAIN,
color=(0, 0, 0), thickness=2)
# Draw predicting score for only 1 person
if len(dict_id2skeleton):
classifier_of_a_person = multiperson_classifier.get_classifier(
id='min')
classifier_of_a_person.draw_scores_onto_image(img_disp)
return img_disp
# Time Display
cv2.putText(img2_disp, str(now_time.isoformat(timespec='seconds')),
(20, 20), fontScale=1.5, fontFace=cv2.FONT_HERSHEY_PLAIN,
color=(0, 0, 0), thickness=2)
# Draw predicting score for only 1 person
if len(dict2_id2skeleton):
classifier_of_a_person = multiperson_classifier.get_classifier(
id='min')
classifier_of_a_person.draw_scores_onto_image(img2_disp)
return img2_disp
def get_the_skeleton_data_to_save_to_disk(dict_id2skeleton):
'''
In each image, for each skeleton, save the:
human_id, label, and the skeleton positions of length 18*2.
So the total length per row is 2+36=38
'''
skels_to_save = []
for human_id in dict_id2skeleton.keys():
label = dict_id2label[human_id]
skeleton = dict_id2skeleton[human_id]
skels_to_save.append([[human_id, label] + skeleton.tolist()])
return skels_to_save
# -- Main
if __name__ == "__main__":
# -- Detector, tracker, classifier
skeleton_detector = SkeletonDetector(OPENPOSE_MODEL, OPENPOSE_IMG_SIZE)
multiperson_tracker = Tracker()
multiperson_classifier = MultiPersonClassifier(SRC_MODEL_PATH, CLASSES)
# -- Image reader and displayer
images_loader = select_images_loader(SRC_DATA_TYPE, SRC_DATA_PATH)
img_displayer = lib_images_io.ImageDisplayer()
# -- Init output
# output folder
os.makedirs(DST_FOLDER, exist_ok=True)
os.makedirs(DST_FOLDER + DST_SKELETON_FOLDER_NAME, exist_ok=True)
# -- Read images and process
try:
ith_img = -1
while images_loader.has_image():
# Timer
now = datetime.datetime.now()
now_time = now.time()
# -- Read image
#img = images_loader.read_image()
img = images_loader.read_image()
ith_img += 1
img_disp = img.copy()
# print(f"\nProcessing {ith_img}th image ...")
# -- Detect skeletons
humans = skeleton_detector.detect(img)
skeletons, scale_h = skeleton_detector.humans_to_skels_list(humans)
)
skeletons = remove_skeletons_with_few_joints(skeletons)
# -- Track people
dict_id2skeleton = multiperson_tracker.track(skeletons)
# -- Log & Recognize action of each person
log.append(str(now_time))
if len(dict_id2skeleton):
dict_id2label = multiperson_classifier.classify(
dict_id2skeleton)
# -- Draw
img_disp = draw_result_img(img_disp, ith_img, humans, dict_id2skeleton, skeleton_detector, multiperson_classifier)
# Print label of a person
if len(dict_id2skeleton):
min_id = min(dict_id2skeleton.keys())
# print("prediced label is :", dict_id2label[min_id])
# -- Display image, and write to video.avi
# video writer
if time >= backup_time*fps:
back_up.release()
now = datetime.datetime.now()
s_now = now.strftime('%Y-%m-%d-%H-%M-%S')
back_up = cv2.VideoWriter(f'{s_now}.mp4', fmt, fps, size)
time = 0
time += 1
writer.write(img_disp)
back_up.write(img_disp)
img_displayer.display(img_disp, wait_key_ms=1)
# -- Get skeleton data and save to file
skels_to_save = get_the_skeleton_data_to_save_to_disk(dict_id2skeleton)
lib_commons.save_listlist(DST_FOLDER + DST_SKELETON_FOLDER_NAME +SKELETON_FILENAME_FORMAT.format(ith_img),skels_to_save)
if cv2.waitKey(1) == 27:
break
finally:
writer.release()
back_up.release()
cv2.destroyAllWindows()
Thanks for your help.
Been messing around with gym-retro and OpenCV. I have been looking at other code examples and tutorials. Several of them seem to be coded the same way but when I do it I get the following error. Has there been some type of update or something? Any suggestions on a fix welcome. I can comment out the reshaping and conversion to greyscale and it works. However then I am feeding too much info to my NN.
import retro
import numpy as np
import cv2
import neat
import pickle
env = retro.make('SuperMarioBros3-Nes', '1Player.World1.Level1')
def eval_genomes(genomes, config):
for genome_id,genome in genomes:
ob = env.reset()
ac = env.action_space.sample()
inx, iny, inc = env.observation_space.shape
inx = int(inx/8)
iny = int(iny/8)
net = neat.nn.recurrent.RecurrentNetwork.create(genome, config)
current_max_fitness = 0
fitness_current = 0
frame = 0
counter = 0
xpos = 0
xpos_max = 0
done = False
while not done:
env.render()
frame += 1
#print(ob)
ob = cv2.resize(ob, (inx,iny))
ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
ob = np.reshape(ob, (inx,iny))
imgarray = np.ndarray.flatten(ob)
nnOutput = net.activate(imgarray)
ob, rew, done, info = env.step(nnOutput)
#imgarray.clear()
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,neat.DefaultSpeciesSet,neat.DefaultStagnation,'config-feedforward')
p = neat.Population(config)
winner = p.run(eval_genomes)
(gameai) C:\Users\dgilk\anaconda3\envs\gameai>python mario.py
Traceback (most recent call last):
File "mario.py", line 45, in <module>
winner = p.run(eval_genomes)
File "C:\Users\dgilk\anaconda3\envs\gameai\lib\site-packages\neat\population.py", line 89, in run
fitness_function(list(iteritems(self.population)), self.config)
File "mario.py", line 32, in eval_genomes
ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
cv2.error: OpenCV(4.4.0) c:\users\appveyor\appdata\local\temp\1\pip-req-build-k8sx3e60\opencv\modules\imgproc\src\color.simd_helpers.hpp:92: error: (-2:Unspecified error) in function '__cdecl cv::impl::`anonymous-namespace'::CvtHelper<struct cv::impl::`anonymous namespace'::Set<3,4,-1>,struct cv::impl::A0xbf2c9cd3::Set<1,-1,-1>,struct cv::impl::A0xbf2c9cd3::Set<0,2,5>,2>::CvtHelper(const class cv::_InputArray &,const class cv::_OutputArray &,int)'
> Invalid number of channels in input image:
> 'VScn::contains(scn)'
> where
> 'scn' is 1
UPDATE:
Here is an output of the shrunken image. There appears to be color.
snippet of observation window
you are creating a cv2 object with shape (inx,iny) here
ob = cv2.resize(ob, (inx,iny)) # 1
ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY) # 2
cv2.COLOR_BGR2GRAY is expecting a color image which would have shape ( inx, iny, 3) , so check what shape you need your 'ob' to be , what do you need line 2 for ?
The issue I'm having is that the two scripts below are both outputting this error: https://i.imgur.com/sLH6Mv4.png
TypeError: FeatureDetector.detect() takes at most 2 arguments (3 given)
which I can avoid in the script 2 below by deleting:
useProvidedKeypoints = False
from the end of
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
which leads to this error in the 2nd script :https://i.imgur.com/ap0odal.png
TypeError: float() argument must be a string or a number
And this error in the first script: i.imgur.com/UVzNvP1.png (2 link limit add manually)
TypeError: trainData data type = 17 is not supported
Any help would be greatly appreciated and the main thing I want to come out of this is with a script I can tweak and edit till I understand the functions involved slightly better.
Summary; I'm not really sure why kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False) is telling me there too many arguments because the person who helped me write this seemed to think this should work.
1
import cv2
import numpy as np
img =cv2.imread('win18.jpg')
imgg =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
surf = cv2.SURF()
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
samples = np.array(descritors)
responses = np.arange(len(kp),dtype = np.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
template = cv2.imread('win17.jpg')
templateg= cv2.cvtColor(template,cv2.COLOR_BGR2GRAY)
keys,desc = surf.detect(templateg,None,useProvidedKeypoints = False)
for h,des in enumerate(desc):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res,dist = int(results[0][0]),dists[0][0]
if dist<0.1:
color = (0,0,255)
else:
print dist
color = (255,0,0)
x,y = kp[res].pt
center = (int(x),int(y))
cv2.circle(img,center,2,color,-1)
x,y = keys[h].pt
center = (int(x),int(y))
cv2.circle(template,center,2,color,-1)
cv2.imshow('img',img)
cv2.imshow('tm',template)
cv2.waitKey(0)
cv2.destroyAllWindows()
2
import cv2
import numpy
opencv_haystack =cv2.imread('win12.jpg')
opencv_needle =cv2.imread('win1.jpg')
ngrey = cv2.cvtColor(opencv_needle, cv2.COLOR_BGR2GRAY)
hgrey = cv2.cvtColor(opencv_haystack, cv2.COLOR_BGR2GRAY)
hessian_threshold = 85
detector = cv2.SURF(hessian_threshold)
(hkeypoints, hdescriptors) = detector.detect(hgrey, None, useProvidedKeypoints = False)
(nkeypoints, ndescriptors) = detector.detect(ngrey, None, useProvidedKeypoints = False)
rowsize = len(hdescriptors) / len(hkeypoints)
if rowsize > 1:
hrows = numpy.array(hdescriptors, dtype = numpy.float32).reshape((-1, rowsize))
nrows = numpy.array(ndescriptors, dtype = numpy.float32).reshape((-1, rowsize))
else:
hrows = numpy.array(hdescriptors, dtype = numpy.float32)
nrows = numpy.array(ndescriptors, dtype = numpy.float32)
rowsize = len(hrows[0])
samples = hrows
responses = numpy.arange(len(hkeypoints), dtype = numpy.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
if dist < 0.1:
color = (0, 0, 255)
else:
color = (255, 0, 0)
x,y = hkeypoints[res].pt
center = (int(x),int(y))
cv2.circle(opencv_haystack,center,2,color,-1)
x,y = nkeypoints[i].pt
center = (int(x),int(y))
cv2.circle(opencv_needle,center,2,color,-1)
cv2.imshow('haystack',opencv_haystack)
cv2.imshow('needle',opencv_needle)
cv2.waitKey(0)
cv2.destroyAllWindows()
Hi I know it's late but for the ones still facing the problem, try replacing detect() with detectAndCompute().
I got the error removed this way.
when in doubt, ...
>>> s = cv2.SURF()
>>> help(s.detect)
Help on built-in function detect:
detect(...)
detect(image[, mask]) -> keypoints
so, your assumptions about the args to SURF.detect() were quite off.
I'm running opencv 2.4.1 using python bindings and am having difficulty calculating the optical flow.
Specifically this section of code:
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
Returns the error:
<unknown> is not a numpy array
So then I try to convert the images to numpy arrays:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
Now I have a new error:
<unknown> data type = 17 is not supported
In a last-ditch effort I convert the images to cvmat (from iplimage) before converting it to a numpy array, just to see what happens
error: ..\..\..\OpenCV-2.4.1\modules\video\src\lkpyramid.cpp:607: error: (-215) nextPtsMat.checkVector(2, CV_32F, true) == npoints
So now I'm stuck. Below is the code in it's entirety for reference
import cv
import cv2
import numpy as np
class Target:
def __init__(self):
self.capture = cv.CaptureFromFile("raw_gait_cropped.avi")
def run(self):
#initiate font
font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 3, 8)
#instantiate images
img_size=cv.GetSize(cv.QueryFrame(self.capture))
hsv_img=cv.CreateImage(img_size,8,3)
saturation_img=cv.CreateImage(img_size,8,1)
saturation_thresh_img=cv.CreateImage(img_size,8,1)
prev_saturation_thresh_img=None
#create params for GoodFeaturesToTrack and calcOpticalFlowPyrLK
gftt_params = dict( cornerCount=11,
qualityLevel=0.2,
minDistance=5,
mask=None,
useHarris=True
)
lk_params = dict( winSize = (15, 15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03),
flags = cv2.OPTFLOW_USE_INITIAL_FLOW,
minEigThreshold=1
)
tracks=[]
writer=cv.CreateVideoWriter("angle_tracking.avi",cv.CV_FOURCC('M','J','P','G'),30,cv.GetSize(hsv_img),1)
i=0
while True:
#grab a frame from the video capture
img=cv.QueryFrame(self.capture)
#break the loop when the video is over
if img == None:
break
#convert the image to HSV
cv.CvtColor(img,hsv_img,cv.CV_BGR2HSV)
#Get Saturation channel
cv.MixChannels([hsv_img],[saturation_img],[(1,0)])
#Apply threshold to saturation channel
cv.InRangeS(saturation_img,145,255,saturation_thresh_img)
#locate initial features to track
if i==0:
eig_image=temp_image = cv.CreateMat(img.height, img.width, cv.CV_32FC1)
for (x,y) in cv.GoodFeaturesToTrack(saturation_thresh_img, eig_image, temp_image, **gftt_params):
tracks.append([(x,y)])
cv.Circle(saturation_thresh_img,(int(x),int(y)),5,(255,255,255),-1,cv.CV_AA,0)
tracks_np=np.float32(tracks).reshape(-1,2)
print tracks
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
prev_saturation_thresh_img=saturation_img
i=i+1
print i
#display frames to users
cv.ShowImage("Raw Video",img)
cv.ShowImage("Saturation Channel",saturation_img)
cv.ShowImage("Saturation Thresholded",saturation_thresh_img)
# Listen for ESC or ENTER key
c = cv.WaitKey(7) % 0x100
if c == 27 or c == 10:
break
#close all windows once video is done
cv.DestroyAllWindows()
if __name__=="__main__":
t = Target()
t.run()
OpenCV can be very picky about the data formats it accepts. The following code extract works for me:
prev = cv.LoadImage('images/'+file_list[0])
prev = np.asarray(prev[:,:])
prev_gs = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
current = cv.LoadImage('images/'+file)
current = np.asarray(current[:,:])
current_gs = cv2.cvtColor(current, cv2.COLOR_BGR2GRAY)
features, status, track_error = cv2.calcOpticalFlowPyrLK(prev_gs, current_gs, good_features, None,
**lk_params)
Note the [:,:] when converting from images to numpy arrays, I have found that they are required.
I hope that this may solve your problem.
I am trying to extract a background image from a video so I can detect moving objects in it.
I have found functions like cv2.BackgroundSubtractorMOG(), however I just can't get it to work.
Does someone have some experience using this ?
I have created object mog = cv2.BackgroundSubtractorMOG(300,-1,-1,-1)
Then I try mog.apply(Nmat,Nforemat,-1), but that doesnt seem to work, I get the following
error:
......\OpenCV-2.4.0\modules\video\src\bgfg_gaussmix.cpp:117: error: (-215) CV_MAT_DEPTH(frameType) == CV_8U
Nmat and N foremat are numpy arrays because i was also getting an error if they weren't.
Here is work in progress...
import cv
import cv2
import numpy as np
if __name__ == '__main__':
cv.NamedWindow("test1", cv.CV_WINDOW_AUTOSIZE)
cv.NamedWindow("test2", cv.CV_WINDOW_AUTOSIZE)
capture = cv.CreateFileCapture('test.avi')
frame = cv.QueryFrame(capture)
img = cv.CreateImage(cv.GetSize(frame),8,1)
thresh = cv.CreateImage(cv.GetSize(frame),8,1)
foreground = cv.CreateImage(cv.GetSize(frame),8,1)
foremat = cv.GetMat(foreground)
Nforemat = np.array(foremat, dtype=np.float32)
thresh = cv.CreateImage(cv.GetSize(img),8,1)
mog = cv2.BackgroundSubtractorMOG()
loop = True
nframes=0
while(loop):
frame = cv.QueryFrame(capture)
mat = cv.GetMat(frame)
Nmat = np.array(mat, dtype=np.float32)
cv.CvtColor(frame,img,cv.CV_BGR2GRAY)
if (frame == None):
break
mog.apply(Nmat,Nforemat,-1)
cv.Threshold(img,thresh,100,255,cv.CV_THRESH_BINARY)
cv.ShowImage("test1", thresh)
cv.ShowImage("test2",frame)
char = cv.WaitKey(50)
if (char != -1):
if (char == 27):
break
cv.DestroyWindow("test1")
cv.DestroyWindow("test2")
change
Nmat = np.array(mat, dtype=np.float32)
for
Nmat = np.array(mat, dtype=np.uint8)
Why are you using these lines:
thresh = cv.CreateImage(cv.GetSize(img),8,1)
and
cv.Threshold(img,thresh,100,255,cv.CV_THRESH_BINARY)
?