Save Video from Opencv - python

I create space detection code by using gray, gausian blur but now I dont know where to put these code to save my opencv video.
I already tried to put the code in random line but it only comes out of the file at the output, I cant play it and the video also just 5.6KB. I even tried to record a video for a very long time.
My code runs fine without save feature but I want to add save video feature:
fourcc = open_cv.VideoWriter_fourcc(*'DIVX')
out = open_cv.VideoWriter('output.avi',fourcc, 20.0, (640,480))
these is my code that i want to be add save video coding from above :
import cv2 as open_cv
import numpy as np
import logging
from drawing_utils import draw_contours
from colors import COLOR_GREEN, COLOR_WHITE, COLOR_BLUE
class MotionDetector:
LAPLACIAN = 1.4
DETECT_DELAY = 1
def __init__(self, video, coordinates, start_frame):
self.video = 0
self.coordinates_data = coordinates
self.start_frame = start_frame
self.contours = []
self.bounds = []
self.mask = []
def detect_motion(self):
capture = open_cv.VideoCapture(self.video)
capture.set(open_cv.CAP_PROP_POS_FRAMES, self.start_frame)
coordinates_data = self.coordinates_data
logging.debug("coordinates data: %s", coordinates_data)
for p in coordinates_data:
coordinates = self._coordinates(p)
logging.debug("coordinates: %s", coordinates)
rect = open_cv.boundingRect(coordinates)
logging.debug("rect: %s", rect)
new_coordinates = coordinates.copy()
new_coordinates[:, 0] = coordinates[:, 0] - rect[0]
new_coordinates[:, 1] = coordinates[:, 1] - rect[1]
logging.debug("new_coordinates: %s", new_coordinates)
self.contours.append(coordinates)
self.bounds.append(rect)
mask = open_cv.drawContours(
np.zeros((rect[3], rect[2]), dtype=np.uint8),
[new_coordinates],
contourIdx=-1,
color=255,
thickness=-1,
lineType=open_cv.LINE_8)
mask = mask == 255
self.mask.append(mask)
logging.debug("mask: %s", self.mask)
statuses = [False] * len(coordinates_data)
times = [None] * len(coordinates_data)
while capture.isOpened():
result, frame = capture.read()
if frame is None:
break
if not result:
raise CaptureReadError("Error reading video capture on frame %s" % str(frame))
blurred = open_cv.GaussianBlur(frame.copy(), (5, 5), 3)
grayed = open_cv.cvtColor(blurred, open_cv.COLOR_BGR2GRAY)
new_frame = frame.copy()
logging.debug("new_frame: %s", new_frame)
position_in_seconds = capture.get(open_cv.CAP_PROP_POS_MSEC) / 1000.0
for index, c in enumerate(coordinates_data):
status = self.__apply(grayed, index, c)
if times[index] is not None and self.same_status(statuses, index, status):
times[index] = None
continue
if times[index] is not None and self.status_changed(statuses, index, status):
if position_in_seconds - times[index] >= MotionDetector.DETECT_DELAY:
statuses[index] = status
times[index] = None
continue
if times[index] is None and self.status_changed(statuses, index, status):
times[index] = position_in_seconds
for index, p in enumerate(coordinates_data):
coordinates = self._coordinates(p)
color = COLOR_GREEN if statuses[index] else COLOR_BLUE
draw_contours(new_frame, coordinates, str(p["id"] + 1), COLOR_WHITE, color)
open_cv.imshow(str(self.video), new_frame)
k = open_cv.waitKey(1)
if k == ord("q"):
break
capture.release()
open_cv.destroyAllWindows()
def __apply(self, grayed, index, p):
coordinates = self._coordinates(p)
logging.debug("points: %s", coordinates)
rect = self.bounds[index]
logging.debug("rect: %s", rect)
roi_gray = grayed[rect[1]:(rect[1] + rect[3]), rect[0]:(rect[0] + rect[2])]
laplacian = open_cv.Laplacian(roi_gray, open_cv.CV_64F)
logging.debug("laplacian: %s", laplacian)
coordinates[:, 0] = coordinates[:, 0] - rect[0]
coordinates[:, 1] = coordinates[:, 1] - rect[1]
status = np.mean(np.abs(laplacian * self.mask[index])) < MotionDetector.LAPLACIAN
logging.debug("status: %s", status)
return status
#staticmethod
def _coordinates(p):
return np.array(p["coordinates"])
#staticmethod
def same_status(coordinates_status, index, status):
return status == coordinates_status[index]
#staticmethod
def status_changed(coordinates_status, index, status):
return status != coordinates_status[index]
class CaptureReadError(Exception):
pass

Create the file for the video to go in. You can think of this file like a book, but a book with no pages. This code is used to create the file:
fourcc = open_cv.VideoWriter_fourcc(*'DIVX')
out = open_cv.VideoWriter('output.avi',fourcc, 20.0, (640,480))
Not all codecs work on all systems. I use Ubuntu, and this is the code I use to create a video file:
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30,(w,h))
If you try to play this video, nothing will happen. There are no frames in the video, so there is nothing to play (like a book with no pages). After you process each frame, the frame needs to be written to the video (like putting a page in a book):
out.write(new_frame)
You should do this around .imshow():
out.write(new_frame)
open_cv.imshow(str(self.video), new_frame)
k = open_cv.waitKey(1)
if k == ord("q"):
break

Related

I want to process a live stream from youtube with opencv

I am posting this while using a translation.
I would like to use a live stream on YouTube to action recognition.
Action recognition
https://github.com/felixchenfy/Realtime-Action-Recognition
Read Youtube live stream with opencv
How to read Youtube live stream using openCV python?
Youtube URL
https://www.youtube.com/watch?v=DjdUEyjx8GM
We have programmed it with reference to the above.The program runs normally for a few hours, but after 4 or 5 hours, I get an error saying that communication from the remote host has been lost.Checking the youtube video, the delivery does not seem to be interrupted.
A partial code for videocapture can be found here.
url = "https://www.youtube.com/watch?v=DjdUEyjx8GM"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
class ReadFromVideo(object):
def __init__(self, video_path, sample_interval=1):
''' A video reader class for reading video frames from video.
Arguments:
video_path
sample_interval {int}: sample every kth image.
''' assert isinstance(sample_interval, int) and sample_interval >= 1
self.cnt_imgs = 0
self._is_stoped = False
self._video = cv2.VideoCapture(best.url)
ret, image = self._video.read()
ret, image2 = self._video2.read()
self._next_image = image
self._next_image2 = image2
self._sample_interval = sample_interval
self._fps = self.get_fps()
if not self._fps >= 0.0001:
import warnings
warnings.warn("Invalid fps of video: {}".format(video_path))
def has_image(self):
return self._next_image is not None
def get_curr_video_time(self):
return 1.0 / self._fps * self.cnt_imgs
def read_image(self):
image = self._next_image
image2 = self._next_image2
for i in range(self._sample_interval):
if self._video.isOpened():
ret, frame = self._video.read()
self._next_image = frame
ret, frame2 = self._video2.read()
self._next_image2 = frame2
else:
self._next_image = None
self._next_image2 = None
break
self.cnt_imgs += 1
return image, image2
def stop(self):
self._video.release()
self._video2.release()
self._is_stoped = True
def __del__(self):
if not self._is_stoped:
self.stop()
def get_fps(self):
# Find OpenCV version
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
# With webcam get(CV_CAP_PROP_FPS) does not work.
# Let's see for ourselves.
# Get video properties
if int(major_ver) < 3:
fps = self._video.get(cv2.cv.CV_CAP_PROP_FPS)
else:
fps = self._video.get(cv2.CAP_PROP_FPS)
return fps
#!/usr/bin/env python
# coding: utf-8
'''
Test action recognition on
(1) a video, (2) a folder of images, (3) or web camera.
Input:
model: model/trained_classifier.pickle
Output:
result video: output/${video_name}/video.avi
result skeleton: output/${video_name}/skeleton_res/XXXXX.txt
visualization by cv2.imshow() in img_displayer
'''
The entire source code is here
'''
Example of usage:
(1) Test on video file:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type video \
--data_path data_test/exercise.avi \
--output_folder output
(2) Test on a folder of images:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type folder \
--data_path data_test/apple/ \
--output_folder output
(3) Test on web camera:
python src/s5_test.py \
--model_path model/trained_classifier.pickle \
--data_type webcam \
--data_path 0 \
--output_folder output
if True: # Include project path
import sys
import os
ROOT = os.path.dirname(os.path.abspath(__file__))+"/../"
CURR_PATH = os.path.dirname(os.path.abspath(__file__))+"/"
sys.path.append(ROOT)
import utils.lib_images_io as lib_images_io
import utils.lib_plot as lib_plot
import utils.lib_commons as lib_commons
from utils.lib_openpose import SkeletonDetector
from utils.lib_tracker import Tracker
from utils.lib_tracker import Tracker
from utils.lib_classifier import ClassifierOnlineTest
from utils.lib_classifier import * # Import all sklearn related libraries
def par(path): # Pre-Append ROOT to the path if it's not absolute
return ROOT + path if (path and path[0] != "/") else path
# -- Command-line input
def get_command_line_arguments():
def parse_args():
parser = argparse.ArgumentParser(
description="Test action recognition on \n"
"(1) a video, (2) a folder of images, (3) or web camera.")
parser.add_argument("-m", "--model_path", required=False,
default='model/trained_classifier.pickle')
parser.add_argument("-t", "--data_type", required=False, default='webcam',
choices=["video", "folder", "webcam"])
parser.add_argument("-p", "--data_path", required=False, default="",
help="path to a video file, or images folder, or webcam. \n"
"For video and folder, the path should be "
"absolute or relative to this project's root. "
"For webcam, either input an index or device name. ")
parser.add_argument("-o", "--output_folder", required=False, default='output/',
help="Which folder to save result to.")
args = parser.parse_args()
return args
args = parse_args()
if args.data_type != "webcam" and args.data_path and args.data_path[0] != "/":
# If the path is not absolute, then its relative to the ROOT.
args.data_path = ROOT + args.data_path
return args
def get_dst_folder_name(src_data_type, src_data_path):
''' Compute a output folder name based on data_type and data_path.
The final output of this script looks like this:
DST_FOLDER/folder_name/vidoe.avi
DST_FOLDER/folder_name/skeletons/XXXXX.txt
'''
assert(src_data_type in ["video", "folder", "webcam"])
if src_data_type == "video": # /root/data/video.avi --> video
folder_name = str("video")
elif src_data_type == "folder": # /root/data/video/ --> video
folder_name = src_data_path.rstrip("/").split("/")[-1]
elif src_data_type == "webcam":
# month-day-hour-minute-seconds, e.g.: 02-26-15-51-12
folder_name = lib_commons.get_time_string()
return folder_name
args = get_command_line_arguments()
SRC_DATA_TYPE = args.data_type
SRC_DATA_PATH = args.data_path
SRC_MODEL_PATH = args.model_path
DST_FOLDER_NAME = get_dst_folder_name(SRC_DATA_TYPE, SRC_DATA_PATH)
# -- Settings
cfg_all = lib_commons.read_yaml(ROOT + "config/config.yaml")
cfg = cfg_all["s5_test.py"]
CLASSES = np.array(cfg_all["classes"])
SKELETON_FILENAME_FORMAT = cfg_all["skeleton_filename_format"]
# Action recognition: number of frames used to extract features.
WINDOW_SIZE = int(cfg_all["features"]["window_size"])
# Output folder
DST_FOLDER = args.output_folder + "/" + DST_FOLDER_NAME + "/"
DST_SKELETON_FOLDER_NAME = cfg["output"]["skeleton_folder_name"]
DST_VIDEO_NAME = cfg["output"]["video_name"]
# framerate of output video.avi
DST_VIDEO_FPS = float(cfg["output"]["video_fps"])
# writer
fmt = cv2.VideoWriter_fourcc(*'MP4V')
fps = 10.0
size = (800, 480)
writer = cv2.VideoWriter('outtest.mp4', fmt, fps, size)
backup_time = 3600 # sec
time = 0 # init
now = datetime.datetime.now()
s_now = now.strftime('%Y-%m-%d-%H-%M-%S')
back_up = cv2.VideoWriter(f'{s_now}.mp4',fmt, fps, size)
# Video setttings
# If data_type is webcam, set the max frame rate.
SRC_WEBCAM_MAX_FPS = float(cfg["settings"]["source"]
["webcam_max_framerate"])
# If data_type is video, set the sampling interval.
# For example, if it's 3, then the video will be read 3 times faster.
SRC_VIDEO_SAMPLE_INTERVAL = int(cfg["settings"]["source"]
["video_sample_interval"])
# Openpose settings
OPENPOSE_MODEL = cfg["settings"]["openpose"]["model"]
OPENPOSE_IMG_SIZE = cfg["settings"]["openpose"]["img_size"]
# Display settings
img_disp_desired_rows = int(cfg["settings"]["display"]["desired_rows"])
# -- Function
def select_images_loader(src_data_type, src_data_path):
if src_data_type == "video":
images_loader = lib_images_io.ReadFromVideo(
src_data_path,
sample_interval=SRC_VIDEO_SAMPLE_INTERVAL)
elif src_data_type == "folder":
images_loader = lib_images_io.ReadFromFolder(
folder_path=src_data_path)
elif src_data_type == "webcam":
if src_data_path == "":
webcam_idx = 0
elif src_data_path.isdigit():
webcam_idx = int(src_data_path)
else:
webcam_idx = src_data_path
images_loader = lib_images_io.ReadFromWebcam(
SRC_WEBCAM_MAX_FPS, webcam_idx)
return images_loader
class MultiPersonClassifier(object):
''' This is a wrapper around ClassifierOnlineTest
for recognizing actions of multiple people.
'''
def __init__(self, model_path, classes):
self.dict_id2clf = {} # human id -> classifier of this person
# Define a function for creating classifier for new people.
self._create_classifier = lambda human_id: ClassifierOnlineTest(
model_path, classes, WINDOW_SIZE, human_id)
def classify(self, dict_id2skeleton):
''' Classify the action type of each skeleton in dict_id2skeleton '''
# Clear people not in view
old_ids = set(self.dict_id2clf)
cur_ids = set(dict_id2skeleton)
humans_not_in_view = list(old_ids - cur_ids)
for human in humans_not_in_view:
del self.dict_id2clf[human]
# Predict each person's action
id2label = {}
for id, skeleton in dict_id2skeleton.items():
if id not in self.dict_id2clf: # add this new person
self.dict_id2clf[id] = self._create_classifier(id)
classifier = self.dict_id2clf[id]
id2label[id] = classifier.predict(skeleton) # predict label
# print("\n\nPredicting label for human{}".format(id))
# print(" skeleton: {}".format(skeleton))
# print(" label: {}".format(id2label[id]))
return id2label
def get_classifier(self, id):
''' Get the classifier based on the person id.
Arguments:
id {int or "min"}
'''
if len(self.dict_id2clf) == 0:
return None
if id == 'min':
id = min(self.dict_id2clf.keys())
return self.dict_id2clf[id]
def remove_skeletons_with_few_joints(skeletons):
''' Remove bad skeletons before sending to the tracker '''
good_skeletons = []
for skeleton in skeletons:
px = skeleton[2:2+13*2:2]
py = skeleton[3:2+13*2:2]
num_valid_joints = len([x for x in px if x != 0])
num_leg_joints = len([x for x in px[-6:] if x != 0])
total_size = max(py) - min(py)
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# IF JOINTS ARE MISSING, TRY CHANGING THESE VALUES:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if num_valid_joints >= 5 and total_size >= 0.1 and num_leg_joints >= 0:
# add this skeleton only when all requirements are satisfied
good_skeletons.append(skeleton)
return good_skeletons
def draw_result_img(img_disp, ith_img, humans, dict_id2skeleton,
skeleton_detector, multiperson_classifier):
''' Draw skeletons, labels, and prediction scores onto image for display '''
# Resize to a proper size for display
r, c = img_disp.shape[0:2]
desired_cols = int(1.0 * c * (img_disp_desired_rows / r))
img_disp = cv2.resize(img_disp,
dsize=(desired_cols, img_disp_desired_rows))
# Draw all people's skeleton
skeleton_detector.draw(img_disp, humans)
# Draw bounding box and label of each person
if len(dict_id2skeleton):
for id, label in dict_id2label.items():
skeleton = dict_id2skeleton[id]
# scale the y data back to original
skeleton[1::2] = skeleton[1::2] / scale_h
# print("Drawing skeleton: ", dict_id2skeleton[id], "with label:", label, ".")
lib_plot.draw_action_result(img_disp, id, skeleton, label)
# Add blank to the left for displaying prediction scores of each class
img_disp = lib_plot.add_white_region_to_left_of_image(img_disp)
cv2.putText(img_disp, "Frame:" + str(ith_img),
(20, 20), fontScale=1.5, fontFace=cv2.FONT_HERSHEY_PLAIN,
color=(0, 0, 0), thickness=2)
# Draw predicting score for only 1 person
if len(dict_id2skeleton):
classifier_of_a_person = multiperson_classifier.get_classifier(
id='min')
classifier_of_a_person.draw_scores_onto_image(img_disp)
return img_disp
# Time Display
cv2.putText(img2_disp, str(now_time.isoformat(timespec='seconds')),
(20, 20), fontScale=1.5, fontFace=cv2.FONT_HERSHEY_PLAIN,
color=(0, 0, 0), thickness=2)
# Draw predicting score for only 1 person
if len(dict2_id2skeleton):
classifier_of_a_person = multiperson_classifier.get_classifier(
id='min')
classifier_of_a_person.draw_scores_onto_image(img2_disp)
return img2_disp
def get_the_skeleton_data_to_save_to_disk(dict_id2skeleton):
'''
In each image, for each skeleton, save the:
human_id, label, and the skeleton positions of length 18*2.
So the total length per row is 2+36=38
'''
skels_to_save = []
for human_id in dict_id2skeleton.keys():
label = dict_id2label[human_id]
skeleton = dict_id2skeleton[human_id]
skels_to_save.append([[human_id, label] + skeleton.tolist()])
return skels_to_save
# -- Main
if __name__ == "__main__":
# -- Detector, tracker, classifier
skeleton_detector = SkeletonDetector(OPENPOSE_MODEL, OPENPOSE_IMG_SIZE)
multiperson_tracker = Tracker()
multiperson_classifier = MultiPersonClassifier(SRC_MODEL_PATH, CLASSES)
# -- Image reader and displayer
images_loader = select_images_loader(SRC_DATA_TYPE, SRC_DATA_PATH)
img_displayer = lib_images_io.ImageDisplayer()
# -- Init output
# output folder
os.makedirs(DST_FOLDER, exist_ok=True)
os.makedirs(DST_FOLDER + DST_SKELETON_FOLDER_NAME, exist_ok=True)
# -- Read images and process
try:
ith_img = -1
while images_loader.has_image():
# Timer
now = datetime.datetime.now()
now_time = now.time()
# -- Read image
#img = images_loader.read_image()
img = images_loader.read_image()
ith_img += 1
img_disp = img.copy()
# print(f"\nProcessing {ith_img}th image ...")
# -- Detect skeletons
humans = skeleton_detector.detect(img)
skeletons, scale_h = skeleton_detector.humans_to_skels_list(humans)
)
skeletons = remove_skeletons_with_few_joints(skeletons)
# -- Track people
dict_id2skeleton = multiperson_tracker.track(skeletons)
# -- Log & Recognize action of each person
log.append(str(now_time))
if len(dict_id2skeleton):
dict_id2label = multiperson_classifier.classify(
dict_id2skeleton)
# -- Draw
img_disp = draw_result_img(img_disp, ith_img, humans, dict_id2skeleton, skeleton_detector, multiperson_classifier)
# Print label of a person
if len(dict_id2skeleton):
min_id = min(dict_id2skeleton.keys())
# print("prediced label is :", dict_id2label[min_id])
# -- Display image, and write to video.avi
# video writer
if time >= backup_time*fps:
back_up.release()
now = datetime.datetime.now()
s_now = now.strftime('%Y-%m-%d-%H-%M-%S')
back_up = cv2.VideoWriter(f'{s_now}.mp4', fmt, fps, size)
time = 0
time += 1
writer.write(img_disp)
back_up.write(img_disp)
img_displayer.display(img_disp, wait_key_ms=1)
# -- Get skeleton data and save to file
skels_to_save = get_the_skeleton_data_to_save_to_disk(dict_id2skeleton)
lib_commons.save_listlist(DST_FOLDER + DST_SKELETON_FOLDER_NAME +SKELETON_FILENAME_FORMAT.format(ith_img),skels_to_save)
if cv2.waitKey(1) == 27:
break
finally:
writer.release()
back_up.release()
cv2.destroyAllWindows()
Thanks for your help.

ModuleNotFoundError: No module named 'gtk'

I've tried to follow several solutions on the internet also this one, but no luck. I'm in the process of implementing a object detection system. I am on Windows 10, and using PyCharm using python-3.8
I am getting errors for the packages, I've tried to add them through the package installer and through the terminal, no luck. Here's the error:
Traceback (most recent call last):
File "D:/CabaleGame/runner.py", line 2, in <module>
import processCards
File "D:\CabaleGame\processCards.py", line 1, in <module>
import gtk.gdk
ModuleNotFoundError: No module named 'gtk'
Process finished with exit code 1
I've downloaded the package from here and here, which one is correct?
My program file:
import gtk.gdk
import cv
import time
import os
import string
def takeScreenCapture(screenShotNum = ""):
time.sleep(1)
w = gtk.gdk.get_default_root_window()
sz = w.get_size()
#print "The size of the window is %d x %d" % sz
pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB,False,8,sz[0],sz[1])
pb = pb.get_from_drawable(w,w.get_colormap(),0,0,0,0,sz[0],sz[1])
# Convert gtk.PixelBuf to a NumPy array
array = pb.get_pixels_array()
# Convert NumPy array to CvMat
mat = cv.fromarray(array)
# Convert RGB to BGR
cv.CvtColor(mat, mat, cv.CV_RGB2BGR)
#cv.ShowImage("win",mat)
#cv.WaitKey(0)
return mat
def getMeaningFromCards(cards):
"""
This takes a dictionary of the form:
(x, y) : Card image
and returns a dictionary of the form:
(x, y) : (number, suit)
(x, y) are the coordinates of the top left of the card
"""
imgdir = "LibraryImages"
templatesNums = os.listdir(os.path.join(imgdir,"Numbers"))
templatesSuits = os.listdir(os.path.join(imgdir,"Suits"))
#templates = filter(lambda s: s[-4:] == ".png", templates)
templatesNums = map(lambda s: os.path.join(imgdir,"Numbers", s), templatesNums)
templatesSuits = map(lambda s: os.path.join(imgdir, "Suits", s), templatesSuits)
for k in cards.keys():
card = cards[k]
cardImg = cv.CreateImageHeader((card.width, card.height), 8, 3)
cv.SetData(cardImg, card.tostring())
numAndSuit3 = cv.GetSubRect(cardImg, (0,0,30,80))
numAndSuit1 = cv.CreateImage((numAndSuit3.width, numAndSuit3.height), 8, 1)
cv.CvtColor(numAndSuit3, numAndSuit1, cv.CV_RGB2GRAY)
# Convert the 1 channel grayscale to 3 channel grayscale
# (GRAY2RGB doesn't actually introduce color)
cv.CvtColor(numAndSuit1, numAndSuit3, cv.CV_GRAY2RGB)
num = findBestTemplateMatch(templatesNums, numAndSuit3)
suit = findBestTemplateMatch(templatesSuits, numAndSuit3)
#print num, suit
# If this image was recognized as a card, but didn't match
# any template, it shouldn't be in the list in the first place
if num == None or suit == None:
del cards[k]
continue
num = string.split(os.path.basename(num), '.')[0]
suit = string.split(os.path.basename(suit), '.')[0]
# The alternate file names have underscores
# after their names
if num[-1] == '_':
num = num[:-1]
if suit[-1] == '_':
suit = suit[:-1]
cards[k] = (num, suit)
#cv.ShowImage("NumandSuit", numAndSuit)
#cv.WaitKey(0)
print
cards
return cards
def findBestTemplateMatch(tplList, img):
"""
Compares img against a list of templates.
tplList is a list of string filenames of template images
Returns a tuple (num, suit) if a template is suitably matched
or None if not
"""
minTpl = 200 # arbitrarily large number
tString = None
for t in tplList:
tpl = cv.LoadImage(t)
w = img.width - tpl.width + 1
h = img.height - tpl.height + 1
result = cv.CreateImage((w,h), 32, 1)
cv.MatchTemplate(img, tpl, result, cv.CV_TM_SQDIFF_NORMED)
(minVal, maxVal, minLoc, maxLoc) = cv.MinMaxLoc(result)
#print t
#print (minVal, maxVal, minLoc, maxLoc)
# 0.2 found by experiment (the non-card images end up being around
# 0.25 - 0.28, and all the card images were much around 0.08 and less
if minVal < minTpl and minVal < 0.2:
minTpl = minVal
tString = t
#print minTpl, tString
#cv.ShowImage("win", img)
#cv.ShowImage("win2", result)
#cv.WaitKey(0)
return tString
def extractCards(fileName = None):
"""
Given an image, this will extract the cards from it.
This takes a filename as an optional argument
This filename should be the name of an image file.
This returns a dictionary of the form:
(x, y) : Card image
It is likely that the output from this will go to the
getMeaningFromCards() function.
"""
if fileName == None:
mat = takeScreenCapture()
else:
mat = cv.LoadImage(fileName)
# First crop the image: but only crop out the bottom.
# It is useful to have all dimensions accurate to the screen
# because otherwise they will throw off the mouse moving and clicking.
# Cropping out the bottom does not change anything in terms of the mouse.
unnec_top_distance = 130
unnec_bottom_distance = 40
margin = 50
submat = cv.GetSubRect(mat, (0,0,mat.width, mat.height - unnec_bottom_distance))
subImg = cv.CreateImageHeader((submat.width, submat.height), 8, 3)
cv.SetData(subImg, submat.tostring())
gray = cv.CreateImage((submat.width, submat.height), 8, 1)
cv.CvtColor(submat, gray, cv.CV_RGB2GRAY)
thresh = 250
max_value = 255
cv.Threshold(gray, gray, thresh, max_value, cv.CV_THRESH_BINARY)
cv.Not(gray,gray)
#cv.ShowImage("sub", submat)
#cv.WaitKey(0)
storage = cv.CreateMemStorage (0)
cpy = cv.CloneImage(gray)
contours = cv.FindContours( cpy, storage, cv.CV_RETR_LIST, cv.CV_CHAIN_APPROX_SIMPLE, (0,0) );
#contours = cv.ApproxPoly(contours, cv.CreateMemStorage(), cv.CV_POLY_APPROX_DP, 3, 1)
bboxes = []
if contours:
while(contours):
area = cv.ContourArea(contours)
# It turns out that all the cards are about 44000 in area...
# It would definitely be nice to have a better way to do this:
# ie, find the size of the card programmatically and use it then
if(area > 44000 and area < submat.width*submat.height*2/3):
bb = cv.BoundingRect(contours)
bboxes.append(bb)
contours = contours.h_next()
#drawBoundingBoxes(bboxes, submat)
# cards is a dictionary of the form:
# (x, y) : card
cards = {}
for box in bboxes:
card = cv.GetSubRect(subImg, box)
#cv.ShowImage("card", card)
#cv.WaitKey(0)
cards[(box[0], box[1])] = card
return cards
def drawBoundingBoxes(bb, img):
for b in bb:
x = b[0]
y = b[1]
width = b[2]
height = b[3]
cv.Rectangle(img, (x,y), (x+width, y+height), (0,255,0,0))
cv.ShowI
mage("bb", img)
cv.WaitKey(0)
def drawSquares(listWithPoints,img):
for l in listWithPoints:
for p in range(len(l)-1):
cv.Line(img, l[p], l[p+1], (0,0,255,0),2)
cv.Line(img, l[-1], l[0], (0,0,255,0),2)
#cv.ShowImage("sub", img)
#cv.WaitKey(0)
def contourToPointList(contour):
plist = []
for (x,y) in contour:
plist.append((x,y))
return plist
if __name__ == '__main__':
cards = extractCards('CardImages/4_heart.jpg')
print
cards
#c = cards[cards.keys()[0]]
#print c
Is it possible to add it manually through the folders?
Would it work if i put the package her:
C:\Users\User1\AppData\Local\Programs\Python\Python38\include
From python documentation :
Replace import gtk.gdk by :
import gi
gi.require_version("Gtk", "insert your gtk version")
from gi.repository import Gtk

python pyparrot image processing question

I'm trying to build code that wants to fly a drone with a camera with demoMamboVisionGUI.py below. When the code is executed, the camera screen comes up and press the button to start the flight. The code above displays four cam screens and detects a straight line while detecting a specified color value, blue (BGR2HSV). Using these two codes, the camera recognizes the blue straight line and flies forward little by little, and turns left and right at a certain angle, recognizes the bottom of the specified color (red), lands, and starts flying with another button. I want to make a code that recognizes green and lands. I would appreciate it if you could throw a simple hint.
enter image description here
import cv2
import numpy as np
def im_trim(img):
x = 160
y = 50
w = 280
h = 180
img_trim = img[y:y + h, x:x + w]
return img_trim
def go():
minimum = 9999;
min_theta=0;
try:
cap = cv2.VideoCapture(0)
except:
return
while True:
ret, P = cap.read()
img1 = P
cv2.imshow('asdf',img1)
img_HSV = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
img_h, img_s, img_v = cv2.split(img_HSV)
cv2.imshow("HSV", img_HSV)
lower_b = np.array([100, 80, 100])
upper_b = np.array([120, 255, 255])
blue = cv2.inRange(img_HSV, lower_b, upper_b)
cv2.imshow('root',blue)
edges = cv2.Canny(blue, 50, 150, apertureSize =3)
lines = cv2.HoughLines(edges, 1, np.pi/180, threshold = 100)
if lines is not None:
for line in lines:
r, theta = line[0]
#if (r<minimum and r>0) and (np.rad2deg(theta)>-90 and np.rad2deg(theta)<90):
#minimum = r
#min_theta = theta
#if (r > 0 and r < 250) and (np.rad2deg(theta) > 170 or np.rad2deg(theta) < 10):
# self.drone_object.fly_direct(pitch=0, roll=-7, yaw=0, vertical_movement=0,
# duration=1)
#print("right")
#elif (r > 400 and r < 650) and (np.rad2deg(theta) > 170 or np.rad2deg(theta) < 10):
# self.drone_object.fly_direct(pitch=0, roll=7, yaw=0, vertical_movement=0,
# duration=1)
print(r, np.rad2deg(theta))
#이하 if문을 while 문을 통해 반복하여 길 경로를 직진경로로 만든 이후 진행
#if(np.rad2deg(min_theta)>=몇도이상 or 이하):
# 이하 -> 왼쪽턴, 이상 -> 오른쪽턴, 사이 -> 직진
a = np.cos(theta)
b = np.sin(theta)
x0 = a * r
y0 = b * r
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * a)
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * a)
cv2.line(img1, (x1,y1), (x2,y2), (0,255,0), 3)
cv2.imshow('hough',img1)
k = cv2.waitKey(1)
if k == 27:
break
cv2.destroyAllWindows()
if __name__ == "__main__":
go()
print("??")
================================================================================================
"""
Demo of the Bebop vision using DroneVisionGUI that relies on libVLC. It is a different
multi-threaded approach than DroneVision
Author: Amy McGovern
"""
from pyparrot.Minidrone import Mambo
from pyparrot.DroneVisionGUI import DroneVisionGUI
import cv2
# set this to true if you want to fly for the demo
testFlying = True
class UserVision:
def __init__(self, vision):
self.index = 0
self.vision = vision
def save_pictures(self, args):
# print("in save pictures on image %d " % self.index)
img = self.vision.get_latest_valid_picture()
if (img is not None):
filename = "test_image_%06d.png" % self.index
# uncomment this if you want to write out images every time you get a new one
#cv2.imwrite(filename, img)
self.index +=1
#print(self.index)
def demo_mambo_user_vision_function(mamboVision, args):
"""
Demo the user code to run with the run button for a mambo
:param args:
:return:
"""
mambo = args[0]
if (testFlying):
print("taking off!")
mambo.safe_takeoff(5)
if (mambo.sensors.flying_state != "emergency"):
print("flying state is %s" % mambo.sensors.flying_state)
print("Flying direct: going up")
mambo.fly_direct(roll=0, pitch=0, yaw=0, vertical_movement=15, duration=2)
print("flip left")
print("flying state is %s" % mambo.sensors.flying_state)
success = mambo.flip(direction="left")
print("mambo flip result %s" % success)
mambo.smart_sleep(5)
print("landing")
print("flying state is %s" % mambo.sensors.flying_state)
mambo.safe_land(5)
else:
print("Sleeeping for 15 seconds - move the mambo around")
mambo.smart_sleep(15)
# done doing vision demo
print("Ending the sleep and vision")
mamboVision.close_video()
mambo.smart_sleep(5)
print("disconnecting")
mambo.disconnect()
if __name__ == "__main__":
# you will need to change this to the address of YOUR mambo
mamboAddr = "B0:FC:36:F4:37:F9"
# make my mambo object
# remember to set True/False for the wifi depending on if you are using the wifi or the BLE to connect
mambo = Mambo(mamboAddr, use_wifi=True)
print("trying to connect to mambo now")
success = mambo.connect(num_retries=3)
print("connected: %s" % success)
if (success):
# get the state information
print("sleeping")
mambo.smart_sleep(1)
mambo.ask_for_state_update()
mambo.smart_sleep(1)
print("Preparing to open vision")
mamboVision = DroneVisionGUI(mambo, is_bebop=False, buffer_size=200,
user_code_to_run=demo_mambo_user_vision_function, user_args=(mambo, ))
userVision = UserVision(mamboVision)
mamboVision.set_user_callback_function(userVision.save_pictures, user_callback_args=None)
mamboVision.open_video()
==========================================================================

pool map no performance gain

I'm doing video capture from opencv using the following
frame_count = 90000
while f < frame_count:
ret, frame = cap.read()
f+=1
if (f > 41300):
dst = cv2.warpPerspective(frame.copy(), matrix, (frame.shape[1], frame.shape[0]))
pipeline(pool_1, pool_2, frame, car_tracker, ped_tracker, df_region, region_buffered, df_line_car, df_line_ped, det, dst, matrix)
cv2.imshow("frame", frame)
cv2.imshow('dst', dst)
cv2.waitKey(1)
In function pipeline:
df_cars= pool_1.map(compute, cars)
df_peds = pool_2.map(compute, peds)
compute is:
def compute(v):
gb = ('trajectory_id',)
global general_pd
id = v[0]
x = v[1]
y = v[2]
frame_id = v[3]
general_pd.loc[len(general_pd)] = [id, x,y, frame_id]
grouped = general_pd.loc[general_pd['trajectory_id'] == id]
df_region = v[4]
region_buffered = v[5]
df_line = v[6]
rpp = RawParameterProcessor(grouped, df_line, frame_idx, df_region, region_buffered, gb=gb)
df_parameter_car = rpp.compute()
return df_parameter_car
I don't know what I'm doing wrong, so that I can't get the correct performance gain, I launch two process, and each frame of the video capture I launch a process and do the jobs async, but I don't get any performance gain.

Finding the Wrong Homography

I am trying to do an image stitching project with OpenCV in Python where I use point matches calculated by tracking points between frames of a video using the Lucas Kanade algorithm to find homography matrices. After writing the program and it came time for stitching together the frames of a video, I decided to run a test where I simply display the perspective warped versions of each image onto a black canvas to see how the Homography matrix had warped them. When I did this, instead of moving over bit by bit between frames, frames were translated further and further distances, way off from a slight nudge between frames
[----------------------------------------------------------------------------Empty Space------------------------------------]
[Frame0------------------------------------------------------------------------------------------------------------------------]
[------------Frame1----------------------------------------------------------------------------------------------------------- ]
[-------------------------------------------Frame 2----------------------------------------------------------------------------]
[------------------------------------------------------------------------------------------------------------Frame 3-----------]
Subsequent frames would be out of visual range. I am not quiet sure why this is happening. I implemented a back-projection error check to make sure only points with accurate optical flow calculations were passed on. I also set the back-projection threshold for findHomography to be 10, 1, and then 0.5, all to no avail. I am stitching multiple images, so I am multiplying my homography matrices between frames. This seems to be compounding the error. Why is this happening and how can I fix my homography matrices? Here is my code (ignore commented out tests. Also, some of the indentation formatting might have been messed with while copying over to the forum):
import numpy as np
import sys
import cv2
import math
lastFeatures = None
currentFeatures = None
opticFlow = None
panRow = None
Rows = None
finalPanorama = None
def loadRow(dirPath, fType, numImages, column):
imageRow = []
for i in range(0, numImages):
imageRow.append(cv2.imread("%s/%i_%i.%s" % (dirPath, column, i, fType), cv2.IMREAD_COLOR))
return imageRow
def findNthFeatures(prevImg, prevPnts, nxtImg):
back_threshold = 0.5
nxtDescriptors = []
prevGrey = None
nxtGrey = None
nxtPnts = prevPnts[:]
prevGrey = cv2.cvtColor(prevImg, cv2.COLOR_BGR2GRAY)
nxtGrey = cv2.cvtColor(nxtImg, cv2.COLOR_BGR2GRAY)
lucasKanadeParams = dict(winSize = (19,19), maxLevel = 100, criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
nxtPnts, status, err = cv2.calcOpticalFlowPyrLK(prevGrey, nxtGrey, prevPnts, None, **lucasKanadeParams)
backProjections, status, err = cv2.calcOpticalFlowPyrLK(nxtGrey, prevGrey, nxtPnts, None, **lucasKanadeParams)
d = abs(prevPnts - backProjections).reshape(-1, 2).max(-1)
status = d < back_threshold
goodNew = nxtPnts[status].copy()
goodLast = prevPnts[status].copy()
return goodLast, goodNew
def getHomographies(videoName):
color = np.random.randint(0,255,(100,3))
lastFrame = None
currentFrame = None
lastKeypoints = None
currentKeypoints = None
firstImage = True
featureRefreshRate = 5
feature_params = dict( maxCorners = 100,
qualityLevel = 0.1,
minDistance = 8,
blockSize = 15)
frameCount = 0
Homographies = []
cv2.namedWindow('display', cv2.WINDOW_NORMAL)
cap = cv2.VideoCapture(videoName)
flags, frame = cap.read()
while flags:
if firstImage:
firstImage = False
lastFrame = frame[:,:].copy()
lastGray = cv2.cvtColor(lastFrame, cv2.COLOR_BGR2GRAY)
lastKeypoints = cv2.goodFeaturesToTrack(lastGray, mask = None, **feature_params)
flags, frame = cap.read()
frameCount += 1
else:
mask = np.zeros_like(lastFrame)
currentFrame = frame[:,:].copy()
frameCount += 1
lastKeypoints, currentKeypoints = findNthFeatures(lastFrame, lastKeypoints, currentFrame)
# for i,(new,old) in enumerate(zip(currentKeypoints, lastKeypoints)):
# a, b = new.ravel()
# c, d = old.ravel()
# mask = cv2.line(mask, (a,b), (c,d), color[i].tolist(), 2)
# frame = cv2.circle(frame, (a,b), 5, color[i].tolist(), -1)
# img = cv2.add(frame,mask)
# cv2.imshow('display', img)
# cv2.waitKey(0)
homographyMatrix, homographyStatus = cv2.findHomography(currentKeypoints, lastKeypoints, cv2.RANSAC, 0.5)
Homographies.append(homographyMatrix)
lastFrame = currentFrame
lastKeypoints = currentKeypoints
if frameCount % featureRefreshRate == 0:
grayBuf = cv2.cvtColor(lastFrame, cv2.COLOR_BGR2GRAY)
lastKeypoints = cv2.goodFeaturesToTrack(grayBuf, mask = None, **feature_params)
flags, frame = cap.read()
return Homographies
def stitchRow(videoName):
cv2.namedWindow('display', cv2.WINDOW_NORMAL)
frameCount = 0
cap = cv2.VideoCapture(videoName)
ret, initialImage = cap.read()
homographyMatrices = []
homographyMatrices = getHomographies(videoName)
warpHMat = homographyMatrices[frameCount]
while ret:
ret, nextImg = cap.read()
frameCount += 1
result = cv2.warpPerspective(nextImg, warpHMat, (initialImage.shape[1] + nextImg.shape[1], nextImg.shape[0]))
#result[0:initialImage.shape[0], 0:initialImage.shape[1]] = initialImage
cv2.imshow('display', result)
cv2.waitKey(0)
# cv2.imshow('display', initialImage)
# cv2.waitKey(0)
warpHMat = homographyMatrices[frameCount]
for j in range(frameCount, 0, -1):
warpHMat = warpHMat * homographyMatrices[j-1]
# initialImage = result[:, :].copy()
stitchRow(sys.argv[1])

Categories