The server is sending video by video using the same RTSP URL(rtsp://192.168.0.2:8554/)
I can capture and display video using opencv.
import numpy as np
import cv2 as cv
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
cap = cv.VideoCapture('rtsp://192.168.0.2:8554/')
while cap.isOpened():
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
cv.imshow('frame', frame)
if cv.waitKey(1) == ord('q'):
break
cap.release()
cv.destroyAllWindows()
This program returns error when going on to the next video.
I tried this, but this didn't work.
import cv2 as cv
import os
import time
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
cap = cv.VideoCapture('rtsp://192.168.0.26:8554/')
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
try:
time.sleep(2)
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
# Our operations on the frame come here
# Display the resulting frame
cv.imshow('frame',frame)
if cv.waitKey(1) == ord('q'):
break
except:
print("Exception!!")
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()
Can I get some help?
Thanks in advance!
I solved this by using multi-threaded program.
Main file
from datasets import LoadStreams
import threading
import os
import logging
import cv2
import torch
import time
logger = logging.getLogger(__name__)
def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
cpu_request = device.lower() == 'cpu'
if device and not cpu_request: # if device requested other than 'cpu'
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availablity
cuda = False if cpu_request else torch.cuda.is_available()
if cuda:
c = 1024 ** 2 # bytes to MB
ng = torch.cuda.device_count()
if ng > 1 and batch_size: # check that batch_size is compatible with device_count
assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}'
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
s = f'Using torch {torch.__version__} '
for i, d in enumerate((device or '0').split(',')):
if i == 1:
s = ' ' * len(s)
logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)")
else:
logger.info(f'Using torch {torch.__version__} CPU')
logger.info('') # skip a line
return torch.device('cuda:0' if cuda else 'cpu')
def detect(rtsp_url):
dataset = LoadStreams(rtsp_url)
device = select_device('')
count = 0
view_img = True
# img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
try:
for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): # for every frame
count += 1
im0 = im0s[0].copy()
if view_img:
cv2.imshow(str(path), im0)
# if cv2.waitKey(1) == ord('q'): # q to quit
# raise StopIteration
except:
print("finish execption")
dataset.stop()
return "good"
if __name__ == '__main__':
rtsp_url = "rtsp://192.168.0.26:8554/"
while True:
for thread in threading.enumerate():
print(thread.name)
print(detect(rtsp_url))
dataset class file
import glob
import logging
import math
import os
import random
import shutil
import time
import re
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import torch
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'stream'
self.img_size = img_size
self.capture = None
self.my_thread = None
self.stopFlag = False
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = [clean_str(x) for x in sources] # clean source names for later
s = sources[0]
# for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
# print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
self.ret, self.imgs[0] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([0, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
self.capture = cap
self.my_thread = thread
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened() and not self.stopFlag:
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def stop(self):
self.stopFlag = True
try:
# self.capture.release()
# self.my_thrsead.join()
print("stop thread!!")
except:
print("ERROR stopping thread!!")
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
if not self.ret:
print("error!!!")
self.stop()
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
# def stop(self):
def clean_str(s):
# Cleans a string by replacing special characters with underscore _
return re.sub(pattern="[|##!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
while cap.isOpened() and not self.stopFlag:
this line is especially important because
without this line the threads will be stacked and will have memory error
as the stack stacks up.
Related
I'm trying to implement a code that takes a video and divides it into frames while also filtering the frames and saving them to a different directory.
I am running into an error that says:
TypeError: cannot pickle 'cv2.VideoCapture' object
I have tried to understand why this problem occurs but I'm still unsure why.
here is my code:
import cv2
import os
import time
import matplotlib.image as pltim
from multiprocessing import Process, Lock
import matplotlib.pyplot as plt
def saveFramesUnfiltered(vid, lock):
currentFrame = 0 # counter for frames (to organize them by order)
while True:
success, frame = vid.read()
lock.acquire()
cv2.imwrite("./framesBefore/frame" + str(currentFrame) + '.jpg',
frame) # save unfiltered frame to folder and show the video (using the frames)
lock.release()
currentFrame = currentFrame + 1
if cv2.waitKey(1) & 0xFF == ord('q'):
break # end loop when finished
time.sleep(0.01)
def saveFramesFiltered(lock):
currentFrame = 0
framemax = 215
while currentFrame < framemax:
if os.path.exists("framesBefore/frame" + str(currentFrame) + '.jpg'):
lock.acquire()
image = pltim.imread("./framesBefore/frame" + str(currentFrame) + '.jpg')
lock.release()
r, g, b = image[:, :, 0], image[:, :, 1], image[:, :, 2]
grayImage = 0.299 * r + 0.587 * g + 0.114 * b
plt.plot(grayImage, cmap="gray")
plt.axis("off")
lock.acquire()
plt.savefig("./framesAfter/grayImage" + str(currentFrame) + ".jpg", bbox_inches='tight', pad_inches=0)
lock.release()
time.sleep(0.01)
def main():
if not os.path.exists('framesBefore'):
os.makedirs('framesBefore') # create a folder for the unfiltered frames
if not os.path.exists('framesAfter'):
os.makedirs('framesAfter') # create a folder for the filtered frames
lock = Lock()
vid = cv2.VideoCapture("maxwell cat.mp4") # getting the video
unfiltered_process = Process(target=saveFramesUnfiltered, args=(vid, lock))
filtered_process = Process(target=saveFramesFiltered, args=lock)
unfiltered_process.start()
filtered_process.start()
unfiltered_process.join()
filtered_process.join()
vid.release()
cv2.destroyAllWindows() # clear memory
if __name__ == '__main__':
main()
I am also new at using threads in python and in general so I would like to know if the way I implemented it is correct.
thanks!
I am trying to do real time prediciton inference for an arm64 computer. I am using Mobilenet_V1. Unfortunately, I am getting different key errors everytime I run the prediction. It seems the error is due to some problem with the label index (I am not sure about it).
Here's the code and error I am getting.
from paddlelite.lite import *
import cv2
import numpy as np
import sys
import time
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
def create_predictor(model_dir):
config = MobileConfig()
config.set_model_from_file(model_dir)
predictor = create_paddle_predictor(config)
return predictor
def process_img(image, input_image_size):
origin = image
img = origin.resize(input_image_size, Image.BILINEAR)
resized_img = img.copy()
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) # HWC to CHW
img -= 127.5
img *= 0.007843
img = img[np.newaxis, :]
return origin,img
def predict(image, predictor, input_image_size):
input_tensor = predictor.get_input(0)
input_tensor.resize([1, 3, input_image_size[0], input_image_size[1]])
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA))
origin, img = process_img(image, input_image_size)
image_data = np.array(img).flatten().tolist()
input_tensor.set_float_data(image_data)
predictor.run()
output_tensor = predictor.get_output(0)
print("output_tensor.float_data()[:] : ", output_tensor.float_data()[:])
res = output_tensor.float_data()[:]
return res
def post_res(label_dict, res):
# print(max(res))
target_index = res.index(max(res))
print("predicted result:" + " " + label_dict[target_index], "accuracy:", max(res))
if __name__ == '__main__':
label_dict = {0:"metal", 1:"paper", 2:"plastic", 3:"glass"}
model_dir = "../model/mobilenet_v1_opt.nb"
image_size = (224, 224)
predictor = create_predictor(model_dir)
while True:
ret, frame = cap.read()
print('Prediction Start')
time_start=time.time()
res = predict(frame, predictor, image_size)
post_res(label_dict, res)
print('Time Cost:{}'.format(time.time()-time_start) , "s")
print('Predict End')
cv2.imshow("frame", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
Error:
Any suggestion/tips would be really helpful.
I followed a video from Youtube about how to run Yolov4 using Darknet.
The thing is that when I run the program using the .exe file, it run perfectly with great FPS.
And now I have to change some part of the code to do the things I want, meaning that I have to run the code using its python code and not using the .exe file anymore.
But before any change, I tested to run it with no change but I only have the video running with LOW FPS and no bounding box shown up.
After some research I found that the "detections" variable is empty, and the problem comes on the "detect_image" function it self. The thing is that I can't figure out why it is empty even I know that there is something he should detect on it.
Here are the code from darknet_video.py
from ctypes import *
import random
import os
import cv2
import time
import darknet
import argparse
from threading import Thread, enumerate
from queue import Queue
def parser():
parser = argparse.ArgumentParser(description="YOLO Object Detection")
parser.add_argument("--input", type=str, default=0,
help="video source. If empty, uses webcam 0 stream")
parser.add_argument("--out_filename", type=str, default="",
help="inference video name. Not saved if empty")
parser.add_argument("--weights", default="yolov4.weights",
help="yolo weights path")
parser.add_argument("--dont_show", action='store_true',
help="windown inference display. For headless systems")
parser.add_argument("--ext_output", action='store_true',
help="display bbox coordinates of detected objects")
parser.add_argument("--config_file", default="./cfg/yolov4.cfg",
help="path to config file")
parser.add_argument("--data_file", default="./cfg/coco.data",
help="path to data file")
parser.add_argument("--thresh", type=float, default=.25,
help="remove detections with confidence below this value")
return parser.parse_args()
def str2int(video_path):
"""
argparse returns and string althout webcam uses int (0, 1 ...)
Cast to int if needed
"""
try:
return int(video_path)
except ValueError:
return video_path
def check_arguments_errors(args):
assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
if not os.path.exists(args.config_file):
raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
if not os.path.exists(args.weights):
raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
if not os.path.exists(args.data_file):
raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
if str2int(args.input) == str and not os.path.exists(args.input):
raise(ValueError("Invalid video path {}".format(os.path.abspath(args.input))))
def set_saved_video(input_video, output_video, size):
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
fps = int(input_video.get(cv2.CAP_PROP_FPS))
video = cv2.VideoWriter(output_video, fourcc, fps, size)
return video
def convert2relative(bbox):
"""
YOLO format use relative coordinates for annotation
"""
x, y, w, h = bbox
_height = darknet_height
_width = darknet_width
return x/_width, y/_height, w/_width, h/_height
def convert2original(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_x = int(x * image_w)
orig_y = int(y * image_h)
orig_width = int(w * image_w)
orig_height = int(h * image_h)
bbox_converted = (orig_x, orig_y, orig_width, orig_height)
return bbox_converted
def convert4cropping(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_left = int((x - w / 2.) * image_w)
orig_right = int((x + w / 2.) * image_w)
orig_top = int((y - h / 2.) * image_h)
orig_bottom = int((y + h / 2.) * image_h)
if (orig_left < 0): orig_left = 0
if (orig_right > image_w - 1): orig_right = image_w - 1
if (orig_top < 0): orig_top = 0
if (orig_bottom > image_h - 1): orig_bottom = image_h - 1
bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom)
return bbox_cropping
def video_capture(frame_queue, darknet_image_queue):
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height),
interpolation=cv2.INTER_LINEAR)
frame_queue.put(frame)
img_for_detect = darknet.make_image(darknet_width, darknet_height, 3)
darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())
darknet_image_queue.put(img_for_detect)
cap.release()
def inference(darknet_image_queue, detections_queue, fps_queue):
while cap.isOpened():
darknet_image = darknet_image_queue.get()
prev_time = time.time()
print("Network: ", network)
detections = darknet.detect_image(network, class_names, darknet_image, thresh=args.thresh)
detections_queue.put(detections)
fps = int(1/(time.time() - prev_time))
fps_queue.put(fps)
print("FPS: {}".format(fps))
darknet.print_detections(detections, args.ext_output)
darknet.free_image(darknet_image)
cap.release()
def drawing(frame_queue, detections_queue, fps_queue):
random.seed(3) # deterministic bbox colors
video = set_saved_video(cap, args.out_filename, (video_width, video_height))
while cap.isOpened():
frame = frame_queue.get()
detections = detections_queue.get()
fps = fps_queue.get()
detections_adjusted = []
if frame is not None:
for label, confidence, bbox in detections:
bbox_adjusted = convert2original(frame, bbox)
detections_adjusted.append((str(label), confidence, bbox_adjusted))
image = darknet.draw_boxes(detections_adjusted, frame, class_colors)
if len(detections):
name = []
count = []
for detect in detections:
name_tag = detect[0].decode()
if name_tag in name:
count[name.index(name_tag)] += 1
else:
name.append(name_tag)
count.append(1)
for index in len(name):
cv2.putText(image, name[index] + ": " + str(count[index]), (10, 30 + 30 * index), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
if not args.dont_show:
cv2.imshow('Inference', image)
if args.out_filename is not None:
video.write(image)
if cv2.waitKey(fps) == 27:
break
cap.release()
video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
frame_queue = Queue()
darknet_image_queue = Queue(maxsize=1)
detections_queue = Queue(maxsize=1)
fps_queue = Queue(maxsize=1)
args = parser()
check_arguments_errors(args)
network, class_names, class_colors = darknet.load_network(
args.config_file,
args.data_file,
args.weights,
batch_size=1
)
darknet_width = darknet.network_width(network)
darknet_height = darknet.network_height(network)
input_path = str2int(args.input)
cap = cv2.VideoCapture(input_path)
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()
Thread(target=inference, args=(darknet_image_queue, detections_queue, fps_queue)).start()
Thread(target=drawing, args=(frame_queue, detections_queue, fps_queue)).start()
Thank you for helping
I am working on an experiment with plants in a pressure chamber. I need to be able to identify with a computer vision algorithm the exact moment when water starts to appear at the cut end of the stem. In the case of this video - taken from a USB microscope, this is the interval between 0:30 and 0:34 seconds, approximately.
I tried to use MOG, MOG2 and GMG as a background subtractor, and compare the histograms of each frame (using chi-squared, bhattacharyya, correlation), looking for changes that could be significant, however still without success. Is there a better alternative for this type of work?
Below, some code (made with the help of a friend)
import numpy as np
import sys
import time
import cv2
from matplotlib import pyplot as plt
video_filename = 'M20201022_004.mov'
capture = cv2.VideoCapture(video_filename)
#fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()
fgbg = cv2.createBackgroundSubtractorMOG2()
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
#fgbg = cv2.bgsegm.createBackgroundSubtractorGMG()
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = capture.get(cv2.CAP_PROP_FPS)
num_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print(' height: {}\n width: {}\n fps: {}\n num_frames: {}\n'.format(height, width,frames_per_second, num_frames))
frameCounter = 0
t = time.process_time()
dist_hist = 0 # distance between histograms
frame_hist = 0
time_hist = 0
#write file
file1 = open("resultado.txt","w")
if not capture.isOpened():
print("Could not open video")
print('frameCounter: {}'.format(frameCounter))
sys.exit(1)
while capture.isOpened():
success, frame = capture.read()
frameCounter += 1
# Test for read error
if not success:
print('Failed to read video - Video Capture EOF or Error')
print('frameCounter:{}'.format(frameCounter))
if frameCounter == num_frames + 1:
print('EOF found')
else:
print('error')
break
#sys.exit(1)
else:
if frameCounter % 1000 == 0:
print('type:{} size:{} dtype:{} len(shape):{} contador:{}'.format(type(frame),frame.size,frame.dtype,len(frame.shape),frameCounter))
if len(frame.shape) < 3: # grayscale
h, w = frame.shape
print('h:{} w:{}'.format(h, w))
else: # color image
h, w, ch = frame.shape
print('h:{} w:{} ch:{}'.format(h, w, ch))
fgmask = fgbg.apply(frame)
#fgmask = fgbg.apply(frame)
#fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)
# Initial histogram Test
if frameCounter == 1:
hist_initial = cv2.calcHist([fgmask], [0],None,[16],[0, 256])
# print('hist_initial:{}'.format(hist_initial))
#elapsed_time = time.process_time() - t
elapsed_time = frameCounter / frames_per_second
# Process Histogram
hist_process = cv2.calcHist([fgmask], [0],None,[16],[0, 256])
dist = cv2.compareHist(hist_initial, hist_process,cv2.HISTCMP_CHISQR)
str1 = str(frameCounter) + "," + str(dist) + "," + str(dist_hist) + "," + str(elapsed_time)
file1.write(str1)
file1.write("\n")
if dist > dist_hist: # Depending on compareHist method
dist_hist = dist
time_hist = elapsed_time
frame_hist = frameCounter
# Print line at image
strfmt = 'frame: {} elapsed_time: {:7.2f}'.format(frameCounter, elapsed_time)
cv2.putText(frame, strfmt, (0, 50),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 1, cv2.LINE_AA)
cv2.imshow('frame', frame)
cv2.imshow('fgmask', fgmask)
if cv2.waitKey(1) & 0xff == 27: # ESC pressed
break
print('---> frame:{} dist:{:10.6f} time:{:7.2f}'.format(frame_hist, dist_hist,time_hist))
capture.release()
cv2.destroyAllWindows()
file1.close()
Any help appreciated!
I have two python files that contain 2 classes. I want to get a variable that is within one function within one class into a function in a different class.
Specifically, the class CaptureFrames creates a variable called frames_count, which keeps track of the current frame of the video being processed. The class ProcessMasks does processing on a frame and returns a variable hr_fft. When this variable is returned, I want to create a tuple containing the hr_fft and the frames_count variable together, so I can keep track of what specific frame is associated with the hr_fft variable.
I've tried labeling it as CaptureFrames.frames_count and CaptureFrames.capture_frames.frames_count but it doesn't work. How do I do this?
The classes look like this:
import cv2
import numpy as np
import torch
from torch import nn
from models import LinkNet34
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image, ImageFilter
import time
import sys
class CaptureFrames():
def __init__(self, bs, source, show_mask=False):
self.frame_counter = 0
self.batch_size = bs
self.stop = False
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = LinkNet34()
self.model.load_state_dict(torch.load('linknet.pth', map_location='cpu'))
self.model.eval()
self.model.to(self.device)
self.show_mask = show_mask
def __call__(self, pipe, source):
self.pipe = pipe
self.capture_frames(source)
def capture_frames(self, source):
img_transform = transforms.Compose([
transforms.Resize((256,256)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
camera = cv2.VideoCapture(source)
time.sleep(1)
self.model.eval()
(grabbed, frame) = camera.read()
time_1 = time.time()
#######################################################
self.frames_count = 0
#######################################################
while grabbed:
(grabbed, orig) = camera.read()
if not grabbed:
continue
shape = orig.shape[0:2]
frame = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame,(256,256), cv2.INTER_LINEAR )
k = cv2.waitKey(1)
if k != -1:
self.terminate(camera)
break
a = img_transform(Image.fromarray(frame))
a = a.unsqueeze(0)
imgs = Variable(a.to(dtype=torch.float, device=self.device))
pred = self.model(imgs)
pred= torch.nn.functional.interpolate(pred, size=[shape[0], shape[1]])
mask = pred.data.cpu().numpy()
mask = mask.squeeze()
# im = Image.fromarray(mask)
# im2 = im.filter(ImageFilter.MinFilter(3))
# im3 = im2.filter(ImageFilter.MaxFilter(5))
# mask = np.array(im3)
mask = mask > 0.8
orig[mask==0]=0
self.pipe.send([orig])
if self.show_mask:
cv2.imshow('mask', orig)
if self.frames_count % 30 == 29:
time_2 = time.time()
sys.stdout.write(f'\rFPS: {30/(time_2-time_1)}')
sys.stdout.flush()
time_1 = time.time()
#######################################################
self.frames_count+=1
#######################################################
self.terminate(camera)
def terminate(self, camera):
self.pipe.send(None)
cv2.destroyAllWindows()
camera.release()
and
import cv2
import numpy as np
from pulse import Pulse
import time
from threading import Lock, Thread
from plot_cont import DynamicPlot
from capture_frames import CaptureFrames
import pandas as pd
from matplotlib import pyplot as plt
from utils import *
import multiprocessing as mp
import sys
class ProcessMasks():
def __init__(self, sz=270, fs=30, bs=30, size=256):
print('init')
self.stop = False
self.masked_batches = []
self.batch_mean = []
self.signal_size = sz
self.batch_size = bs
self.signal = np.zeros((sz, 3))
self.pulse = Pulse(fs, sz, bs, size)
self.hrs = []
self.save_results = True
def __call__(self, pipe, plot_pipe, source):
self.pipe = pipe
self.plot_pipe = plot_pipe
self.source = source
compute_mean_thread = Thread(target=self.compute_mean)
compute_mean_thread.start()
extract_signal_thread = Thread(target=self.extract_signal)
extract_signal_thread.start()
self.rec_frames()
compute_mean_thread.join()
extract_signal_thread.join()
def rec_frames(self):
while True and not self.stop:
data = self.pipe.recv()
if data is None:
self.terminate()
break
batch = data[0]
self.masked_batches.append(batch)
def process_signal(self, batch_mean):
size = self.signal.shape[0]
b_size = batch_mean.shape[0]
self.signal[0:size-b_size] = self.signal[b_size:size]
self.signal[size-b_size:] = batch_mean
p = self.pulse.get_pulse(self.signal)
p = moving_avg(p, 6)
hr = self.pulse.get_rfft_hr(p)
if len(self.hrs) > 300: self.hrs.pop(0)
self.hrs.append(hr)
if self.plot_pipe is not None and self.stop:
self.plot_pipe.send(None)
elif self.plot_pipe is not None:
self.plot_pipe.send([p, self.hrs])
else:
#######################################################
# I want to insert frames_count here, so when I print
# hr_fft, I can also print the frame_count for that
# observation.
#######################################################
hr_fft = moving_avg(self.hrs, 3)[-1] if len(self.hrs) > 5 else self.hrs[-1]
print(hr_fft, CaptureFrames.frames_count)
sys.stdout.write(f'\rHr: {round(hr_fft, 0)}')
sys.stdout.flush()
def extract_signal(self):
signal_extracted = 0
while True and not self.stop:
if len(self.batch_mean) == 0:
time.sleep(0.01)
continue
mean_dict = self.batch_mean.pop(0)
mean = mean_dict['mean']
if mean_dict['face_detected'] == False:
if self.plot_pipe is not None:
self.plot_pipe.send('no face detected')
continue
if signal_extracted >= self.signal_size:
self.process_signal(mean)
else:
self.signal[signal_extracted: signal_extracted + mean.shape[0]] = mean
signal_extracted+=mean.shape[0]
def compute_mean(self):
curr_batch_size = 0
batch = None
while True and not self.stop:
if len(self.masked_batches) == 0:
time.sleep(0.01)
continue
mask = self.masked_batches.pop(0)
if batch is None:
batch = np.zeros((self.batch_size, mask.shape[0], mask.shape[1], mask.shape[2]))
if curr_batch_size < (self.batch_size - 1):
batch[curr_batch_size] = mask
curr_batch_size+=1
continue
batch[curr_batch_size] = mask
curr_batch_size = 0
non_zero_pixels = (batch!=0).sum(axis=(1,2))
total_pixels = batch.shape[1] * batch.shape[2]
avg_skin_pixels = non_zero_pixels.mean()
m = {'face_detected': True, 'mean': np.zeros((self.batch_size, 3))}
if (avg_skin_pixels + 1) / (total_pixels) < 0.05:
m['face_detected'] = False
else:
m['mean'] = np.true_divide(batch.sum(axis=(1,2)), non_zero_pixels+1e-6)
self.batch_mean.append(m)
def terminate(self):
if self.plot_pipe is not None:
self.plot_pipe.send(None)
self.savePlot(self.source)
self.saveresults()
self.stop = True
def saveresults(self):
"""
saves numpy array of heart rates as hrs
saves numpy array of power spectrum as fft_spec
"""
np.save('hrs', np.array(self.hrs))
np.save('fft_spec', np.array(self.pulse.fft_spec))
For reference, they are called together an run with this file:
import cv2
import numpy as np
from pulse import Pulse
import time
from threading import Lock, Thread
from plot_cont import DynamicPlot
from capture_frames import CaptureFrames
from process_mask import ProcessMasks
from utils import *
import multiprocessing as mp
import sys
from optparse import OptionParser
class RunPOS():
def __init__(self, sz=270, fs=28, bs=30, plot=False):
self.batch_size = bs
self.frame_rate = fs
self.signal_size = sz
self.plot = plot
def __call__(self, source):
time1=time.time()
mask_process_pipe, chil_process_pipe = mp.Pipe()
self.plot_pipe = None
if self.plot:
self.plot_pipe, plotter_pipe = mp.Pipe()
self.plotter = DynamicPlot(self.signal_size, self.batch_size)
self.plot_process = mp.Process(target=self.plotter, args=(plotter_pipe,), daemon=True)
self.plot_process.start()
process_mask = ProcessMasks(self.signal_size, self.frame_rate, self.batch_size)
mask_processer = mp.Process(target=process_mask, args=(chil_process_pipe, self.plot_pipe, source, ), daemon=True)
mask_processer.start()
capture = CaptureFrames(self.batch_size, source, show_mask=True)
capture(mask_process_pipe, source)
mask_processer.join()
if self.plot:
self.plot_process.join()
time2=time.time()
time2=time.time()
print(f'time {time2-time1}')
def get_args():
parser = OptionParser()
parser.add_option('-s', '--source', dest='source', default=0,
help='Signal Source: 0 for webcam or file path')
parser.add_option('-b', '--batch-size', dest='batchsize', default=30,
type='int', help='batch size')
parser.add_option('-f', '--frame-rate', dest='framerate', default=25,
help='Frame Rate')
(options, _) = parser.parse_args()
return options
if __name__=="__main__":
args = get_args()
source = args.source
runPOS = RunPOS(270, args.framerate, args.batchsize, True)
runPOS(source)