I am using multiprocessing to get frames of a video using Opencv in python.
My class looks like this :-
import cv2
from multiprocessing import Process, Queue
class StreamVideos:
def __init__(self):
self.image_data = Queue()
def start_proces(self):
p = Process(target=self.echo)
p.start()
def echo(self):
cap = cv2.VideoCapture('videoplayback.mp4')
while cap.isOpened():
ret,frame = cap.read()
self.image_data.put(frame)
# print("frame")
I start the process "echo" using :-
p = Process(target=self.echo)
p.start()
the echo function looks like this :-
def echo(self):
cap = cv2.VideoCapture('videoplayback.mp4')
while cap.isOpened():
ret,frame = cap.read()
self.image_data.put(frame)
in which i am using queue where i put these frames
self.image_data.put(frame)
and then in another process I start reviving these frames
self.obj = StreamVideos()
def start_process(self):
self.obj.start_proces()
p = Process(target=self.stream_videos)
p.start()
def stream_videos(self):
while True:
self.img = self.obj.image_data.get()
print(self.img)
but as soon as I start putting frames to queue, the ram gets filled very quickly and the system gets stuck. The video I am using is just 25 fps and 39mb in size, so it does not make any sense.
One thing I noticed is that the "echo" process is putting a lot of frames in the queue before the "stream_videos" process retrives it.
What could be the root of this problem?
Thanks in advance.
Expectations: -
Able to retrieve the frames continuosly.
Tried :-
Not putting frames in queue, in which case the ram is not filled.
The following is a general purpose single producer/multiple consumer implementation. The producer (class StreamVideos) creates a shared memory array whose size is the number of bytes in the video frame. One or more consumers (you specify the number of consumers to StreamVideos) can then call StreamVideos.get_next_frame() to retrieve the next frame. This method converts the shared array back into a numpy array for subsequent processing. The producer will only read the next frame into the shared array after all consumers have called get_next_frame:
#!/usr/bin/env python3
import multiprocessing
import numpy as np
import ctypes
import cv2
class StreamVideos:
def __init__(self, path, n_consumers):
"""
path is the path to the video:
n_consumers is the number of tasks to which we will be sreaming this.
"""
self._path = path
self._event = multiprocessing.Event()
self._barrier = multiprocessing.Barrier(n_consumers + 1, self._reset_event)
# Discover how large a framesize is by getting the first frame
cap = cv2.VideoCapture(self._path)
ret, frame = cap.read()
if ret:
self._shape = frame.shape
frame_size = self._shape[0] * self._shape[1] * self._shape[2]
self._arr = multiprocessing.RawArray(ctypes.c_ubyte, frame_size)
else:
self._arr = None
cap.release()
def _reset_event(self):
self._event.clear()
def start_streaming(self):
cap = cv2.VideoCapture(self._path)
while True:
self._barrier.wait()
ret, frame = cap.read()
if not ret:
# No more readable frames:
break
# Store frame into shared array:
temp = np.frombuffer(self._arr, dtype=frame.dtype)
temp[:] = frame.flatten(order='C')
self._event.set()
cap.release()
self._arr = None
self._event.set()
def get_next_frame(self):
# Tell producer that this consumer is through with the previous frame:
self._barrier.wait()
# Wait for next frame to be read by the producer:
self._event.wait()
if self._arr is None:
return None
# Return shared array as a numpy array:
return np.ctypeslib.as_array(self._arr).reshape(self._shape)
def consumer(producer, id):
frame_name = f'Frame - {id}'
while True:
frame = producer.get_next_frame()
if frame is None:
break
cv2.imshow(frame_name, frame)
cv2.waitKey(1)
cv2.destroyAllWindows()
def main():
producer = StreamVideos('videoplayback.mp4', 2)
consumer1 = multiprocessing.Process(target=consumer, args=(producer, 1))
consumer1.start()
consumer2 = multiprocessing.Process(target=consumer, args=(producer, 2))
consumer2.start()
"""
# Run as a child process:
producer_process = multiprocessing.Process(target=producer.start_streaming)
producer_process.start()
producer_process.join()
"""
# Run in main process:
producer.start_streaming()
consumer1.join()
consumer2.join()
if __name__ == '__main__':
main()
Related
This is a followup on this where #Aaron helped quite a lot.
Context:
OpenCV, Python, a USB Webcam and multiprocessing - Grabbing all the time from the camera on a subprocess and returning a single frame upon request to the main process.
#Aaron did most of the heavylifting, of not all :). What I am missing is how to turn this into a process that gets a single frame upon request.
Help Please?
import numpy as np
import cv2
from multiprocessing import Process, Queue
from multiprocessing.shared_memory import SharedMemory
def produce_frames(q):
#get the first frame to calculate size of buffer
cap = cv2.VideoCapture(0)
success, frame = cap.read()
shm = SharedMemory(create=True, size=frame.nbytes)
framebuffer = np.ndarray(frame.shape, frame.dtype, buffer=shm.buf) #could also maybe use array.array instead of numpy, but I'm familiar with numpy
framebuffer[:] = frame #in case you need to send the first frame to the main process
q.put(shm) #send the buffer back to main
q.put(frame.shape) #send the array details
q.put(frame.dtype)
try:
while True:
cap.read(framebuffer)
except KeyboardInterrupt:
pass
finally:
shm.close() #call this in all processes where the shm exists
shm.unlink() #call this in at least one process
def consume_frames(q):
shm = q.get() #get the shared buffer
shape = q.get()
dtype = q.get()
framebuffer = np.ndarray(shape, dtype, buffer=shm.buf) #reconstruct the array
try:
while True:
cv2.imshow("window title", framebuffer)
cv2.waitKey(100)
except KeyboardInterrupt:
pass
finally:
shm.close()
if __name__ == "__main__":
q = Queue()
producer = Process(target=produce_frames, args=(q,))
producer.start()
consume_frames(q)
Recently, I want to show tello stream with image detection. My first thought is to save model’s output to my local with save() method, and show it by cv2.imshow() method. It works but the stream with objects detection will have a delay about 4~5 second.
My code:
from threading import Thread
from djitellopy import Tello
import cv2, math, time
import torch
import os
import numpy as np
import asyncio
import imutils
from PIL import Image
path = r'C:\yolov5-master'
model = torch.hub.load(path, 'yolov5s',source='local', pretrained=True)
tello = Tello()
tello.connect()
tello.streamon()
frame_read = tello.get_frame_read()
class VideoStreamWidget(object):
def __init__(self, src=0):
# Start the thread to read frames from the video stream
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
self.thread.start()
def update(self):
# Read the next frame from the stream
global frame
while True:
self.frame = cv2.cvtColor(frame_read.frame,cv2.COLOR_RGB2BGR)
time.sleep(.01)
def show_frame(self):
# Display frames in main program
wee = model(self.frame)
arr = wee.datah().cpu().numpy()
img = Image.fromarray.fromarray(arr, 'RGB')
result = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
cv2.imshow('frame', result)
key = cv2.waitKey(1)
if __name__ == '__main__':
video_stream_widget = VideoStreamWidget()
time.sleep(1)
while True:
try:
video_stream_widget.show_frame()
except AttributeError:
pass
I'm wondering what data type is the output of model( ).
And I tried:
wee = model(self.frame)
print( type( wee ) )
output:
<class 'models.common.Detections'>
How can I convert this kind of data to the thing fit cv2.imshow( ) method? Or is there any way to show a real-time stream with object detection without delay?
Appreciate.
I am trying to use 2 threads in python to solve face-recognition task. The first thread is used to capture frame and append it into deque, and the second one is used to read the frame from deque to solve face-recognition task, deque is a global variable .When i start 2 threads, it show the result as below.
This is result of program. In second thread, process function don't execute.
and here is my code. How to when a new frame is appended into deque in first thread, and the second thread is detect and recognize the face in new frame.
import face_recognition
import cv2
from threading import Thread
from collections import deque
import pickle
svm = pickle.load(open('svm.pkl','rb'))
deque = deque()
def open_camera(url):
print('start camera')
#capture = cv2.VideoCapture('rtsp://'+str(url))
capture = cv2.VideoCapture(0)
while capture.isOpened():
_,frame = capture.read()
frame_resize = cv2.resize(frame,(640,480))
cv2.imshow('Video',frame_resize)
if cv2.waitKey(1) & 0xFF==ord('q'):
break
# cvtColor
frame_resize = frame_resize[:, :, ::-1]
deque.append(frame_resize)
capture.release()
cv2.destroyAllWindows()
def spin(seconds):
time.sleep(seconds)
def process_camera():
print("start process")
global deque
if deque:
time.sleep(0.1)
print(deque[-1])
for i in deque:
process(frame=i)
else:
for i in deque:
process(frame=i)
def process(frame):
print('process')
frame = cv2.imread(frame)
face_locations = face_recognition.face_locations(frame)
face_encodings = face_recognition.face_encodings(frame,face_locations)
face_names = []
for face_encoding in face_encodings:
name = svm.predict(face_encoding.reshape(1, -1))
face_names.append(name)
print(face_names)
thread1= Thread(target=open_camera, args=(0,))
thread2 = Thread(target=process_camera, args=())
thread1.start()
thread2.start()
print('\texit main')
Thanks you so much!
I have a tello ryze drone, which has got a camera onboard. I am connecting to the drone in order to receive the video over wifi. My system is windows 10 and I am using python 2.7.
I am receiving a h264 bytecode and i use the libh264 decoder from tello in order to get the frames of the video, which i also display in my UI.
What i need to do is to save this video as a file on my computer and i have problems with this. I am able to make a snapshot with opencv and save it as an image, this isn't a problem. But making a video isn't working for some reason.
I read many posts here like this one but they dont work out for me. I am getting either an error or i get a very small video file, which doesnt open.
My Frames are List of List with RGB values like:
[[255,200,100][55,200,100][25,20,100]]
Here is my code for a better understanding in order to help me
This is the UI Part(i just copy here the needed code only):
def videoLoop(self):
try:
# start the thread that get GUI image and draw skeleton
time.sleep(0.5)
self.sending_command_thread.start()
while not self.stopEvent.is_set():
system = platform.system()
# read the frame for GUI show
self.frame = self.drone.read()
if self.frame is None or self.frame.size == 0:
continue
# transfer the format from frame to image
image = Image.fromarray(self.frame)
# we found compatibility problem between Tkinter,PIL and Macos,and it will
# sometimes result the very long preriod of the "ImageTk.PhotoImage" function,
# so for Macos,we start a new thread to execute the _updateGUIImage function.
if system == "Windows" or system == "Linux":
self.refreshUI(image)
else:
thread_tmp = threading.Thread(target=self.refreshUI, args=(image,))
thread_tmp.start()
time.sleep(0.03)
except RuntimeError as e:
print("[INFO] caught a RuntimeError")
def refreshUI(self, image):
image = ImageTk.PhotoImage(image)
# if the imagePanel none ,we need to initial it
if self.imagePanel is None:
self.imagePanel = tki.Label(image=image)
self.imagePanel.image = image
self.imagePanel.pack(side="left", fill="both",
expand="yes", padx=10, pady=10)
# otherwise, simply update the imagePanel
else:
self.imagePanel.configure(image=image)
self.imagePanel.image = image
def takeSnapshot(self):
# grab the current timestamp and use it to construct the filename
ts = datetime.datetime.now()
filename = "{}.jpg".format(ts.strftime("%d-%m-%Y_%H-%M-%S"))
p = os.path.sep.join((self.screenShotPath, filename))
# save the file
cv2.imwrite(p, cv2.cvtColor(self.frame, cv2.COLOR_RGB2BGR))
print("[INFO] saved {}".format(filename))
Below you can find the Tello code (only the needed part too):
def read(self):
"""Return the last frame from camera."""
if self.is_freeze:
return self.last_frame
else:
return self.frame
def video_freeze(self, is_freeze=True):
"""Pause video output -- set is_freeze to True"""
self.is_freeze = is_freeze
if is_freeze:
self.last_frame = self.frame
def _receive_video_thread(self):
"""
Listens for video streaming (raw h264) from the Tello.
Runs as a thread, sets self.frame to the most recent frame Tello captured.
"""
packet_data = b''
while True:
try:
res_string, ip = self.socket_video.recvfrom(2048)
packet_data += res_string
# end of frame
if len(res_string) != 1460:
for frame in self._h264_decod(packet_data):
self.frame = frame
packet_data = b''
except socket.error as exc:
print(("Caught exception sock.error : %s" % exc))
def _h264_decod(self, packet_data):
"""
decode raw h264 format data from Tello
:param packet_data: raw h264 data array
:return: a list of decoded frame
"""
res_frame_list = []
frames = self.decoder.decode(packet_data)
for framedata in frames:
(frame, w, h, ls) = framedata
if frame is not None:
# print ('frame size %i bytes, w %i, h %i, linesize %i' % (len(frame), w, h, ls))
frame = np.frombuffer(frame, dtype=np.ubyte, count=len(frame))
frame = (frame.reshape((h, ls // 3, 3)))
frame = frame[:, :w, :]
res_frame_list.append(frame)
return res_frame_list
It would be very kind of you if someone could help me write a method like this pseudocode:
def saveVideo(self, frame_or_whatever_i_need_here):
out = cv2.VideoWriter('output.avi_or_other_format', -1, 20.0, (640,480))
out.write(frame_or_whatever_i_need_here)
out.release()
Edit 1:
i found an option making a video out of my snapshots, this means that i coud make snapshot over a thread and later save them to a video. That would be an option. The problem is, that it would consume too much space in the disk. The workaround link is here
I found the solution, so i will post it here if someone needs the same thing. I used the following blog and modified the code to do my work, you can find the post here
self.frame = None
self.frame_array = []
def videoLoop(self):
try:
# start the thread that get GUI image and draw skeleton
time.sleep(0.5)
self.sending_command_thread.start()
while not self.stopEvent.is_set():
system = platform.system()
# read the frame for GUI show
self.frame = self.drone.read()
self.frame_array.append(self.frame)
if self.frame is None or self.frame.size == 0:
continue
# transfer the format from frame to image
image = Image.fromarray(self.frame)
if system == "Windows" or system == "Linux":
self.refreshUI(image)
else:
thread_tmp = threading.Thread(target=self.refreshUI, args=(image,))
thread_tmp.start()
time.sleep(0.03)
except RuntimeError as e:
print("[INFO] caught a RuntimeError")
def convert_frames_to_video(self, pathOut, fps):
size = (self.videoWidth, self.videoHeight)
out = cv2.VideoWriter(pathOut, cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
for i in range(len(self.frame_array)):
# writing to a image array
out.write(self.frame_array[i])
out.release()
def onClose(self):
print("[INFO] closing...")
self.convert_frames_to_video('video.avi', 25.0)
self.stopEvent.set()
del self.drone
self.root.quit()
I'm using OpenCv and Dlib to execute facial recognition w/ landmarks, live from the webcam stream. The language is Python. It works fine on my macbook laptop, but I need it to run from a desktop computer 24/7. The computer is a PC Intel® Core™2 Quad CPU Q6600 # 2.40GHz 32bit running Debian Jessie. The drop in performance is drastic : there is a 10 seconds delay due to processing !
I therefore looked into multi-threading to gain performance :
I first tried the sample code by OpenCv, and the result is great! All four cores hit 100%, and the performance is much better.
I then replaced the frame processing code with my code, and it doesn't improve performance at all ! Only one core hits the 100%, the other ones stay very low. I even think it's worse with multi-threading on.
I got the facial landmark code from the dlib sample code. I know it can probably be optimized, but I want to understand why am I not able to use my (old) computer's full power with multi-threading ?
I'll drop my code below, thanks a lot for reading :)
from __future__ import print_function
import numpy as np
import cv2
import dlib
from multiprocessing.pool import ThreadPool
from collections import deque
from common import clock, draw_str, StatValue
import video
class DummyTask:
def __init__(self, data):
self.data = data
def ready(self):
return True
def get(self):
return self.data
if __name__ == '__main__':
import sys
print(__doc__)
try:
fn = sys.argv[1]
except:
fn = 0
cap = video.create_capture(fn)
#Face detector
detector = dlib.get_frontal_face_detector()
#Landmarks shape predictor
predictor = dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat")
# This is where the facial detection takes place
def process_frame(frame, t0, detector, predictor):
# some intensive computation...
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_image = clahe.apply(gray)
detections = detector(clahe_image, 1)
for k,d in enumerate(detections):
shape = predictor(clahe_image, d)
for i in range(1,68): #There are 68 landmark points on each face
cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=2)
return frame, t0
threadn = cv2.getNumberOfCPUs()
pool = ThreadPool(processes = threadn)
pending = deque()
threaded_mode = True
latency = StatValue()
frame_interval = StatValue()
last_frame_time = clock()
while True:
while len(pending) > 0 and pending[0].ready():
res, t0 = pending.popleft().get()
latency.update(clock() - t0)
draw_str(res, (20, 20), "threaded : " + str(threaded_mode))
draw_str(res, (20, 40), "latency : %.1f ms" % (latency.value*1000))
draw_str(res, (20, 60), "frame interval : %.1f ms" % (frame_interval.value*1000))
cv2.imshow('threaded video', res)
if len(pending) < threadn:
ret, frame = cap.read()
t = clock()
frame_interval.update(t - last_frame_time)
last_frame_time = t
if threaded_mode:
task = pool.apply_async(process_frame, (frame.copy(), t, detector, predictor))
else:
task = DummyTask(process_frame(frame, t, detector, predictor))
pending.append(task)
ch = cv2.waitKey(1)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break
cv2.destroyAllWindows()
Performance issue was due to a bad compilation of dlib. Do not use pip install dlib which runs very very slowly for some reason compared to the proper compilation. I went from almost 10 seconds lag to about 2 seconds this way. So finally I didn't need multi-threading/processing, but I'm working on it to enhance the speed even more. Thanks for the help :)
i tried a simplified approach like P.Ro mentioned in his answer with processes writing to an output queue but somehow the queue got locked most of the time because all the processes wrote to it at the same time. (just my guess) i probably did something wrong.
in the end i ended up using pipes.
the code is nasty. but if i was me a few hours ago. i would still be glad to find an example that actually runs without effort.
from multiprocessing import Process, Queue, Manager,Pipe
import multiprocessing
import face_recognition as fik
import cv2
import time
video_input = 0
obama_image = fik.load_image_file("obama.png")
obama_face_encoding = fik.face_encodings(obama_image)[0]
quality = 0.7
def f(id,fi,fl):
import face_recognition as fok
while True:
small_frame = fi.get()
print("running thread"+str(id))
face_locations = fok.face_locations(small_frame)
if(len(face_locations)>0):
print(face_locations)
for (top7, right7, bottom7, left7) in face_locations:
small_frame_c = small_frame[top7:bottom7, left7:right7]
fl.send(small_frame_c)
fps_var =0
if __name__ == '__main__':
multiprocessing.set_start_method('spawn')
# global megaman
with Manager() as manager:
video_capture = cv2.VideoCapture(video_input)
fi = Queue(maxsize=14)
threads = 8
proc = []
parent_p = []
thread_p = []
# procids = range(0,threads)
for t in range(0,threads):
p_t,c_t = Pipe()
parent_p.append(p_t)
thread_p.append(c_t)
print(t)
proc.append(Process(target=f, args=(t,fi,thread_p[t])))
proc[t].start()
useframe = False
frame_id = 0
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
effheight, effwidth = frame.shape[:2]
if effwidth < 20:
break
# Resize frame of video to 1/4 size for faster face recognition processing
xxx = 930
yyy = 10/16 #0.4234375
small_frame = cv2.resize(frame, (xxx, int(xxx*yyy)))
if frame_id%2 == 0:
if not fi.full():
fi.put(small_frame)
print(frame_id)
cv2.imshow('Video', small_frame)
print("FPS: ", int(1.0 / (time.time() - fps_var)))
fps_var = time.time()
#GET ALL DETECTIONS
for t in range(0,threads):
if parent_p[t].poll():
small_frame_c = parent_p[t].recv()
cv2.imshow('recc', small_frame_c)
height34, width34 = small_frame_c.shape[:2]
# print fsizeee
if(width34<20):
print("face 2 small")
print(width34)
break
face_encodings_cam = fik.face_encodings(small_frame_c,[(0, width34, height34, 0)])
match = fik.compare_faces([obama_face_encoding], face_encodings_cam[0])
name = "Unknown"
if match[0]:
name = "Barack"
print(name)
break
frame_id += 1
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
Do not have much experience with using ThreadPool, but I always just use Process like shown below. You should be able to easily edit this code to fit your needs. I wrote this with your implementation in mind.
This code will get the number of cores and start however many worker processes that will all be implementing the desired function in parallel. They all share a Queue of frames for input and all put to the same output Queue for the main to get and show. Each Queue has a maximum size, in this case 5. This ensures that despite the CPU time it takes to process, it will always be relatively live time.
import numpy as np
import cv2
from multiprocessing import Process, Queue
import time
#from common import clock, draw_str, StatValue
#import video
class Canny_Process(Process):
def __init__(self,frame_queue,output_queue):
Process.__init__(self)
self.frame_queue = frame_queue
self.output_queue = output_queue
self.stop = False
#Initialize your face detectors here
def get_frame(self):
if not self.frame_queue.empty():
return True, self.frame_queue.get()
else:
return False, None
def stopProcess(self):
self.stop = True
def canny_frame(self,frame):
# some intensive computation...
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 100)
#To simulate CPU Time
#############################
for i in range(1000000):
x = 546*546
res = x/(i+1)
#############################
'REPLACE WITH FACE DETECT CODE HERE'
if self.output_queue.full():
self.output_queue.get_nowait()
self.output_queue.put(edges)
def run(self):
while not self.stop:
ret, frame = self.get_frame()
if ret:
self.canny_frame(frame)
if __name__ == '__main__':
frame_sum = 0
init_time = time.time()
def put_frame(frame):
if Input_Queue.full():
Input_Queue.get_nowait()
Input_Queue.put(frame)
def cap_read(cv2_cap):
ret, frame = cv2_cap.read()
if ret:
put_frame(frame)
cap = cv2.VideoCapture(0)
threadn = cv2.getNumberOfCPUs()
threaded_mode = True
process_list = []
Input_Queue = Queue(maxsize = 5)
Output_Queue = Queue(maxsize = 5)
for x in range((threadn -1)):
canny_process = Canny_Process(frame_queue = Input_Queue,output_queue = Output_Queue)
canny_process.daemon = True
canny_process.start()
process_list.append(canny_process)
ch = cv2.waitKey(1)
cv2.namedWindow('Threaded Video', cv2.WINDOW_NORMAL)
while True:
cap_read(cap)
if not Output_Queue.empty():
result = Output_Queue.get()
cv2.imshow('Threaded Video', result)
ch = cv2.waitKey(5)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break
cv2.destroyAllWindows()
This should do the trick just change my canny function to do your face detection. I wrote this from your code and compared the two. This is significantly faster. I am using multiprocessing.Process here. In python processes are truly parallel and threads are not quite because of the GIL. I am using 2 queues to send data back and forth between the main and the processes. Queues are both Thread and Process safe.
you may use this, multithreaded:
from imutils.video import VideoStream
# Initialize multithreading the video stream.
videostream = "rtsp://192.168.x.y/user=admin=xxxxxxx_channel=vvvv=1.sdp?params"
vs = VideoStream(src=videostream, resolution=frameSize,
framerate=32).start()
frame = vs.read()