OpenCV / Python : multi-threading for live facial recognition - python

I'm using OpenCv and Dlib to execute facial recognition w/ landmarks, live from the webcam stream. The language is Python. It works fine on my macbook laptop, but I need it to run from a desktop computer 24/7. The computer is a PC Intel® Core™2 Quad CPU Q6600 # 2.40GHz 32bit running Debian Jessie. The drop in performance is drastic : there is a 10 seconds delay due to processing !
I therefore looked into multi-threading to gain performance :
I first tried the sample code by OpenCv, and the result is great! All four cores hit 100%, and the performance is much better.
I then replaced the frame processing code with my code, and it doesn't improve performance at all ! Only one core hits the 100%, the other ones stay very low. I even think it's worse with multi-threading on.
I got the facial landmark code from the dlib sample code. I know it can probably be optimized, but I want to understand why am I not able to use my (old) computer's full power with multi-threading ?
I'll drop my code below, thanks a lot for reading :)
from __future__ import print_function
import numpy as np
import cv2
import dlib
from multiprocessing.pool import ThreadPool
from collections import deque
from common import clock, draw_str, StatValue
import video
class DummyTask:
def __init__(self, data):
self.data = data
def ready(self):
return True
def get(self):
return self.data
if __name__ == '__main__':
import sys
print(__doc__)
try:
fn = sys.argv[1]
except:
fn = 0
cap = video.create_capture(fn)
#Face detector
detector = dlib.get_frontal_face_detector()
#Landmarks shape predictor
predictor = dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat")
# This is where the facial detection takes place
def process_frame(frame, t0, detector, predictor):
# some intensive computation...
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_image = clahe.apply(gray)
detections = detector(clahe_image, 1)
for k,d in enumerate(detections):
shape = predictor(clahe_image, d)
for i in range(1,68): #There are 68 landmark points on each face
cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=2)
return frame, t0
threadn = cv2.getNumberOfCPUs()
pool = ThreadPool(processes = threadn)
pending = deque()
threaded_mode = True
latency = StatValue()
frame_interval = StatValue()
last_frame_time = clock()
while True:
while len(pending) > 0 and pending[0].ready():
res, t0 = pending.popleft().get()
latency.update(clock() - t0)
draw_str(res, (20, 20), "threaded : " + str(threaded_mode))
draw_str(res, (20, 40), "latency : %.1f ms" % (latency.value*1000))
draw_str(res, (20, 60), "frame interval : %.1f ms" % (frame_interval.value*1000))
cv2.imshow('threaded video', res)
if len(pending) < threadn:
ret, frame = cap.read()
t = clock()
frame_interval.update(t - last_frame_time)
last_frame_time = t
if threaded_mode:
task = pool.apply_async(process_frame, (frame.copy(), t, detector, predictor))
else:
task = DummyTask(process_frame(frame, t, detector, predictor))
pending.append(task)
ch = cv2.waitKey(1)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break
cv2.destroyAllWindows()

Performance issue was due to a bad compilation of dlib. Do not use pip install dlib which runs very very slowly for some reason compared to the proper compilation. I went from almost 10 seconds lag to about 2 seconds this way. So finally I didn't need multi-threading/processing, but I'm working on it to enhance the speed even more. Thanks for the help :)

i tried a simplified approach like P.Ro mentioned in his answer with processes writing to an output queue but somehow the queue got locked most of the time because all the processes wrote to it at the same time. (just my guess) i probably did something wrong.
in the end i ended up using pipes.
the code is nasty. but if i was me a few hours ago. i would still be glad to find an example that actually runs without effort.
from multiprocessing import Process, Queue, Manager,Pipe
import multiprocessing
import face_recognition as fik
import cv2
import time
video_input = 0
obama_image = fik.load_image_file("obama.png")
obama_face_encoding = fik.face_encodings(obama_image)[0]
quality = 0.7
def f(id,fi,fl):
import face_recognition as fok
while True:
small_frame = fi.get()
print("running thread"+str(id))
face_locations = fok.face_locations(small_frame)
if(len(face_locations)>0):
print(face_locations)
for (top7, right7, bottom7, left7) in face_locations:
small_frame_c = small_frame[top7:bottom7, left7:right7]
fl.send(small_frame_c)
fps_var =0
if __name__ == '__main__':
multiprocessing.set_start_method('spawn')
# global megaman
with Manager() as manager:
video_capture = cv2.VideoCapture(video_input)
fi = Queue(maxsize=14)
threads = 8
proc = []
parent_p = []
thread_p = []
# procids = range(0,threads)
for t in range(0,threads):
p_t,c_t = Pipe()
parent_p.append(p_t)
thread_p.append(c_t)
print(t)
proc.append(Process(target=f, args=(t,fi,thread_p[t])))
proc[t].start()
useframe = False
frame_id = 0
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
effheight, effwidth = frame.shape[:2]
if effwidth < 20:
break
# Resize frame of video to 1/4 size for faster face recognition processing
xxx = 930
yyy = 10/16 #0.4234375
small_frame = cv2.resize(frame, (xxx, int(xxx*yyy)))
if frame_id%2 == 0:
if not fi.full():
fi.put(small_frame)
print(frame_id)
cv2.imshow('Video', small_frame)
print("FPS: ", int(1.0 / (time.time() - fps_var)))
fps_var = time.time()
#GET ALL DETECTIONS
for t in range(0,threads):
if parent_p[t].poll():
small_frame_c = parent_p[t].recv()
cv2.imshow('recc', small_frame_c)
height34, width34 = small_frame_c.shape[:2]
# print fsizeee
if(width34<20):
print("face 2 small")
print(width34)
break
face_encodings_cam = fik.face_encodings(small_frame_c,[(0, width34, height34, 0)])
match = fik.compare_faces([obama_face_encoding], face_encodings_cam[0])
name = "Unknown"
if match[0]:
name = "Barack"
print(name)
break
frame_id += 1
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break

Do not have much experience with using ThreadPool, but I always just use Process like shown below. You should be able to easily edit this code to fit your needs. I wrote this with your implementation in mind.
This code will get the number of cores and start however many worker processes that will all be implementing the desired function in parallel. They all share a Queue of frames for input and all put to the same output Queue for the main to get and show. Each Queue has a maximum size, in this case 5. This ensures that despite the CPU time it takes to process, it will always be relatively live time.
import numpy as np
import cv2
from multiprocessing import Process, Queue
import time
#from common import clock, draw_str, StatValue
#import video
class Canny_Process(Process):
def __init__(self,frame_queue,output_queue):
Process.__init__(self)
self.frame_queue = frame_queue
self.output_queue = output_queue
self.stop = False
#Initialize your face detectors here
def get_frame(self):
if not self.frame_queue.empty():
return True, self.frame_queue.get()
else:
return False, None
def stopProcess(self):
self.stop = True
def canny_frame(self,frame):
# some intensive computation...
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 100)
#To simulate CPU Time
#############################
for i in range(1000000):
x = 546*546
res = x/(i+1)
#############################
'REPLACE WITH FACE DETECT CODE HERE'
if self.output_queue.full():
self.output_queue.get_nowait()
self.output_queue.put(edges)
def run(self):
while not self.stop:
ret, frame = self.get_frame()
if ret:
self.canny_frame(frame)
if __name__ == '__main__':
frame_sum = 0
init_time = time.time()
def put_frame(frame):
if Input_Queue.full():
Input_Queue.get_nowait()
Input_Queue.put(frame)
def cap_read(cv2_cap):
ret, frame = cv2_cap.read()
if ret:
put_frame(frame)
cap = cv2.VideoCapture(0)
threadn = cv2.getNumberOfCPUs()
threaded_mode = True
process_list = []
Input_Queue = Queue(maxsize = 5)
Output_Queue = Queue(maxsize = 5)
for x in range((threadn -1)):
canny_process = Canny_Process(frame_queue = Input_Queue,output_queue = Output_Queue)
canny_process.daemon = True
canny_process.start()
process_list.append(canny_process)
ch = cv2.waitKey(1)
cv2.namedWindow('Threaded Video', cv2.WINDOW_NORMAL)
while True:
cap_read(cap)
if not Output_Queue.empty():
result = Output_Queue.get()
cv2.imshow('Threaded Video', result)
ch = cv2.waitKey(5)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break
cv2.destroyAllWindows()
This should do the trick just change my canny function to do your face detection. I wrote this from your code and compared the two. This is significantly faster. I am using multiprocessing.Process here. In python processes are truly parallel and threads are not quite because of the GIL. I am using 2 queues to send data back and forth between the main and the processes. Queues are both Thread and Process safe.

you may use this, multithreaded:
from imutils.video import VideoStream
# Initialize multithreading the video stream.
videostream = "rtsp://192.168.x.y/user=admin=xxxxxxx_channel=vvvv=1.sdp?params"
vs = VideoStream(src=videostream, resolution=frameSize,
framerate=32).start()
frame = vs.read()

Related

Python multiprocessing queue using a lot of resources with opencv

I am using multiprocessing to get frames of a video using Opencv in python.
My class looks like this :-
import cv2
from multiprocessing import Process, Queue
class StreamVideos:
def __init__(self):
self.image_data = Queue()
def start_proces(self):
p = Process(target=self.echo)
p.start()
def echo(self):
cap = cv2.VideoCapture('videoplayback.mp4')
while cap.isOpened():
ret,frame = cap.read()
self.image_data.put(frame)
# print("frame")
I start the process "echo" using :-
p = Process(target=self.echo)
p.start()
the echo function looks like this :-
def echo(self):
cap = cv2.VideoCapture('videoplayback.mp4')
while cap.isOpened():
ret,frame = cap.read()
self.image_data.put(frame)
in which i am using queue where i put these frames
self.image_data.put(frame)
and then in another process I start reviving these frames
self.obj = StreamVideos()
def start_process(self):
self.obj.start_proces()
p = Process(target=self.stream_videos)
p.start()
def stream_videos(self):
while True:
self.img = self.obj.image_data.get()
print(self.img)
but as soon as I start putting frames to queue, the ram gets filled very quickly and the system gets stuck. The video I am using is just 25 fps and 39mb in size, so it does not make any sense.
One thing I noticed is that the "echo" process is putting a lot of frames in the queue before the "stream_videos" process retrives it.
What could be the root of this problem?
Thanks in advance.
Expectations: -
Able to retrieve the frames continuosly.
Tried :-
Not putting frames in queue, in which case the ram is not filled.
The following is a general purpose single producer/multiple consumer implementation. The producer (class StreamVideos) creates a shared memory array whose size is the number of bytes in the video frame. One or more consumers (you specify the number of consumers to StreamVideos) can then call StreamVideos.get_next_frame() to retrieve the next frame. This method converts the shared array back into a numpy array for subsequent processing. The producer will only read the next frame into the shared array after all consumers have called get_next_frame:
#!/usr/bin/env python3
import multiprocessing
import numpy as np
import ctypes
import cv2
class StreamVideos:
def __init__(self, path, n_consumers):
"""
path is the path to the video:
n_consumers is the number of tasks to which we will be sreaming this.
"""
self._path = path
self._event = multiprocessing.Event()
self._barrier = multiprocessing.Barrier(n_consumers + 1, self._reset_event)
# Discover how large a framesize is by getting the first frame
cap = cv2.VideoCapture(self._path)
ret, frame = cap.read()
if ret:
self._shape = frame.shape
frame_size = self._shape[0] * self._shape[1] * self._shape[2]
self._arr = multiprocessing.RawArray(ctypes.c_ubyte, frame_size)
else:
self._arr = None
cap.release()
def _reset_event(self):
self._event.clear()
def start_streaming(self):
cap = cv2.VideoCapture(self._path)
while True:
self._barrier.wait()
ret, frame = cap.read()
if not ret:
# No more readable frames:
break
# Store frame into shared array:
temp = np.frombuffer(self._arr, dtype=frame.dtype)
temp[:] = frame.flatten(order='C')
self._event.set()
cap.release()
self._arr = None
self._event.set()
def get_next_frame(self):
# Tell producer that this consumer is through with the previous frame:
self._barrier.wait()
# Wait for next frame to be read by the producer:
self._event.wait()
if self._arr is None:
return None
# Return shared array as a numpy array:
return np.ctypeslib.as_array(self._arr).reshape(self._shape)
def consumer(producer, id):
frame_name = f'Frame - {id}'
while True:
frame = producer.get_next_frame()
if frame is None:
break
cv2.imshow(frame_name, frame)
cv2.waitKey(1)
cv2.destroyAllWindows()
def main():
producer = StreamVideos('videoplayback.mp4', 2)
consumer1 = multiprocessing.Process(target=consumer, args=(producer, 1))
consumer1.start()
consumer2 = multiprocessing.Process(target=consumer, args=(producer, 2))
consumer2.start()
"""
# Run as a child process:
producer_process = multiprocessing.Process(target=producer.start_streaming)
producer_process.start()
producer_process.join()
"""
# Run in main process:
producer.start_streaming()
consumer1.join()
consumer2.join()
if __name__ == '__main__':
main()

How to measure cpu usage (as a percentage) of a single code line in Python?

I need to know how much this particular line of code loads my cpu when whole program is executed:
cap.set(cv.CAP_PROP_POS_FRAMES,random_frame)
This line is part of a certain program. But I'm only interested in how this particular line loads the cpu. I don't know exactly how to measure it.
To be more precised, this is my full code:
import cv2 as cv
import random
cap = cv.VideoCapture('file_name.avi')
random_frame = random.randint(1,99999)
cap.set(cv.CAP_PROP_POS_FRAMES,random_frame)
ret, frame = cap.read()
cv.imshow("random_frame",frame)
while cap.isOpened():
if cv.waitKey(1) == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Windows 7 operating system
import multiprocessing as mp
import psutil
import random
import cv2 as cv
import random
cap = cv.VideoCapture ('file_name.avi')
random_frame = random.randint (1, 99999)
def monitorFunction():
cap.set (cv.CAP_PROP_POS_FRAMES,random_frame)
def monitor (target):
worker_process = mp.Process(target=target)
worker_process.start()
p = psutil.Process(worker_process.pid)
# log cpu usage of `worker_process` every 10 ms
cpu_percents = []
while worker_process.is_alive():
cpu_percents.append(p.cpu_percent())
time.sleep(0.01)
worker_process.join()
return cpu_percents
cpu_percents = monitor(target=monitorFunction)
print (cpu_percents)
This should work.

how to run multiple camera in threading using python

below is the code i used to play multiple videos in parallel using multi threading pool. but only one video is playing for each input. i want each video to open separately. not combined
import concurrent.futures
RTSP_URL = "rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mp4"
RTSP_List = [RTSP_URL, RTSP_URL, RTSP_URL, RTSP_URL]
def url_to_video(url):
video = cv2.VideoCapture(url)
while True:
_, frame = video.read()
cv2.imshow("RTSP", frame)
k = cv2.waitKey(1)
if k == ord('q'):
break
video.release()
cv2.destroyAllWindows()
while True:
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(url_to_video, RTSP_List)```
how to play each video separately.
you just need each thread to use a different name for the window in cv2.imshow, so that each thread will generate a different window, and you should place them somewhere distinct so that they aren't appearing one over the other, i just added in index to them so that each distinct index will have a position on screen and different title, also you shouldn't destroy all windows when one is done ...
import concurrent.futures
import cv2
RTSP_URL = "rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mp4"
RTSP_List = [(RTSP_URL,0), (RTSP_URL,1), (RTSP_URL,2), (RTSP_URL,3)]
def url_to_video(tup):
url,index = tup
video = cv2.VideoCapture(url)
while True:
_, frame = video.read()
cv2.imshow(f"RTSP {index}", frame)
cv2.moveWindow(f"RTSP {index}", index*300, 0)
k = cv2.waitKey(1)
if k == ord('q'):
break
video.release()
while True:
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(url_to_video, RTSP_List)
cv2.destroyAllWindows()

Extract specific frames of youtube video without downloading video

I need to extract specific frames of an online video to work on an algorithm but I don't want to download the whole video because that would make it highly inefficient.
For starters, I tried working with youtube videos. I can download whole of the video using youtube-dl in this way:
ydl_opts = {'outtmpl': r'OUTPUT_DIRECTORY_HERE',}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
And then I can capture individual frames.
I need to avoid downloading the whole video. After some research, I have found that ffmpeg might help me do this. I found no way to download just the frames so if this is not possible, the second option is that I can download specific portions of the video. One such example in linux is here but I couldn't find any solution for python.
What is a good way to download just the frames, or portions of videos (in python) without downloading the entire thing?
Just to add on to the current answer, performance can further be enhanced using multiprocessing. For example, if you wanted to split up the video into frames and process them independently in num_cpu processes:
import os
from functools import partial
from multiprocessing.pool import Pool
import cv2
import youtube_dl
def process_video_parallel(url, skip_frames, process_number):
cap = cv2.VideoCapture(url)
num_processes = os.cpu_count()
frames_per_process = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) // num_processes
cap.set(cv2.CAP_PROP_POS_FRAMES, frames_per_process * process_number)
x = 0
count = 0
while x < 10 and count < frames_per_process:
ret, frame = cap.read()
if not ret:
break
filename =r"PATH\shot"+str(x)+".png"
x += 1
cv2.imwrite(filename.format(count), frame)
count += skip_frames # Skip 300 frames i.e. 10 seconds for 30 fps
cap.set(1, count)
cap.release()
video_url = "..." # The Youtube URL
ydl_opts = {}
ydl = youtube_dl.YoutubeDL(ydl_opts)
info_dict = ydl.extract_info(video_url, download=False)
formats = info_dict.get('formats', None)
print("Obtaining frames")
for f in formats:
if f.get('format_note', None) == '144p':
url = f.get('url', None)
cpu_count = os.cpu_count()
with Pool(cpu_count) as pool:
pool.map(partial(process_video_parallel, url, 300), range(cpu_count))
For the purposes of this application, since images are just being saved from the video, this may not result in a huge improvement (maybe a few seconds), but if additional algorithms needed to be applied on the frames, it could be beneficial.
I tried what #AyeshaKhan shared in the comments.
After importing cv2,numpy,youtube-dl:
url=saved_url #The Youtube URL
ydl_opts={}
ydl=youtube_dl.YoutubeDL(ydl_opts)
info_dict=ydl.extract_info(video_url, download=False)
formats = info_dict.get('formats',None)
print("Obtaining frames")
for f in formats:
if f.get('format_note',None) == '144p':
url = f.get('url',None)
cap = cv2.VideoCapture(url)
x=0
count=0
while x<10:
ret, frame = cap.read()
if not ret:
break
filename =r"PATH\shot"+str(x)+".png"
x+=1
cv2.imwrite(filename.format(count), frame)
count+=300 #Skip 300 frames i.e. 10 seconds for 30 fps
cap.set(1,count)
if cv2.waitKey(30)&0xFF == ord('q'):
break
cap.release()
The answer in the comments was downloading all of the frames so the count I added in .format() ensured that I skipped the frames as per my requirement.
Additionally, x here limits the number to 10.
Although, I am still not sure whether this method is actually capturing the specified frames, or if it is capturing all the frames and just saving the specified frames to my local storage. I needed the former thing.
But this is still fast enough and works for me!
Alternative to #danielcahall,
This method uses Ray for parallelization instead of Pool
Note: For the first time, it might take more time for initializing the ray components, After the first run, this will be fine
from timeit import default_timer as timer
import os, ray, shutil
import cv2
import youtube_dl
try:
os.makedirs('test_fol')
except:
shutil.rmtree('test_fol')
os.makedirs('test_fol')
ray.init()
#ray.remote
def process_video_parallel(url, total_frames, process_number):
cap = cv2.VideoCapture(url)
num_processes = os.cpu_count()
frames_per_process = int(total_frames) // num_processes
cap.set(cv2.CAP_PROP_POS_FRAMES, frames_per_process * process_number)
count = frames_per_process * process_number
while count < frames_per_process * (process_number+1):
ret, frame = cap.read()
if not ret:
break
filename = f"test_fol/{count}.jpg"
cv2.imwrite(filename, frame)
count += 1
cap.release()
t1 = timer()
video_url = "..." # The Youtube URL
ydl_opts = {}
ydl = youtube_dl.YoutubeDL(ydl_opts)
info_dict = ydl.extract_info(video_url, download=False)
duration = info_dict['duration']
formats = info_dict.get('formats', None)
for f in formats:
if f.get('format_note', None) == '144p':
url = f.get('url', None)
cpu_count = os.cpu_count()
data = ray.get([process_video_parallel.remote(url, int(duration*31), x) for x in range(cpu_count)])
break
print("Total Time", timer()-t1)

How do I terminate processes in an infinite loop in python?

I have to write a code that converts video files from RGB to black and white using an equation that converts every frame to black and white.
and I have to do that in parallel with multiprocessing and queue and with the help of Opencv.
I did write the code but I have a problem with the termination of the processes in the infinite loop. How can I terminate the processes when I am finished with reading the frames, because the father is waiting for the children to finish and they never finish.
this is my code..
#! /usr/bin/python
import numpy as np
import cv2
import multiprocessing as mp
import time
def read_frames(q1, q2):
while True:
NumAndFrame = q1.get()
frame = NumAndFrame[1]
if frame == 'Done':
# Here is my problem,this is not working!!!
processes.terminate()
break
j = NumAndFrame[0]
R = frame[:, :, 0]
G = frame[:, :, 1]
B = frame[:, :, 2]
y = (np.uint8)((0.299 * R) + (0.587 * G) + (0.114 * B))
q2.put((j, y))
if __name__ == '__main__':
start = time.time()
q1 = mp.Queue()
q2 = mp.Queue()
processes = []
for i in range(4):
processes.append(mp.Process(target=read_frames, args=(q1, q2)))
for p in processes:
p.start()
# feed the processes
# read input file and send to the processes the frames:
cap = cv2.VideoCapture('gou.avi')
lines = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cols = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc_ver = int(cap.get(cv2.CAP_PROP_FOURCC))
out = cv2.VideoWriter('output.avi', fourcc_ver, fps, (cols, lines), False)
j = 1
while (cap.isOpened()):
ret, frame = cap.read()
# as long as new frames are there
if ret == True:
q1.put((j, frame))
j += 1
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
else:
break
q1.put((1, 'Done'))
for p in processes:
p.join()
result = []
for p in processes:
result.append(q2.get())
result.sort()
result = []
for r in result:
result.append(r[1])
for i in result:
out.write(i)
print i
# Release everything if job is finished
print 'final finish'
cap.release()
out.release()
cv2.destroyAllWindows()
You might want to try to pair your question down to a smaller example, but if you're just interested in stopping the computation in the middle of a loop that is running indefinitely, you can spam Ctrl-C until it halts. Alternatively, you can just close the shell window.
Without having tested, for the same reason others gave in there comments:
You should rather call "terminate" on each process within the main part, than call it in the child function:
...
for p in processes:
p.terminate()
p.join()
Consider using multiprocessing.Pool because it does most of the heavy lifting for you.
You need a "done" message for each child process. The child should send some sort of acknowledgement back to the parent and terminate. You also need some sort of error handling policy in the worker so that an exception doesn't just silently exit the worker.
You have other problems such as this code that confuses number of processes with number of messages processed.
for p in processes:
result.append(q2.get())
Instead you should read all messages, counting the number of termination acknowledgements it gets on the way so that you know when to stop reading.
Your script is long and I'm not going to pretend that I've gotten it all right (please be friendly and post smaller examples in the future!) but here is a first go at cleaning it up.
#! /usr/bin/python
import numpy as np
import cv2
import multiprocessing as mp
import time
def read_frames(q1, q2):
while True:
try:
NumAndFrame = q1.get()
frame = NumAndFrame[1]
if frame == 'Done':
q2.put('Done')
break
j = NumAndFrame[0]
R = frame[:, :, 0]
G = frame[:, :, 1]
B = frame[:, :, 2]
y = (np.uint8)((0.299 * R) + (0.587 * G) + (0.114 * B))
q2.put((j, y))
except Exception, e:
q2.put('Error: ' + str(e))
if __name__ == '__main__':
start = time.time()
q1 = mp.Queue()
q2 = mp.Queue()
processes = []
for i in range(4):
processes.append(mp.Process(target=read_frames, args=(q1, q2)))
for p in processes:
p.start()
# feed the processes
# read input file and send to the processes the frames:
cap = cv2.VideoCapture('gou.avi')
lines = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cols = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc_ver = int(cap.get(cv2.CAP_PROP_FOURCC))
out = cv2.VideoWriter('output.avi', fourcc_ver, fps, (cols, lines), False)
j = 1
while (cap.isOpened()):
ret, frame = cap.read()
# as long as new frames are there
if ret == True:
q1.put((j, frame))
j += 1
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
else:
break
for _ in len(processes):
q1.put((1, 'Done'))
for p in processes:
p.join()
result = []
done_count = 0
while done_count < len(processes):
data = q2.get()
if isinstance(data, basetring) and data == 'Done':
done_count += 1
else:
result.append(data)
result.sort()
# What??? don't overwrite result here!
result = []
for r in result:
result.append(r[1])
for i in result:
out.write(i)
print i
# Release everything if job is finished
print 'final finish'
cap.release()
out.release()
cv2.destroyAllWindows()
You end up holding the entire returned dataset in the parent so you may hit memory problems. And since (1) you have a large data payload being copied from parent to child and back, and (2) numpy releases the gil, you may find threads perform better than processes. You can check rather quickly by just substituting Thread for Process when you create the workers.

Categories