Apply mask to Webcam streaming in opencv - python

I applied mask to video feed using OpenCV and want to display the Live streaming on the website but the following code stops streaming once it starts. I've been wrapping my head around but couldn't figure out the solution. Any help would be greatly appreciated.
def gen(frame):
while True:
# frame = camera.get_frame()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
def seasoncolor(request):
while True:
return StreamingHttpResponse(gen(color_detection.color_detection(0)),
content_type='multipart/x-mixed-replace; boundary=frame')
import numpy as np
import cv2
import sys
ML object detection algo(haarcascade)used to identify objects.
the XML file consists of trained Haar Cascade models.
def color_detection(season):
face_cascade = cv2.CascadeClassifier(
# 'accounts/personal_color/self_detection/haarcascade_frontalface_default.xml'
# initialize video from the webcam
video = cv2.VideoCapture(1)
# Spring/summer/fall/winter
while True:
# ret tells if the camera works properly. Frame is an actual frame from the video feed
ret, frame =
# make sure port is working and read the image
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
Detect the faces within the subregions of the image in scales
scaleFactor indicates how much the image size is reduced at each image scale.
minNeighbors: Higher value results in higher quality of the detected face.
faces = face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=6)
# Draw the rectangle around each face
for (x, y, w, h) in faces:
# Use the stcoordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = w // 2 # or can be h / 2 or can be anything based on your requirements
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# Draw the desired region to crop out in white, center_coordinates, radius, (255, 255, 255), -1)
masked = cv2.bitwise_and(frame, frame, mask=mask)
if int(season) ==0: # Spring
# Replace all (0,0,0)channel with Coral pink
masked[np.where((masked == [0, 0, 0]).all(axis=2))] = [121, 131, 248]
elif int(season) ==1: # Summer
#Replace all (0,0,0)channel with Rose Red
elif int(season) ==2: # Fall
#Replace all (0,0,0)channel with Red Brown /Cinnamon
else: # Winter
#Replace all (0,0,0)channel with Burgundy Red
# cv2.imshow('mask applied', masked)
ret, jpeg = cv2.imencode('.jpg', masked)
return jpeg.tobytes()
if cv2.waitKey(30) & 0xff == 27:
if __name__ == '__main__':
arg = sys.argv[1]
import React, { useState, useEffect } from 'react';
import ReactDOM from 'react-dom';
import CameraScreen from './CameraScreen';
import { StyleSheet, Text, View, Image } from 'react-native';
import { NavigationContainer } from '#react-navigation/native';
import { createStackNavigator } from '#react-navigation/stack';
import axios from 'axios';
function Self_color_diagnosis({navigation,route}) {
return (
<View style={styles.title_container}>
<Image style={} source={{
uri: 'http://localhost:8000/seasoncolor/',}}/>
const styles = StyleSheet.create({
video: {
width: 500,
height: 500
title_container: {
flex: 1,
justifyContent: 'center'
export default Self_color_diagnosis;
The above code results in the pic below. The streaming stops and does not change at all.

gen() runs loop which all time uses the same frame().
You have to get frame inside this loop.
def gen():
while True:
frame = color_detection.color_detection(0)
if frame:
yield b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n'
# print('no frame')
But color_detection should run without loop.
And you should create VideoCapture(1) only once.
And you should return frame even if you didn't detect any face.
path = os.path.join(, 'haarcascade_frontalface_default.xml')
face_cascade = cv2.CascadeClassifier(path)
video = cv2.VideoCapture(1)
def color_detection(season):
# ret tells if the camera works properly. Frame is an actual frame from the video feed
ret, frame =
# make sure port is working and read the image
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
Detect the faces within the subregions of the image in scales
scaleFactor indicates how much the image size is reduced at each image scale.
minNeighbors: Higher value results in higher quality of the detected face.
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6)
# Draw circle around each face
for (x, y, w, h) in faces:
# Use the stcoordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = w // 2 # or can be h / 2 or can be anything based on your requirements
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# Draw the desired region to crop out in white, center_coordinates, radius, (255, 255, 255), -1)
masked = cv2.bitwise_and(frame, frame, mask=mask)
if season == 0: # Spring
# Replace all (0,0,0)channel with Coral pink
masked[np.where((masked == [0, 0, 0]).all(axis=2))] = [121, 131, 248]
elif season == 1: # Summer
#Replace all (0,0,0)channel with Rose Red
masked[np.where((masked==[0,0,0]).all(axis=2))] = [86,30,194]
elif season == 2: # Fall
#Replace all (0,0,0)channel with Red Brown /Cinnamon
masked[np.where((masked==[0,0,0]).all(axis=2))] = [30,105,210]
else: # Winter
#Replace all (0,0,0)channel with Burgundy Red
masked[np.where((masked==[0,0,0]).all(axis=2))] = [31,2,141]
ret, jpeg = cv2.imencode('.jpg', masked)
else: # it is `for/else` construction, not `if/else`
ret, jpeg = cv2.imencode('.jpg', frame)
return jpeg.tobytes()
#return None
I see other problem. When it detects many faces then it creates new mask for every face and assigns every mask to original image - so every mask skip previous mask - so it should show only last face, and hide other faces. You should first create one mask with all circles and next use it on image.
I don't know what web framework you use so I used Flask to create minimal working example.
import os
from flask import Flask, Response
import cv2
import numpy as np
app = Flask(__name__)
path = os.path.join(, 'haarcascade_frontalface_default.xml')
face_cascade = cv2.CascadeClassifier(path)
#video = cv2.VideoCapture(0) # my webcam
video = cv2.VideoCapture(0) # your webcam
def color_detection(season):
ret, frame =
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6)
# test two circles on image 640x480
#faces = [[100, 100, 250, 250], [640-100-250, 480-100-250, 250, 250]]
if len(faces) > 0: # it can be `if faces:` because `faces` is `numpy array` which need different method to check if not empty
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# draw all circles on mask
for (x, y, w, h) in faces:
#print(x, y, w, h)
# use the coordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = max(w, h) // 2 # or can be h / 2 or can be anything based on your requirements
# draw the desired region to crop out in white, center_coordinates, radius, (255, 255, 255), -1)
# use mask with all circles
masked = cv2.bitwise_and(frame, frame, mask=mask)
if season == 0: # Spring - Coral pink
color = [121, 131, 248]
elif season == 1: # Summer - Rose Red
color = [86,30,194]
elif season == 2: # Fall - Red Brown /Cinnamon
color = [30,105,210]
else: # Winter - Burgundy Red
color = [31,2,141]
masked[np.where((masked == [0,0,0]).all(axis=2))] = color
else: # no faces
masked = frame
ret, jpeg = cv2.imencode('.jpg', masked)
return jpeg.tobytes()
def gen():
while True:
frame = color_detection(0)
if frame:
yield (b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
# print('no frame')
def index():
return '<image src="/seasoncolor">'
def seasoncolor():
return Response(gen(), mimetype='multipart/x-mixed-replace; boundary=frame')
if __name__ == '__main__':
#app.debug = True
To make sure: VideoCapture can work only with local camera where you run web server. It can't work with remote camera on user computer. Only user's browser has access to its camera. To work with remote camera on user computer you would have to use JavaScript to access camera in user browser and send frames to server - like in my examples in my GitHub python-examples: web camera in browser - canvas - take image and upload to server


OpenCV wont object detect with MSS screen capture

I'm new to Python and want to learn it bit by bit, so I decided to write a simple program that would, in real time, capture my screen and do object detection. Through a lot of googling and reading, I was able to make this script, however, no matter what I do, it won't do object detection (m1.png).
Can you please assist me with the reason why it is like this?
import time
import cv2
import mss
import numpy
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_w, template_h = template_gray.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 247, "width": 875, "height": 679}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
res = cv2.matchTemplate(
image = img_gray,
templ = template_gray,
method= cv2.TM_CCOEFF_NORMED
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if max_val >= 0.5:
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
color = (0,255,0),
thickness = 3 #fill the rectangle
print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
I spent three days trying to figure it out, and the only similar code that works but with a poor frame rate is this one:
from re import template
import cv2
import pyautogui
from time import sleep
#No cooldown time
pyautogui.PAUSE = 0
#template and dimensions
template = cv2.imread("b1.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_RGB2GRAY)
template_w, template_h = template_gray.shape[::-1]
# game window dimensions
x, y, w, h = 523, 247, 875, 679
while True:
#screenshot = img
pyautogui.screenshot("image.png", (x, y, w, h))
image = cv2.imread("image.png")
while True:
image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
result = cv2.matchTemplate(
image = image_gray,
templ = template_gray,
method = cv2.TM_CCOEFF_NORMED
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val >= 0.1:
# x = max_loc[0] + x, #screen x
# y = max_loc[1] + y #screen y
image = cv2.rectangle(
img = image,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
color = (0,0,255),
thickness = -1 #fill the rectangle
The structure looks similar the only difference in this template is that it uses pyautogui with OpenCV, whereas I'm trying to use mss. So does that mean that the issue in the code is that there's no physical location of the screen capture due? If so, does that mean it's impossible to make an object detection with mss?? You would make my day if you could disclose this mystery with the code!!
UPD: I was able to solve this, honey! So the issue was that firstly I misspelled the .png file, and to see it detecting an object, place the cv2.imshow after the if statement. Although it works, it's not perfect, so I'm trying to implement the usage of cv2.Canny() but now I don't get any output, so here I'm rising a question of whether there should be a different approach when Canny is used:
import time
import Options.settings as set
import time
import pyautogui as pt
from time import sleep
import cv2
import mss
import numpy
x = 0
offset = set.offset
create_logs = set.create_logs
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_canny = cv2.Canny(template_gray, 79, 100)
template_w, template_h = template_canny.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 1600, "width": 230, "height": 359}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
img_canny = cv2.Canny(img_gray, 100, 115)
res = cv2.matchTemplate(
image = img_canny,
templ = template_canny,
method= cv2.TM_CCOEFF_NORMED
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if max_val >= 0.6:
x = x + 1
print(f'{x} is detected')
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
color = (0,255,0),
thickness = 3 #fill the rectangle
# Display the picture
cv2.imshow("Normal", img)
#print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
UPD 2:
As #fmw42 suggested, I tried different OpenCV methods, but whether I try them, they constantly react if there's an object in the screen capture field no matter how I change the if max_vol >= ...
Please find attached
m2.png =
Example of what the screen capture look like =
alright, somehow, I get the idea behind the issue,
I had to move after the if function and now it's working^^
# Display the picture
cv2.imshow("Normal", img)

Open Cv add jewelry image to captured face issue

I have implemented the code where i am capturing the image and saving that image, After that i have another code which adds jewelry to that captured image, But i am facing issue while adding jewelry to captured face error==> "face_landmarks = face_landmarks_list[0]
IndexError: list index out of range"
Can some one help me with the solutions.
image capturing code
import cv2
cam = cv2.VideoCapture(0)
img_counter = 0
while True:
ret, frame =
if not ret:
print("failed to grab frame")
cv2.imshow("test", frame)
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Escape hit, closing...")
elif k%256 == 32:
# SPACE pressed
img_name = "opencv_frame_{}.png".format(img_counter)
cv2.imwrite(img_name, frame)
print("{} written!".format(img_name))
img_counter += 1
-------- below code for adding image --------
import cv2
import face_recognition
from PIL
import Image, ImageDraw
import numpy
jewel_img = cv2.imread("jewelery.png")
frame = cv2.imread('akash.jpg')
frame = cv2.resize(frame, (432, 576))
# Returns a list of face landmarks present on frame
face_landmarks_list = face_recognition.face_landmarks(frame)
# For demo images only one person is present in image
face_landmarks = face_landmarks_list[0]
shape_chin = face_landmarks['chin']
# x, y cordinates on frame where jewelery will be added
x = shape_chin[3][0]
y = shape_chin[6][1]
# Jewelry width & height calculated using face chin cordinates
img_width = abs(shape_chin[3][0] - shape_chin[14][0])
img_height = int(1.02 * img_width)
jewel_img = cv2.resize(jewel_img, (img_width, img_height), interpolation = cv2.INTER_AREA)
jewel_gray = cv2.cvtColor(jewel_img, cv2.COLOR_BGR2GRAY)
# All pixels greater than 230 will be converted to white and others will be converted to black
thresh, jewel_mask = cv2.threshold(jewel_gray, 230, 255, cv2.THRESH_BINARY)
# Convert to black the background of jewelry image
jewel_img[jewel_mask == 255] = 0
# Crop out jewelry area from original frame
jewel_area = frame[y: y + img_height, x: x + img_width]
# bitwise_and will convert all black regions in any image to black in resulting image
masked_jewel_area = cv2.bitwise_and(jewel_area, jewel_area, mask = jewel_mask)
# add both images so that the black region in any image will result in another image non black regions being rendered over that area
final_jewel = cv2.add(masked_jewel_area, jewel_img)
# replace original frame jewel area with newly created jewel_area
frame[y: y + img_height, x: x + img_width] = final_jewel
# convert image to RGB format to read it in pillow library
rgb_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(rgb_img)
draw = ImageDraw.Draw(pil_img, 'RGBA')
draw.polygon(face_landmarks['left_eyebrow'], fill = (23, 26, 31, 100))
draw.polygon(face_landmarks['right_eyebrow'], fill = (23, 26, 31, 100))
draw.polygon(face_landmarks['top_lip'], fill = (158, 63, 136, 100))
draw.polygon(face_landmarks['bottom_lip'], fill = (158, 63, 136, 100))
draw.polygon(face_landmarks['left_eye'], fill = (23, 26, 31, 100))
draw.polygon(face_landmarks['right_eye'], fill = (23, 26, 31, 100))
# calculate x, y, radius
for ellipse to be drawn between two eyebrows
x_centre_eyebrow = face_landmarks['nose_bridge'][0][0]
y_centre_eyebrow = face_landmarks['left_eyebrow'][4][1]
r = int(1 / 4 * abs(face_landmarks['left_eyebrow'][4][0] - face_landmarks['right_eyebrow'][0][0]))
draw.ellipse((x_centre_eyebrow - r, y_centre_eyebrow - r, x_centre_eyebrow + r, y_centre_eyebrow + r), fill = (128, 0, 128, 100))
Error says the answer
face_landmarks = face_landmarks_list[0]
You need to check whether a single face is detected or not.
Your second code, should start by checking the stored list length.
If the length is greater than 0, meaning some faces are detected, then continue.
# Returns a list of face landmarks present on frame
face_landmarks_list = face_recognition.face_landmarks(frame)
if len(face_landmarks_list) > 0:
jewel_img = cv2.imread("jewelery.png")
frame = cv2.imread('akash.jpg')
frame = cv2.resize(frame, (432, 576))
# For demo images only one person is present in image
face_landmarks = face_landmarks_list[0]

How to crop multiple ROI in image using Python and OpenCV

I have an image that converted from PDF to PNG. The converted image contains several keywords that I wanted to extracted using OCR Tesseract.
Right now, I need to determine the ROI manually to crop the selected ROI. Since I have more than 5 ROI's to be applied, what would be the most efficient way to apply the ROI instead of doing it by try and error to find the exact location?
Below is the code:
def cropped(self, event):
#1st ROI
y = 20
x = 405
h = 230
w = 425
#2nd ROI
y1 = 30
x1 = 305
h1 = 330
w1 = 525
#open the converted image
image = cv2.imread("Output.png")
#perform image cropping
crop_image = image[x:w, y:h]
crop_image1 = image[x1:w1, y1:h1]
#save the cropped image
cv2.imwrite("Cropped.png", crop_image)
cv2.imwrite("Cropped1.png", crop_image1)
#open the cropped image and pass to the OCR engine
im = cv2.imread("Cropped.png")
im1 = cv2.imread("Cropped1.png")
## Do the text extraction here
you can use mouse event to select multiple ROI and crop based on the location
#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os
drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []
# mouse callback function
def draw_rectangle(event,x,y,flags,param):
global ix,iy,drawing,img,clone,refPt, ROIRegion
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
refPt = [(x, y)]
#clone = img.copy()
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img = clone.copy()
if a != x | b != y:
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
img = clone.copy()
cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()
k = cv2.waitKey(1) & 0xFF
if k == ord("r"):
del ROIRegion[-1]
del refPt[-1]
img = clone.copy()
elif k == 27:
#Do your cropping here
for region in range(len(ROIRegion)):
cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
Here is one way in Python/OpenCV.
Read the input
Threshold on box outline color
Apply morphology to ensure closed
Get the external contours
Loop over each contour, get its bounding box, crop the region in the input and write the output
import cv2
import numpy as np
# read image
img = cv2.imread('text_boxes.jpg')
# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)
# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get bounding boxes
i = 1
for cntr in contours:
# get bounding boxes
x,y,w,h = cv2.boundingRect(cntr)
crop = img[y:y+h, x:x+w]
cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
i = i + 1
# save threshold
# show thresh and result
cv2.imshow("thresh", thresh)
Threshold image:
Cropped Images:

How to read a frame (raw_Image) from a virtual turtlebot's camera in Gazebo, and manipulate it using OpenCV and publish for viewing in Rviz

For my class robotics project, we are required to read in video feed frame by frame from a virtual turtlebot being simulated using gazebo. Then we detect colors in this image (red, blue, and green) if they are at the center of the image, within a rectangle that we have to draw in the center of the frame. If one of the colors are detected, the rectangle should change from green to red, and if the 's' button is clicked while the rectangle is red (ie it detects a color) then it should publish to a topic saying that the target was shot. The majority of this code was written for a previous assignment, in which we had to detect any of these colors anywhere in a frame from a webcam and indicate it with a bounding box or circle, and my code worked for that assignment. Now I have changed my code slightly to read frames from the virtual camera, and I added some code from some example code that our professor provided us with to try to publish the modified image so that I can view it using Rviz. Whatever I added is making the code no longer work as it should and I am getting an error, but I don't understand the code deeply enough to find my error.
The error that I am receiving is as follows:
[ERROR] [1575253473.863051, 2724.055000]: bad callback:
Traceback (most recent call last):
File "/opt/ros/melodic/lib/python2.7/dist-packages/rospy/", line 750, in _invoke_callback
File "/home/project_ws/src/project_gazebo/scripts/", line 49, in ImageCallback
upper_left(int(w/4), h)
TypeError: 'NoneType' object is not callable
I have no idea what this means or how to fix it. I know that it is probably a problem with the subscriber callback function, but I tried to write it exactly how the professor's example did for face detection (rather than color detection). Other than that, I have no idea how to fix this issue. I could really use some guidance.
Here is what the file looks like (I would only post a snippet, but since I am unsure where the issue is I am posting all of it):
#!/usr/bin/env python
from __future__ import print_function
from collections import deque
import numpy as np
import cv2
import sys
import rospy
import rospkg
from std_msgs.msg import String
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
hit_pub = rospy.Publisher('Cait/hit', String, queue_size=10)
image_pub = rospy.Publisher("Cait/image_topic_2",Image, queue_size=10)
bridge = CvBridge()
frame = None
height= None
width= None
inRange= False
targetHit = False
def ImageCallback(data):
global frame
frame = bridge.imgmsg_to_cv2(data, "bgr8")
#frame =
global height
global width
(h, w) = frame.shape[:2] #w:image-width and h:image-height
height = h
width = w
global centerPoint_x
global centerPoint_y
centerPoint_x = w/2
centerpoint_y= h/2
global upper_left
global lower_right
upper_left(int(w/4), h)
lower_right(int(w*3/4), 0)
except CvBridgeError as e:
image_sub = rospy.Subscriber("Cait/camera/rgb/image_raw",Image,ImageCallback)
def Image_converter():
# define the lower and upper boundaries of the colors in the HSV color space
lower = {'red':(166, 84, 141), 'green':(66, 122, 129), 'yellow':(23, 59, 119)}
upper = {'red':(186,255,255), 'green':(86,255,255), 'yellow':(54,255,255)}
# grab frames from webcam -> now grab fromm rviz raw_image in subscriber
#camera = cv2.VideoCapture(0)
# keep looping
while True:
# grab the current frame -> now done by ImageCallback()
#(grabbed, frame) =
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#for each color in dictionary check object in frame
#draw rectangle in the center of the frame
cv2.rectangle(frame, upper_left, lower_right, (0, 255, 0), 2)
for key, value in upper.items():
# construct a mask for the color from dictionary`1, then remove any small blobs left in the mask
kernel = np.ones((9,9),np.uint8)
mask = cv2.inRange(hsv, lower[key], upper[key])
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
# find contours in the mask and initialize the current (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use it to compute the minimum enclosing circle
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
#check to see if the point found is at the center of the screen, and if yes change the color of the rectangle
global inRange
if (x > int(width/4) & x< int(width*3/4)):
if(y > 0 & y < height):
cv2.rectangle(frame, upper_left, lower_right, (0, 0, 255), 2)
inRange = True
cv2.rectangle(frame, upper_left, lower_right, (0, 255, 0), 2)
inRange= False
# show the frame to our screen
cv2.imshow("Image window", frame)
key = cv2.waitKey(1) & 0xFF
image_pub.publish(bridge.cv2_to_imgmsg(frame, "bgr8"))
except CvBridgeError as e:
# if the 's' key is pressed, publish an answer to the topic
if key == ord("s"):
if (inRange):
hit_pub.publish("Target hit!")
hit_pub.publish("Sorry. Nothing was hit.")
if key == ord("q"): #q quits loop
def main(args):
rospy.init_node('image_converter', anonymous=True)
except KeyboardInterrupt:
print("Shutting down")
if __name__ == '__main__':

Python: record video on motion, but release VideoWriter when motion not present

My goal of this script is to start recording to a video file when motion is detected. When motion is no longer detected, the writer will be released and the script will create a new video the next time that motion is detected...and so on.
With the below script, I'm able to start writing to a video file once motion is detected, but I have to press the q button to release the writer and make the video playable. If I don't press q, the recording will stop, but the next time there's motion it will just be added to the existing video. I've tried writer.release() in a few places without success.
# import the necessary packages
from pyimagesearch.tempimage import TempImage
import argparse
import warnings
import datetime
import imutils
import json
import numpy as np
import time
import cv2
print("[INFO] Kicking off script - " +"%Y-%m-%d_%H_%M_%S"))
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True,
help="path to the JSON configuration file")
args = vars(ap.parse_args())
# filter warnings, load the configuration and initialize the Dropbox
# client
conf = json.load(open(args["conf"]))
client = None
# initialize the camera and grab a reference to the raw camera capture
# if the video argument is None, then we are reading from webcam
if not conf["use_ip_cam"]:
camera = cv2.VideoCapture(0)
# otherwise, we are reading from a video input
camera = cv2.VideoCapture(conf["ip_cam_addr"])
# allow the camera to warmup, then initialize the average frame, last
# uploaded timestamp, and frame motion counter
print("[INFO] warming up...")
avg = None
lastUploaded =
motionCounter = 0
fourcc = 0x00000020 # a little hacky, but works for now
writer = None
(h, w) = (None, None)
zeros = None
output = None
# capture frames from the camera
# for f in camera.capture_continuous(rawCapture, format="bgr",
# use_video_port=True):
while True:
# grab the raw NumPy array representing the image and initialize
# the timestamp and occupied/unoccupied text
(grabbed, frame) =
# frame = f.array
timestamp =
motion_detected = False
# if the frame could not be grabbed, then we have reached the end
# of the video
if not grabbed:
# resize the frame, convert it to grayscale, and blur it
frame = imutils.resize(frame, width=500)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
# if the average frame is None, initialize it
if avg is None:
print("[INFO] starting background model...")
avg = gray.copy().astype("float")
# frame.truncate(0)
# accumulate the weighted average between the current frame and
# previous frames, then compute the difference between the current
# frame and running average
cv2.accumulateWeighted(gray, avg, 0.5)
frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg))
# threshold the delta image, dilate the thresholded image to fill
# in holes, then find contours on thresholded image
thresh = cv2.threshold(frameDelta, conf["delta_thresh"], 255,
thresh = cv2.dilate(thresh, None, iterations=2)
(_, cnts, _) = cv2.findContours(thresh.copy(),
# loop over the contours
for c in cnts:
# if the contour is too small, ignore it
if cv2.contourArea(c) < conf["min_area"]:
# compute the bounding box for the contour, draw it on the frame,
# and update the text
(x, y, w1, h1) = cv2.boundingRect(c)
cv2.rectangle(frame, (x, y), (x + w1, y + h1), (0, 255, 0), 2)
motion_detected = True
fps = camera.get(cv2.CAP_PROP_FPS)
ts = timestamp.strftime("%Y-%m-%d_%H_%M_%S")
time_and_fps = ts + " - fps: " + str(fps)
# draw the text and timestamp on the frame
cv2.putText(frame, "Motion Detected: {}".format(motion_detected), (10, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.putText(frame, time_and_fps, (10, frame.shape[0] - 10),
0.35, (0, 0, 255), 1)
# check to see if the room is occupied
if motion_detected:
motionCounter += 1
# check to see if the number of frames with consistent motion is
# high enough
if motionCounter >= conf["min_motion_frames"]:
# check if the writer is None
if writer is None:
print("hitting writer is none")
# store the image dimensions, initialzie the video
# writer, and construct the zeros array
(h2, w2) = frame.shape[:2]
writer = cv2.VideoWriter("/Users/user/Library/Mobile Documents/com~apple~CloudDocs/testMotionDetection/" +"%Y-%m-%d_%H_%M_%S") + ".mp4",
fourcc, fps,
(w2, h2), True)
zeros = np.zeros((h2, w2), dtype="uint8")
# construct the final output frame, storing the
# original frame
output = np.zeros((h2, w2, 3), dtype="uint8")
output[0:h2, 0:w2] = frame
# write the output frame to file
# otherwise, there is no motion
# Traceback (most recent call last):
# File "", line 178, in <module>
# writer.release()
# AttributeError: 'NoneType' object has no attribute 'release'
motionCounter = 0
# check to see if the frames should be displayed to screen
if conf["show_video"]:
# display the security feed
cv2.imshow("Security Feed", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key is pressed, break from the loop
if key == ord("q"):
# cleanup the camera and close any open windows
print("[INFO] cleaning up...")
# writer.release() - only releases writer when q is pressed
I was able to get this working correctly. This script creates a VideoWriter object for every frame with a temporary file. If motion is no detected within that frame, the writer is released and the file that was created with VideoWriter gets deleted.
If motion is detected, the file is kept and written to. Once motion is no longer detected, a countdown timer is started so it continues recording as long as you set. If motion is detected before the timer hits 0, then it continues to record and resets the timer and so on.
This is still being tuned, but works pretty well.
import argparse
import warnings
import datetime
import imutils
import json
import numpy as np
import os
import time
import cv2
print("[INFO] Kicking off script - " +"%Y-%m-%d_%H_%M_%S"))
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True,
help="path to the JSON configuration file")
args = vars(ap.parse_args())
# filter warnings, load the configuration
conf = json.load(open(args["conf"]))
# initialize the camera and grab a reference to the raw camera capture
# if the video argument is None, then we are reading from webcam
if not conf["use_ip_cam"]:
camera = cv2.VideoCapture(0)
# otherwise, we are reading from a video input
camera = cv2.VideoCapture(conf["ip_cam_addr"])
# allow the camera to warmup, then initialize the average frame, last
# uploaded timestamp, and frame motion counter
print("[INFO] warming up...")
avg = None
lastUploaded =
motion_counter = 0
non_motion_timer = conf["nonMotionTimer"]
fourcc = 0x00000020 # a little hacky, but works for now
writer = None
(h, w) = (None, None)
zeros = None
output = None
made_recording = False
# capture frames from the camera
while True:
# grab the raw NumPy array representing the image and initialize
# the timestamp and occupied/unoccupied text
(grabbed, frame) =
timestamp =
motion_detected = False
# if the frame could not be grabbed, then we have reached the end
# of the video
if not grabbed:
print("[INFO] Frame couldn't be grabbed. Breaking - " +"%Y-%m-%d_%H_%M_%S"))
# resize the frame, convert it to grayscale, and blur it
frame = imutils.resize(frame, width=conf["resizeWidth"])
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
# if the average frame is None, initialize it
if avg is None:
print("[INFO] starting background model...")
avg = gray.copy().astype("float")
# frame.truncate(0)
# accumulate the weighted average between the current frame and
# previous frames, then compute the difference between the current
# frame and running average
cv2.accumulateWeighted(gray, avg, 0.5)
frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg))
# threshold the delta image, dilate the thresholded image to fill
# in holes, then find contours on thresholded image
thresh = cv2.threshold(frameDelta, conf["delta_thresh"], 255,
thresh = cv2.dilate(thresh, None, iterations=2)
(_, cnts, _) = cv2.findContours(thresh.copy(),
# loop over the contours
for c in cnts:
# if the contour is too small, ignore it
if cv2.contourArea(c) < conf["min_area"]:
# compute the bounding box for the contour, draw it on the frame,
# and update the text
(x, y, w1, h1) = cv2.boundingRect(c)
cv2.rectangle(frame, (x, y), (x + w1, y + h1), (0, 255, 0), 2)
motion_detected = True
fps = int(round(camera.get(cv2.CAP_PROP_FPS)))
record_fps = 10
ts = timestamp.strftime("%Y-%m-%d_%H_%M_%S")
time_and_fps = ts + " - fps: " + str(fps)
# draw the text and timestamp on the frame
cv2.putText(frame, "Motion Detected: {}".format(motion_detected), (10, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.putText(frame, time_and_fps, (10, frame.shape[0] - 10),
0.35, (0, 0, 255), 1)
# Check if writer is None TODO: make path configurable
if writer is None:
filename ="%Y-%m-%d_%H_%M_%S")
file_path = (conf["userDir"] + "/Library/Mobile Documents/"
file_path = file_path.format(filename=filename)
(h2, w2) = frame.shape[:2]
writer = cv2.VideoWriter(file_path, fourcc, record_fps, (w2, h2), True)
zeros = np.zeros((h2, w2), dtype="uint8")
def record_video():
# construct the final output frame, storing the original frame
output = np.zeros((h2, w2, 3), dtype="uint8")
output[0:h2, 0:w2] = frame
# write the output frame to file
# print("[DEBUG] Recording....")
if motion_detected:
# increment the motion counter
motion_counter += 1
# check to see if the number of frames with motion is high enough
if motion_counter >= conf["min_motion_frames"]:
if conf["create_image"]:
# create image TODO: make path configurable
image_path = (conf["userDir"] + "/Library/Mobile Documents/"
cv2.imwrite(image_path, frame)
made_recording = True
non_motion_timer = conf["nonMotionTimer"]
# If there is no motion, continue recording until timer reaches 0
# Else clean everything up
else: # TODO: implement a max recording time
# print("[DEBUG] no motion")
if made_recording is True and non_motion_timer > 0:
non_motion_timer -= 1
# print("[DEBUG] first else and timer: " + str(non_motion_timer))
# print("[DEBUG] hit else")
motion_counter = 0
if writer is not None:
# print("[DEBUG] hit if 1")
writer = None
if made_recording is False:
# print("[DEBUG] hit if 2")
made_recording = False
non_motion_timer = conf["nonMotionTimer"]
# check to see if the frames should be displayed to screen
if conf["show_video"]:
cv2.imshow("Security Feed", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key is pressed, break from the loop
if key == ord("q"):
# cleanup the camera and close any open windows
print("[INFO] cleaning up...")
Example config file:
"show_video": true,
"use_ip_cam": false,
"ip_cam_addr": "rtsp://<ip>/live0.264",
"create_image": true,
"min_upload_seconds": 5,
"min_motion_frames": 12,
"camera_warmup_time": 2.5,
"delta_thresh": 5,
"resolution": [640, 480],
"fps": 16,
"min_area": 500,
"userDir": "/Path/to/user",
"resizeWidth": 500,
"nonMotionTimer": 36
