I am trying to generate grayscale MotionHistoryImages from a live image feed (webcam) for processing by a CNN model I have built, from the updateMotionHistory function I get an output of:
[width x height] array of type np.float32
I want to convert this array into a grayscale image, brighter the newer the change in motion like this
EDIT: Added code example below
import cv2
import numpy as np
import time
import genMHI_util
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2(history=1000,detectShadows=False)
colourThreshold = 0.975
frame_reduction_counter = 0
mhi = np.zeros((genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT), np.float32)
while True:
ret, frame = cap.read()
# kernel = np.ones((5, 5), np.float32) / 25
# frame = cv2.medianBlur(frame, 5)
# frame = cv2.filter2D(frame, -1, kernel) # Blur image
if frame_reduction_counter >= 0:
# Disregarding frames that do not contain enough movement, below the set threshold
frame_reduction_counter = 0
timestamp = cv2.getTickCount() / cv2.getTickFrequency()
# Do background subtraction on frame to get silhouette
silhouette = fgbg.apply(frame)
silhouette = cv2.resize(silhouette, (genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT))
# Update MHI
cv2.motempl.updateMotionHistory(silhouette, mhi, timestamp, 0.5)
# Do something with 'mhi' object (300x300 float array) ?
# Convert float array to int
mask = cv2.convertScaleAbs(mhi,
alpha=(255 / genMHI_util.MHI_DURATION),
beta=((genMHI_util.MHI_DURATION - timestamp) * 255 / genMHI_util.MHI_DURATION))
# Preview images
cv2.imshow('original', frame)
cv2.imshow('silhouette', silhouette)
cv2.imshow('mhi', mask)
frame_reduction_counter += 1
Output without movement - bit of camera noise
Output with movement - kind of works but very badly
So I have this code below and I was expecting it to change the width size of the frame based on loop i value from 1 to 1000 in live time, to put that in visualization, when I executed the code I was expecting it to changes the windows width size while it was running.
import cv2
import numpy as np
vid = cv2.VideoCapture("C:\\users\\USER\\downloads\\man.mp4")
while True:
ret,frame = vid.read()
for i in range(1,1000):
frame = cv2.resize(frame,(i,450))
size = 16
# Create motion blur kernel
kernel_motion_blur = np.zeros((size,size))
kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
# Apply motion kernel motion blur
result = cv2.filter2D(frame, -1, kernel_motion_blur)
cv2.imshow('Motion Blur Applied',result)
if cv2.waitKey(1) == ord('q'):
Its works but the problem is when I executed the code the video frame gets glitchy and the video frame doesn't display the correct image of what contains in the actual video. So how do I fix that?
The video before and after
I'm not sure I totally understand the question, or why you have a for loop inside of a while loop, but I think I was able to achieve the effect you are going for. Take a look:
import cv2
import numpy as np
# control how fast the image slides left to right
step_size = 10
vid = cv2.VideoCapture("videos\demo.mp4")
while True:
ret,frame = vid.read()
if ret == False:
width = frame.shape[1]
for n in range(0, width, step_size):
frame_to_show = frame[:,:n+step_size]
size = 16
# Create motion blur kernel
kernel_motion_blur = np.zeros((size,size))
kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
# Apply motion kernel motion blur
result = cv2.filter2D(frame_to_show, -1, kernel_motion_blur)
cv2.imshow('Motion Blur Applied',result)
key = cv2.waitKey(1)
if key == ord('q'):
Or perhaps this is more what you are looking for:
import cv2
import numpy as np
# control how fast the image slides left to right
step_size = 10
vid = cv2.VideoCapture("videos\demo.mp4")
end = 0
while True:
ret,frame = vid.read()
if ret == False:
width = frame.shape[1]
if end > width:
end = 0
end += step_size
frame_to_show = frame[:,:end]
size = 16
# Create motion blur kernel
kernel_motion_blur = np.zeros((size,size))
kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
# Apply motion kernel motion blur
result = cv2.filter2D(frame_to_show, -1, kernel_motion_blur)
cv2.imshow('Motion Blur Applied',result)
key = cv2.waitKey(1)
if key == ord('q'):
i want to remove the duplication of objects, so when the camera opens it captures the first frame and save on the disk, than untill next object appears in the scene it saves the next object frame (does not save the same frame consecutively).
i have written a code to compare two consecutive frames of webcam, i want to store one frame in an array (max limit 3) to compare it with current frame. so the first frame will be saved on the disk and it compares untill the next object appears(used threshold value for this purpose)
How can i save the frame to an array and compare with current frame?
from skimage.metrics import structural_similarity
import imutils
import sys
import datetime
import cv2
import time
import numpy as np
cap = cv2.VideoCapture(0)
while (True):
# Capture frame-by-frame
ret, frame1 = cap.read(0) # first image
time.sleep(1/50) # slight delay
ret, frame2 = cap.read(0) # second image
gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# compute the Structural Similarity Index (SSIM) between the two
# images, ensuring that the difference image is returned
(score, diff) = structural_similarity (gray1, gray2, full=True)
diff = (diff * 255).astype ("uint8")
print ("SSIM: {}".format (score))
# threshold the difference image, followed by finding contours to
# obtain the regions of the two input images that differ
thresh = cv2.threshold (diff, 0, 255,
if np.mean (thresh) < 0.4 :
print ("New object Detected")
date_string = datetime.datetime.now ( ).strftime ("%Y-%m-%d-%H:%M:%S")
cv2.imwrite ('img/img-' + date_string + '.png', frame2[y:y+h+30, x:x+w+30])
# Display the resulting frame
cv2.imshow ('frame1', frame1)
cv2.imshow('frame2', frame2)
cv2.imshow ("Diff", diff)
cv2.imshow ("Thresh", thresh)
if cv2.waitKey(1) & 0xFF == ord('q'):
# When everything is done, release the capture
Not hard.
Actually, you can get the image as a NumPy array.
The shape is (720, 1280, 3).
To save it, try this
ret, frame1 = cap.read(0) # first image
rgb_frame1 = frame1[..., ::-1]
im = Image.fromarray(rgb_frame1)
time.sleep(1/50) # slight delay
Note: you need to change the channel order or you will get a blue image. Because the original channel is in BRG format.
Then you can store the frame:
I made a convolutional neural network, that predicts faces and returns coordinates (y1, x1, y2, x2). Iam able to create rectangle that serves as mask that covers the desired coordinates. I need a way to cover the images in real time. Is there a way to get live image sequence without saving the frames, just overwriting them, and how do i extract the coordinates in openCV? I was using pyplot and was saving the images, it is slow and ineffective.
Yeah, so I managed to come up with a solution, but I found out that, 1 frame takes about 0.54s to compute, so 2FPS, not great for live streaming, so I am switching to haarcascade.
Code below is used to configure and call the model.
from numpy import expand_dims
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from mrcnn.model import mold_image
import cv2
import time
# define the prediction configuration
class PredictionConfig(Config):
# define the name of the configuration
NAME = "face_cfg"
# number of classes (background + face)
# simplify GPU config
def classify_image(image,model,cfg):
# convert pixel values (e.g. center)
scaled_image = mold_image(image, cfg)
# convert image into one sample
sample = expand_dims(scaled_image, 0)
# make prediction
tic = time.time()
yhat = model.detect(sample, verbose=0)[0]
print(time.time() - tic)
return yhat['rois']
def image_bnd_highlight(image,coordinates):
for box in coordinates:
# get coordinates
y1, x1, y2, x2 = box
# create the shape
new_img = cv2.rectangle(image,(x1,y1),(x2,y2),(255,255,255),5)
return new_img
# create config
cfg: PredictionConfig = PredictionConfig()
# define the model
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
# load model weights
model_path = 'mask_rcnn_face_cfg_0029.h5'
model.load_weights(model_path, by_name=True)
definitive_model = model
Then I call my functions, that I created above.
import cv2 as cv
import acapture
from RealTime import definitive_model
from RealTime import cfg
from RealTime import classify_image
from RealTime import image_bnd_highlight
import time
# cap = acapture.open(0)
cap = cv.VideoCapture(0)
cap.set(3,128) #set frame width
cap.set(4,128) #set frame height
cap.set(cv.CAP_PROP_FPS, 2) #adjusting fps to 2
# cap.set(cv.CAP_PROP_BUFFERSIZE,3)
# if not cap.isOpened():
# print("Cannot open camera")
# exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
# let's resize our image to be 150 pixels wide, but in order to
# prevent our resized image from being skewed/distorted, we must
# first calculate the ratio of the *new* width to the *old* width
r = 150.0 / frame.shape[1]
dim = (150, int(frame.shape[0] * r))
# perform the actual resizing of the image
resized = cv.resize(frame, dim, interpolation=cv.INTER_AREA)
# tic = time.time()
coords = classify_image(resized,definitive_model,cfg)
# print(time.time() - tic)
image = image_bnd_highlight(resized,coords)
# Display the resulting frame
cv.imshow('frame', image)
if cv.waitKey(1) == ord('q'):
# When everything done, release the capture
Here is a code to get the optical flow output from a stabilized video (no camera movement) and save it as a set of frames
import cv2 as cv
import numpy as np
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
count = 0
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
# Opens a new window and displays the input frame
cv.imshow("input", frame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Computes the magnitude and angle of the 2D vectors
magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])
# Sets image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Sets image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)
# Converts HSV to RGB (BGR) color representation
rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)
# Opens a new window and displays the output frame
cv.imshow("dense optical flow", rgb[40:150,120:220])
cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
count +=1
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
Can someone please suggest how to quantify the difference between the frames? i.e. to estimate speed/velocity ?
Here's an example to obtain pixel magnitude translation from .bsq frames. You can modify the the code to input a video file instead. You are probably most interested in the get_translation() function. Example:
Graph displaying pixel translation from frame-to-frame
import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random
# Usage: python translate_analyzer.py -p <filename.bsq>
# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = image
# Calculate grayscale histogram
hist = cv2.calcHist([gray],[0],None,[256],[0,256])
hist_size = len(hist)
# Calculate cumulative distribution from the histogram
accumulator = []
for index in range(1, hist_size):
accumulator.append(accumulator[index -1] + float(hist[index]))
# Locate points to clip
maximum = accumulator[-1]
clip_hist_percent *= (maximum/100.0)
clip_hist_percent /= 2.0
# Locate left cut
minimum_gray = 0
while accumulator[minimum_gray] < clip_hist_percent:
minimum_gray += 1
# Locate right cut
maximum_gray = hist_size -1
while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
maximum_gray -= 1
# Calculate alpha and beta values
alpha = 255 / (maximum_gray - minimum_gray)
beta = -minimum_gray * alpha
auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)
# Draw flow
def draw_flow(img, flow, step=30):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 1, (36, 255, 12))
for (x1, y1), (_x2, _y2) in lines:
cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
return vis
# Return translation value
def get_translation(img, flow, step=30):
return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--path", help="Path to the directory")
args = vars(ap.parse_args())
if not args['path']:
print('Usage: python translate_analyzer.py -p <directory>')
# Extract file name
bsq_fname = os.path.split(args['path'])[-1]
if '.bsq' not in bsq_fname:
print('ERROR: Invalid bsq file. Select correct file.')
width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512
# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]
translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
data = data_file[:,:,index].copy()
data //= 64
data = automatic_brightness_and_contrast(data)[0]
data = data[y:y+h, x:x+w]
flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
translation, pixel_direction = get_translation(data, flow)
prev = data
cv2.imshow('flow', draw_flow(data, flow))
frame_direction = pixel_direction
index = (index+1) % frame_count
end = time.time()
print('Time:', end - start)
plt.title("Pixel Direction")
I wrote a program that find 2 ROIs, selects them in 2 separate frames and then counts number of green pixels of each frame.
It works fine, but when I apply morphology masks for camera it gives me error: /home/pi/opencv/opencv-3.4.0/modules/core/src/arithm.cpp:1769: error: (-209) The lower boundary is neither an array of the same size and same type as src, nor a scalar in function inRange
How can I fix this?
Problem occurs here
maskFinal=maskClose ... for i in range(len(conts)):
x,y,w,h=cv2.boundingRect(conts[i]) area=maskFinal[y:y+h, x:x+w] pixcount =
cv2.inRange(area,lowerBound,upperBound ) pixNum = cv2.countNonZero(pixcount)
Full code
import cv2
import numpy as np
from picamera.array import PiRGBArray
from picamera import PiCamera
import time
import sys
prevNrOfContours = 0
camera = PiCamera()
camera.rotation = 180
camera.resolution = (640, 480)
camera.framerate = 30
rawCapture = PiRGBArray(camera, size=(640, 480))
GREEN_MIN = np.array([0, 0, 0])
GREEN_MAX = np.array([0, 0, 0])
# allow the camera to warmup
# capture frames from the camera
for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True):
img = frame.array
imgHSV= cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
_, conts, _=cv2.findContours(maskFinal.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
nrOfContours = len(conts)
for i in range(len(conts)):
area=maskFinal[y:y+h, x:x+w] ######## problem is here
pixcount = cv2.inRange(area,lowerBound,upperBound )
pixNum = cv2.countNonZero(pixcount)
print("Area No."+str(i), "Green pixels = " + str(no_black))
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255), 2)
cv2.imshow("area" + str(i), area)
# close unnecessary windows
if prevNrOfContours > nrOfContours:
for i in range(nrOfContours, prevNrOfContours):
cv2.destroyWindow("area" + str(i))
prevNrOfContours = nrOfContours
# clear the stream in preparation for the next frame
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
You're creating a subimage of maskFinal. maskFinal is a binary image, which is incompatible with the 3 channel lowerBound.
To solve it you can actually drop the line pixcount = cv2.inRange(area,lowerBound,upperBound ) The masked image has white for the green area's, so counting the nonzeros is enough.
Note: the current subimage includes the morphological transformations. If you do not want to count the pixels caused by those, you'll have to create a subimage of mask