OpenCV - overwriting live video - python

I made a convolutional neural network, that predicts faces and returns coordinates (y1, x1, y2, x2). Iam able to create rectangle that serves as mask that covers the desired coordinates. I need a way to cover the images in real time. Is there a way to get live image sequence without saving the frames, just overwriting them, and how do i extract the coordinates in openCV? I was using pyplot and was saving the images, it is slow and ineffective.

Yeah, so I managed to come up with a solution, but I found out that, 1 frame takes about 0.54s to compute, so 2FPS, not great for live streaming, so I am switching to haarcascade.
Code below is used to configure and call the model.
from numpy import expand_dims
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from mrcnn.model import mold_image
import cv2
import time
# define the prediction configuration
class PredictionConfig(Config):
# define the name of the configuration
NAME = "face_cfg"
# number of classes (background + face)
NUM_CLASSES = 1 + 1
# simplify GPU config
GPU_COUNT = 1
IMAGES_PER_GPU = 1
def classify_image(image,model,cfg):
# convert pixel values (e.g. center)
scaled_image = mold_image(image, cfg)
# convert image into one sample
sample = expand_dims(scaled_image, 0)
# make prediction
tic = time.time()
yhat = model.detect(sample, verbose=0)[0]
print(time.time() - tic)
return yhat['rois']
def image_bnd_highlight(image,coordinates):
for box in coordinates:
# get coordinates
y1, x1, y2, x2 = box
# create the shape
new_img = cv2.rectangle(image,(x1,y1),(x2,y2),(255,255,255),5)
return new_img
# create config
cfg: PredictionConfig = PredictionConfig()
# define the model
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
# load model weights
model_path = 'mask_rcnn_face_cfg_0029.h5'
model.load_weights(model_path, by_name=True)
definitive_model = model
Then I call my functions, that I created above.
import cv2 as cv
import acapture
from RealTime import definitive_model
from RealTime import cfg
from RealTime import classify_image
from RealTime import image_bnd_highlight
import time
# cap = acapture.open(0)
cap = cv.VideoCapture(0)
cap.set(3,128) #set frame width
cap.set(4,128) #set frame height
cap.set(cv.CAP_PROP_FPS, 2) #adjusting fps to 2
# cap.set(cv.CAP_PROP_BUFFERSIZE,3)
# if not cap.isOpened():
# print("Cannot open camera")
# exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
# let's resize our image to be 150 pixels wide, but in order to
# prevent our resized image from being skewed/distorted, we must
# first calculate the ratio of the *new* width to the *old* width
r = 150.0 / frame.shape[1]
dim = (150, int(frame.shape[0] * r))
# perform the actual resizing of the image
resized = cv.resize(frame, dim, interpolation=cv.INTER_AREA)
# tic = time.time()
coords = classify_image(resized,definitive_model,cfg)
# print(time.time() - tic)
image = image_bnd_highlight(resized,coords)
# Display the resulting frame
cv.imshow('frame', image)
if cv.waitKey(1) == ord('q'):
break
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()

Related

I want to add a boundry box

I have a code that detects the object but I want to add the boundary box to the detections.
import cv2
import numpy as np
from keras.models import load_model
# Load the model
model = load_model('keras_model.h5')
# CAMERA can be 0 or 1 based on default camera of your computer.
camera = cv2.VideoCapture(0)
# Grab the labels from the labels.txt file. This will be used later.
labels = open('labels.txt', 'r').readlines()
while True:
# Grab the webcameras image.
ret, image = camera.read()
# Resize the raw image into (224-height,224-width) pixels.
image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)
# Show the image in a window
cv2.imshow('Webcam Image', image)
# Make the image a numpy array and reshape it to the models input shape.
image = np.asarray(image, dtype=np.float32).reshape(1, 224, 224, 3)
# Normalize the image array
image = (image / 127.5) - 1
# Have the model predict what the current image is. Model.predict
# returns an array of percentages. Example:[0.2,0.8] meaning its 20% sure
# it is the first label and 80% sure its the second label.
probabilities = model.predict(image)
# Print what the highest value probabilitie label
print(labels[np.argmax(probabilities)])
# Listen to the keyboard for presses.
keyboard_input = cv2.waitKey(1)
# 27 is the ASCII for the esc key on your keyboard.
if keyboard_input == 27:
break
camera.release()
cv2.destroyAllWindows()
A nice boundrybox around the predicted object

Getting only 30 frames from videos with varying number of frames using VideoCapture in OpenCV Python

I am trying to extract keypoints using mediapipe to train an LSTM model to detect sign language actions(only for a set of adjectives for now). The training dataset is being parsed and passed to VideoCapture, which is then used to extract keypoints through mediapipe's holistic model, which are then saved as numpy arrays for each frame in .npy files in respective directories. However, due to varying video lengths, the total number of frames captured and thus the number of numpy arrays generated are varying for each video, which is not suitable for the training architecture. It would be straightforward to simply break out of the loop when 30 frames have been captured, but that would exract data only for first 30 frames, which is not desirable. Is there a way to not skip over data whilst getting only a fixed number of frames per video.
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
DATASET_PATH = "/home/kuna71/Dev/HearMySign/Datasets/Adjectives_1of8/Adjectives"
KEYPOINT_PATH = "/home/kuna71/Dev/HearMySign/Keypoints"
sequence_len = 30
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
def mediapipe_detection(image, model):
print("In mediapipe_detection function")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
image.flags.writeable = False # Image is no longer writeable (just to save on some memory)
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, face, lh, rh])
#opencv code to capture frames
directories = os.listdir(DATASET_PATH)
for d in directories:
vids = os.listdir(os.path.join(DATASET_PATH, d))
for v in vids:
videopath = os.path.join(DATASET_PATH, d, v)
print("\n\n\n\n\n__________________CHANGING VIDEO__________________\n\n\n\n\n")
print("\n\n________VIDEOPATH:"+videopath)
cap = cv2.VideoCapture(videopath)
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("LENGTH:" + str(length))
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
count =0
while cap.isOpened():
count=count+1
print("Reading feed")
ret, frame = cap.read()
if(frame is None):
break
image, result = mediapipe_detection(frame, holistic)
result_test = extract_keypoints(result)
print(result_test)
if not os.path.exists(os.path.join(KEYPOINT_PATH, d, v)):
os.makedirs(os.path.join(KEYPOINT_PATH, d, v))
NPY_PATH = os.path.join(KEYPOINT_PATH, d, v, str(count))
np.save(NPY_PATH, result_test)
# Show to screen
cv2.imshow('OpenCV Feed', image)
# Break gracefully
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

Real time taking pictures with different exposure times and automatically with OpenCV

I'm trying to make a python program with OpenCV, which opens the webcam and takes several images with different exposures in real time (40ms,95ms,150ms) and averages them in the end.
I tried to create a loop in which I change the exposure time, update the rendering (frame) and save it in a list, but the problem is that the display remains static and the rendering hardly changes (which gives after merging the images an image whose exposure time is almost 40)
I supposed that after setting exposure time, the frame update needs some time so I added the method time.sleep to suspend the execution for 3 seconds, but it was in vain.
Here is my code
import numpy as np
import cv2
import os
import time
capture = cv2.VideoCapture(0, cv2.CAP_V4L2)
while True:
(grabbed, frame) = capture.read()
if not grabbed:
break
# Resize frame
width = 1500
height = 1000
dim = (width, height)
frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
cv2.imshow('RGB', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if cv2.waitKey(1) == ord('h') or cv2.waitKey(1) == ord('H'):
#repertory = input("Enter the name of the directory: ")
if not os.path.exists(repertory):
os.mkdir(repertory)
exposure = [40,95,150]
ims = []
for i in exposure:
capture.set(cv2.CAP_PROP_EXPOSURE, i) # Setting Exposure
(grabbed, frame) = capture.read() # Updating frame
if grabbed:
cv2.imshow('RGB', frame) #Display
ims.append(frame)
# Convert to numpy
ims = np.array([np.array(im) for im in ims])
# average et conversion en uint8
imave = np.average(ims, axis=0)
imave = imave.astype(np.uint8)
# image HDR
cv2.imwrite(repertory + '/' + repertory + '_HDR8.jpg', imave)
capture.release()
cv2.destroyAllWindows()
Is there an optimal solution that allows to take pictures with differents exposure time in real time and in an automatic way?

How to crop a video (such as a zoom call) into multiple files?

I have conferance call video with different people's tiles arranged on a grid.
Example:
gallery view zoom
Can I crop every video tile to a separate file using python or nodejs?
Yes, you can achieve that using OpenCV library
Read the video in OpenCV using VideoCapture API. Note down framerate while reading.
Parse through each frame and crop the frame:
Write the frame in a video using OpenCV VideoWriter
Here is the example code using (640,480) to be the new dimensions:
cap = cv2.VideoCapture(<video_file_name>)
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('<output video file name>, -1, fps, (640,480))
while(cap.isOpened()):
ret, frame = cap.read()
crop_frame = frame[y:y+h, x:x+w]
# write the crooped frame
out.write(crop_frame)
# Release reader wand writer after parsing all frames
cap.release()
out.release()
Here's the code (tested). It works by initialising a number of video outputs, then for each frame of the input video: cropping the region of interest (roi) and assigning each to the relevent output video. You might need to make tweaks depending on input video dimensions, number of times, offsets etc.
import numpy as np
import cv2
import time
cap = cv2.VideoCapture('in.mp4')
ret, frame = cap.read()
(h, w, d) = np.shape(frame)
horiz_divisions = 5 # Number of tiles stacked horizontally
vert_divisions = 5 # Number of tiles stacked vertically
divisions = horiz_divisions*vert_divisions # Total number of tiles
seg_h = int(h/vert_divisions) # Tile height
seg_w = int(w/horiz_divisions) # Tile width
# Initialise the output videos
outvideos = [0] * divisions
for i in range(divisions):
outvideos[i] = cv2.VideoWriter('out{}.avi'.format(str(i)),cv2.VideoWriter_fourcc('M','J','P','G'), 10, (seg_w,seg_h))
# main code
while(cap.isOpened()):
ret, frame = cap.read()
if ret == True:
vid = 0 # video counter
for i in range(vert_divisions):
for j in range(horiz_divisions):
# Get the coordinates (top left corner) of the current tile
row = i * seg_h
col = j * seg_w
roi = frame[row:row+seg_h,col:col+seg_w,0:3] # Copy the region of interest
outvideos[vid].write(roi)
vid += 1
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
# Release all the objects
cap.release()
for i in range(divisions):
outvideos[i].release()
# Release everything if job is finished
cv2.destroyAllWindows()
Hope this helps!

Parse output of opencv updateMotionHistory in Python

I am trying to generate grayscale MotionHistoryImages from a live image feed (webcam) for processing by a CNN model I have built, from the updateMotionHistory function I get an output of:
[width x height] array of type np.float32
I want to convert this array into a grayscale image, brighter the newer the change in motion like this
EDIT: Added code example below
import cv2
import numpy as np
import time
import genMHI_util
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2(history=1000,detectShadows=False)
colourThreshold = 0.975
frame_reduction_counter = 0
mhi = np.zeros((genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT), np.float32)
while True:
ret, frame = cap.read()
# kernel = np.ones((5, 5), np.float32) / 25
# frame = cv2.medianBlur(frame, 5)
# frame = cv2.filter2D(frame, -1, kernel) # Blur image
if frame_reduction_counter >= 0:
# Disregarding frames that do not contain enough movement, below the set threshold
frame_reduction_counter = 0
timestamp = cv2.getTickCount() / cv2.getTickFrequency()
# Do background subtraction on frame to get silhouette
silhouette = fgbg.apply(frame)
silhouette = cv2.resize(silhouette, (genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT))
# Update MHI
cv2.motempl.updateMotionHistory(silhouette, mhi, timestamp, 0.5)
# Do something with 'mhi' object (300x300 float array) ?
# Convert float array to int
mask = cv2.convertScaleAbs(mhi,
alpha=(255 / genMHI_util.MHI_DURATION),
beta=((genMHI_util.MHI_DURATION - timestamp) * 255 / genMHI_util.MHI_DURATION))
# Preview images
cv2.imshow('original', frame)
cv2.imshow('silhouette', silhouette)
cv2.imshow('mhi', mask)
cv2.waitKey(1)
frame_reduction_counter += 1
cap.release()
cv2.destroyAllWindows()
CONSTANTS:
MHI_DURATION = 5
MHI_WIDTH = 300
MHI_HEIGHT = 300
Output without movement - bit of camera noise
Output with movement - kind of works but very badly

Categories