I have a code that detects the object but I want to add the boundary box to the detections.
import cv2
import numpy as np
from keras.models import load_model
# Load the model
model = load_model('keras_model.h5')
# CAMERA can be 0 or 1 based on default camera of your computer.
camera = cv2.VideoCapture(0)
# Grab the labels from the labels.txt file. This will be used later.
labels = open('labels.txt', 'r').readlines()
while True:
# Grab the webcameras image.
ret, image = camera.read()
# Resize the raw image into (224-height,224-width) pixels.
image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)
# Show the image in a window
cv2.imshow('Webcam Image', image)
# Make the image a numpy array and reshape it to the models input shape.
image = np.asarray(image, dtype=np.float32).reshape(1, 224, 224, 3)
# Normalize the image array
image = (image / 127.5) - 1
# Have the model predict what the current image is. Model.predict
# returns an array of percentages. Example:[0.2,0.8] meaning its 20% sure
# it is the first label and 80% sure its the second label.
probabilities = model.predict(image)
# Print what the highest value probabilitie label
print(labels[np.argmax(probabilities)])
# Listen to the keyboard for presses.
keyboard_input = cv2.waitKey(1)
# 27 is the ASCII for the esc key on your keyboard.
if keyboard_input == 27:
break
camera.release()
cv2.destroyAllWindows()
A nice boundrybox around the predicted object
Related
i want to remove the duplication of objects, so when the camera opens it captures the first frame and save on the disk, than untill next object appears in the scene it saves the next object frame (does not save the same frame consecutively).
i have written a code to compare two consecutive frames of webcam, i want to store one frame in an array (max limit 3) to compare it with current frame. so the first frame will be saved on the disk and it compares untill the next object appears(used threshold value for this purpose)
How can i save the frame to an array and compare with current frame?
from skimage.metrics import structural_similarity
import imutils
import sys
import datetime
import cv2
import time
import numpy as np
cap = cv2.VideoCapture(0)
while (True):
# Capture frame-by-frame
ret, frame1 = cap.read(0) # first image
time.sleep(1/50) # slight delay
ret, frame2 = cap.read(0) # second image
gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# compute the Structural Similarity Index (SSIM) between the two
# images, ensuring that the difference image is returned
(score, diff) = structural_similarity (gray1, gray2, full=True)
diff = (diff * 255).astype ("uint8")
print ("SSIM: {}".format (score))
# threshold the difference image, followed by finding contours to
# obtain the regions of the two input images that differ
thresh = cv2.threshold (diff, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
if np.mean (thresh) < 0.4 :
print ("New object Detected")
date_string = datetime.datetime.now ( ).strftime ("%Y-%m-%d-%H:%M:%S")
cv2.imwrite ('img/img-' + date_string + '.png', frame2[y:y+h+30, x:x+w+30])
# Display the resulting frame
cv2.imshow ('frame1', frame1)
cv2.imshow('frame2', frame2)
cv2.imshow ("Diff", diff)
cv2.imshow ("Thresh", thresh)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
Not hard.
Actually, you can get the image as a NumPy array.
The shape is (720, 1280, 3).
To save it, try this
...
ret, frame1 = cap.read(0) # first image
print(frame1.shape)
rgb_frame1 = frame1[..., ::-1]
im = Image.fromarray(rgb_frame1)
im.save("your_file.jpeg")
time.sleep(1/50) # slight delay
...
Note: you need to change the channel order or you will get a blue image. Because the original channel is in BRG format.
Then you can store the frame:
I made a convolutional neural network, that predicts faces and returns coordinates (y1, x1, y2, x2). Iam able to create rectangle that serves as mask that covers the desired coordinates. I need a way to cover the images in real time. Is there a way to get live image sequence without saving the frames, just overwriting them, and how do i extract the coordinates in openCV? I was using pyplot and was saving the images, it is slow and ineffective.
Yeah, so I managed to come up with a solution, but I found out that, 1 frame takes about 0.54s to compute, so 2FPS, not great for live streaming, so I am switching to haarcascade.
Code below is used to configure and call the model.
from numpy import expand_dims
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from mrcnn.model import mold_image
import cv2
import time
# define the prediction configuration
class PredictionConfig(Config):
# define the name of the configuration
NAME = "face_cfg"
# number of classes (background + face)
NUM_CLASSES = 1 + 1
# simplify GPU config
GPU_COUNT = 1
IMAGES_PER_GPU = 1
def classify_image(image,model,cfg):
# convert pixel values (e.g. center)
scaled_image = mold_image(image, cfg)
# convert image into one sample
sample = expand_dims(scaled_image, 0)
# make prediction
tic = time.time()
yhat = model.detect(sample, verbose=0)[0]
print(time.time() - tic)
return yhat['rois']
def image_bnd_highlight(image,coordinates):
for box in coordinates:
# get coordinates
y1, x1, y2, x2 = box
# create the shape
new_img = cv2.rectangle(image,(x1,y1),(x2,y2),(255,255,255),5)
return new_img
# create config
cfg: PredictionConfig = PredictionConfig()
# define the model
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
# load model weights
model_path = 'mask_rcnn_face_cfg_0029.h5'
model.load_weights(model_path, by_name=True)
definitive_model = model
Then I call my functions, that I created above.
import cv2 as cv
import acapture
from RealTime import definitive_model
from RealTime import cfg
from RealTime import classify_image
from RealTime import image_bnd_highlight
import time
# cap = acapture.open(0)
cap = cv.VideoCapture(0)
cap.set(3,128) #set frame width
cap.set(4,128) #set frame height
cap.set(cv.CAP_PROP_FPS, 2) #adjusting fps to 2
# cap.set(cv.CAP_PROP_BUFFERSIZE,3)
# if not cap.isOpened():
# print("Cannot open camera")
# exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
# let's resize our image to be 150 pixels wide, but in order to
# prevent our resized image from being skewed/distorted, we must
# first calculate the ratio of the *new* width to the *old* width
r = 150.0 / frame.shape[1]
dim = (150, int(frame.shape[0] * r))
# perform the actual resizing of the image
resized = cv.resize(frame, dim, interpolation=cv.INTER_AREA)
# tic = time.time()
coords = classify_image(resized,definitive_model,cfg)
# print(time.time() - tic)
image = image_bnd_highlight(resized,coords)
# Display the resulting frame
cv.imshow('frame', image)
if cv.waitKey(1) == ord('q'):
break
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()
I want to get each frame from a video as an image. background to this is following. I have written a Neural Network which is able to recognize Hand Signs. Now I want to start a video stream, where each image/frame of the stream is put through the Neural Network. To fit it into my neural Network, I want to render each frame and reduce the image to 28*28 pixels. In the end it should look similar to this: https://www.youtube.com/watch?v=JfSao30fMxY
I have searched through the web and found out that I can use cv2.VideoCapture to get the stream. But how can I pick each image of the Frame, render it and print the result back on the screen. My Code looks like this until now:
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
# Todo: each Frame/Image from the video should be saved as a variable and open imageToLabel()
# Todo: before the image is handed to the method, it needs to be translated into a 28*28 np Array
# Todo: the returned Label should be printed onto the video (otherwise it can be )
i = 0
while (True):
# Capture frame-by-frame
# Load model once and pass it as an parameter
ret, frame = cap.read()
i += 1
image = cv2.imwrite('database/{index}.png'.format(index=i), frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRAY)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
def imageToLabel(imgArr, checkpointLoad):
new_model = tf.keras.models.load_model(checkpointLoad)
imgArrNew = imgArr.reshape(1, 28, 28, 1) / 255
prediction = new_model.predict(imgArrNew)
label = np.argmax(prediction)
return label
frame is the RGB Image you get from the stream.
gray is the grayscale converted image.
I suppose your network takes grayscaled images because of its shape. Therefor you need to first resize the image to (28,28) and then pass it to your imageToLabel function
resizedImg = cv2.resize(gray,(28,28))
label = imageToLabel(resizedImg,yourModel)
now that you know the prediction you can draw it on the frame using e.g. cv2.putText() and then draw the frame it returns instead of frame
edit:
If you want to use parts of the image for your network you can slice the image like this:
slicedImg = gray[50:150,50:150]
resizedImg = cv2.resize(slicedImg,(28,28))
label = imageToLabel(resizedImg,yourModel)
If you're not that familiar with indexing in python you might want to take a look at this
Also if you want it to look like in the linked video you can draw a rectangle from e.g. (50,50) to (150,150) that is green (0,255,0)
cv2.rectangle(frame,(50,50),(150,150),(0,255,0))
I captured video using cv2.VideoCapured and display. Captured Video display on same time not saved. How I can insert image on this captured video for display on same time.
Assuming you want to add image directly to video frames at a certain x,y location without doing any color blending or image transparency. you can use the following python code:
#!/usr/bin/python3
import cv2
# load the overlay image. size should be smaller than video frame size
img = cv2.imread('logo.png')
# Get Image dimensions
img_height, img_width, _ = img.shape
# Start Capture
cap = cv2.VideoCapture(0)
# Get frame dimensions
frame_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH )
frame_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT )
# Print dimensions
print('image dimensions (HxW):',img_height,"x",img_width)
print('frame dimensions (HxW):',int(frame_height),"x",int(frame_width))
# Decide X,Y location of overlay image inside video frame.
# following should be valid:
# * image dimensions must be smaller than frame dimensions
# * x+img_width <= frame_width
# * y+img_height <= frame_height
# otherwise you can resize image as part of your code if required
x = 50
y = 50
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# add image to frame
frame[ y:y+img_height , x:x+img_width ] = img
# Display the resulting frame
cv2.imshow('frame',frame)
# Exit if ESC key is pressed
if cv2.waitKey(20) & 0xFF == 27:
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
Please give more details if my assumption was wrong.
I'm building some code to adaptively detect skin from webcam video. I have it almost working, however, when outputting the video, it shows 9 screens of the "skin" mask instead of just one. Seems like I'm just missing something simple, but I can't figure it out.
image shown here
Code below:
# first let's train the data
data, labels = ReadData()
classifier = TrainTree(data, labels)
# get the webcam. The input is either a video file or the camera number
# since using laptop webcam (only 1 cam), input is 0. A 2nd cam would be input 1
camera = cv2.VideoCapture(0)
while True:
# reads in the current frame
# .read() returns True if frame read correctly, and False otherwise
ret, frame = camera.read() # frame.shape: (480,640,3)
if ret:
# reshape the frame to follow format of training data (rows*col, 3)
data = np.reshape(frame, (frame.shape[0] * frame.shape[1], 3))
bgr = np.reshape(data, (data.shape[0], 1, 3))
hsv = cv2.cvtColor(np.uint8(bgr), cv2.COLOR_BGR2HSV)
# once we have converted to HSV, we reshape back to original shape of (245057,3)
data = np.reshape(hsv, (hsv.shape[0], 3))
predictedLabels = classifier.predict(data)
# the AND operator applies the skinMask to the image
# predictedLabels consists of 1 (skin) and 2 (non-skin), needs to change to 0 (non-skin) and 255 (skin)
predictedMask = (-(predictedLabels - 1) + 1) * 255 # predictedMask.shape: (307200,)
# resize to match frame shape
imgLabels = np.resize(predictedMask, (frame.shape[0], frame.shape[1], 3)) # imgLabels.shape: (480,640,3)
# masks require 1 channel, not 3, so change from BGR to GRAYSCALE
imgLabels = cv2.cvtColor(np.uint8(imgLabels), cv2.COLOR_BGR2GRAY) # imgLabels.shape: (480,640)
# do bitwsie AND to pull out skin pixels. All skin pixels are anded with 255 and all others are 0
skin = cv2.bitwise_and(frame, frame, mask=imgLabels) # skin.shape: (480,640,3)
# show the skin in the image along with the mask, show images side-by-side
# **********THE BELOW LINE OUTPUTS 9 screens of the skin mask instead of just 1 ****************
cv2.imshow("images", np.hstack([frame, skin]))
# if the 'q' key is pressed, stop the loop
if cv2.waitKey(1) & 0xFF == ord("q"):
break
else:
break
# release the video capture
camera.release()
cv2.destroyAllWindows()
You're working with bitmaps. To get an idea what they hold, cv2.imshow them individually. Then you're going to see (literally) where the data goes wrong.
Now, the culprit is most probably np.resize():
np.resize(a, new_shape)
Return a new array with the specified shape.
If the new array is larger than the original array, then the new array
is filled with repeated copies of a. Note that this behavior is
different from a.resize(new_shape) which fills with zeros instead of
repeated copies of a.
To scale a bitmap (=resize while striving to preserve the same visual image), use cv2.resize() as per OpenCV: Geometric Transformations of Images.