I am using Opencv to capture the stream from a ZED camera and display it using cv2.imshow.
But I am having issues resending the ZED stream back out as a RTSP stream.
Now the reason I am using Opencv is because I need to extract data from the stream before it is sent out as a RTSP stream.
Here is the code for the Zed camera OpenCV stream capture:
def main() :
# Create a ZED camera object
zed = sl.Camera()
# Set configuration parameters
input_type = sl.InputType()
if len(sys.argv) >= 2 :
input_type.set_from_svo_file(sys.argv[1])
init = sl.InitParameters(input_t=input_type)
init.camera_resolution = sl.RESOLUTION.HD1080
init.depth_mode = sl.DEPTH_MODE.ULTRA
init.coordinate_units = sl.UNIT.METER
# Open the camera
err = zed.open(init)
if err != sl.ERROR_CODE.SUCCESS :
print(repr(err))
zed.close()
exit(1)
# Set runtime parameters after opening the camera
runtime = sl.RuntimeParameters()
runtime.sensing_mode = sl.SENSING_MODE.FILL
# Setting the depth confidence parameters
runtime.confidence_threshold = 100
runtime.textureness_confidence_threshold = 100
# Prepare new image size to retrieve half-resolution images
image_size = zed.get_camera_information().camera_resolution
image_size.width = image_size.width /2
image_size.height = image_size.height /2
# Declare your sl.Mat matrices
image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4)
point_cloud = sl.Mat()
key = ' '
while key != 113 :
err = zed.grab(runtime)
def click_event (event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
# Retrieve the left image, depth image in the half-resolution
zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size)
# Retrieve the RGBA point cloud in half resolution
zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size)
err, point_cloud_value = point_cloud.get_value(x, y)
distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] +
point_cloud_value[1] * point_cloud_value[1] +
point_cloud_value[2] * point_cloud_value[2])
print("Distance to Camera at ({}, {}) (image center): {:1.3} m".format(x, y, distance))
if err == sl.ERROR_CODE.SUCCESS :
# Retrieve the left image, depth image in the half-resolution
zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size)
# Retrieve the RGBA point cloud in half resolution
zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size)
# To recover data from sl.Mat to use it with opencv, use the get_data() method
# It returns a numpy array that can be used as a matrix with opencv
image_ocv = image_zed.get_data()
cv2.imshow("Image", image_ocv)
cv2.setMouseCallback('Image', click_event)
key = cv2.waitKey(10)
cv2.destroyAllWindows()
zed.close()
print("\nFINISH")
if __name__ == "__main__":
main()
Here is the code for the RTSP server:
How do I send the captured Opencv cv2.imshow out as a RTSP stream
import gi
gi.require_version('Gst','1.0')
gi.require_version('GstVideo','1.0')
gi.require_version('GstRtspServer','1.0')
from gi.repository import GObject, Gst, GstVideo, GstRtspServer
Gst.init(None)
mainloop = GObject.MainLoop()
server = GstRtspServer.RTSPServer()
mounts = server.get_mount_points()
factory = GstRtspServer.RTSPMediaFactory()
factory.set_launch('( zedsrc stream-type=0 ! videoconvert ! videoscale ! video/x-
raw,format=YUY2,width=1280,height=720,framerate=30/1 ! nvvidconv ! nvv4l2h264enc insert-
sps-pps=1 idrinterval=1 insert-vui=1 ! rtph264pay name=pay0 pt=96 )')
#factory.set_launch('(v4l2src device=/dev/video0 io-mode=2 !
image/jpeg,width=1280,height=720,framerate=30/1 ! nvjpegdec ! video/x-raw ! nvvidconv !
nvv4l2h264enc ! rtph264ay name=pay0 pt=96)')
mounts.add_factory('/test', factory)
server.attach(None)
print('stream ready at rtsp://127.0.0.1:8554/test')
mainloop.run()
Related
I'm new to tensorflow and object detetion, and any help would be greatly appreciated! I got a database of 50 photos, used this video to get me started, and it DID work with Google's Sample Model (I'm using a RPi4B with 8 GB of RAM), then I wanted to create my own model. I tried a couple of options, but ultimately failed since the type of files I needed were a .TFLITE and a .txt one with the labels. I only managed to get a .LITE file which from what I tested didn't work
I tried his google collab sheet but the terminal got stuck at step 5 when I pressed the button to train the model, so I tried Edge Impulse but the output models were all in a .LITE file, and didn't provide a labelmap.txt file for the code. I tried manually changing the extension from .LITE to .TFLITE since according to this thread it was supposed to work, but it didn't!
I need this to be ready in 3 days from now... Isn't there a more beginner-friendly way to do this? How can I get a valid .TFLITE model to work with my RPI4? If I have to, I will change the code for this to work. Here's the code the tutorial provided:
######## Webcam Object Detection Using Tensorflow-trained Classifier #########
#
# Author: Evan Juras
# Date: 10/27/19
# Description:
# This program uses a TensorFlow Lite model to perform object detection on a live webcam
# feed. It draws boxes and scores around the objects of interest in each frame from the
# webcam. To improve FPS, the webcam object runs in a separate thread from the main program.
# This script will work with either a Picamera or regular USB webcam.
#
# This code is based off the TensorFlow Lite image classification example at:
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/examples/python/label_image.py
#
# I added my own method of drawing boxes and labels using OpenCV.
# Import packages
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
# Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def _init_(self,resolution=(640,480),framerate=30):
# Initialize the PiCamera and the camera image stream
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
# Read first frame from the stream
(self.grabbed, self.frame) = self.stream.read()
# Variable to control when the camera is stopped
self.stopped = False
def start(self):
# Start the thread that reads frames from the video stream
Thread(target=self.update,args=()).start()
return self
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
# If the camera is stopped, stop the thread
if self.stopped:
# Close camera resources
self.stream.release()
return
# Otherwise, grab the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
def read(self):
# Return the most recent frame
return self.frame
def stop(self):
# Indicate that the camera and thread should be stopped
self.stopped = True
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='detect.lite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='1280x720')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.lite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Check output layer name to determine if this model was created with TF2 or TF1,
# because outputs are ordered differently for TF2 and TF1 models
outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname): # This is a TF2 model
boxes_idx, classes_idx, scores_idx = 1, 3, 0
else: # This is a TF1 model
boxes_idx, classes_idx, scores_idx = 0, 1, 2
# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
#for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
while True:
# Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
# Grab frame from video stream
frame1 = videostream.read()
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0] # Confidence of detected objects
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
if object_name=='person' and int(scores[i]*100)>65:
print("YES")
else:
print("NO")
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
# Draw framerate in corner of frame
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Calculate framerate
t2 = cv2.getTickCount()
time1 = (t2-t1)/freq
frame_rate_calc= 1/time1
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
cv2.destroyAllWindows()
videostream.stop()
```
Easy, just downgrade to OpenCV version 3.4.16, and use Tensorflow 1.0 instead of 2.0 and that should solve all your problems. That will allow the use of .LITE files, as well that of .TFLITE
Also, try increasing the resolution to a 720x1280, most likely that can cause a ton of errors as well when working with tensorflow
Take a look here: https://www.tensorflow.org/tutorials/images/classification
This notebook sets up a new classification model, and ends with "Convert the Keras Sequential model to a TensorFlow Lite model"
https://www.tensorflow.org/tutorials/images/classification#convert_the_keras_sequential_model_to_a_tensorflow_lite_model
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
# Save the model.
with open('model.tflite', 'wb') as f:
f.write(tflite_model)
This reliably produces a tflite model from a standard tf model.
trt_yolo.py
import os
import time
import argparse
import cv2
import pycuda.autoinit # This is needed for initializing CUDA driver
from utils.yolo_classes import get_cls_dict
from utils.camera import add_camera_args, Camera
from utils.display import open_window, set_display, show_fps
from utils.visualization import BBoxVisualization
from utils.yolo_with_plugins import TrtYOLO
WINDOW_NAME = 'TrtYOLODemo'
def parse_args():
"""Parse input arguments."""
desc = ('Capture and display live camera video, while doing '
'real-time object detection with TensorRT optimized '
'YOLO model on Jetson')
parser = argparse.ArgumentParser(description=desc)
parser = add_camera_args(parser)
parser.add_argument(
'-c', '--category_num', type=int, default=80,
help='number of object categories [80]')
parser.add_argument(
'-t', '--conf_thresh', type=float, default=0.3,
help='set the detection confidence threshold')
parser.add_argument(
'-m', '--model', type=str, required=True,
help=('[yolov3-tiny|yolov3|yolov3-spp|yolov4-tiny|yolov4|'
'yolov4-csp|yolov4x-mish|yolov4-p5]-[{dimension}], where '
'{dimension} could be either a single number (e.g. '
'288, 416, 608) or 2 numbers, WxH (e.g. 416x256)'))
parser.add_argument(
'-l', '--letter_box', action='store_true',
help='inference with letterboxed image [False]')
args = parser.parse_args()
return args
def loop_and_detect(cam, trt_yolo, conf_th, vis):
"""Continuously capture images from camera and do object detection.
# Arguments
cam: the camera instance (video source).
trt_yolo: the TRT YOLO object detector instance.
conf_th: confidence/score threshold for object detection.
vis: for visualization.
"""
full_scrn = False
fps = 0.0
tic = time.time()
while True:
if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
break
img = cam.read()
if img is None:
break
boxes, confs, clss = trt_yolo.detect(img, conf_th)
img = vis.draw_bboxes(img, boxes, confs, clss)
img = show_fps(img, fps)
cv2.imshow(WINDOW_NAME, img)
toc = time.time()
curr_fps = 1.0 / (toc - tic)
# calculate an exponentially decaying average of fps number
fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
tic = toc
key = cv2.waitKey(1)
if key == 27: # ESC key: quit program
break
elif key == ord('F') or key == ord('f'): # Toggle fullscreen
full_scrn = not full_scrn
set_display(WINDOW_NAME, full_scrn)
def main():
args = parse_args()
if args.category_num <= 0:
raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
if not os.path.isfile('yolo/%s.trt' % args.model):
raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)
cam = Camera(args)
if not cam.isOpened():
raise SystemExit('ERROR: failed to open camera!')
cls_dict = get_cls_dict(args.category_num)
vis = BBoxVisualization(cls_dict)
trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)
open_window(
WINDOW_NAME, 'Camera TensorRT YOLO Demo',
cam.img_width, cam.img_height)
loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis)
cam.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
Hello, I'm doing drone recognition and tracking with yolov4 in my project. My weight file is ready. Since I'm using Jetson Nano, I converted the code to TensorRT. However, I could not add the tracking algorithm to the TensorRT code. What I want is, in Real Time, the camera will follow the drone based on its distance from the midpoint, and the servo motors will rotate the camera following the drone. Can you write this algorithm in the code I added?
Trying to accept multiple RTSP addresses via command line but I keep on failing. This would be my input down below:
python rtsp.py --rtsp --uri rtsp://admin:password#192.168.1.63:554 rtsp://admin:password#192.168.1.61:554 rtsp://admin:password#192.168.1.62:554
It doesn't show the videos simultaneously which I want it to, it shows one by one after you close them.
import sys
import argparse
import cv2
from multiprocessing import Process
WINDOW_NAME = 'Network Camera Demo'
def parse_args():
# Parse input arguments
desc = 'Capture and display live video feed'
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--rtsp', dest='use_rtsp',
help='use IP CAM (remember to also set --uri)',
action='store_true')
parser.add_argument('--uri', dest='rtsp_uri',
help='RTSP URI, e.g. rtsp://192.168.1.64:554',
default=None, type=str)
parser.add_argument('--latency', dest='rtsp_latency',
help='latency in ms for RTSP [200]',
default=25, type=int)
parser.add_argument('--width', dest='image_width',
help='image width [1920]',
default=1920, type=int)
parser.add_argument('--height', dest='image_height',
help='image height [1080]',
default=1080, type=int)
args = parser.parse_args()
return args
def open_cam_rtsp(uri, width, height, latency):
gst_str = ('rtspsrc location={} latency={} ! '
'rtph264depay ! h264parse ! omxh264dec ! '
'nvvidconv ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)BGRx ! '
'videoconvert ! appsink').format(uri, latency, width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_window(width, height):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, width, height)
cv2.moveWindow(WINDOW_NAME, 0, 0)
cv2.setWindowTitle(WINDOW_NAME, 'Network Cam Demo')
def read_cam(cap):
show_help = True
full_scrn = False
help_text = '"Esc" to Quit, "H" for Help, "F" to Toggle Fullscreen'
font = cv2.FONT_HERSHEY_PLAIN
while True:
if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
# Check to see if the user has closed the window
# If yes, terminate the program
break
_, img = cap.read() # grab the next image frame from camera
if show_help:
cv2.putText(img, help_text, (11, 20), font,
1.0, (32, 32, 32), 4, cv2.LINE_AA)
cv2.putText(img, help_text, (10, 20), font,
1.0, (240, 240, 240), 1, cv2.LINE_AA)
cv2.imshow(WINDOW_NAME, img)
key = cv2.waitKey(10)
if key == 27: # ESC key: quit program
break
def main(rtsp_uri):
# args = parse_args()
print('Called with args:')
print(args)
print('OpenCV version: {}'.format(cv2.__version__))
cap = open_cam_rtsp(rtsp_uri,
args.image_width,
args.image_height,
args.rtsp_latency)
if not cap.isOpened():
sys.exit('Failed to open camera!')
open_window(args.image_width, args.image_height)
read_cam(cap)
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
args = parse_args()
for rtsp_uri in args.rtsp_uri:
process = Process(target=main, args=(rtsp_uri))
process.start()
process.join()
I am kind of stuck on how to actually get it to accept multiple arguments like certain frameworks e.g.
gst-launch-1.0 rtspsrc location=rtsp://admin:password#192.168.1.61 ! rtph264depay ! h264parse ! omxh264dec ! nvvidconv ! videoconvert ! appsink
rtspsrc location=rtsp://admin:password#192.168.1.62 ! rtph264depay ! h264parse ! omxh264dec ! nvvidconv ! videoconvert ! appsink
Edit: With the help of Right Leg, I have managed to solve the problem that exists within my code which was due to a misunderstanding of how the Process actually works in multiprocessing.
For some strange reason, action=append and nargs='+ has to be used in order to accept multiple arguments from the command line.
Final code as shown below:
parser.add_argument('--uri', dest='rtsp_uri', action='append'
help='RTSP URI, e.g. rtsp://192.168.1.64:554',
default=None, type=str, nargs='+')
...
if __name__ == '__main__':
args = parse_args()
processes = []
for args.rtsp_uri in args.rtsp_uri:
process = Process(target=main, args=(args.rtsp_uri))
processes.append(process)
process.start()
for process in processes:
process.join()
In your main, for each url, you create a process, start it, and join it.
if __name__ == '__main__':
args = parse_args()
for rtsp_uri in args.rtsp_uri:
process = Process(target=main, args=(rtsp_uri))
process.start()
process.join()
The problem is that Process.join waits for the process to end.
As a result, the processes are created, started, and thus run sequentially: only once a process has ended will the next start.
You need to start all the processes, and only then join them:
if __name__ == '__main__':
args = parse_args()
processes = []
for rtsp_uri in args.rtsp_uri:
process = Process(target=main, args=(rtsp_uri))
processes.append(process)
process.start()
for process in processes:
process.join()
I'm currently working with a d435 and I want to display IR images (both left and right but for the moments just focus on one), following my code:
import pyrealsense2 as rs
import numpy as np
import cv2
# We want the points object to be persistent so we can display the
#last cloud when a frame drops
points = rs.points()
# Create a pipeline
pipeline = rs.pipeline()
#Create a config and configure the pipeline to stream
config = rs.config()
config.enable_stream(rs.stream.infrared, 1, 1280, 720, rs.format.y8, 30)
# Start streaming
profile = pipeline.start(config)
# Streaming loop
try:
while True:
# Get frameset of color and depth
frames = pipeline.wait_for_frames()
ir1_frame = frames.get_infrared_frame(1) # Left IR Camera, it allows 1, 2 or no input
image = np.asanyarray(ir1_frame)
cv2.namedWindow('IR Example', cv2.WINDOW_AUTOSIZE)
cv2.imshow('IR Example', image)
key = cv2.waitKey(1)
# Press esc or 'q' to close the image window
if key & 0xFF == ord('q') or key == 27:
cv2.destroyAllWindows()
break
finally:
pipeline.stop()
Everything works fine till the line:
cv2.imshow('IR Example', image)
I get the error:
TypeError: mat data type = 17 is not supported
I found this link:
TypeError: src data type = 17 is not supported
but I still can't figure out how to show my image.
Does anyone have some ideas? Please share, i'm a newbie with opencv.
image.shape = ()
image.dtype = dtype('O')
Cheers
You need to call get_data() to get the image from the frame.
image = np.asanyarray(ir1_frame.get_data())
I am trying to capture and display with Python a network video stream. The stream has been created (on my laptop) with the following command:
gst-launch-1.0 v4l2src ! videorate ! video/x-raw,framerate=2/1,width=640,height=480 ! x264enc pass=qual quantizer=20 tune=zerolatency ! rtph264pay config-interval=10 pt=96 ! udpsink host=127.0.0.1 port=5000
It takes the webcam input and streams it over a UDP port. I can capture the stream and display it with the following command:
gst-launch-1.0 udpsrc port=5000 ! "application/x-rtp, payload=127" ! rtph264depay ! avdec_h264 ! xvimagesink sync=false
Now I am trying to do the same (capture) with a Python script, but without lack. Here is my code:
import gi
gi.require_version('Gst', '1.0')
from gi.repository import Gst
udpPipe = Gst.pipeline("player")
source = Gst.ElementFactory.make('udpsrc', None)
source.set_property("port", 5000)
source.set_property("host", "127.0.0.1")
rdepay = Gst.ElementFactory.make('rtph264depay', 'rdepay')
vdecode = Gst.ElementFactory.make('avdec_h264', 'vdecode')
sink = Gst.ElementFactory.make('xvimagesink', None)
udpPipe.add(source, rdepay, vdecode, sink)
gst.element_link_many(source, rdepay, vdecode, sink)
udpPipe.set_state(gst.STATE_PLAYING)
The error I am getting is:
/usr/lib/python2.7/dist-packages/gi/overrides/Gst.py:56: Warning: /build/glib2.0-prJhLS/glib2.0-2.48.2/./gobject/gsignal.c:1674: parameter 1 of type '<invalid>' for signal "GstBus::sync_message" is not a value type
Gst.Bin.__init__(self, name=name)
/usr/lib/python2.7/dist-packages/gi/overrides/Gst.py:56: Warning: /build/glib2.0-prJhLS/glib2.0-2.48.2/./gobject/gsignal.c:1674: parameter 1 of type '<invalid>' for signal "GstBus::message" is not a value type
Gst.Bin.__init__(self, name=name)
Traceback (most recent call last):
File "getUdp.py", line 13, in <module>
source = Gst.ElementFactory.make('udpsrc', None)
File "/usr/lib/python2.7/dist-packages/gi/overrides/Gst.py", line 217, in make
return Gst.ElementFactory.make(factory_name, instance_name)
TypeError: unbound method fake_method() must be called with ElementFactory instance as first argument (got str instance instead)
Any ideas? :-(
I also got the same error on Debian 9.3 (stretch) today.
Explicitly calling Gst.init resolved the problem.
Following code popped up a xvimagesink window on my system with both python 2.7 and 3.5.
#!/usr/bin/python
import sys
import gi
gi.require_version('GLib', '2.0')
gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst
Gst.init(sys.argv)
udpPipe = Gst.Pipeline("player")
source = Gst.ElementFactory.make('udpsrc', None)
source.set_property("port", 5000)
#source.set_property("host", "127.0.0.1")
caps = Gst.caps_from_string("application/x-rtp, payload=127")
source.set_property("caps", caps)
rdepay = Gst.ElementFactory.make('rtph264depay', 'rdepay')
vdecode = Gst.ElementFactory.make('avdec_h264', 'vdecode')
sink = Gst.ElementFactory.make('xvimagesink', None)
sink.set_property("sync", False)
udpPipe.add(source, rdepay, vdecode, sink)
#Gst.element_link_many(source, rdepay, vdecode, sink)
source.link(rdepay)
rdepay.link(vdecode)
vdecode.link(sink)
udpPipe.set_state(Gst.State.PLAYING)
GLib.MainLoop().run()
I think it is necessary to call Gst.init and run mainloop to convert gst-launch command line into python script with PyGObject.
You can use "mjpeg" stream as follows:
gst-launch-1.0 videotestsrc ! videoconvert ! videoscale ! video/x-raw,format=I420,width=800,height=600,framerate=25/1 ! jpegenc ! rtpjpegpay ! udpsink host=127.0.0.1 port=5000
In python3 you can get the frames like this:
#!/usr/bin/env python
import cv2
import gi
import numpy as np
gi.require_version('Gst', '1.0')
from gi.repository import Gst
class Video():
"""BlueRov video capture class constructor
Attributes:
port (int): Video UDP port
video_codec (string): Source h264 parser
video_decode (string): Transform YUV (12bits) to BGR (24bits)
video_pipe (object): GStreamer top-level pipeline
video_sink (object): Gstreamer sink element
video_sink_conf (string): Sink configuration
video_source (string): Udp source ip and port
"""
def __init__(self, port=5000):
"""Summary
Args:
port (int, optional): UDP port
"""
Gst.init(None)
self.port = port
self._frame = None
# [Software component diagram](https://www.ardusub.com/software/components.html)
# UDP video stream (:5000)
self.video_source = 'udpsrc port={}'.format(self.port)
# [Rasp raw image](http://picamera.readthedocs.io/en/release-0.7/recipes2.html#raw-image-capture-yuv-format)
# Cam -> CSI-2 -> H264 Raw (YUV 4-4-4 (12bits) I420)
# self.video_codec = '! application/x-rtp, payload=96 ! rtph264depay ! h264parse ! avdec_h264'
self.video_codec = '! application/x-rtp, payload=26 ! rtpjpegdepay ! jpegdec'
# Python don't have nibble, convert YUV nibbles (4-4-4) to OpenCV standard BGR bytes (8-8-8)
self.video_decode = \
'! decodebin ! videoconvert ! video/x-raw,format=(string)BGR ! videoconvert'
# Create a sink to get data
self.video_sink_conf = \
'! appsink emit-signals=true sync=false max-buffers=2 drop=true'
self.video_pipe = None
self.video_sink = None
self.run()
def start_gst(self, config=None):
""" Start gstreamer pipeline and sink
Pipeline description list e.g:
[
'videotestsrc ! decodebin', \
'! videoconvert ! video/x-raw,format=(string)BGR ! videoconvert',
'! appsink'
]
Args:
config (list, optional): Gstreamer pileline description list
"""
if not config:
config = \
[
'videotestsrc ! decodebin',
'! videoconvert ! video/x-raw,format=(string)BGR ! videoconvert',
'! appsink'
]
command = ' '.join(config)
self.video_pipe = Gst.parse_launch(command)
self.video_pipe.set_state(Gst.State.PLAYING)
self.video_sink = self.video_pipe.get_by_name('appsink0')
#staticmethod
def gst_to_opencv(sample):
"""Transform byte array into np array
Args:
sample (TYPE): Description
Returns:
TYPE: Description
"""
buf = sample.get_buffer()
caps = sample.get_caps()
array = np.ndarray(
(
caps.get_structure(0).get_value('height'),
caps.get_structure(0).get_value('width'),
3
),
buffer=buf.extract_dup(0, buf.get_size()), dtype=np.uint8)
return array
def frame(self):
""" Get Frame
Returns:
iterable: bool and image frame, cap.read() output
"""
return self._frame
def frame_available(self):
"""Check if frame is available
Returns:
bool: true if frame is available
"""
return type(self._frame) != type(None)
def run(self):
""" Get frame to update _frame
"""
self.start_gst(
[
self.video_source,
self.video_codec,
self.video_decode,
self.video_sink_conf
])
self.video_sink.connect('new-sample', self.callback)
def callback(self, sink):
sample = sink.emit('pull-sample')
new_frame = self.gst_to_opencv(sample)
self._frame = new_frame
return Gst.FlowReturn.OK
if __name__ == '__main__':
# Create the video object
# Add port= if is necessary to use a different one
video = Video(port=5000)
while True:
# Wait for the next frame
if not video.frame_available():
continue
frame = video.frame()
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
REF
1: http://www.einarsundgren.se/gstreamer-basic-real-time-streaming-tutorial/
2: https://gist.github.com/patrickelectric/443645bb0fd6e71b34c504d20d475d5a