Convert OpenCV video to Streamlit video in-memory - python

I have a list of image frames frames that I would like to be able to display in Streamlit application: st.video(frames_converted).
Challenges:
Streamlit takes HTML5 and video requires H264 encoding
Want to complete all processing in-memory (as opposed to the much more common saving to temporary file
Current attempt:
## Convert frames to video for streamlit
height, width, layers = frames[0].shape
codec = cv.VideoWriter_fourcc(*'H264')
fps = 1
video = cv.VideoWriter("temp_video",codec, fps, (width,height)) # Initialize video object
for frame in frames:
video.write(frame)
cv.destroyAllWindows()
video.release()
st.video(video)
Current Blocker
RuntimeError: Invalid binary data format: <class 'cv2.VideoWriter'>

We may encode an "in memory" MP4 video using PyAV as described in my following answer - the video is stored in BytesIO object.
We may pass the BytesIO object as input to Streamlit (or convert the BytesIO object to bytes array and use the array as input).
Code sample:
import numpy as np
import cv2 # OpenCV is used only for writing text on image (for testing).
import av
import io
import streamlit as st
n_frmaes = 100 # Select number of frames (for testing).
width, height, fps = 192, 108, 10 # Select video resolution and framerate.
output_memory_file = io.BytesIO() # Create BytesIO "in memory file".
output = av.open(output_memory_file, 'w', format="mp4") # Open "in memory file" as MP4 video output
stream = output.add_stream('h264', str(fps)) # Add H.264 video stream to the MP4 container, with framerate = fps.
stream.width = width # Set frame width
stream.height = height # Set frame height
#stream.pix_fmt = 'yuv444p' # Select yuv444p pixel format (better quality than default yuv420p).
stream.pix_fmt = 'yuv420p' # Select yuv420p pixel format for wider compatibility.
stream.options = {'crf': '17'} # Select low crf for high quality (the price is larger file size).
def make_sample_image(i):
""" Build synthetic "raw BGR" image for testing """
p = width//60
img = np.full((height, width, 3), 60, np.uint8)
cv2.putText(img, str(i+1), (width//2-p*10*len(str(i+1)), height//2+p*10), cv2.FONT_HERSHEY_DUPLEX, p, (255, 30, 30), p*2) # Blue number
return img
# Iterate the created images, encode and write to MP4 memory file.
for i in range(n_frmaes):
img = make_sample_image(i) # Create OpenCV image for testing (resolution 192x108, pixel format BGR).
frame = av.VideoFrame.from_ndarray(img, format='bgr24') # Convert image from NumPy Array to frame.
packet = stream.encode(frame) # Encode video frame
output.mux(packet) # "Mux" the encoded frame (add the encoded frame to MP4 file).
# Flush the encoder
packet = stream.encode(None)
output.mux(packet)
output.close()
output_memory_file.seek(0) # Seek to the beginning of the BytesIO.
#video_bytes = output_memory_file.read() # Convert BytesIO to bytes array
#st.video(video_bytes)
st.video(output_memory_file) # Streamlit supports BytesIO object - we don't have to convert it to bytes array.
# Write BytesIO from RAM to file, for testing:
#with open("output.mp4", "wb") as f:
# f.write(output_memory_file.getbuffer())
#video_file = open('output.mp4', 'rb')
#video_bytes = video_file.read()
#st.video(video_bytes)
We can't use cv.VideoWriter, because it does not support in-memory video encoding (cv.VideoWriter requires a "true file").

Related

Creating video from images using PyAV

I am trying to write a function that creates a new MP4 video from a set of frames taken from another video. The frames will be given in PIL.Image format and is often cropped to include only a part of the input video, but all images will have the same dimension.
What I have tried:
def modify_image(img):
return img
test_input = av.open('input_vid.mp4')
test_output =av.open('output_vid.mp4', 'w')
in_stream = test_input.streams.video[0]
out_stream = test_output.add_stream(template=in_stream)
for frame in test_input.decode(in_stream):
img_frame = frame.to_image()
# Some possible modifications to img_frame...
img_frame = modify_image(img_frame)
out_frame = av.VideoFrame.from_image(img_frame)
out_packet = out_stream.encode(out_frame)
print(out_packet)
test_input.close()
test_output.close()
And the error that I got:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[23], line 11
8 img_frame = frame.to_image()
10 out_frame = av.VideoFrame.from_image(img_frame)
---> 11 out_packet = out_stream.encode(out_frame)
12 print(out_packet)
15 test_input.close()
File av\stream.pyx:153, in av.stream.Stream.encode()
File av\codec\context.pyx:490, in av.codec.context.CodecContext.encode()
File av\frame.pyx:52, in av.frame.Frame._rebase_time()
ValueError: Cannot rebase to zero time.
I followed the answer given in How to create a video out of frames without saving it to disk using python?, and met with the same issue.
Comparing the original VideoFrame and the VideoFrame created from the image, I found that the pts value of the new frames are saved as None instead of integer values. Overwriting the pts value of the new frame with the original values still causes the same error, and overwriting the dts value of the new frame gives the following error:
AttributeError: attribute 'dts' of 'av.frame.Frame' objects is not writable
Is there a way to modify the dts value, or possibly another method to create a video from a set of PIL.Image objects?
Using add_stream(template=in_stream) is only documented in the Remuxing example.
It's probably possible to use template=in_stream when re-encoding, but we have to set the time-base, and set the PTS timestamp of each encoded packet.
I found a discussion here (I didn't try it).
Instead of using template=in_stream, we may stick to the code sample from my other answer, and copy few parameters from the input stream to the output stream.
Example:
in_stream = test_input.streams.video[0]
codec_name = in_stream.codec_context.name # Get the codec name from the input video stream.
fps = in_stream.codec_context.rate # Get the framerate from the input video stream.
out_stream = test_output.add_stream(codec_name, str(fps))
out_stream.width = in_stream.codec_context.width # Set frame width to be the same as the width of the input stream
out_stream.height = in_stream.codec_context.height # Set frame height to be the same as the height of the input stream
out_stream.pix_fmt = in_stream.codec_context.pix_fmt # Copy pixel format from input stream to output stream
#stream.options = {'crf': '17'} # Select low crf for high quality (the price is larger file size).
We also have to "Mux" the video frame:
test_output.mux(out_packet)
At the end, we have to flush the encoder before closing the file:
out_packet = out_stream.encode(None)
test_output.mux(out_packet)
Code sample:
import av
# Build input_vid.mp4 using FFmpeg CLI (for testing):
# ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=100 -vcodec libx264 -crf 10 -pix_fmt yuv444p input_vid.mp4
test_input = av.open('input_vid.mp4')
test_output = av.open('output_vid.mp4', 'w')
in_stream = test_input.streams.video[0]
#out_stream = test_output.add_stream(template=in_stream) # Using template=in_stream is not working (probably meant to be used for re-muxing and not for re-encoding).
codec_name = in_stream.codec_context.name # Get the codec name from the input video stream.
fps = in_stream.codec_context.rate # Get the framerate from the input video stream.
out_stream = test_output.add_stream(codec_name, str(fps))
out_stream.width = in_stream.codec_context.width # Set frame width to be the same as the width of the input stream
out_stream.height = in_stream.codec_context.height # Set frame height to be the same as the height of the input stream
out_stream.pix_fmt = in_stream.codec_context.pix_fmt # Copy pixel format from input stream to output stream
#stream.options = {'crf': '17'} # Select low crf for high quality (the price is larger file size).
for frame in test_input.decode(in_stream):
img_frame = frame.to_image()
out_frame = av.VideoFrame.from_image(img_frame) # Note: to_image and from_image is not required in this specific example.
out_packet = out_stream.encode(out_frame) # Encode video frame
test_output.mux(out_packet) # "Mux" the encoded frame (add the encoded frame to MP4 file).
print(out_packet)
# Flush the encoder
out_packet = out_stream.encode(None)
test_output.mux(out_packet)
test_input.close()
test_output.close()

How to create a video out of frames without saving it to disk using python?

I have a function that returns a frame as result. I wanted to know how to make a video out of a for-loop with this function without saving every frame and then creating the video.
What I have from now is something similar to:
import cv2
out = cv2.VideoWriter('video.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 14.25,(500,258))
for frame in frames:
img_result = MyImageTreatmentFunction(frame) # returns a numpy array image
out.write(img_result)
out.release()
Then the video will be created as video.mp4 and I can access it on memory. I'm asking myself if there's a way to have this video in a variable that I can easily convert to bytes later. My purpose for that is to send the video via HTTP post.
I've looked on ffmpeg-python and opencv but I didn't find anything that applies to my case.
We may use PyAV for encoding "in memory file".
PyAV is a Pythonic binding for the FFmpeg libraries.
The interface is relatively low level, but it allows us to do things that are not possible using other FFmpeg bindings.
Here are the main stages for creating MP4 in memory using PyAV:
Create BytesIO "in memory file":
output_memory_file = io.BytesIO()
Use PyAV to open "in memory file" as MP4 video output file:
output = av.open(output_memory_file, 'w', format="mp4")
Add H.264 video stream to the MP4 container, and set codec parameters:
stream = output.add_stream('h264', str(fps))
stream.width = width
stream.height = height
stream.pix_fmt = 'yuv444p'
stream.options = {'crf': '17'}
Iterate the OpenCV images, convert image to PyAV VideoFrame, encode, and "Mux":
for i in range(n_frmaes):
img = make_sample_image(i) # Create OpenCV image for testing (resolution 192x108, pixel format BGR).
frame = av.VideoFrame.from_ndarray(img, format='bgr24')
packet = stream.encode(frame)
output.mux(packet)
Flush the encoder and close the "in memory" file:
packet = stream.encode(None)
output.mux(packet)
output.close()
The following code samples encode 100 synthetic images to "in memory" MP4 memory file.
Each synthetic image applies OpenCV image, with sequential blue frame number (used for testing).
At the end, the memory file is written to output.mp4 file for testing.
import numpy as np
import cv2
import av
import io
n_frmaes = 100 # Select number of frames (for testing).
width, height, fps = 192, 108, 23.976 # Select video resolution and framerate.
output_memory_file = io.BytesIO() # Create BytesIO "in memory file".
output = av.open(output_memory_file, 'w', format="mp4") # Open "in memory file" as MP4 video output
stream = output.add_stream('h264', str(fps)) # Add H.264 video stream to the MP4 container, with framerate = fps.
stream.width = width # Set frame width
stream.height = height # Set frame height
stream.pix_fmt = 'yuv444p' # Select yuv444p pixel format (better quality than default yuv420p).
stream.options = {'crf': '17'} # Select low crf for high quality (the price is larger file size).
def make_sample_image(i):
""" Build synthetic "raw BGR" image for testing """
p = width//60
img = np.full((height, width, 3), 60, np.uint8)
cv2.putText(img, str(i+1), (width//2-p*10*len(str(i+1)), height//2+p*10), cv2.FONT_HERSHEY_DUPLEX, p, (255, 30, 30), p*2) # Blue number
return img
# Iterate the created images, encode and write to MP4 memory file.
for i in range(n_frmaes):
img = make_sample_image(i) # Create OpenCV image for testing (resolution 192x108, pixel format BGR).
frame = av.VideoFrame.from_ndarray(img, format='bgr24') # Convert image from NumPy Array to frame.
packet = stream.encode(frame) # Encode video frame
output.mux(packet) # "Mux" the encoded frame (add the encoded frame to MP4 file).
# Flush the encoder
packet = stream.encode(None)
output.mux(packet)
output.close()
# Write BytesIO from RAM to file, for testing
with open("output.mp4", "wb") as f:
f.write(output_memory_file.getbuffer())

Create 7 hour video from +250.000 frames

I have 14 videos of 30 minutes (7 hours of videodata). I read in every video seperately, perform some morphological processing on each frame and then use cv2.imwrite() to save each processed frame. I'd like to make 1 big videofile of 7 hours of all processed frames. So far, I've been trying to use this code:
import numpy as np
import glob
img_array = []
for filename in glob.glob('C:/New folder/Images/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width,height)
img_array.append(img)
out = cv2.VideoWriter('project.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
out.write(img_array[i])
out.release()
But an error is given when creating the img_array (memory overload). Is there any other way to make a 7 hour video from +250.000 frames?
Thank you.
check that all pictures are of the same size
as stated by others, don't read all pictures at once. it's not necessary.
Usually I'd prefer to create the VideoWriter before the loop but you need the size for that, and you only know that after you've read the first image. That's why I initialize that variable to None and create the VideoWriter once I have the first image
Also: DIVX and .avi may work but that's not the best option. the built-in option is to use MJPG (with .avi), which is always available in OpenCV. I would however recommend .mkv and avc1 (H.264) for general video, or you could look for a lossless codec that stores data in RGB instead of YUV (which may distort color information from screenshots... and also drawn lines and other hard edges). You could try the rle (note the space) codec, which is a lossless codec based on run-length encoding.
import cv2 # `import cv2 as cv` is preferred these days
import numpy as np
import glob
out = None # VideoWriter initialized after reading the first image
outsize = None
for filename in glob.glob('C:/New folder/Images/*.jpg'):
img = cv2.imread(filename)
assert img is not None, filename # file could not be read
(height, width, layers) = img.shape
thissize = (width, height)
if out is None: # this happens once at the beginning
outsize = thissize
out = cv2.VideoWriter('project.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, outsize)
assert out.isOpened()
else: # error checking for every following image
assert thissize == outsize, (outsize, thissize, filename)
out.write(img)
# finalize the video file (write headers/footers)
out.release()
You could also do this with an invocation of ffmpeg on the command line (or from your program):
How to create a video from images with FFmpeg?
You don't need to store each frame inside an array.
You can read the frame and write it to the video directly.
You can modify your code as:
import numpy as np
import glob
out = None
for filename in glob.glob('C:/New folder/Images/*.jpg'):
img = cv2.imread(filename)
if not out:
height, width, layers = img.shape
size = (width,height)
out = cv2.VideoWriter('project.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
out.write(img)
out.release()

How to shrink data set output from avi file

I'm trying to create a data set from an avi file I have and I know I've made a mistake somewhere.
The Avi file I have is 1,827 KB (4:17) but after running my code to convert the frames into arrays of number I now have a file that is 1,850,401 KB. This seems a little large to me.
How can I reduce the size of my data set / where did I go wrong?
# Program To Read video
# and Extract Frames
import cv2
import numpy as np
import time
# Function to extract frames
def FrameCapture(path):
# Path to video file
vidObj = cv2.VideoCapture(path)
# Used as counter variable
count = 0
# checks whether frames were extracted
success = 1
newDataSet = []
try:
while success:
# vidObj object calls read
# function extract frames
success, image = vidObj.read()
img_reverted = cv2.bitwise_not(image)
new_img = img_reverted / 255.0
newDataSet.append(new_img)
#new_img >> "frame%d.txt" % count
# Saves the frames with frame-count
#cv2.imwrite("frame%d.jpg" % count, image)
count += 1
except:
timestr = time.strftime("%Y%m%d-%H%M%S")
np.save("DataSet" + timestr , newDataSet)
# Driver Code
if __name__ == '__main__':
# Calling the function
FrameCapture("20191212-150041output.avi")
I'm going to guess that the video mainly consist of similar pixels blocked together that the video have compressed to such a low file size. When you load single images into arrays all that compression goes away and depending on the fps of the video you will have thousands of uncompressed images. When you first load an image it will be saved as a numpy array of dtype uint8 and the image size will be WIDTH * HEIGHT * N_COLOR_CHANNELS bytes. After you divide it with 255.0 to normalize between 0 and 1 the dtype changes to float64 and the image size increases eightfold. You can use this information to calculate expected size of the images.
So your options is to either decrease the height and width of your images (downscale), change to grayscale or if your application allows it to stick with uint8 values. If the images doesn't change too much and you don't need thousands of them you could also only save every 10th or whatever seems reasonable. If you need them all as is but they don't fit in memory consider using a generator to load them on demand. It will be slower but at least it will run.

How to create JPEG compressed DICOM dataset using pydicom?

I am trying to create a JPEG compressed DICOM image using pydicom. A nice source material about colorful DICOM images can be found here, but it's mostly theory and C++. In the code example below I create a pale blue ellipsis inside output-raw.dcm (uncompressed) which looks fine like this:
import io
from PIL import Image, ImageDraw
from pydicom.dataset import Dataset
from pydicom.uid import generate_uid, JPEGExtended
from pydicom._storage_sopclass_uids import SecondaryCaptureImageStorage
WIDTH = 100
HEIGHT = 100
def ensure_even(stream):
# Very important for some viewers
if len(stream) % 2:
return stream + b"\x00"
return stream
def bob_ross_magic():
image = Image.new("RGB", (WIDTH, HEIGHT), color="red")
draw = ImageDraw.Draw(image)
draw.rectangle([10, 10, 90, 90], fill="black")
draw.ellipse([30, 20, 70, 80], fill="cyan")
draw.text((11, 11), "Hello", fill=(255, 255, 0))
return image
ds = Dataset()
ds.is_little_endian = True
ds.is_implicit_VR = True
ds.SOPClassUID = SecondaryCaptureImageStorage
ds.SOPInstanceUID = generate_uid()
ds.fix_meta_info()
ds.Modality = "OT"
ds.SamplesPerPixel = 3
ds.BitsAllocated = 8
ds.BitsStored = 8
ds.HighBit = 7
ds.PixelRepresentation = 0
ds.PhotometricInterpretation = "RGB"
ds.Rows = HEIGHT
ds.Columns = WIDTH
image = bob_ross_magic()
ds.PixelData = ensure_even(image.tobytes())
image.save("output.png")
ds.save_as("output-raw.dcm", write_like_original=False) # File is OK
#
# Create compressed image
#
output = io.BytesIO()
image.save(output, format="JPEG")
ds.PixelData = ensure_even(output.getvalue())
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False) # File is corrupt
At the very end I am trying to create compressed DICOM: I tried setting various transfer syntaxes, compressions with PIL, but no luck. I believe the generated DICOM file is corrupt. If I were to convert the raw DICOM file to JPEG compressed with gdcm-tools:
$ gdcmconv -J output-raw.dcm output-jpeg.dcm
By doing a dcmdump on this converted file we could see an interesting structure, which I don't know how to reproduce using pydicom:
$ dcmdump output-jpeg.dcm
# Dicom-File-Format
# Dicom-Meta-Information-Header
# Used TransferSyntax: Little Endian Explicit
(0002,0000) UL 240 # 4, 1 FileMetaInformationGroupLength
(0002,0001) OB 00\01 # 2, 1 FileMetaInformationVersion
(0002,0002) UI =SecondaryCaptureImageStorage # 26, 1 MediaStorageSOPClassUID
(0002,0003) UI [1.2.826.0.1.3680043.8.498.57577581978474188964358168197934098358] # 64, 1 MediaStorageSOPInstanceUID
(0002,0010) UI =JPEGLossless:Non-hierarchical-1stOrderPrediction # 22, 1 TransferSyntaxUID
(0002,0012) UI [1.2.826.0.1.3680043.2.1143.107.104.103.115.2.8.4] # 48, 1 ImplementationClassUID
(0002,0013) SH [GDCM 2.8.4] # 10, 1 ImplementationVersionName
(0002,0016) AE [gdcmconv] # 8, 1 SourceApplicationEntityTitle
# Dicom-Data-Set
# Used TransferSyntax: JPEG Lossless, Non-hierarchical, 1st Order Prediction
...
... ### How to do the magic below?
...
(7fe0,0010) OB (PixelSequence #=2) # u/l, 1 PixelData
(fffe,e000) pi (no value available) # 0, 1 Item
(fffe,e000) pi ff\d8\ff\ee\00\0e\41\64\6f\62\65\00\64\00\00\00\00\00\ff\c3\00\11... # 4492, 1 Item
(fffe,e0dd) na (SequenceDelimitationItem) # 0, 0 SequenceDelimitationItem
I tried to use pydicom's encaps module, but I think it's mostly for reading data, not writing. Anyone else have any ideas how to deal with this issue, how to create/encode these PixelSequences? Would love to create JPEG compressed DICOMs in plain Python without running external tools.
DICOM requires compressed Pixel Data be encapsulated (see the tables especially). Once you have your compressed image data you can use the encaps.encapsulate() method to create bytes suitable for use with Pixel Data:
from pydicom.encaps import encapsulate
# encapsulate() requires a list of bytes, one item per frame
ds.PixelData = encapsulate([ensure_even(output.getvalue())])
# Need to set this flag to indicate the Pixel Data is compressed
ds['PixelData'].is_undefined_length = True # Only needed for < v1.4
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False)
Trying the solution from #scaramallion, with more detail looks to work:
import numpy as np
from PIL import Image
import io
# set some parameters
num_frames = 4
img_size = 10
# Create a fake RGB dataset
random_image_array = (np.random.random((num_frames, img_size, img_size, 3))*255).astype('uint8')
# Convert to PIL
imlist = []
for i in range(num_frames): # convert the multiframe image into RGB of single frames (Required for compression)
imlist.append(Image.fromarray(tmp))
# Save the multipage tiff with jpeg compression
f = io.BytesIO()
imlist[0].save(f, format='tiff', append_images=imlist[1:], save_all=True, compression='jpeg')
# The BytesIO object cursor is at the end of the object, so I need to tell it to go back to the front
f.seek(0)
img = Image.open(f)
# Get each one of the frames converted to even numbered bytes
img_byte_list = []
for i in range(num_frames):
try:
img.seek(i)
with io.BytesIO() as output:
img.save(output, format='jpeg')
img_byte_list.append(output.getvalue())
except EOFError:
# Not enough frames in img
break
ds.PixelData = encapsulate([x for x in img_byte_list])
ds['PixelData'].is_undefined_length = True
ds.is_implicit_VR = False
ds.LossyImageCompression = '01'
ds.LossyImageCompressionRatio = 10 # default jpeg
ds.LossyImageCompressionMethod = 'ISO_10918_1'
ds.file_meta.TransferSyntaxUID = '1.2.840.10008.1.2.4.51'
ds.save_as("output-jpeg.dcm", write_like_original=False)

Categories