I am building a web application for the determination of room impulse responses.
I'm currently using streamlit for the GUI part, this is an extract of my code relevant to sounddevice:
from pathlib import Path
import streamlit as st
import numpy as np
import sounddevice as sd
from numba import jit
from scipy import signal
from scipy.io import wavfile
def app_room_measurements():
audio_files_path = r"data/audio_files"
sweep_string = ""
inv_filter_string = ""
ir_string = ""
#jit(nopython=True)
def fade(data, gain_start, gain_end):
"""
Create a fade on an input object
Parameters
----------
:param data: The input array
:param gain_start: The fade starting point
:param gain_end: The fade ending point
Returns
-------
data : object
An input array with the fade applied
"""
gain = gain_start
delta = (gain_end - gain_start) / (len(data) - 1)
for i in range(len(data)):
data[i] = data[i] * gain
gain = gain + delta
return data
#jit(nopython=True)
def generate_exponential_sweep(
sweep_duration, sr, starting_frequency, ending_frequency
):
"""
Generate an exponential sweep using Farina's log sweep theory
Parameters
----------
:param sweep_duration: The duration of the excitement signal (in seconds)
:param sr: The sampling frequency
:param starting_frequency: The starting frequency of the excitement signal
:param ending_frequency: The ending frequency of the excitement signal
Returns
-------
exponential_sweep : array
An array with the fade() function applied
"""
time_in_samples = sweep_duration * sr
exponential_sweep = np.zeros(time_in_samples, dtype=np.double)
for n in range(time_in_samples):
t = n / sr
exponential_sweep[n] = np.sin(
(2.0 * np.pi * starting_frequency * sweep_duration)
/ np.log(ending_frequency / starting_frequency)
* (
np.exp(
(t / sweep_duration)
* np.log(ending_frequency / starting_frequency)
)
- 1.0
)
)
number_of_samples = 50
exponential_sweep[-number_of_samples:] = fade(
exponential_sweep[-number_of_samples:], 1, 0
)
return exponential_sweep
#jit(nopython=True)
def generate_inverse_filter(
sweep_duration, sr, exponential_sweep, starting_frequency, ending_frequency
):
"""
Generate an inverse filter using Farina's log sweep theory
Parameters
----------
:param sweep_duration: The duration of the excitement signal (in seconds)
:param sr: The sampling frequency
:param exponential_sweep: The resulting array of the generate_exponential_sweep() function
:param starting_frequency: The starting frequency of the excitement signal
:param ending_frequency: The ending frequency of the excitement signal
Returns
-------
inverse_filter : array
The array resulting from applying an amplitude envelope to the exponential_sweep array
"""
time_in_samples = sweep_duration * sr
amplitude_envelope = np.zeros(time_in_samples, dtype=np.double)
inverse_filter = np.zeros(time_in_samples, dtype=np.double)
for n in range(time_in_samples):
amplitude_envelope[n] = pow(
10,
(
(-6 * np.log2(ending_frequency / starting_frequency))
* (n / time_in_samples)
)
* 0.05,
)
inverse_filter[n] = exponential_sweep[-n] * amplitude_envelope[n]
return inverse_filter
sample_rate_option = st.selectbox("Select the desired sample rate", (44100, 48000))
sweep_duration_option = st.selectbox("Select the duration of the sweep", (3, 7, 14))
max_reverb_option = st.selectbox(
"Select the expected maximum reverb decay time", (1, 2, 3, 5, 10)
)
st.caption(
"""
Note that longer sweeps provide more accuracy,
but even short sweeps can be used to measure long decays
"""
)
def write_wav_file(file_name, rate, data):
save_file_path = os.path.join(audio_files_path, file_name)
wavfile.write(save_file_path, rate, data)
st.success(f"File successfully written to audio_files_path as:>> {file_name}")
def playrec_sweep(wavefile_name):
read_file_path = os.path.join(audio_files_path, wavefile_name)
sample_rate, data = wavfile.read(read_file_path)
stop_button = st.button("Stop")
if "stop_button_state" not in st.session_state:
st.session_state.stop_button_state = False
user_sweep = sd.playrec(data, sample_rate, channels=1, blocking=True)
if stop_button or st.session_state.stop_button_state:
st.session_state.stop_button_state = True
sd.stop()
else:
write_wav_file(
file_name=user_sweep_string, rate=sample_rate_option, data=user_sweep
)
print("Sweep done playing")
user_input = str(st.text_input("Name your file: "))
if user_input:
sweep_string = user_input + "_exponential_sweep.wav"
inv_filter_string = user_input + "_inverse_filter.wav"
user_sweep_string = user_input + "_user_exponential_sweep.wav"
st.write(sweep_string)
play_button = st.button("Play")
if "play_button_state" not in st.session_state:
st.session_state.play_button_state = False
if play_button or st.session_state.play_button_state:
st.session_state.play_button_state = True
sweep = generate_exponential_sweep(
sweep_duration_option, sample_rate_option, 20, 24000
)
inv_filter = generate_inverse_filter(
sweep_duration_option, sample_rate_option, sweep, 20, 24000
)
write_wav_file(file_name=sweep_string, rate=sample_rate_option, data=sweep)
playrec_sweep(sweep_string)
In short: I let the user choose the desired sample rate, the desired duration of the excitement signal (the sweep) and the maximum expected reverb decay time.
After that the user can name the file and start the simultaneous playback and recording of the created file with sd.playrec().
The problem is: I would like to extend the duration of the recording by adding the user-inputted parameter max_reverb_option to the duration value, as the recording should include the tail of the reverb, but apparently sd.playrec() does not accept a duration parameter. How can I do it? Are there other options that I'm missing?
I try to hash many file, but it not use full of cpu power. it only consume 25%. i test to move the heavy process into thread. but still no different. im from nodejs use sharp library. with same task. it consume all cpu usage. How python to make it full power?
import cv2
import math
import datetime
import hashlib
import threading
def thread_function(image, yPos, xPos, wSizeBlock, hSizeBlock):
block = image[yPos:yPos+wSizeBlock, xPos:xPos+hSizeBlock]
hash = hashlib.sha256()
hash.update(block.tobytes())
print(hash.hexdigest())
image = cv2.imread('frame323.jpg', cv2.IMREAD_COLOR)
dimension = {
'width': image.shape[1],
'height': image.shape[0]
}
wSizeBlock = int(16)
hSizeBlock = int(16)
wBlockLength = math.floor(dimension['width'] / wSizeBlock)
hBlockLength = math.floor(dimension['height'] / hSizeBlock)
count = 0
start_time = datetime.datetime.now()
print(start_time)
for k in range(0, 500):
for i in range(0, wBlockLength):
for j in range(0, hBlockLength):
xPos = int(i*wSizeBlock)
yPos = int(j*hSizeBlock)
x = threading.Thread(target=thread_function, args=(image, xPos, yPos, wSizeBlock, hSizeBlock))
x.start()
count += 1
count = 0
end_time = datetime.datetime.now()
print(end_time)
For CPU intensive operations that can be split up into smaller tasks, you would want to use the multiprocessing module. It is similar to the threading module in that it allows multiple functions to be ran at once. Syntax looks something like this:
import multiprocessing as mp
def add(a, b):
return a + b
p = mp.Process(target=add, args=(1, 2))
p.start()
I am trying to create an image array from scratch.
I got the code running but it takes arrounds 30 secs to run it.
I feel it could be faster by using numpy native functions.
How can I do this?
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
image = np.float32(final_image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
How can I do to make this image array creation faster?
I need to create the image every timesteps because I want to simulate real live data stream that come every new timesteps.
This is why I would like to optimize only this part of the code :
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
How can I do this?
First simplest optimizations are next:
Use comparing values to np.arange(...) instead of inner loop.
Use gray image instead of 3-channels RGB. 3 times less data to process.
Use np.uint8 type instead of np.float32, which is faster to process and doesn't need conversion to float32 for CV2 visualizing.
All these above optimizations give huge speedup (10x times), and my running time is 2.6 sec instead of 27 sec before.
Also another very useful optimization that I didn't do is that you don't need to recompute previous image pixels in a case when max/min of whole data within current window didn't change. You need to recompute previous image data only in the case if max/min changed. And I expect that your real-life data is gradually changing like Forex or Bitcoin prices, hence max/min change within a window is very non-often.
Optimizations 1)-3) mentioned above are implemented in the next code:
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
aranges = np.arange(image_heigh, dtype = np.int32)[:, None]
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
#final_image = np.zeros((image_heigh, image_width, image_channel), dtype = np.float32)
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
final_image = (aranges < vol_norm[None, :].astype(np.int32)).astype(np.uint8) * 255
# ===================== part to optimize end
if show_img:
cv2.imshow('ok', final_image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
For above code I just did one more optimization of inner loop which speed-up code above even 2x times more to have timings of 1.3 sec. But also I put back 3 channels plus float32, this reduced speed resulting in final 2.8 sec, here is the code
Another next optimization is possible if re-computing old images data is not needed.
Main thing to be optimized was that you were re-computing almost same whole image on each step with 1 pixel shift-step along width. Instead of this you need to compute whole image once, then shift right not 1 pixel but whole image width.
Then after this optimization running time is 0.08 sec.
And do 1 pixel stepping only for showing animation, not for computing image data, image data should be computed just once if you need speed.
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = volumes.size #256
image_channel = 3
screen_width = 256
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(0, len(volumes), image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
for start in range(0, final_image.shape[1] - screen_width):
image = np.float32(final_image[:, start : start + screen_width])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
I also created animation image out of your data:
If you want to create same animation just append next piece of code to the end of script above:
# Needs: python -m pip install pillow
import PIL.Image
imgs = [PIL.Image.fromarray(final_image[:, start : start + screen_width].astype(np.uint8) * 255) for start in range(0, final_image.shape[1] - screen_width, 6)]
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
I've implemented also simulation of real-time live stream data rendering/visualizing.
live_stream() generator spits out random amount of data at random points of time, this is to simulate data generation process.
stream_fetcher() listens to live stream and records all data received to python queue q0, this fetcher is run in one thread.
renderer() gets data recorded by fetcher and renders it into image through your mathematical formulas and normalization process, it renders as much data as available, resulting in images with varying widths, rendered images are saved to another queue q1.
visualizer() visualizes rendered data by fetching as much rendered images as available.
All functions run in separate threads not to block whole process. Also if any of threads works to slow then it skips some of data to catch-up with current real-time data, thus every queue doesn't overflow.
Also you may see that visualized process is jumpy, it is not because functions are somewhat slow, but because live stream spits out different amount of data in each time step, this is how usually real-time data may behave.
In the next code I did also extra optimization mentioned before, that is not-recomputing image if min/max didn't change.
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 20)).astype(np.float64).cumsum() + last
yield a
last = a[-1]
time.sleep(random.random() * 0.1)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
vols = np.concatenate(data)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put(whole_image)
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
imgs = []
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
image = np.concatenate(data, axis = 1)[:, -image_width:]
cv2.imshow('ok', image)
cv2.waitKey(1)
if imgs is not None:
try:
# Needs: python -m pip install pillow
import PIL.Image
has_pil = True
except:
has_pil = False
imgs = None
if has_pil:
imgs.append(PIL.Image.fromarray(np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)))
if len(imgs) >= 1000:
print('saving...', flush = True)
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
imgs = None
print('saved!', flush = True)
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Above live process simulation is rendered into result.png which I show down below:
I've also decided to improve visualization, by using more advanced matplotlib instead of cv2 to be able to show axes and doing real-time plot drawing. Visualization image is down below:
Next is a matplotlib-based code corresponding to last image above:
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
save_nsec = 20
dpi, fps = 100, 15
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
pos = 0
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 30)).astype(np.float64).cumsum() + last
yield a, pos, pos + a.size - 1
pos += a.size
last = a[-1]
time.sleep(random.random() * 2.2 / fps)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
data_vols = [e[0] for e in data]
data_minx, data_maxx = data[0][1], data[-1][2]
vols = np.concatenate(data_vols)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put((whole_image, data_maxx - whole_image.shape[1] + 1, data_maxx, vols_min, vols_max))
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
import matplotlib.pyplot as plt, matplotlib.animation
def images():
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
minx = min([e[1] for e in data])
maxx = min([e[2] for e in data])
miny = min([e[3] for e in data])
maxy = min([e[4] for e in data])
image = np.concatenate([e[0] for e in data], axis = 1)[:, -image_width:]
image = np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)
image = np.repeat(image[:, :, None], 3, axis = -1)
yield image, minx, maxx, miny, maxy
it = images()
im = None
fig = plt.figure(figsize = (image_width / dpi, image_height / dpi), dpi = dpi)
def animate_func(i):
nonlocal it, im, fig
image, minx, maxx, miny, maxy = next(it)
print(f'.', end = '', flush = True)
if im is None:
im = plt.imshow(image, interpolation = 'none', aspect = 'auto')
else:
im.set_array(image)
im.set_extent((minx, maxx, miny, maxy))
return [im]
anim = matplotlib.animation.FuncAnimation(fig, animate_func, frames = round(save_nsec * fps), interval = 1000 / fps)
print('saving...', end = '', flush = True)
#anim.save('result.mp4', fps = fps, dpi = dpi, extra_args = ['-vcodec', 'libx264'])
anim.save('result.gif', fps = fps, dpi = dpi, writer = 'imagemagick')
print('saved!', end = '', flush = True)
plt.show()
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Then I've decided to play a bit and colored last image with RGB palette, the higher the peak is more red-ish it is, if it is more in the middle then it is more green-ish, if it is low enough then it is more blue-ish. Resulting image below was achieved by this coloring code:
And another one colored animation below, line-style instead of bar-style, with the help of this code:
I write some code in python to open USB camera and grab frame from it. I use my code for http stream. For JPEG encode I use libturbojpeg library. For this I use 64 bit OS.
product: Raspberry Pi 3 Model B Rev 1.2
serial: 00000000f9307746
width: 64 bits
capabilities: smp cp15_barrier setend swp
I do some test with different resolutions.
Resolution FPS Time for encode
640 x 480 ~35 ~0.01
1280 x 720 ~17 ~0.028
And this is my code
import time
import os
import re
import uvc
from turbojpeg import TurboJPEG, TJPF_GRAY, TJSAMP_GRAY
jpeg = TurboJPEG("/opt/libjpeg-turbo/lib64/libturbojpeg.so")
camera = None
import numpy as np
from threading import Thread
class ProcessJPG(Thread):
def __init__(self, data):
self.jpeg_data = None
self.data = data
super(ProcessJPG, self).__init__()
def run(self):
self.jpeg_data = jpeg.encode((self.data))
def get_frame(self):
self.frame = camera.get_frame()
global camera
dev_list = uvc.device_list()
print("devices: ", dev_list)
camera = uvc.Capture(dev_list[1]['uid'])
camera.frame_size = camera.frame_sizes[2] // set 1280 x 720
camera.frame_rate = camera.frame_rates[0] // set 30 fps
class GetFrame(Thread):
def __init__(self):
self.frame = None
super(GetFrame, self).__init__()
def run(self):
self.frame = camera.get_frame()
_fps = -1
count_to_fps = 0
_real_fps = 0
from time import time
_real_fps = ""
cfps_time = time()
while True:
if camera:
t = GetFrame()
t.start()
t.join()
img = t.frame
timestamp = img.timestamp
img = img.img
ret = 1
t_start = time()
t = ProcessJPG(img)
t.start()
t.join()
jpg = t.jpeg_data
t_end = time()
print(t_end - t_start)
count_to_fps += 1
if count_to_fps >= _fps:
t_to_fps = time() - cfps_time
_real_fps = 1.0 / t_to_fps
cfps_time = time()
count_to_fps = 0
print("FPS, ", _real_fps)
Encoding line is: jpeg.encode((self.data))
My question is, it is possible to increase FPS for 1280 x 720 (eg 30fps) resolution or should I use more powerful device? When I look on htop during the computation CPU is not used in 100%.
EDIT:
Camera formats:
[video4linux2,v4l2 # 0xa705c0] Raw : yuyv422 : YUYV 4:2:2 : 640x480 1280x720 960x544 800x448 640x360 424x240 352x288 320x240 800x600 176x144 160x120 1280x800
[video4linux2,v4l2 # 0xa705c0] Compressed: mjpeg : Motion-JPEG : 640x480 1280x720 960x544 800x448 640x360 800x600 416x240 352x288 176x144 320x240 160x120
It is possible and you don't need more powerful hardware.
From the pyuvc
README.md,
* Capture instance will always grab mjpeg conpressed frames from cameras.
When your code accesses the .img property, that invokes jpeg2yuv (see
here and
here). Then
you are re-encoding with jpeg_encode(). Try using frame.jpeg_buffer after
the capture and don't touch .img at all.
I took a look at pyuvc on an RPi2 with a Logitech
C310 and made a
simplified example,
import uvc
import time
def main():
dev_list = uvc.device_list()
cap = uvc.Capture(dev_list[0]["uid"])
cap.frame_mode = (1280, 720, 30)
tlast = time.time()
for x in range(100):
frame = cap.get_frame_robust()
jpeg = frame.jpeg_buffer
print("%s (%d bytes)" % (type(jpeg), len(jpeg)))
#img = frame.img
tnow = time.time()
print("%.3f" % (tnow - tlast))
tlast = tnow
cap = None
main()
I get ~.033s per frame, which works out to ~30fps at ~8%CPU. If I uncomment the #img = frame.img line it goes up to ~.054s/frame or ~18fps at 99%CPU (the decode time limits the capture rate).