Where can I set a default audio input in Pyaudio?

Where can I set a default audio input in Pyaudio? - python

I work on a sequencial started audio recorder on Raspberry pi, my problem is the following:
I tried several ways ,I described under the code, to get Pyaudio open a stream after the second time it recorded and saved the stream in .wav format.
in the second run it wont start over opening the stream and gives me this error,
I simply don't get why the default input device only gets accepted once is it my still used by the last stream even If I terminated it?
I am using an external USB as input since raspberry still has no option to take Mic. input.
I also took .open() and tried every possible input device I found.
Traceback (most recent call last):
File "/home/fabian/Documents/Zusammenschluss_REC()_mit_Interrupt_BACKUP1.py", line 122, in <module>
stream = audio.open(format= pyaudio.paInt16, channels = 1, rate=44100, input= True, frames_per_buffer=1024)
File "/home/fabian/.local/lib/python3.9/site-packages/pyaudio.py", line 754, in open
stream = Stream(self, *args, **kwargs)
File "/home/fabian/.local/lib/python3.9/site-packages/pyaudio.py", line 445, in __init__
self._stream = pa.open(**arguments)
OSError: [Errno -9996] Invalid input device (no default output device)
#imports to be made:
import RPi.GPIO as GPIO
import time
import os
import sys
import signal
import datetime
import pyaudio
import wave
#variables to be defined:
BUTTON_GPIO = 16
LED_REC_GPIO = 20
LED_ON_GPIO = 21
GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
global TIME_LAST_PRESSED
TIME_LAST_PRESSED = time.time()
global TIME_LAST_SWITCHED
TIME_LAST_SWITCHED = time.time()
global timeout
timeout = 1
TIME_MIN = 0.2
TIME_MAX = 0.6
Loop = True
REC = False
audio = pyaudio.PyAudio()
parent_dir = "/home/fabian/Dokumente/Speicherordner_Audiofiles"
global person
person = "fabian"
#Definition of functions:
def signal_handler(sig, frame):
GPIO.cleanup()
sys.exit(0)
def button_pressed_callback(channel):
print("Button has been pressed")
global TIME_LAST_PRESSED
global TIME_LAST_SWITCHED
global REC
print("CALLBACK activated, now checking sequence")
x = time.time() - TIME_LAST_PRESSED
print(x)
if x > TIME_MIN and x < TIME_MAX:
print("Sequence succeded! Sequence time:" + str(x))
y = time.time() - TIME_LAST_SWITCHED
if timeout < y:
TIME_LAST_SWITCHED = time.time()
REC = not REC
print("REC has now the value off:" + str(REC))
TIME_LAST_PRESSED = time.time()
#setup of GPIO:
GPIO.setmode(GPIO.BCM)
GPIO.setup(BUTTON_GPIO,GPIO.IN, pull_up_down=GPIO.PUD_UP)
GPIO.setup(LED_REC_GPIO, GPIO.OUT)
GPIO.setup(LED_ON_GPIO, GPIO.OUT)
GPIO.add_event_detect(BUTTON_GPIO, GPIO.FALLING, callback = button_pressed_callback, bouncetime = 100)
signal.signal(signal.SIGINT, signal_handler)
#setup of savingpath:
saving_path = parent_dir + "/" + person
if os.path.isfile(saving_path):
os.makedirs(saving_path)
#"Main" loop:
GPIO.output(LED_ON_GPIO, GPIO.HIGH)
while Loop:
if REC:
stream = audio.open(format= pyaudio.paInt16, channels = 1, rate=44100, input= True, frames_per_buffer=1024)
frames = []
REC_TIME = time.time()
GPIO.output(LED_REC_GPIO, GPIO.HIGH)
while REC:
data = stream.read(1024)
frames.append(data)
GPIO.remove_event_detect(BUTTON_GPIO)
stream.stop_stream()
stream.close()
audio.terminate()
now = datetime.datetime.now()
REC_TIME = time.time()-REC_TIME
date =str(now.day) + "." + str(now.month) + "." + str(now.year) + " " + str(now.hour) + ":" + str(now.minute) + " "
os.makedirs(saving_path +"/"+ date + str(REC_TIME))
sound_file = wave.open(saving_path + date + str(REC_TIME), "w")
sound_file.setnchannels(1)
sound_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
sound_file.setframerate(44100)
sound_file.writeframes(b"".join(frames))
sound_file.close()
GPIO.add_event_detect(BUTTON_GPIO, GPIO.FALLING, callback = button_pressed_callback, bouncetime = 100)
else:
GPIO.output(LED_REC_GPIO, GPIO.LOW)
if Loop == False:
print ("cleanup")
GPIO.cleanup
I created .asoundrc set the default card and device after I looked it up with aplay -l and arecord -l
I changed the default in alsa.conf
First run everything works just fine, second time in the recording loop it won't take the input device.

Related

Trying to record audio with python-sounddevice but result is just static

I'm trying to record some audio by using the sounddevice library in python. This is to create a sound-activated recording for a scanner.
I cannot use the record function as it requires a time specified which is not suitable for my program as each transmission may be of different length, so I added a callback to the stream and when the audio is above a certain level will trigger the recording by creating a new file and writing the buffer data to the wave file.
I have implemented the trigger successfully, however when I attempt to write to the file and open it, it is not what the transmission was, instead it is just corrupt static.
Here's my code:
import time as timer
import wave
import numpy as np
import sounddevice as sd
RecordActivate = False
StartTime = 0
def print_sound(indata, outdata, frames, time, status):
global RecordActivate
global StartTime
volume_norm = np.linalg.norm(indata) * 10
VolumeLevel = int(volume_norm)
global f
if RecordActivate == False:
print("itls false" + str(VolumeLevel))
if VolumeLevel > 16:
RecordActivate = True
print("Begin")
StartTime = timer.time()
ThingTime = timer.strftime(
"%Y-%m-%d %H:%M:%S", timer.localtime(timer.time())
)
print("Transmission detected at " + ThingTime)
f = wave.open("scan.wav", "w")
f.setnchannels(1)
f.setsampwidth(1)
f.setframerate(44100)
if RecordActivate == True:
if VolumeLevel < 16:
print("Transmission ceased.")
RecordActivate = False
else:
f.writeframes(indata.tobytes())
print("recording..")
with sd.Stream(callback=print_sound):
while True:
thing = 0

Using Python on Raspberry Pi, I'm unable to record audio with PyAudio via Thonny/terminal

Using the example from: https://realpython.com/playing-and-recording-sound-python/#python-sounddevice_1 , I'm getting the following error when using Thonny: "Backend terminated or disconnected. Use 'Stop/Restart' to restart.". When I run the program in terminal, I get this as the error: "OSError: [Errno -9981] Input overflowed". The example code (which isn't threaded) can work in both terminal and Thonny if I modify it to not throw an exception on overflow "data = stream.read(chunk, exception_on_overflow = False)" but does not work when in a new thread. I've also tried changing the chunk size to be larger and smaller to no avail. When I have the overflow exception for the threaded version, I get a different error in terminal: "Segmentation fault". I'm running Raspbian 10 Buster and Python 3.7.3, if someone could test/see if it works, thanks.
import time
import board
import busio
import digitalio
from adafruit_mcp230xx.mcp23017 import MCP23017
from adafruit_debouncer import Debouncer
i2c = busio.I2C(board.SCL, board.SDA)
mcp = MCP23017(i2c)
import threading
import pyaudio
import wave
import sys
import subprocess
record = False
def background_audio_recording():
#chunk = 1024 # Record in chunks of 1024 samples
chunk = 1024
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 44100 # Record at 44100 samples per second
#seconds = 3
filename = "output.wav"
p = pyaudio.PyAudio() # Create an interface to PortAudio
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
#input_device_index = 2,
input=True)
frames = [] # Initialize array to store frames
while record:
#data = stream.read(chunk, exception_on_overflow = False)
data = stream.read(chunk)
frames.append(data)
'''
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
'''
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()
print('Finished recording')
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
print("File Saved")
return
button1PinSetup = mcp.get_pin(0) # GPA0
button1PinSetup.direction = digitalio.Direction.INPUT
button1PinSetup.pull = digitalio.Pull.UP
button1Pin = Debouncer(button1PinSetup)
button2PinSetup = mcp.get_pin(1) # GPA1
button2PinSetup.direction = digitalio.Direction.INPUT
button2PinSetup.pull = digitalio.Pull.UP
button2Pin = Debouncer(button2PinSetup)
while True:
button1Pin.update()
button2Pin.update()
if button1Pin.fell:
print("Record")
if record == True:
print("Already Recording")
else:
record = True
threading.Thread(target=background_audio_recording).start()
if button2Pin.fell:
print("Recording Stopped")
record = False

It seems as though, I needed to create the PyAudio object as a global, as well as within the thread (for each additional recording attempt). I got it working with the following code
import pyaudio
import wave
import time
import threading
import board
import busio
import digitalio
from adafruit_mcp230xx.mcp23017 import MCP23017
from adafruit_debouncer import Debouncer
i2c = busio.I2C(board.SCL, board.SDA)
mcp = MCP23017(i2c)
form_1 = pyaudio.paInt16 # 16-bit resolution
chans = 2 # 1 channel
samp_rate = 44100 # 44.1kHz sampling rate
chunk = 4096 # 2^12 samples for buffer
#record_secs = 3 # seconds to record
dev_index = 2 # device index found by p.get_device_info_by_index(ii)
wav_output_filename = 'test1.wav' # name of .wav file
audio = pyaudio.PyAudio() # create pyaudio instantiation
record = False
def background_audio_recording():
audio = pyaudio.PyAudio() # create pyaudio instantiation
# create pyaudio stream
stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
input_device_index = dev_index,input = True, \
frames_per_buffer=chunk)
print("recording")
frames = []
# loop through stream and append audio chunks to frame array
#for ii in range(0,int((samp_rate/chunk)*record_secs)):
while record == True:
data = stream.read(chunk)
frames.append(data)
print("finished recording")
# stop the stream, close it, and terminate the pyaudio instantiation
stream.stop_stream()
stream.close()
audio.terminate()
# save the audio frames as .wav file
wavefile = wave.open(wav_output_filename,'wb')
wavefile.setnchannels(chans)
wavefile.setsampwidth(audio.get_sample_size(form_1))
wavefile.setframerate(samp_rate)
wavefile.writeframes(b''.join(frames))
wavefile.close()
return
button1PinSetup = mcp.get_pin(0) # GPA0
button1PinSetup.direction = digitalio.Direction.INPUT
button1PinSetup.pull = digitalio.Pull.UP
button1Pin = Debouncer(button1PinSetup)
button2PinSetup = mcp.get_pin(1) # GPA1
button2PinSetup.direction = digitalio.Direction.INPUT
button2PinSetup.pull = digitalio.Pull.UP
button2Pin = Debouncer(button2PinSetup)
while True:
button1Pin.update()
button2Pin.update()
if button1Pin.fell:
print("Record")
if record == True:
print("Already Recording")
else:
record = True
threading.Thread(target=background_audio_recording).start()
if button2Pin.fell:
print("Recording Stopped")
record = False

Real time continuous sounds with Pydub

I'm trying to make a program that generates sounds from a wifi probe log, so that the number of devices (within a certain distance) generate a tone, and that the rssi is to be the frequence.
I'm trying to make it as real time as possible, but can't figure out how to make the tones continuous and change the frequence based on value change.
'''
This program takes a log file from a
wifi probe and translates it into sound
'''
import time
import math #import needed modules
import pyaudio #sudo apt-get install python-pyaudio
import threading
from threading import Thread
from pydub import AudioSegment
from pydub.generators import Sine
from pydub.playback import play
import signal
def logData():
'''
Takes log file data and puts it into database
updates every 1 sec
'''
global dic
global tone
tone = []
dic = {}
while True:
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as f:
for line in f:
(key, val) = line.split()
if val <= str(-50):
dic[(key)] = val
print (dic)
time.sleep(1)
def sound():
'''
Generate sounds
'''
# Play final tone
while (True):
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as i:
try:
tone1 = Sine(abs(int(list(dic.values())[0]))).to_audio_segment(3000)
tone2 = Sine(abs(int(list(dic.values())[1]))).to_audio_segment(3000)
tone3 = Sine(abs(int(list(dic.values())[2]))).to_audio_segment(3000)
except:
print('Index error')
try:
multitone1 = tone1
multitone2 = tone1.overlay(tone2)
multitone3 = tone3.overlay(multitone2)
except:
print('Multitone error')
try:
if len(dic) <= 1:
play(multitone1.fade_in(250).fade_out(250))
elif len(dic) == 2:
play(multitone2.fade_in(250).fade_out(250))
elif len(dic) >= 3:
play(multitone3.fade_in(250).fade_out(250))
except:
print('Playback error')
if __name__ == '__main__':
try:
Thread(target = logData).start()
time.sleep(1)
Thread(target = sound).start()
except KeyboardInterrupt:
print('Interrupted')

I was able to craft a boiler-plate solution that you can tailor per your needs.
Here's central idea
1) Read last line of log file using os.popen in a continuous loop that repeats every second
2) the RSSI value is very small and the difference between these values is also small. We multiply that by constant 100 here to create noticeable difference. You can try different values.
3) Using pydub we create sine tones and play them
Code
from pydub.generators import Sine
from pydub import AudioSegment
from pydub.playback import play
import os
import time
sr = 44100 # sample rate
bd = 16 # bit depth
l = 50.0 # duration in millisec
last_line = "" #to avoid same line played again
log_file = "probemon.log"
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
if last_line == line:
pass
else:
key, val = line.split()
f = abs(int(val)) * 100
#create sine wave of given freq
sine_wave = Sine(f, sample_rate=sr, bit_depth=bd)
#Convert waveform to audio_segment for playback and export
sine_segment = sine_wave.to_audio_segment(duration=l)
print "mac:{} , rssi:{}".format(key,val)
#Play audio segment
play(sine_segment)
last_line = line
time.sleep(1) #sleep 1 sec, synch this with log file fill
I tested by filling up probemon.log file line by line from a different terminal with a delay of 1 second. The loop will wait if there is no new data.
EDIT1
Audio "tone" have "frequency" , when you change Frequency, tone changes.
Based on our discussion, since we need tone to vary in real-time, we can't use pydub which is mostly good for offline manipulation.
pyaudio has a non-blocking method using callback, which allows manipulating stream data while its being played in realtime.
This solution plays audio based on last line of log continuously until log data changes.
This solution also eliminated popping / cracking sound that occurs when merging two tones.
Inspiration from here.
import pyaudio
import numpy as np
from time import time,sleep
import os
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
log_file = "probemon.log"
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[0::2] = left #left data
data[1::2] = left #right data
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
tmphold = ""
try:
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
try:
key, val = line.split()
except:
key, val = "default", 0.0
f = abs(int(val))
newfreq = f * 10 #update freq per log
if newfreq != tmphold:
tmphold = newfreq
print "mac:{} , rssi:{} , freq:{}
finally:
stream.stop_stream()
stream.close()
p.terminate()
Result
mac:default , rssi:0.0 , freq:0 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-43 , freq:430 Hz
mac:04:4f:4c:77:72:8f , rssi:-51 , freq:510 Hz
mac:20:39:56:af:51:49 , rssi:-39 , freq:390 Hz
mac:20:39:56:af:51:49 , rssi:-45 , freq:450 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-47 , freq:470 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-49 , freq:490 Hz
mac:12:84:16:9c:75:ee , rssi:-43 , freq:430 Hz
mac:da:a1:19:71:4d:0c , rssi:-55 , freq:550 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-49 , freq:490 Hz

Live recognition with Python and Pocketsphinx

I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
"""
Decodes a speech file
"""
try:
import pocketsphinx as ps
import sphinxbase
except:
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
"""
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?

The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
in_speech_bf = False
decoder.start_utt()
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
decoder.end_utt()
print 'Result:', decoder.hyp().hypstr
decoder.start_utt()
else:
break
decoder.end_utt()
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py

Try this. Pocketsphinx is now a GStreamer plugin.

This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
"""
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
"""
Saves mic data to temporary WAV file. Returns filename of saved
file
"""
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000) # TODO make this value a function parameter?
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
else:
break
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
"""
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
"""
self.setup_mic()
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
audio2send.append(cur_data)
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
Mic.playaudiofromAudio().play("/home/l/Desktop/PROJECT/Audio/beep_hi.wav")
os.remove(filename)
return
# Removes temp audio file
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
else:
prev_audio.append(cur_data)
print "* Hoàn thành nghe"
stream.close()
p.terminate()

How to capture a video (AND audio) in python, from a camera (or webcam)

i'm looking for a solution, either in linux or in windows, that allows me to
record video (+audio) from my webcam & microphone, simultaneously.
save it as a file.AVI (or mpg or whatever)
display the video on the screen while recording it
Compression is NOT an issue in my case, and i actually prefer to capture RAW and compress it later.
So far i've done it with an ActiveX component in VB which took care of everything, and i'd like to progress with python (the VB solution is unstable, unreliable).
so far i've seen code that captures VIDEO only, or individual frames...
I've looked so far at
OpenCV - couldn't find audio capture there
PyGame - no simultaneous audio capture (AFAIK)
VideoCapture - provide only single frames.
SimpleCV - no audio
VLC - binding to VideoLAN program into wxPthon - hopefully it will do (still investigating this option)
kivy - just heard about it, didn't manage to get it working under windows SO FAR.
The question - is there a video & audio capture library for python?
or - what are the other options if any?

Answer: No. There is no single library/solution in python to do video/audio recording simultaneously. You have to implement both separately and merge the audio and video signal in a smart way to end up with a video/audio file.
I got a solution for the problem you present. My code addresses your three issues:
Records video + audio from webcam and microphone simultaneously.
It saves the final video/audio file as .AVI
Un-commenting lines 76, 77 and 78 will make the video to be displayed to screen while recording.
My solution uses pyaudio for audio recording, opencv for video recording, and ffmpeg for muxing the two signals. To be able to record both simultaneously, I use multithreading. One thread records video, and a second one the audio. I have uploaded my code to github and also have included all the essential parts it here.
https://github.com/JRodrigoF/AVrecordeR
Note: opencv is not able to control the fps at which the webcamera does the recording. It is only able to specify in the encoding of the file the desired final fps, but the webcamera usually behaves differently according to specifications and light conditions (I found). So the fps have to be controlled at the level of the code.
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
# Video class based on openCV
def __init__(self):
self.open = True
self.device_index = 0
self.fps = 6 # fps should be the minimum constant rate at which the camera can
self.fourcc = "MJPG" # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (640,480) # video formats and sizes also depend and vary according to the camera used
self.video_filename = "temp_video.avi"
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
# Video starts being recorded
def record(self):
# counter = 1
timer_start = time.time()
timer_current = 0
while(self.open==True):
ret, video_frame = self.video_cap.read()
if (ret==True):
self.video_out.write(video_frame)
# print str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current)
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(0.16)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
# 0.16 delay -> 6 fps
#
# Finishes the video recording therefore the thread too
def stop(self):
if self.open==True:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
else:
pass
# Launches the video recording function using a thread
def start(self):
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
# Audio class based on pyAudio and Wave
def __init__(self):
self.open = True
self.rate = 44100
self.frames_per_buffer = 1024
self.channels = 2
self.format = pyaudio.paInt16
self.audio_filename = "temp_audio.wav"
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
# Audio starts being recorded
def record(self):
self.stream.start_stream()
while(self.open == True):
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if self.open==False:
break
# Finishes the audio recording therefore the thread too
def stop(self):
if self.open==True:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
pass
# Launches the audio recording function using a thread
def start(self):
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print "total frames " + str(frame_counts)
print "elapsed time " + str(elapsed_time)
print "recorded fps " + str(recorded_fps)
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print "Re-encoding"
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print "Muxing"
cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print "Normal recording\nMuxing"
cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print ".."
# Required and wanted processing of final files
def file_manager(filename):
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
if os.path.exists(str(local_path) + "/" + filename + ".avi"):
os.remove(str(local_path) + "/" + filename + ".avi")

To the questions asked above: Yes the code should also works under Python3. I adjusted it a little bit and now works for python2 and python3 (tested it on windows7 with 2.7 and 3.6, though you need to have ffmpeg installed or the executable ffmpeg.exe at least in the same directory, you can get it here: https://www.ffmpeg.org/download.html ). Of course you also need all the other libraries cv2, numpy, pyaudio, installed like herewith:
pip install opencv-python numpy pyaudio
You can now run the code directly:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
while self.open:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if not self.open:
break
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test"):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test"):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print("Re-encoding")
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print("Muxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print("Normal recording\nMuxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
if __name__ == '__main__':
start_AVrecording()
time.sleep(5)
stop_AVrecording()
file_manager()

I would recommend ffmpeg. There is a python wrapper.
http://code.google.com/p/pyffmpeg/

I've been looking around for a good answer to this, and I think it is GStreamer...
The documentation for the python bindings is extremely light, and most of it seemed centered around the old 0.10 version of GStreamer instead of the new 1.X versions, but GStreamer is a extremely powerful, cross-platform multimedia framework that can stream, mux, transcode, and display just about anything.

I used JRodrigoF's script for a while on a project. However, I noticed that sometimes the threads would hang and it would cause the program to crash. Another issue is that openCV does not capture video frames at a reliable rate and ffmpeg would distort my video when re-encoding.
I came up with a new solution that records much more reliably and with much higher quality for my application. It presently only works for Windows because it uses pywinauto and the built-in Windows Camera app. The last bit of the script does some error-checking to confirm the video successfully recorded by checking the timestamp of the name of the video.
https://gist.github.com/mjdargen/956cc968864f38bfc4e20c9798c7d670
import pywinauto
import time
import subprocess
import os
import datetime
def win_record(duration):
subprocess.run('start microsoft.windows.camera:', shell=True) # open camera app
# focus window by getting handle using title and class name
# subprocess call opens camera and gets focus, but this provides alternate way
# t, c = 'Camera', 'ApplicationFrameWindow'
# handle = pywinauto.findwindows.find_windows(title=t, class_name=c)[0]
# # get app and window
# app = pywinauto.application.Application().connect(handle=handle)
# window = app.window(handle=handle)
# window.set_focus() # set focus
time.sleep(2) # have to sleep
# take control of camera window to take video
desktop = pywinauto.Desktop(backend="uia")
cam = desktop['Camera']
# cam.print_control_identifiers()
# make sure in video mode
if cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").exists():
cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").click()
time.sleep(1)
# start then stop video
cam.child_window(title="Take Video", auto_id="CaptureButton_1", control_type="Button").click()
time.sleep(duration+2)
cam.child_window(title="Stop taking Video", auto_id="CaptureButton_1", control_type="Button").click()
# retrieve vids from camera roll and sort
dir = 'C:/Users/m/Pictures/Camera Roll'
all_contents = list(os.listdir(dir))
vids = [f for f in all_contents if "_Pro.mp4" in f]
vids.sort()
vid = vids[-1] # get last vid
# compute time difference
vid_time = vid.replace('WIN_', '').replace('_Pro.mp4', '')
vid_time = datetime.datetime.strptime(vid_time, '%Y%m%d_%H_%M_%S')
now = datetime.datetime.now()
diff = now - vid_time
# time different greater than 2 minutes, assume something wrong & quit
if diff.seconds > 120:
quit()
subprocess.run('Taskkill /IM WindowsCamera.exe /F', shell=True) # close camera app
print('Recorded successfully!')
win_record(2)

If you notice misalignment between video and audio by the code above, please see my solution below
I think the most rated answer above does a great job. However, it did not work perfectly when I was using it,especially when you use a low fps rate (say 10). The main issue is with video recording. In order to properly synchronize video and audio recording with ffmpeg, one has to make sure that cv2.VideoCapture() and cv2.VideoWriter() share exact same FPS, because the recorded video time length is solely determined by fps rate and the number of frames.
Following is my suggested update:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_cap.set(cv2.CAP_PROP_FPS, self.fps)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# print(self.frame_counts)
# counter += 1
# timer_current = time.time() - timer_start
# time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
while self.open:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if not self.open:
break
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test"):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test"):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
video_stream = ffmpeg.input(video_thread.video_filename)
audio_stream = ffmpeg.input(audio_thread.audio_filename)
ffmpeg.output(audio_stream, video_stream, 'out.mp4').run(overwrite_output=True)
# # Merging audio and video signal
# if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
# print("Re-encoding")
# cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
# subprocess.call(cmd, shell=True)
# print("Muxing")
# cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
# subprocess.call(cmd, shell=True)
# else:
# print("Normal recording\nMuxing")
# cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
# subprocess.call(cmd, shell=True)
# print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
if __name__ == '__main__':
start_AVrecording()
# try:
# while True:
# pass
# except KeyboardInterrupt:
# stop_AVrecording()
time.sleep(10)
stop_AVrecording()
print("finishing recording")
file_manager()

Using everyone's contributions and following the suggestion of Paul
I was able to come up with the following code:
Recorder.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import sys
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg
REC_FOLDER = "recordings/"
class Recorder():
def __init__(self, filename):
self.filename = filename
self.video_thread = self.VideoRecorder(self, REC_FOLDER + filename)
self.audio_thread = self.AudioRecorder(self, REC_FOLDER + filename)
def startRecording(self):
self.video_thread.start()
self.audio_thread.start()
def stopRecording(self):
self.video_thread.stop()
self.audio_thread.stop()
def saveRecording(self):
#Save audio / Show video resume
self.audio_thread.saveAudio()
self.video_thread.showFramesResume()
#Merges both streams and writes
video_stream = ffmpeg.input(self.video_thread.video_filename)
audio_stream = ffmpeg.input(self.audio_thread.audio_filename)
while (not os.path.exists(self.audio_thread.audio_filename)):
print("waiting for audio file to exit...")
stream = ffmpeg.output(video_stream, audio_stream, REC_FOLDER + self.filename +".mp4")
try:
ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)
except ffmpeg.Error as e:
print(e.stdout, file=sys.stderr)
print(e.stderr, file=sys.stderr)
class VideoRecorder():
"Video class based on openCV"
def __init__(self, recorder, name, fourcc="MJPG", frameSize=(640,480), camindex=0, fps=15):
self.recorder = recorder
self.open = True
self.duration = 0
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.video_filename = name + ".avi" # video formats and sizes also depend and vary according to the camera used
self.video_cap = cv2.VideoCapture(self.device_index, cv2.CAP_DSHOW)
self.video_writer = cv2.VideoWriter_fourcc(*fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
counter = 1
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
self.frame_counts += 1
counter += 1
self.duration += 1/self.fps
if (video_frame is None): print("I WAS NONEEEEEEEEEEEEEEEEEEEEEE")
gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('video_frame', gray)
cv2.waitKey(1)
while(self.duration - self.recorder.audio_thread.duration >= 0.2 and self.recorder.audio_thread.open):
time.sleep(0.2)
else:
break
#Release Video
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
self.video_out = None
def stop(self):
"Finishes the video recording therefore the thread too"
self.open=False
def start(self):
"Launches the video recording function using a thread"
self.thread = threading.Thread(target=self.record)
self.thread.start()
def showFramesResume(self):
#Only stop of video has all frames
frame_counts = self.frame_counts
elapsed_time = time.time() - self.start_time
recorded_fps = self.frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, recorder, filename, rate=44100, fpb=1024, channels=1, audio_index=0):
self.recorder = recorder
self.open = True
self.rate = rate
self.duration = 0
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename + ".wav"
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=audio_index,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
t_start = time.time_ns()
while self.open:
try:
self.duration += self.frames_per_buffer / self.rate
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
except Exception as e:
print('\n' + '*'*80)
print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
print(e)
print('*'*80 + '\n')
while(self.duration - self.recorder.video_thread.duration >= 0.5):
time.sleep(0.5)
#Closes audio stream
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
def stop(self):
"Finishes the audio recording therefore the thread too"
self.open = False
def start(self):
"Launches the audio recording function using a thread"
self.thread = threading.Thread(target=self.record)
self.thread.start()
def saveAudio(self):
#Save Audio File
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
Main.py
from recorder import Recorder
import time
recorder = Recorder("test1")
recorder.startRecording()
time.sleep(240)
recorder.stopRecording()
recorder.saveRecording()
With this solution, the camera and the audio will wait for each other.
I also tried the FFmpeg Re-encoding and Muxing and even though it was able to synchronize the audio with video, the video had a massive quality drop.

You can do offline html,js code to do video with audio recording. Using python lib python webview open that page. It should work fine.

I was randomly getting "[Errno -9999] Unanticipated host error" while using JRodrigoF's solution and found that it's due to a race condition where an audio stream can be closed just before being read for the last time inside record() of AudioRecorder class.
I modified slightly so that all the closing procedures are done after the while loop and added a function list_audio_devices() that shows the list of audio devices to select from. I also added an audio device index as a parameter to choose an audio device.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=2**12, channels=1, audio_index=0):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=audio_index,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
t_start = time.time_ns()
while self.open:
try:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
except Exception as e:
print('\n' + '*'*80)
print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
print(e)
print('*'*80 + '\n')
time.sleep(0.01)
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test", audio_index=0, sample_rate=44100):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test", audio_index=0, sample_rate=44100):
global audio_thread
audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print("Re-encoding")
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print("Muxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print("Normal recording\nMuxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
def list_audio_devices(name_filter=None):
pa = pyaudio.PyAudio()
device_index = None
sample_rate = None
for x in range(pa.get_device_count()):
info = pa.get_device_info_by_index(x)
print(pa.get_device_info_by_index(x))
if name_filter is not None and name_filter in info['name']:
device_index = info['index']
sample_rate = int(info['defaultSampleRate'])
break
return device_index, sample_rate
if __name__ == '__main__':
start_AVrecording()
time.sleep(5)
stop_AVrecording()
file_manager()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Where can I set a default audio input in Pyaudio? - python

Related

Trying to record audio with python-sounddevice but result is just static

Using Python on Raspberry Pi, I'm unable to record audio with PyAudio via Thonny/terminal

Real time continuous sounds with Pydub

Live recognition with Python and Pocketsphinx

How to capture a video (AND audio) in python, from a camera (or webcam)

Categories

Resources