PyAudio -- How to capture microphone and system sounds in a single stream? - python

i'm trying to build an app with pyaudio that records speaker and microphone sound but i just don't know how to record them both.I tried enabling stereo mixer but it didn't work out because i could only listen the sound from the speakers.
This code that i'm using records audio from the default microphone in a background thread using pyaudio:
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
self.going = True
while self.going:
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def stop_recording(self):
self.going = False
I'm using Windows and I can also use other library, not only PyAudio. I just need to get this work.
Edit : I found this code to record the output from speaker but i couldn't make it work in my application:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.,get_default_output_device_info()["hostApi"]
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback=True)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
UPDATE :
I could record both my speaker and my microphone with this:
import pyaudio
import wave
import numpy as np
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
#stream using as_loopback to get sound from OS
stream = p.open(
format = FORMAT,
channels = 2,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format = FORMAT,
channels = 1,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
#as_loopback=False)
frames = []
frames2 = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
#frames = as_loopback sound data (Speakers)
frames= b''.join(frames);
#frames2 = sound data of Microphone
frames2= b''.join(frames2);
#decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
#decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
#converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded= np.array(Sdecoded, dtype='int16')
#getting the data on the right side
direito=Sdecoded[1::2]
#getting the data on the left side
esquerdo=Sdecoded[::2]
#mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix=(direito+esquerdo+Mdecoded)
#ensuring no value goes beyond the limits of short int
signal=np.clip(mix, -32767, 32766)
#encode the data again
encodecoded = wave.struct.pack("%dh"%(len(signal)), *list(signal))
#stop all streams and terminate pyaudio
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
#recording mixed audio in mono
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes((encodecoded))
wf.close()
But when i try to mix with my code it doesn't work.What is going on?I think i'm almost solving it
import numpy as np
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
#stream using as_loopback to get sound from OS
stream = p.open(
format=FORMAT,
channels=2,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format=FORMAT,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
# as_loopback=False)
#print("* recording")
frames = []
frames2= []
self.going = True
while self.going:
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
# frames = as_loopback sound data (Speakers)
frames = b''.join(frames);
# frames2 = sound data of Microphone
frames2 = b''.join(frames2);
# decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
# decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
# converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded = np.array(Sdecoded, dtype='int16')
# getting the data on the right side
direito = Sdecoded[1::2]
# getting the data on the left side
esquerdo = Sdecoded[::2]
# mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix = (direito + esquerdo + Mdecoded)
# ensuring no value goes beyond the limits of short int
signal = np.clip(mix, -32767, 32766)
# encode the data again
encodecoded = wave.struct.pack("%dh" % (len(signal)), *list(signal))
# print("* done recording")
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(encodecoded)
wf.close()
def stop_recording(self):
self.going = False
I made the code very clean and commented each part so you understand what is going on.I made a for loop in the beginning for Pyaudio show me what are the interfaces I have in my OS:
0 Mapeador de som da Microsoft - Input
1 Microfone (Realtek(R) Audio)
2 Mixagem estéreo (Realtek(R) Aud
3 Mapeador de som da Microsoft - Output
4 Alto-falantes (Realtek(R) Audio
5 Alto-falantes (Realtek(R) Audio)
6 Microfone (Realtek(R) Audio)
7 Mixagem estéreo (Realtek(R) Audio)
8 Speakers 1 (Realtek HD Audio output with SST)
9 Speakers 2 (Realtek HD Audio output with SST)
10 Alto-falante (Realtek HD Audio output with SST)
11 Microfone (Realtek HD Audio Mic input)
12 Mixagem estéreo (Realtek HD Audio Stereo input)

You can use 2 separate threads to record from 2 different devices ( providing separate device Index) into separate Wav files.
Then Mix these 2 files Using the pydub library
from pydub import AudioSegment
speakersound = AudioSegment.from_file("/path/speaker.wav")
micsound = AudioSegment.from_file("/path/mic.wav")
mixsound = speakersound.overlay(micsound)
mixsound.export("/path/mixsound.wav", format='wav')

Related

Trigger (py)audio recording with key presses

I am trying to write a script that records audio when certain keys are pressed and stops recording when the keys are released, and saves the audio to a .wav.
The program starts and activates my microphone, but it never seems to actually record and removing my fingers from the Control + Shift keys does not seem to have any affect. At any rate, I do not end up with a .wav file.
import keyboard
import pyaudio
import wave
import time
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
while True:
if keyboard.is_pressed('ctrl+shift'):
print('You Pressed A Key!')
break
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

Why in pyaudio paUInt8 format not working?

I am trying to record my mic with pyaudio. The problem is that when record using
FORMAT = pyaudio.paUInt8
I cannot hear any sound when the recorded file is played. But if I use paInt16 format I can hear the recorded voice without any problem. I am using VLC player for playback.My code is below
import pyaudio
import wave
import threading
#FORMAT = pyaudio.paInt16 # working properly
FORMAT = pyaudio.paUInt8 # Not hearing any sound on play back
CHANNELS = 1
RATE = 8000
CHUNK = 2040
WAVE_OUTPUT_FILENAME = "file.wav"
stop_ = False
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,input_device_index = 0,
frames_per_buffer=CHUNK)
def stop():
global stop_
while True:
if not input('Press Enter >>>'):
print('exit')
stop_ = True
t = threading.Thread(target=stop, daemon=True).start()
frames = []
while True:
data = stream.read(CHUNK)
frames.append(data)
if stop_:
break
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
if I convert pyaudio.paInt16 recorded file to uint8 format using Audacity , it is playing fine in VLC.

Python - How to record system audio(The output from the speaker)?

I have been searching for this since last week. Tried pyaudio also and when i used its another fork the system audio was mixed with microphone audio. I was not able to find any other module for this and thus finally asked the question.
Edit:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.get_default_output_device_info()["hostApi"] #The modified part
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback = True) #The part I have modified
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS) + 1):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
This code was taken from stack overflow. It records the speaker output but the output is mixed with the microphone Input.
Also the pyaudio module used was from the fork : https://github.com/intxcc/pyaudio_portaudio.
using https://github.com/intxcc/pyaudio_portaudio
This only records the audio of the device specified by "device_id"
import pyaudio
import wave
chunk = 1024 # Record in chunks of 1024 samples
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 44100 # Record at 44100 samples per second
seconds = 3
filename = "output.wav"
p = pyaudio.PyAudio() # Create an interface to PortAudio
#Select Device
print ( "Available devices:\n")
for i in range(0, p.get_device_count()):
info = p.get_device_info_by_index(i)
print ( str(info["index"]) + ": \t %s \n \t %s \n" % (info["name"], p.get_host_api_info_by_index(info["hostApi"])["name"]))
pass
#ToDo change to your device ID
device_id = 7
device_info = p.get_device_info_by_index(device_id)
channels = device_info["maxInputChannels"] if (device_info["maxOutputChannels"] < device_info["maxInputChannels"]) else device_info["maxOutputChannels"]
# https://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.Stream.__init__
stream = p.open(format=sample_format,
channels=channels,
rate=int(device_info["defaultSampleRate"]),
input=True,
frames_per_buffer=chunk,
input_device_index=device_info["index"],
as_loopback=True
)
frames = [] # Initialize array to store frames
print('\nRecording', device_id, '...\n')
# Store data in chunks for 3 seconds
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()
print('Finished recording')
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
P.S. check out https://github.com/intxcc/pyaudio_portaudio/tree/master/example
This can be done with soundcard. You will have to figure out which device index to use for your loopback. This code prints out the ones you will have to choose from. I found the correct one by looping over all of them and seeing which produced non zeros when speakers were playing.
pip install soundcard
import soundcard as sc
import time
# get a list of all speakers:
speakers = sc.all_speakers()
# get the current default speaker on your system:
default_speaker = sc.default_speaker()
# get a list of all microphones:v
mics = sc.all_microphones(include_loopback=True)
# get the current default microphone on your system:
default_mic = mics[index of your speaker loopback here]
for i in range(len(mics)):
try:
print(f"{i}: {mics[i].name}")
except Exception as e:
print(e)
with default_mic.recorder(samplerate=148000) as mic, \
default_speaker.player(samplerate=148000) as sp:
print("Recording...")
data = mic.record(numframes=1000000)
print("Done...Stop your sound so you can hear playback")
time.sleep(5)
sp.play(data)
I install a virtul soundcard(blackhole) on mac to record the system audio, and is worked.
I only record system audio without microphone audio, as I don't need it
On Ubuntu, you can use 'pavucontrol' to change the recording source. An example of recording audio directly from the speakers (without using a microphone):
First you run a script like the one below:
import pyaudio
mic = pyaudio.PyAudio()
stream = mic.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, output=True, frames_per_buffer=2048)
stream.start_stream()
if __name__ == '__main__':
while True:
data = stream.read(1024)
# Do something with sound
Then you can change the recording source (recording tab) from 'Built-in=Audio Analog Stereo' to 'Monitor of Built-in=Audio Analog Stereo'.
With this approach, you can analyze the sound from the speakers during the video call.

How to ignore background noise while recording audio using pyaudio in python?

I am using the following code to record audio in python, it works perfectly. But when I set the microphone input volume to high, the audio thus recorded using this code has a lot of noise. How to get rid of this noise??
import pyaudio
import wave
def record(filename):
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 15
WAVE_OUTPUT_FILENAME = filename
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("recording...")
print('---------------------------------')
print(int(RATE / CHUNK * RECORD_SECONDS))
print('*********************************')
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("Recording finished. . .")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(2)
audio.get_sample_size(FORMAT)
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

Add recording duration with pyaudio in Python

I am writing a program in Python, to create and record audio file by using pyaudio. Audio file creation is done successfully but in recording, i am facing trouble because it is capturing nothing.
import pyaudio
import wave
import time
import sys
from datetime import datetime
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = 'Audio.wav'
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
frames = []
print('* done recording')
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
How can I add duration in audio recording?
I got my answer, which I am sharing here
import pyaudio, wave, sys
CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = 'Audio_.wav'
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = 0,
frames_per_buffer = CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
Actually you had to call stream.read to start the capture,seems you figured it out haha

Categories