Pyaudio mix mp3 files with microphone - python

Pydub documentation
I am trying to mix 2 mp3 files with microphone input using pydub and pyaudio. Here is what i have done:
from pydub import AudioSegment
from pydub.playback import play
sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3")
sound1_channels = sound1.split_to_mono()
sound1 = sound1_channels[0].overlay(sound1_channels[1])
sound1 = sound1 - 48 # make sound1 quiter 48dB
sound2 = AudioSegment.from_file(r"ΑΓΙΑ ΚΥΡΙΑΚΗ.mp3")
sound2_channels = sound2.split_to_mono()
sound2 = sound2_channels[0].overlay(sound2_channels[1])
sound2 = sound2 - 48 # make sound2 quiter 48dB
import pyaudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
p = pyaudio.PyAudio()
player = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, output=True,frames_per_buffer=CHUNK)
mic_stream = p.open(format=FORMAT, channels=CHANNELS,rate=RATE, input=True,frames_per_buffer=CHUNK)
chunk_time_in_seconds = int(RATE/CHUNK)
chunk_number = 0
while(True):
mic_data = mic_stream.read(CHUNK)
mic_sound = AudioSegment(mic_data, sample_width=2, channels=1, frame_rate=RATE)
sound1_part = sound1[chunk_number*chunk_time_in_seconds:(chunk_number+1)*chunk_time_in_seconds]
sound2_part = sound2[chunk_number*chunk_time_in_seconds:(chunk_number+1)*chunk_time_in_seconds]
#player.write(mic_sound.raw_data) works well
mix_sound = sound1_part.overlay(sound2_part).overlay(mic_sound)
player.write(mix_sound.raw_data) # low microphone quality
chunk_number = chunk_number+1
The above code seems to work, but it output microphone input with low quality.
Can you please help me about?

I don't know if this aproach is good for real time audio processing but the following code works well!
from pydub import AudioSegment
from pydub.playback import play
sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3")
sound1_channels = sound1.split_to_mono()
sound1 = sound1_channels[0].overlay(sound1_channels[1])
sound1 = sound1 - 30 # make sound1 quiter 30dB
sound2 = AudioSegment.from_file(r"ΑΓΙΑ ΚΥΡΙΑΚΗ.mp3")
sound2_channels = sound2.split_to_mono()
sound2 = sound2_channels[0].overlay(sound2_channels[1])
sound2 = sound2 - 30 # make sound2 quiter 30dB
import pyaudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
p = pyaudio.PyAudio()
player = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, output=True,frames_per_buffer=CHUNK)
mic_stream = p.open(format=FORMAT, channels=CHANNELS,rate=RATE, input=True,frames_per_buffer=CHUNK)
#chunk_time_in_seconds = int(RATE/CHUNK)
chunk_number = 0
while(True):
mic_data = mic_stream.read(CHUNK)
mic_sound = AudioSegment(mic_data, sample_width=2, channels=1, frame_rate=RATE)
mic_sound_duration = len(mic_sound)
sound1_part = sound1[chunk_number*mic_sound_duration:(chunk_number+1)*mic_sound_duration]
sound2_part = sound2[chunk_number*mic_sound_duration:(chunk_number+1)*mic_sound_duration]
#player.write(mic_sound.raw_data) works well
mix_sound = sound1_part.overlay(sound2_part).overlay(mic_sound)
player.write(mix_sound.raw_data) # low microphone quality
chunk_number = chunk_number+1

Related

Why in pyaudio paUInt8 format not working?

I am trying to record my mic with pyaudio. The problem is that when record using
FORMAT = pyaudio.paUInt8
I cannot hear any sound when the recorded file is played. But if I use paInt16 format I can hear the recorded voice without any problem. I am using VLC player for playback.My code is below
import pyaudio
import wave
import threading
#FORMAT = pyaudio.paInt16 # working properly
FORMAT = pyaudio.paUInt8 # Not hearing any sound on play back
CHANNELS = 1
RATE = 8000
CHUNK = 2040
WAVE_OUTPUT_FILENAME = "file.wav"
stop_ = False
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,input_device_index = 0,
frames_per_buffer=CHUNK)
def stop():
global stop_
while True:
if not input('Press Enter >>>'):
print('exit')
stop_ = True
t = threading.Thread(target=stop, daemon=True).start()
frames = []
while True:
data = stream.read(CHUNK)
frames.append(data)
if stop_:
break
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
if I convert pyaudio.paInt16 recorded file to uint8 format using Audacity , it is playing fine in VLC.

Using Python on Raspberry Pi, I'm unable to record audio with PyAudio via Thonny/terminal

Using the example from: https://realpython.com/playing-and-recording-sound-python/#python-sounddevice_1 , I'm getting the following error when using Thonny: "Backend terminated or disconnected. Use 'Stop/Restart' to restart.". When I run the program in terminal, I get this as the error: "OSError: [Errno -9981] Input overflowed". The example code (which isn't threaded) can work in both terminal and Thonny if I modify it to not throw an exception on overflow "data = stream.read(chunk, exception_on_overflow = False)" but does not work when in a new thread. I've also tried changing the chunk size to be larger and smaller to no avail. When I have the overflow exception for the threaded version, I get a different error in terminal: "Segmentation fault". I'm running Raspbian 10 Buster and Python 3.7.3, if someone could test/see if it works, thanks.
import time
import board
import busio
import digitalio
from adafruit_mcp230xx.mcp23017 import MCP23017
from adafruit_debouncer import Debouncer
i2c = busio.I2C(board.SCL, board.SDA)
mcp = MCP23017(i2c)
import threading
import pyaudio
import wave
import sys
import subprocess
record = False
def background_audio_recording():
#chunk = 1024 # Record in chunks of 1024 samples
chunk = 1024
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 44100 # Record at 44100 samples per second
#seconds = 3
filename = "output.wav"
p = pyaudio.PyAudio() # Create an interface to PortAudio
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
#input_device_index = 2,
input=True)
frames = [] # Initialize array to store frames
while record:
#data = stream.read(chunk, exception_on_overflow = False)
data = stream.read(chunk)
frames.append(data)
'''
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
'''
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()
print('Finished recording')
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
print("File Saved")
return
button1PinSetup = mcp.get_pin(0) # GPA0
button1PinSetup.direction = digitalio.Direction.INPUT
button1PinSetup.pull = digitalio.Pull.UP
button1Pin = Debouncer(button1PinSetup)
button2PinSetup = mcp.get_pin(1) # GPA1
button2PinSetup.direction = digitalio.Direction.INPUT
button2PinSetup.pull = digitalio.Pull.UP
button2Pin = Debouncer(button2PinSetup)
while True:
button1Pin.update()
button2Pin.update()
if button1Pin.fell:
print("Record")
if record == True:
print("Already Recording")
else:
record = True
threading.Thread(target=background_audio_recording).start()
if button2Pin.fell:
print("Recording Stopped")
record = False
It seems as though, I needed to create the PyAudio object as a global, as well as within the thread (for each additional recording attempt). I got it working with the following code
import pyaudio
import wave
import time
import threading
import board
import busio
import digitalio
from adafruit_mcp230xx.mcp23017 import MCP23017
from adafruit_debouncer import Debouncer
i2c = busio.I2C(board.SCL, board.SDA)
mcp = MCP23017(i2c)
form_1 = pyaudio.paInt16 # 16-bit resolution
chans = 2 # 1 channel
samp_rate = 44100 # 44.1kHz sampling rate
chunk = 4096 # 2^12 samples for buffer
#record_secs = 3 # seconds to record
dev_index = 2 # device index found by p.get_device_info_by_index(ii)
wav_output_filename = 'test1.wav' # name of .wav file
audio = pyaudio.PyAudio() # create pyaudio instantiation
record = False
def background_audio_recording():
audio = pyaudio.PyAudio() # create pyaudio instantiation
# create pyaudio stream
stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
input_device_index = dev_index,input = True, \
frames_per_buffer=chunk)
print("recording")
frames = []
# loop through stream and append audio chunks to frame array
#for ii in range(0,int((samp_rate/chunk)*record_secs)):
while record == True:
data = stream.read(chunk)
frames.append(data)
print("finished recording")
# stop the stream, close it, and terminate the pyaudio instantiation
stream.stop_stream()
stream.close()
audio.terminate()
# save the audio frames as .wav file
wavefile = wave.open(wav_output_filename,'wb')
wavefile.setnchannels(chans)
wavefile.setsampwidth(audio.get_sample_size(form_1))
wavefile.setframerate(samp_rate)
wavefile.writeframes(b''.join(frames))
wavefile.close()
return
button1PinSetup = mcp.get_pin(0) # GPA0
button1PinSetup.direction = digitalio.Direction.INPUT
button1PinSetup.pull = digitalio.Pull.UP
button1Pin = Debouncer(button1PinSetup)
button2PinSetup = mcp.get_pin(1) # GPA1
button2PinSetup.direction = digitalio.Direction.INPUT
button2PinSetup.pull = digitalio.Pull.UP
button2Pin = Debouncer(button2PinSetup)
while True:
button1Pin.update()
button2Pin.update()
if button1Pin.fell:
print("Record")
if record == True:
print("Already Recording")
else:
record = True
threading.Thread(target=background_audio_recording).start()
if button2Pin.fell:
print("Recording Stopped")
record = False

Real time audio processing with python and AudioSegment

Well for some reason i want to split some selected mp3 files to chunk-time: ~28msec.
I have quality problem for slicing<1sec.
from av import AudioFrame
from pydub import AudioSegment
import av
#open an mp3 file
sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3")
codec = av.CodecContext.create('pcm_s16le', 'r')
codec.sample_rate = 44100
codec.channels = 2
#split the file each part 10 second
#slices = sound1[::10000]
#split the file each part 2 second
#slices = sound1[::2000]
#split the file each part 1 second
#slices = sound1[::1000] #ok quality 1 tick every 1 second
#split the file each part 10 millisecond
slices = sound1[::10] #bad quality
pieces = AudioSegment.silent()
'''
for slice in slices:
pieces = pieces+slice
pieces.export("remaked.mp3",format="mp3")
#remaked works well
'''
for slice in slices:
#qualty loss (why?)
packet = av.Packet(slice.raw_data)
frame = codec.decode(packet)[0]
#remake AudioSegment from Av.AudioFrame
for p in frame.planes:
data = p.to_bytes()
data_segment = AudioSegment(data, sample_width=2, channels=2, frame_rate=44100)
pieces = pieces+data_segment
pieces.export("remaked.mp3",format="mp3")
How can i fix the quality problem?
Note that i use av.AudioFrame (frame = codec.decode(packet)[0]) because i want to send some real time audio data with aiortc
Edit:
from av import AudioFrame
from pydub import AudioSegment
import pyaudio
import av
import fractions
from aiortc.mediastreams import MediaStreamTrack
class RadioTelephoneTrack(MediaStreamTrack):
kind = "audio"
def __init__(self):
super().__init__() # don't forget this!
self.sample_rate = 8000
self.AUDIO_PTIME = 0.020 # 20ms audio packetization
self.samples = int(self.AUDIO_PTIME * self.sample_rate)
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 2
self.RATE = self.sample_rate
#self.RATE = 44100
self.CHUNK = int(8000*0.020)
#self.CHUNK = 1024
self.p = pyaudio.PyAudio()
self.mic_stream = self.p.open(format=self.FORMAT, channels=1,rate=self.RATE, input=True,frames_per_buffer=self.CHUNK)
self.codec = av.CodecContext.create('pcm_s16le', 'r')
self.codec.sample_rate = self.RATE
#self.codec.sample_fmt = AV_SAMPLE_FMT_S16
self.codec.channels = 2
#self.codec.channel_layout = "mono";
self.sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3").set_frame_rate(self.sample_rate)
print("Frame rate: "+str(self.sound1.frame_rate))
#self.sound1_channels = self.sound1.split_to_mono()
#self.sound1 = self.sound1_channels[0].overlay(self.sound1_channels[1])
self.audio_samples = 0
self.chunk_number = 0
#self.sound1 = self.sound1 - 30 # make sound1 quiter 30dB
async def recv(self):
mic_data = self.mic_stream.read(self.CHUNK)
mic_sound = AudioSegment(mic_data, sample_width=2, channels=1, frame_rate=self.RATE)
mic_sound = AudioSegment.from_mono_audiosegments(mic_sound, mic_sound)
mic_sound_duration = len(mic_sound)
#print("Mic sound duration: "+str(mic_sound_duration))
mp3_slice_duration = mic_sound_duration
if(len(self.sound1)>(self.chunk_number+1)*mp3_slice_duration):
sound1_part = self.sound1[self.chunk_number*mp3_slice_duration:(self.chunk_number+1)*mp3_slice_duration]
elif(len(self.sound1)>(self.chunk_number)*mp3_slice_duration):
sound1_part = self.sound1[self.chunk_number*mp3_slice_duration:]
else:
#replay
times_played_1 = int((self.chunk_number)*mp3_slice_duration/len(self.sound1))
times_played_2 = int((self.chunk_number+1)*mp3_slice_duration/len(self.sound1))
if(times_played_1==times_played_2):
time_start = ((self.chunk_number)*mp3_slice_duration)-(times_played_1*len(self.sound1))
time_end = ((self.chunk_number+1)*mp3_slice_duration)-(times_played_1*len(self.sound1))
sound1_part = self.sound1[time_start:time_end]
else:
time_start_1 = ((self.chunk_number)*mp3_slice_duration)-(times_played_1*len(self.sound1))
sound1_part1 = self.sound1[time_start_1:]
time_end_1 = ((self.chunk_number+1)*mp3_slice_duration)-(times_played_2*len(self.sound1))
sound1_part2 = self.sound1[0:time_end_1]
sound1_part = sound1_part1.append(sound1_part2, crossfade=5)
#sound1_part = AudioSegment.silent()
#self.mix_sound = sound1_part.overlay(mic_sound)
self.mix_sound = sound1_part
packet = av.Packet(self.mix_sound.raw_data)
frame = self.codec.decode(packet)[0]
frame.pts = self.audio_samples
self.audio_samples += frame.samples
self.chunk_number = self.chunk_number+1
return frame
The above code works (pretty better).
The main problems now are:
The sound sounds in depth.
There is click noise every time the sound re-starts (starts from the beginning).

PyAudio -- How to capture microphone and system sounds in a single stream?

i'm trying to build an app with pyaudio that records speaker and microphone sound but i just don't know how to record them both.I tried enabling stereo mixer but it didn't work out because i could only listen the sound from the speakers.
This code that i'm using records audio from the default microphone in a background thread using pyaudio:
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
self.going = True
while self.going:
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def stop_recording(self):
self.going = False
I'm using Windows and I can also use other library, not only PyAudio. I just need to get this work.
Edit : I found this code to record the output from speaker but i couldn't make it work in my application:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.,get_default_output_device_info()["hostApi"]
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback=True)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
UPDATE :
I could record both my speaker and my microphone with this:
import pyaudio
import wave
import numpy as np
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
#stream using as_loopback to get sound from OS
stream = p.open(
format = FORMAT,
channels = 2,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format = FORMAT,
channels = 1,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
#as_loopback=False)
frames = []
frames2 = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
#frames = as_loopback sound data (Speakers)
frames= b''.join(frames);
#frames2 = sound data of Microphone
frames2= b''.join(frames2);
#decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
#decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
#converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded= np.array(Sdecoded, dtype='int16')
#getting the data on the right side
direito=Sdecoded[1::2]
#getting the data on the left side
esquerdo=Sdecoded[::2]
#mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix=(direito+esquerdo+Mdecoded)
#ensuring no value goes beyond the limits of short int
signal=np.clip(mix, -32767, 32766)
#encode the data again
encodecoded = wave.struct.pack("%dh"%(len(signal)), *list(signal))
#stop all streams and terminate pyaudio
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
#recording mixed audio in mono
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes((encodecoded))
wf.close()
But when i try to mix with my code it doesn't work.What is going on?I think i'm almost solving it
import numpy as np
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
#stream using as_loopback to get sound from OS
stream = p.open(
format=FORMAT,
channels=2,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format=FORMAT,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
# as_loopback=False)
#print("* recording")
frames = []
frames2= []
self.going = True
while self.going:
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
# frames = as_loopback sound data (Speakers)
frames = b''.join(frames);
# frames2 = sound data of Microphone
frames2 = b''.join(frames2);
# decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
# decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
# converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded = np.array(Sdecoded, dtype='int16')
# getting the data on the right side
direito = Sdecoded[1::2]
# getting the data on the left side
esquerdo = Sdecoded[::2]
# mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix = (direito + esquerdo + Mdecoded)
# ensuring no value goes beyond the limits of short int
signal = np.clip(mix, -32767, 32766)
# encode the data again
encodecoded = wave.struct.pack("%dh" % (len(signal)), *list(signal))
# print("* done recording")
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(encodecoded)
wf.close()
def stop_recording(self):
self.going = False
I made the code very clean and commented each part so you understand what is going on.I made a for loop in the beginning for Pyaudio show me what are the interfaces I have in my OS:
0 Mapeador de som da Microsoft - Input
1 Microfone (Realtek(R) Audio)
2 Mixagem estéreo (Realtek(R) Aud
3 Mapeador de som da Microsoft - Output
4 Alto-falantes (Realtek(R) Audio
5 Alto-falantes (Realtek(R) Audio)
6 Microfone (Realtek(R) Audio)
7 Mixagem estéreo (Realtek(R) Audio)
8 Speakers 1 (Realtek HD Audio output with SST)
9 Speakers 2 (Realtek HD Audio output with SST)
10 Alto-falante (Realtek HD Audio output with SST)
11 Microfone (Realtek HD Audio Mic input)
12 Mixagem estéreo (Realtek HD Audio Stereo input)
You can use 2 separate threads to record from 2 different devices ( providing separate device Index) into separate Wav files.
Then Mix these 2 files Using the pydub library
from pydub import AudioSegment
speakersound = AudioSegment.from_file("/path/speaker.wav")
micsound = AudioSegment.from_file("/path/mic.wav")
mixsound = speakersound.overlay(micsound)
mixsound.export("/path/mixsound.wav", format='wav')

How to ignore background noise while recording audio using pyaudio in python?

I am using the following code to record audio in python, it works perfectly. But when I set the microphone input volume to high, the audio thus recorded using this code has a lot of noise. How to get rid of this noise??
import pyaudio
import wave
def record(filename):
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 15
WAVE_OUTPUT_FILENAME = filename
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("recording...")
print('---------------------------------')
print(int(RATE / CHUNK * RECORD_SECONDS))
print('*********************************')
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("Recording finished. . .")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(2)
audio.get_sample_size(FORMAT)
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

Categories