I want to write a very basic application that passes audio from microphone to speakers. This is very simple with pyaudio as described on https://people.csail.mit.edu/hubert/pyaudio/ .
def passthrough():
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
p.terminate()
But now I try to mix a wave file into this stream, when an event occurs. And that's where I am stuck right now. Playing a wave file seems to be easy, too.
def play_wave(wav_file):
wf = wave.open(wav_file, 'rb')
sample_width=wf.getsampwidth()
channels=wf.getnchannels()
rate=wf.getframerate()
second=sample_width*channels*rate
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(sample_width),
channels=channels,
rate=int(rate),
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
At this time, I have two problems.
How do I mix the wave output into the continuous stream
How can I trigger 1. on an event basis
Hope someone can light up the dark basement I am in right now.
EDIT: Assume the wave file to have same number of channels and same rate, so no conversion necessary.
After moving the throughput() function into a thread it works like desired. When I tried this yesterday, I just fucked up the thread start (called throughput from init instead in run() method).
So here the complete, working code.
import pyaudio
import wave
import threading
import time
class AudioPass(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
self.passthrough()
def passthrough(self):
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
p.terminate()
def play_wave(wav_file):
wf = wave.open(wav_file, 'rb')
sample_width=wf.getsampwidth()
channels=wf.getnchannels()
rate=wf.getframerate()
second=sample_width*channels*rate
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(sample_width),
channels=channels,
rate=int(rate),
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
thread = AudioPass()
thread.start()
play_wave('C:/bell.wav')
Later I will also try another way a colleauge suggested today and if it does well too, I will put it here as an alternative, too. Using the threaded way is nice because I can use different rates for the stream and the wav file.
A colleague provided the below solution, which is a very raw approach, but it works and is good for understanding how this pyaudio stuff works.
import time
import pyaudio
import numpy
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
SINE_WAVE_FREQUENCY = 440.0 # In Hz
SINE_WAVE_DURATION = 5.0 # In seconds
SINE_WAVE_VOLUME = 0.5
SINE_WAVE = (numpy.sin(2 * numpy.pi * numpy.arange(RATE * SINE_WAVE_DURATION) * SINE_WAVE_FREQUENCY / RATE)).astype(numpy.float32) * SINE_WAVE_VOLUME
def loopback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH), channels=CHANNELS, rate=RATE, input=True, output=True, stream_callback=loopback)
stream.start_stream()
def playsine():
sinestream = p.open(format=pyaudio.paFloat32, channels=1, rate=RATE, output=True)
sinestream.write(SINE_WAVE)
sinestream.stop_stream()
sinestream.close()
while True:
input("Press enter to play a sine wave")
playsine()
Related
i'm trying to build an app with pyaudio that records speaker and microphone sound but i just don't know how to record them both.I tried enabling stereo mixer but it didn't work out because i could only listen the sound from the speakers.
This code that i'm using records audio from the default microphone in a background thread using pyaudio:
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
self.going = True
while self.going:
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def stop_recording(self):
self.going = False
I'm using Windows and I can also use other library, not only PyAudio. I just need to get this work.
Edit : I found this code to record the output from speaker but i couldn't make it work in my application:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.,get_default_output_device_info()["hostApi"]
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback=True)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
UPDATE :
I could record both my speaker and my microphone with this:
import pyaudio
import wave
import numpy as np
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
#stream using as_loopback to get sound from OS
stream = p.open(
format = FORMAT,
channels = 2,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format = FORMAT,
channels = 1,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
#as_loopback=False)
frames = []
frames2 = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
#frames = as_loopback sound data (Speakers)
frames= b''.join(frames);
#frames2 = sound data of Microphone
frames2= b''.join(frames2);
#decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
#decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
#converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded= np.array(Sdecoded, dtype='int16')
#getting the data on the right side
direito=Sdecoded[1::2]
#getting the data on the left side
esquerdo=Sdecoded[::2]
#mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix=(direito+esquerdo+Mdecoded)
#ensuring no value goes beyond the limits of short int
signal=np.clip(mix, -32767, 32766)
#encode the data again
encodecoded = wave.struct.pack("%dh"%(len(signal)), *list(signal))
#stop all streams and terminate pyaudio
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
#recording mixed audio in mono
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes((encodecoded))
wf.close()
But when i try to mix with my code it doesn't work.What is going on?I think i'm almost solving it
import numpy as np
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
#stream using as_loopback to get sound from OS
stream = p.open(
format=FORMAT,
channels=2,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format=FORMAT,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
# as_loopback=False)
#print("* recording")
frames = []
frames2= []
self.going = True
while self.going:
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
# frames = as_loopback sound data (Speakers)
frames = b''.join(frames);
# frames2 = sound data of Microphone
frames2 = b''.join(frames2);
# decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
# decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
# converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded = np.array(Sdecoded, dtype='int16')
# getting the data on the right side
direito = Sdecoded[1::2]
# getting the data on the left side
esquerdo = Sdecoded[::2]
# mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix = (direito + esquerdo + Mdecoded)
# ensuring no value goes beyond the limits of short int
signal = np.clip(mix, -32767, 32766)
# encode the data again
encodecoded = wave.struct.pack("%dh" % (len(signal)), *list(signal))
# print("* done recording")
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(encodecoded)
wf.close()
def stop_recording(self):
self.going = False
I made the code very clean and commented each part so you understand what is going on.I made a for loop in the beginning for Pyaudio show me what are the interfaces I have in my OS:
0 Mapeador de som da Microsoft - Input
1 Microfone (Realtek(R) Audio)
2 Mixagem estéreo (Realtek(R) Aud
3 Mapeador de som da Microsoft - Output
4 Alto-falantes (Realtek(R) Audio
5 Alto-falantes (Realtek(R) Audio)
6 Microfone (Realtek(R) Audio)
7 Mixagem estéreo (Realtek(R) Audio)
8 Speakers 1 (Realtek HD Audio output with SST)
9 Speakers 2 (Realtek HD Audio output with SST)
10 Alto-falante (Realtek HD Audio output with SST)
11 Microfone (Realtek HD Audio Mic input)
12 Mixagem estéreo (Realtek HD Audio Stereo input)
You can use 2 separate threads to record from 2 different devices ( providing separate device Index) into separate Wav files.
Then Mix these 2 files Using the pydub library
from pydub import AudioSegment
speakersound = AudioSegment.from_file("/path/speaker.wav")
micsound = AudioSegment.from_file("/path/mic.wav")
mixsound = speakersound.overlay(micsound)
mixsound.export("/path/mixsound.wav", format='wav')
I am using pyaudio to record data from a two channel soundcard using this function
def record(self):
'''
Record Function reads from stream with configured soundcard and stores items in an array
uses callback function as can be told to stop recording during stream. After ending writes contents
to wav file
'''
wf = wave.open('audiooutput.wav', 'wb')
wf.setnchannels(2)
wf.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
p = pyaudio.PyAudio()
frames = []
# sub function checks the queue for a message to stop recording
def check_for_recordstop():
try:
message = self.my_queue.get(timeout = 0.1)
except:
return
if message == None:
pass
elif message.payload == "Stop":
self.confirm_message_recieved(message)
stream.stop_stream()
#subfunction callback
def callback(in_data, frame_count, time_info, status):
if stream.is_active():
frames.append(in_data)
return (in_data, pyaudio.paContinue)
else:
frames.append(in_data)
return (in_data, pyaudio.paComplete)
stream = p.open(format=pyaudio.get_sample_size(pyaudio.paInt16),
channels= 2,
rate=44100,
input=True,
frames_per_buffer=1024,
input_device_index=1,
stream_callback = callback)
self.logger.info("Recording")
stream.start_stream() # callback is run on a new thread when start_stream() is triggered
while stream.is_active(): #Loop to keep thread alive while callback is running
time.sleep(0.1)
check_for_recordstop()
print("done")
stream.close()
wf.writeframes(b''.join(frames))
wf.close()
p.terminate()
However when viewing this data in audacity I do not end up with 2 channel stream it looks like this
But When using a Function like this
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
input_device_index=1,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open("pyaudoutput.wav", 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
the audio data comes out as expected see here
I cannot seem to get the first function to produce the desired results from the second!
I need to use the callback capability of pyaudio for my usecase but cannot get the two channels to separate. Any Advice would be great!
I was opening the stream using the sample size not the sample width
format=pyaudio.get_sample_size(pyaudio.paInt16),
should be
format=pyaudio.get_format_from_width(wf.getsampwidth()),
using the sample width from the file I writing to solved the issue
I have been trying to do real-time audio signal processing using 'pyAudio' module in python. What I did was a simple case of reading audio data from microphone and play it via headphones. I tried with the following code(both Python and Cython versions). Thought it works but unfortunately it is stalls and not smooth enough. How can I improve the code so that it will run smoothly. My PC is i7, 8GB RAM.
Python Version
import pyaudio
import numpy as np
RATE = 16000
CHUNK = 256
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True,
frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
Cython Version
import pyaudio
import numpy as np
cdef int RATE = 16000
cdef int CHUNK = 1024
cdef int i
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(500): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
I believe you are missing CHUNK as second argument to player.write call.
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
Also, not sure if its formatting error. But player.write needs to be tabbed into for loop
And per pyaudio site you need to have RATE / CHUNK * RECORD_SECONDS and not RECORD *RATE/CHUNK as python executes * multiplication before / division.
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
Finally, you may want to increase rate to 44100 , CHUNK to 1024 and CHANNEL to 2 for better fidelity.
The code below will take the default input device, and output what's recorded into the default output device.
import PyAudio
import numpy as np
p = pyaudio.PyAudio()
CHANNELS = 2
RATE = 44100
def callback(in_data, frame_count, time_info, flag):
# using Numpy to convert to array for processing
# audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data, pyaudio.paContinue
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
input=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(20)
stream.stop_stream()
print("Stream is stopped")
stream.close()
p.terminate()
This will run for 20 seconds and stop. The method callback is where you can process the signal :
audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data is where you send back post-processed data to the output device.
Note chunk has a default argument of 1024 as noted in the PyAudio docs:
http://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.PyAudio.open
I am working on a similar project. I modified your code and the stalls now are gone. The bigger the chunk the bigger the delay. That is why I kept it low.
import pyaudio
import numpy as np
CHUNK = 2**5
RATE = 44100
LEN = 10
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
for i in range(int(LEN*RATE/CHUNK)): #go for a LEN seconds
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
player.write(data,CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
I want to create two threads, so when I run the program, the 1st thread activates the function "recorder" to record audio, and after 5 seconds it stops and 2nd thread activates to do some activity. After n second thread2 stops and thread1 comes back in position. This process continues until we terminate it by using keyboard exception.
I have following code. It starts with recording the audio, saves it but 2nd thread doesn't seem to work as the program finishes with exit code 0.
Can someone correct me where I have made a mistake? Thanks
from threading import Thread
import time
from time import ctime
import pyaudio
import wave
def recorder(name, delay, repeat):
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 3
WAVE_OUTPUT_FILENAME = "file.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
def save(name, delay, repeat):
print("Saved")
def Main():
t1 = Thread(target=recorder, args=("Timer1", 1, 5))
t2 = Thread(target=save, args=("Timer2", 5, 5))
t1.start()
t2.start()
print("Main complete")
if __name__ == '__main__':
Main()
I want to do a simple realtime processing to audio every 4096 samples. But this code calls the callback function every 1024 samples. I just want to change the frame_count to 4096.
import pyaudio
import time
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
out=do_something(in_data)
print(frame_count)#1024
return (out, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
p.terminate()
I haven't tested it, but from the documentation it seems that if you change the stream open line to:
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=4096,
stream_callback=callback)
that you should get your required number of samples per block. The frames_per_buffer defaults at 1024 so that's probably why you're getting this value in your test.
Good luck!