Recording and playing audio simultaneously with PyAudio and NumPy - python

Currently i can record audio and save it as a NumPy array. What i need is after audio has been recorded i want to be able to record again but play this NumPy array at the same time
import pyaudio
import numpy
CHUNK = 1024
WIDTH = 2
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(numpy.fromstring(data, dtype=numpy.int16))
numpydata = numpy.hstack(frames)
stream.stop_stream()
stream.close()
p.terminate()

You can use threading. Go to the official documentation for more information here I don't know recording and playing audio very well, so I have just created a template that should work for you.
Here is my example:
from threading import Thread
def record():
#Put your recording function here
def play():
#Put your playing function here
Thread(target = record).start()
Thread(target = play).start()
#These two start the two functions at the same time. If you want to only run the play
#function after it runs the record function once, you could do something like this:
Here is the better one:
from threading import Thread
def record():
#Put your recording function here
def play():
#Put your playing function here
while recorded!=True
Thread(target = record)
recorded=True
Thread(target = record).start()
Thread(target = play).start()
To repeat the last two lines in the second example, you can just add a while or for loop. Please feel free to ask questions in the comments.

Related

Python PyAudio is Processing all data into one channel

I am using pyaudio to record data from a two channel soundcard using this function
def record(self):
'''
Record Function reads from stream with configured soundcard and stores items in an array
uses callback function as can be told to stop recording during stream. After ending writes contents
to wav file
'''
wf = wave.open('audiooutput.wav', 'wb')
wf.setnchannels(2)
wf.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
p = pyaudio.PyAudio()
frames = []
# sub function checks the queue for a message to stop recording
def check_for_recordstop():
try:
message = self.my_queue.get(timeout = 0.1)
except:
return
if message == None:
pass
elif message.payload == "Stop":
self.confirm_message_recieved(message)
stream.stop_stream()
#subfunction callback
def callback(in_data, frame_count, time_info, status):
if stream.is_active():
frames.append(in_data)
return (in_data, pyaudio.paContinue)
else:
frames.append(in_data)
return (in_data, pyaudio.paComplete)
stream = p.open(format=pyaudio.get_sample_size(pyaudio.paInt16),
channels= 2,
rate=44100,
input=True,
frames_per_buffer=1024,
input_device_index=1,
stream_callback = callback)
self.logger.info("Recording")
stream.start_stream() # callback is run on a new thread when start_stream() is triggered
while stream.is_active(): #Loop to keep thread alive while callback is running
time.sleep(0.1)
check_for_recordstop()
print("done")
stream.close()
wf.writeframes(b''.join(frames))
wf.close()
p.terminate()
However when viewing this data in audacity I do not end up with 2 channel stream it looks like this
But When using a Function like this
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
input_device_index=1,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open("pyaudoutput.wav", 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
the audio data comes out as expected see here
I cannot seem to get the first function to produce the desired results from the second!
I need to use the callback capability of pyaudio for my usecase but cannot get the two channels to separate. Any Advice would be great!
I was opening the stream using the sample size not the sample width
format=pyaudio.get_sample_size(pyaudio.paInt16),
should be
format=pyaudio.get_format_from_width(wf.getsampwidth()),
using the sample width from the file I writing to solved the issue

Python - How to record system audio(The output from the speaker)?

I have been searching for this since last week. Tried pyaudio also and when i used its another fork the system audio was mixed with microphone audio. I was not able to find any other module for this and thus finally asked the question.
Edit:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.get_default_output_device_info()["hostApi"] #The modified part
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback = True) #The part I have modified
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS) + 1):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
This code was taken from stack overflow. It records the speaker output but the output is mixed with the microphone Input.
Also the pyaudio module used was from the fork : https://github.com/intxcc/pyaudio_portaudio.
using https://github.com/intxcc/pyaudio_portaudio
This only records the audio of the device specified by "device_id"
import pyaudio
import wave
chunk = 1024 # Record in chunks of 1024 samples
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 44100 # Record at 44100 samples per second
seconds = 3
filename = "output.wav"
p = pyaudio.PyAudio() # Create an interface to PortAudio
#Select Device
print ( "Available devices:\n")
for i in range(0, p.get_device_count()):
info = p.get_device_info_by_index(i)
print ( str(info["index"]) + ": \t %s \n \t %s \n" % (info["name"], p.get_host_api_info_by_index(info["hostApi"])["name"]))
pass
#ToDo change to your device ID
device_id = 7
device_info = p.get_device_info_by_index(device_id)
channels = device_info["maxInputChannels"] if (device_info["maxOutputChannels"] < device_info["maxInputChannels"]) else device_info["maxOutputChannels"]
# https://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.Stream.__init__
stream = p.open(format=sample_format,
channels=channels,
rate=int(device_info["defaultSampleRate"]),
input=True,
frames_per_buffer=chunk,
input_device_index=device_info["index"],
as_loopback=True
)
frames = [] # Initialize array to store frames
print('\nRecording', device_id, '...\n')
# Store data in chunks for 3 seconds
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()
print('Finished recording')
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
P.S. check out https://github.com/intxcc/pyaudio_portaudio/tree/master/example
This can be done with soundcard. You will have to figure out which device index to use for your loopback. This code prints out the ones you will have to choose from. I found the correct one by looping over all of them and seeing which produced non zeros when speakers were playing.
pip install soundcard
import soundcard as sc
import time
# get a list of all speakers:
speakers = sc.all_speakers()
# get the current default speaker on your system:
default_speaker = sc.default_speaker()
# get a list of all microphones:v
mics = sc.all_microphones(include_loopback=True)
# get the current default microphone on your system:
default_mic = mics[index of your speaker loopback here]
for i in range(len(mics)):
try:
print(f"{i}: {mics[i].name}")
except Exception as e:
print(e)
with default_mic.recorder(samplerate=148000) as mic, \
default_speaker.player(samplerate=148000) as sp:
print("Recording...")
data = mic.record(numframes=1000000)
print("Done...Stop your sound so you can hear playback")
time.sleep(5)
sp.play(data)
I install a virtul soundcard(blackhole) on mac to record the system audio, and is worked.
I only record system audio without microphone audio, as I don't need it
On Ubuntu, you can use 'pavucontrol' to change the recording source. An example of recording audio directly from the speakers (without using a microphone):
First you run a script like the one below:
import pyaudio
mic = pyaudio.PyAudio()
stream = mic.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, output=True, frames_per_buffer=2048)
stream.start_stream()
if __name__ == '__main__':
while True:
data = stream.read(1024)
# Do something with sound
Then you can change the recording source (recording tab) from 'Built-in=Audio Analog Stereo' to 'Monitor of Built-in=Audio Analog Stereo'.
With this approach, you can analyze the sound from the speakers during the video call.

Real-time audio signal processing using python

I have been trying to do real-time audio signal processing using 'pyAudio' module in python. What I did was a simple case of reading audio data from microphone and play it via headphones. I tried with the following code(both Python and Cython versions). Thought it works but unfortunately it is stalls and not smooth enough. How can I improve the code so that it will run smoothly. My PC is i7, 8GB RAM.
Python Version
import pyaudio
import numpy as np
RATE = 16000
CHUNK = 256
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True,
frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
Cython Version
import pyaudio
import numpy as np
cdef int RATE = 16000
cdef int CHUNK = 1024
cdef int i
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(500): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
I believe you are missing CHUNK as second argument to player.write call.
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
Also, not sure if its formatting error. But player.write needs to be tabbed into for loop
And per pyaudio site you need to have RATE / CHUNK * RECORD_SECONDS and not RECORD *RATE/CHUNK as python executes * multiplication before / division.
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
Finally, you may want to increase rate to 44100 , CHUNK to 1024 and CHANNEL to 2 for better fidelity.
The code below will take the default input device, and output what's recorded into the default output device.
import PyAudio
import numpy as np
p = pyaudio.PyAudio()
CHANNELS = 2
RATE = 44100
def callback(in_data, frame_count, time_info, flag):
# using Numpy to convert to array for processing
# audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data, pyaudio.paContinue
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
input=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(20)
stream.stop_stream()
print("Stream is stopped")
stream.close()
p.terminate()
This will run for 20 seconds and stop. The method callback is where you can process the signal :
audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data is where you send back post-processed data to the output device.
Note chunk has a default argument of 1024 as noted in the PyAudio docs:
http://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.PyAudio.open
I am working on a similar project. I modified your code and the stalls now are gone. The bigger the chunk the bigger the delay. That is why I kept it low.
import pyaudio
import numpy as np
CHUNK = 2**5
RATE = 44100
LEN = 10
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
for i in range(int(LEN*RATE/CHUNK)): #go for a LEN seconds
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
player.write(data,CHUNK)
stream.stop_stream()
stream.close()
p.terminate()

multiple threads not working properly

I want to create two threads, so when I run the program, the 1st thread activates the function "recorder" to record audio, and after 5 seconds it stops and 2nd thread activates to do some activity. After n second thread2 stops and thread1 comes back in position. This process continues until we terminate it by using keyboard exception.
I have following code. It starts with recording the audio, saves it but 2nd thread doesn't seem to work as the program finishes with exit code 0.
Can someone correct me where I have made a mistake? Thanks
from threading import Thread
import time
from time import ctime
import pyaudio
import wave
def recorder(name, delay, repeat):
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 3
WAVE_OUTPUT_FILENAME = "file.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
def save(name, delay, repeat):
print("Saved")
def Main():
t1 = Thread(target=recorder, args=("Timer1", 1, 5))
t2 = Thread(target=save, args=("Timer2", 5, 5))
t1.start()
t2.start()
print("Main complete")
if __name__ == '__main__':
Main()

PyAudio Responsive Recording

I've seen the recording tutorial on the PyAudio website for recording a fixed length recording, but I was wondering how I could do the same with a non-fixed recording? Bascially, I want to create buttons to start and end the recording but I haven't found anything on the matter. Any ideas, and I am not looking for an alternative library?
Best is to use the non-blocking way of recording, i.e. you provide a callback function that gets called from the moment you start the stream and keeps getting called for every block/buffer that gets processed until you stop the stream.
In that callback function you check for a boolean for example, and when it is true you write the incoming buffer to a datastructure, when it is false you ignore the incoming buffer. This boolean can be set from clicking a button for example.
EDIT: look at the example of wire audio: http://people.csail.mit.edu/hubert/pyaudio/#wire-callback-example
The stream is opened with an argument
stream_callback=my_callback
Where my_callback is a regular function declared as
def my_callback(in_data, frame_count, time_info, status)
This function will be called every time a new buffer is available. in_data contains the input, which you want to record. In this example, in_data just gets returned in a tuple together with pyaudio.paContinue. Which means that the incoming buffer from the input device is put/copied back into the output buffer sent the the output device (its the same device, so its actually routing input to output aka wire). See the api docs for a bit more explanation: http://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.PyAudio.open
So in this function you can do something like (this is an extract from some code I've written, which is not complete: I use some functions not depicted. Also I play a sinewave on one channel and noise on the other in 24bit format.):
record_on = False
playback_on = False
recorded_frames = queue.Queue()
def callback_play_sine(in_data, frame_count, time_info, status):
if record_on:
global recorded_frames
recorded_frames.put(in_data)
if playback_on:
left_channel_data = mysine.next_block(frame_count) * MAX_INT24 * gain
right_channel_data = ((np.random.rand(frame_count) * 2) - 1) * MAX_INT24 * gain
data = interleave_channels(max_nr_of_channels, (left_output_channel, left_channel_data), (right_output_channel, right_channel_data))
data = convert_int32_to_24bit_bytestream(data)
else:
data = np.zeros(frame_count*max_nr_of_channels).tostring()
if stop_callback:
callback_flag = pyaudio.paComplete
else:
callback_flag = pyaudio.paContinue
return data, callback_flag
You can then set record_on and playback_on to True or False from another part of your code while the stream is open/running, causing recording and playback to start or stop independently without interrupting the stream.
I copy the in_data in a (threadsafe) queue, which is used by another thread to write to disk there, else the queue will get big after a while.
BTW: pyaudio is based on portaudio, which has much more documentation and helpful tips. For example (http://portaudio.com/docs/v19-doxydocs/writing_a_callback.html): the callback function has to finish before a new buffer is presented, else buffers will be lost. So writing to a file inside the callback function usually not a good idea. (though writing to a file gets buffered and I don't know if it blocks when its written to disk eventually)
import pyaudio
import wave
import pygame, sys
from pygame.locals import *
pygame.init()
scr = pygame.display.set_mode((640, 480))
recording = True
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
while True:
if recording:
data = stream.read(CHUNK)
frames.append(data)
for event in pygame.event.get():
if event.type == KEYDOWN and recording:
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
recording = False
if event.type == QUIT:
pygame.quit(); sys.exit()
This is what I came up with when compiling it to an exe. Passing arguments to the
exeparser = argparse.ArgumentParser()
parser.add_argument('-t', dest='time', action='store')
args = parser.parse_args()
time = int(args.time)

Categories