I'm trying to generate my own notes using pyaudio, but I'm running into what is surely a beginner's mistake. I can generate pure sin wav tones and play them sequentially, but if I try to concatenate them, I don't get three notes in a row, I get the original note played three times as long.
import numpy as np
import pyaudio
def play_note(note):
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=1,
rate=44100,
output=True)
stream.write(np.asarray(note, dtype=np.float32))
stream.stop_stream()
stream.close()
p.terminate()
sampling_rate = 44100
seconds = 1
x = np.arange(sampling_rate * seconds)
freqs = [440,660,880]
notes = []
for freq in freqs:
note = 100*np.sin(2 * np.pi * freq * x /sampling_rate)
notes.append(note)
# This is the behavior I want
for note in notes:
play_note(note)
# I would expect this to behave similarly, but without the gaps. It doesn't.
note = np.concatenate(notes)
play_note(note)
I get the same result with pyaudio 0.2.11 (running on Mac OS 10.12.6). I was able to fix it by adding the argument frames_per_buffer=1 to p.open() and num_frames=len(note) to stream.write():
def play_note(note):
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=1,
rate=44100,
output=True,
frames_per_buffer=1)
stream.write(np.asarray(note, dtype=np.float32), num_frames=len(note))
stream.stop_stream()
stream.close()
p.terminate()
I haven't investigated further to answer why the original version doesn't work or why this change fixes it. Perhaps a pyaudio guru will give a more thorough answer.
Related
We have designed a code that records two wav files:
1. Records the ambient noise
2. Records the ambient noise with voice
We are then going to use those two wav files as inputs for our third def function that will subtract the ambient noise wav file from the ambient noise with voice. The only problem is that when we run the script and call the combination() function, the resulting wav file combines the two preceding wav files. Our goal is to get an output where the ambient noise will be reduced and the voice will be the one heard louder than it. Here is our script below:
import pyaudio
import wave
import matplotlib.pyplot as plt
import numpy as np
import scipy.io.wavfile
import scipy.signal as sp
def ambient():
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "ambientnoise.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print ("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print ("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
x = scipy.io.wavfile.read('ambientnoise.wav')
n = x[1]
y = np.zeros(n.shape)
y = n.cumsum(axis=0)
times = np.linspace(0, len(n), len(n))
plt.title("Plot 261 $speech1.wav\n $Secades, M.F.\spadesuit SIGNLAB \spadesuit 6Feb2018$")
plt.xlabel("n")
plt.ylabel("$speech1.wav$")
plt.plot(times,n)
plt.show()
def voice():
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "ambientwithvoice.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print ("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print ("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
x = scipy.io.wavfile.read('ambientwithvoice.wav')
n = x[1]
y = np.zeros(n.shape)
y = n.cumsum(axis=0)
times = np.linspace(0, len(n), len(n))
plt.title("Plot 261 $speech1.wav\n $Secades, M.F.\spadesuit SIGNLAB \spadesuit 6Feb2018$")
plt.xlabel("n")
plt.ylabel("$speech1.wav$")
plt.plot(times,n)
plt.show()
def combination():
rate1,Data1 = scipy.io.wavfile.read('ambientnoise.wav')
rate2,Data2 = scipy.io.wavfile.read('ambientwithvoice.wav')
new_Data = [0]*len(Data1)
for i in range(0,len(Data1)):
new_Data[i] = Data2[i] + Data1[i]
new_Data = np.array(new_Data)
scipy.io.wavfile.write('filtered.wav', rate1, new_Data)
x = scipy.io.wavfile.read('ambientwithvoice.wav')
n = x[1]
y = np.zeros(n.shape)
y = n.cumsum(axis=0)
times = np.linspace(0, len(n), len(n))
plt.title("Plot 261 $speech1.wav\n $Secades, M.F.\spadesuit SIGNLAB \spadesuit 6Feb2018$")
plt.xlabel("n")
plt.ylabel("$speech1.wav$")
plt.plot(times,n)
plt.show()
We have designed a code that records two wav files: 1. Records the ambient noise 2. Records the ambient noise with voice
This means, that while the ambient noise is continuously going on in the background two different recordings are made, one after the other. The first records only the noise, the second also has speech in it.
To simplify the explanation, let's assume the speech is not present (maybe the speaker simply said nothing). This should work similarly; noise from the first recording should be used to reduce the noise in the second recording - it does not matter if there is another signal present in the second recording or not. We know we were successful if the noise is reduced.
The situation looks like this:
Now let's combine the two recordings either by adding them or by subtracting:
Apparently, neither approach reduced the noise. Looking closely, the situation got worse: the noise amplitude in the resulting signal is higher than in either of the two recordings!
In order to work, the signal we subtract must be an exact replicate of noise in the speech signal (or at least a reasonable approximation). There lies the problem: we do not know the noise signal, because every time we record it looks differently.
So what can we do?
Use a second microphone that records the noise at the same time as the speech, but does not record the speaker.
Apply domain knowledge (#1): if you know for example that the noise is in a different frequency range than the speech signal filters can reduce the noise part.
Apply domain knowledge (#2): if the noise is predictable (e.g. something periodic like a fan or an engine) create a mathematical model that predicts the noise and subtract that from the speech signal.
If the noise is "real noise" (statistically independent and broad-band) such as Gaussian white-noise, we're pretty much out of luck.
I am trying to achieve active noise reduction in python. My project is composed of two set of codes:
sound recording code
sound filtering code
What I aim for is that when you run the program, it will start recording through the microphone. After you've finished recording there will be a saved file called "file1.wav" When you play that file, it is the one that you recorded originally. After you're finished with that, you will now put "file1.wav" through a filter by calling "fltrd()". This will create a second wav file in the same folder and that second wav file is supposedly the one with less/reduced noise. Now my problem is that the second wav file is enhancing noise instead of reducing it. Can anyone please troubleshoot my code? :(
Here is my code below:
import pyaudio
import wave
import matplotlib.pyplot as plt
import numpy as np
import scipy.io.wavfile
import scipy.signal as sp
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "file1.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print ("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print ("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
x = scipy.io.wavfile.read('file1.wav')
n = x[1]
y = np.zeros(n.shape)
y = n.cumsum(axis=0)
def fltrd():
n,x = scipy.io.wavfile.read('file1.wav')
a2 = x.cumsum(axis=0)
a3 = np.asarray(a2, dtype = np.int16)
scipy.io.wavfile.write('file2.wav',n,a3)
Actual noise filtering is difficult and intense. However, an simple noise filter using high and low pass filter can be easily created using pydub library. See here for more details (install, requirements etc)
Also see here for more details on low and high pass filter using pydub.
Basic idea is to take a audio file and then pass it through both low and high pass filter such that audio above and below certain threahold will be highly attenuated (in effect demonstrating filtering).
Although, this will not affect any noise falling in pass-band for which you will need to look at other noise cancellation techniques.
from pydub import AudioSegment
from pydub import low_passfilter
from pydub import high_pass_filter
from pydub.playback import play
song = AudioSegment.from_wav('file1.wav')
#Freq in Hz ,Adjust as per your needs
new = song.low_pass_filter(5000).high_pass_filter(200)
play(new)
I am using python and pyaudio to stream a pure sine tone using a callback method, in order to later modulate the sound via user input. Everything is fine except that when i run the code, i get 1-2 seconds of a cracking-buzzing sound associated to the warning message
ALSA lib pcm.c:7339:(snd_pcm_recover) underrun occurred
After that, the sine tone is streamed correctly. Any hints about how to remove the initial popping sound?
here is the code that stream the sound for one second
import pyaudio
import time
import numpy as np
CHANNELS = 1
RATE = 44100
freq = 600
CHUNK = 1024
lastchunk = 0
def sine(current_time):
global freq,lastchunk
length = CHUNK
factor = float(freq)*2*np.pi/RATE
this_chunk = np.arange(length)+lastchunk
lastchunk = this_chunk[-1]
return np.sin(this_chunk*factor)
def get_chunk():
data = sine(time.time())
return data * 0.1
def callback(in_data, frame_count, time_info, status):
chunk = get_chunk() * 0.25
data = chunk.astype(np.float32).tostring()
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
time.sleep(1)
stream.stop_stream()
stream.close()
Cheers
PortAudio (the library behind PyAudio) allows you to specify a block size, which is typically called CHUNK in the PyAudio examples. If you don't specify one, the default is 0, which in PortAudio terms means that the block size will be chosen automatically and will even change from callback to callback!
To check that, try printing frame_count (which is another name for the block size) within the callback. I suspect that PortAudio chooses a too small block size in the beginning and when that causes underruns, it increases the block size. Am I right?
To avoid this, you should specify a fixed block size from the beginning, using:
stream = p.open(..., frames_per_buffer=CHUNK, ...)
... where frames_per_buffer is yet another name for the block size.
This also makes more sense since up to now you use length = CHUNK in your code without knowing the actual block size!
If this still leads to underruns, you can try further increasing the block size to 2048.
Finally, let me take the liberty to make a shameless plug for my own PortAudio wrapper, the sounddevice module. It basically does the same as PyAudio, but it's easier to install, IMHO has a nicer API and it supports NumPy directly, without you having to do the manual conversions.
The accepted answer still didn't give perfect audio quality. Judging from what I heard (didn't measure) there are sometimes drop outs and/or phase jumps in the sine. Based on the code in the PyAudio examples and what can be found here I came to this solution :
"""PyAudio Example: Play a wave file (callback version)."""
import pyaudio
import time
import math
from itertools import count
import numpy as np
RATE = 96000
# More efficient calculation but period = int(framer... causes high granularity for higher frequencies (15kHz becoming 16kHz for instance)
# def sine_wave(frequency=1000, framerate=RATE, amplitude=0.5):
# period = int(framerate / frequency)
# amplitude = max(min(amplitude, 1), 0)
# lookup_table = [float(amplitude) * math.sin(2.0 * math.pi * float(frequency) *
# (float(i % period) / float(framerate))) for i in xrange(period)]
# return (lookup_table[i % period] for i in count(0))
def sine_wave(frequency=440.0, framerate=RATE, amplitude=0.5):
amplitude = max(min(amplitude, 1), 0)
return (float(amplitude) * math.sin(2.0*math.pi*float(frequency)*(float(i)/float(framerate))) for i in count(0))
sine = [sine_wave(150), sine_wave(1500), sine_wave(15000)]
# instantiate PyAudio (1)
p = pyaudio.PyAudio()
# define callback (2)
def callback(in_data, frame_count, time_info, status):
wave = sine[0]
data = [wave.next()]
for i in range(frame_count - 1):
data.append(wave.next())
ret_array =np.array(data).astype(np.float32).tostring()
return (ret_array, pyaudio.paContinue)
# open stream using callback (3)
stream = p.open(format=pyaudio.paFloat32,
channels=1,
rate=RATE,
frames_per_buffer=1024,
output=True,
stream_callback=callback)
# start the stream (4)
stream.start_stream()
# Insert your own solution to end the sound
time.sleep(3)
# stop stream (6)
stream.stop_stream()
stream.close()
# close PyAudio (7)
p.terminate()
This should be able to play the sine until your hardware dies or the next power outage... But I only tested for half an hour ;-)
I'm trying to write naiv low pass filter using Python.
Values of the Fourier Transformant higher than a specific frequency should be equal to 0, right?
As far as I know that should to work.
But after an inverse fourier transformation what I get is just noise.
Program1 records RECORD_SECONDS from microphone and writes information about fft in fft.bin file.
Program2 reads from this file, do ifft and plays result on speakers.
In addition, I figured out, that every, even very little change in fft causes Program2 to fail.
Where do I make mistake?
Program1:
import pickle
import pyaudio
import wave
import numpy as np
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1 #1-mono, 2-stereo
RATE = 44100
RECORD_SECONDS = 2
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
f = open("fft.bin", "wb")
Tsamp = 1./RATE
#arguments for a fft
fft_x_arg = np.fft.rfftfreq(CHUNK/2, Tsamp)
#max freq
Fmax = 4000
print("* recording")
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
#read one chunk from mic
SigString = stream.read(CHUNK)
#convert string to int
SigInt = np.fromstring(SigString, 'int')
#calculate fft
fft_Sig = np.fft.rfft(SigInt)
"""
#apply low pass filter, maximum freq = Fmax
j=0
for value in fft_x_arg:
if value > Fmax:
fft_Sig[j] = 0
j=j+1
"""
#write one chunk of data to file
pickle.dump(fft_Sig,f)
print("* done recording")
f.close()
stream.stop_stream()
stream.close()
p.terminate()
Program2:
import pyaudio
import pickle
import numpy as np
CHUNK = 1024
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=44100/2, #anyway, why 44100 Hz plays twice faster than normal?
output=True)
f = open("fft.bin", "rb")
#load first value from file
fft_Sig = pickle.load(f)
#calculate ifft and cast do int
SigInt = np.int16(np.fft.irfft(fft_Sig))
#convert once more - to string
SigString = np.ndarray.tostring(SigInt)
while SigString != '':
#play sound
stream.write(SigString)
fft_Sig = pickle.load(f)
SigInt = np.int16(np.fft.irfft(fft_Sig))
SigString = np.ndarray.tostring(SigInt)
f.close()
stream.stop_stream()
stream.close()
p.terminate()
FFTs operate on complex numbers. You might be able to feed them real numbers (which will get converted to complex by setting the imaginary part to 0) but their outputs will always be complex.
This is probably throwing off your sample counting by 2 among other things. It should also be trashing your output because you're not converting back to real data.
Also, you forgot to apply a 1/N scale factor to the IFFT output. And you need to keep in mind that the frequency range of an FFT is half negative, that is it's approximately the range -1/(2T) <= f < 1/(2T). BTW, 1/(2T) is known as the Nyquist frequency, and for real input data, the negative half of the FFT output will mirror the positive half (i.e. for y(f) = F{x(t)} (where F{} is the forward Fourier transform) y(f) == y(-f).
I think you need to read up a bit more on DSP algorithms using FFTs. What you're trying to do is called a brick wall filter.
Also, something that will help you a lot is matplotlib, which will help you see what the data looks like at intermediate steps. You need to look at this intermediate data to find out where things are going wrong.
Is it possible to play a certain part of a .wav file in Python?
I'd like to have a function play(file, start, length) that plays the audiofile file from start seconds and stops playing after length seconds. Is this possible, and if so, what library do I need?
this is possible and can be easy in python.
Pyaudio is a nice library and you can use to play your audio!
First do you need decode the audio file (wav, mp3, etc) this step convert audio data in numbers(short int or float32).
Do you need convert the seconds in equivalent position point to cut the signal in the position of interest, to do this multiply your frame rate by what seconds do you want !
Here one simple example for wav files:
import pyaudio
import sys
import numpy as np
import wave
import struct
File='ederwander.wav'
start = 12
length=7
chunk = 1024
spf = wave.open(File, 'rb')
signal = spf.readframes(-1)
signal = np.fromstring(signal, 'Int16')
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(spf.getsampwidth()),
channels = spf.getnchannels(),
rate = spf.getframerate(),
output = True)
pos=spf.getframerate()*length
signal =signal[start*spf.getframerate():(start*spf.getframerate()) + pos]
sig=signal[1:chunk]
inc = 0;
data=0;
#play
while data != '':
data = struct.pack("%dh"%(len(sig)), *list(sig))
stream.write(data)
inc=inc+chunk
sig=signal[inc:inc+chunk]
stream.close()
p.terminate()
I know that this is a rather old question, but I just needed the exact same thing and for me ederwander's example seems a little bit too complicated.
Here is my shorter (and commented) solution:
import pyaudio
import wave
# set desired values
start = 7
length = 3
# open wave file
wave_file = wave.open('myWaveFile.wav', 'rb')
# initialize audio
py_audio = pyaudio.PyAudio()
stream = py_audio.open(format=py_audio.get_format_from_width(wave_file.getsampwidth()),
channels=wave_file.getnchannels(),
rate=wave_file.getframerate(),
output=True)
# skip unwanted frames
n_frames = int(start * wave_file.getframerate())
wave_file.setpos(n_frames)
# write desired frames to audio buffer
n_frames = int(length * wave_file.getframerate())
frames = wave_file.readframes(n_frames)
stream.write(frames)
# close and terminate everything properly
stream.close()
py_audio.terminate()
wave_file.close()