PyAudio How do you group frequency ranges together - python

I followed a tutorial on how to get sound data with pyaudio. It shows the amplitude of sound on certain frequencies.
import pyaudio
import struct
import numpy as np
from scipy.fftpack import fft
CHUNK = 2**10
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
stream=p.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,
output=True, frames_per_buffer=CHUNK)
data = struct.unpack(str(CHUNK*CHANNELS) + 'h', stream.read(CHUNK))
fft_data = fft(data)
fft_data = np.abs(fft_data[:CHUNK]) * 2 / (256 * CHUNK)
print(fft_data)
Now I want to group these together so that I can make a simple bar graph showing the amplitude of certain ranges of frequencies. When I run this code it returns the amplitude of 1024 frequencies, so I tried using this function to group them into 32 groups.
def split_freq(freq): # splits given sound frequencies into groups of frequencies to feed into turtle
freq_ranges = []
for i in range(len(freq)-1): # split the frequencies into 32 groups
if i % abs((len(freq)//32)) == 0: # create new array every time i is a multiple of the number of frequencies divided by 32
if len(freq_ranges) > 0:
freq_ranges[len(freq_ranges)-2] = freq_ranges[len(freq_ranges)-2] / (len(freq)//32)
freq_ranges.append(0)
freq_ranges[len(freq_ranges)-1] = freq_ranges[len(freq_ranges)-1] + freq[i]
return [i * 400 for i in freq_ranges]
My frequency grouping method seems to work, but when I feed in a certain tone the bar graph doesn't change in any meaningful way. Does anybody know what the problem is?

First of all, you should use rfft instead of fft since the data you are transforming only contains real values. The rfft should provide a more meaningful representation of the data.
The way you are splitting the data has no real physical meaning as far as I can see. I would recommend grouping by frequency bands; you can use a list comprehension to do this by grouping values in bins and then apply a function like sum or average:
n = fourier_data.size // 32 # 32 frequency bands
bands = [sum(fft_data[i:(i + n)]) for i in range(0, fft_data.size, n)]
or:
from statistics import mean
n = fourier_data.size // 32 # 32 frequency bands
bands = [mean(fft_data[i:(i + n)]) for i in range(0, fft_data.size, n)]

Related

Apply FFT to each chunk of 1024 samples

I have 1024 samples and I want to cut them into 32 chunks of 32 and run FFT on each one of them and plot it via a Frequency-Amplitude spectrum, I have most of the code working just the part of applying FFT to each chunk is not working, however I am able to apply FFT to the whole samples array.
I tried doing something like that:
realFFT = [for chunk in chunks(amplitude,32): np.fft.fft(chunk)]
but that is wrong syntax
I also tried going over the chunks array by casting it to a List and then saving it all to another list but that didn't work either.
Here is my code:
# Python example - Fourier transform using numpy.fft method
import numpy as np
import matplotlib.pyplot as plotter
from os import times
from PIL import Image
import numpy as np
from numpy.lib.type_check import real
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]
# How many time points are needed i,e., Sampling Frequency
samplingFrequency = 100
# At what intervals time points are sampled
samplingInterval = 1 / samplingFrequency
# Begin time period of the signals
beginTime = 0
# End time period of the signals
endTime = 10.24
# Frequency of the signals
signal1Frequency = 4
signal2Frequency = 7
# Time points
time = np.arange(beginTime, endTime, samplingInterval)
# Create two sine waves
amplitude1 = 0.7* np.sin(2*np.pi*signal1Frequency*time)
amplitude2 = np.sin(2*np.pi*signal2Frequency*time)
# Create subplot
figure, axis = plotter.subplots(2, 1)
plotter.subplots_adjust(hspace=2)
# Time domain representation for sine wave 1
amplitude = amplitude1
axis[0].set_title('Sine wave with a frequency of 4 Hz')
axis[0].plot(time, amplitude)
axis[0].set_xlabel('Time')
axis[0].set_ylabel('Amplitude')
# Frequency domain representation
realFFT = [for chunk in chunks(amplitude,32): np.fft.fft(chunk)]
#fourierTransform = np.fft.fft(amplitude) # Normalize amplitude
fourierTransform = realFFT[range(int(len(amplitude)/2))] # Exclude sampling frequency
tpCount = len(amplitude)
values = np.arange(int(tpCount/2))
timePeriod = tpCount/samplingFrequency
frequencies = values/timePeriod
# Frequency domain representation
axis[1].set_title('Fourier transform depicting the frequency components')
#dBm = 30 + (20 * np.log10(abs(fourierTransform)))
axis[1].plot(frequencies, abs(fourierTransform))
axis[1].set_xlabel('Frequency')
axis[1].set_ylabel('Amplitude')
plotter.show()
You have the syntax slightly wrong
realFFT = [np.fft.fft(chunk) for chunk in chunks(amplitude, 32)]
The syntax of list comprehension is a little bit different that a for loop.

Storing Values from One Array into Another Larger Array

I am trying to create a range of signals of different frequencies. I am finding it difficult to store amplitude vs time into another storage matrix for each frequency ranging from 0 to 50 Hz. Example, for a frequency of 20 Hz, I want to store the amplitude vs time for that frequency, then for 21 Hz I want to store the amplitude vs time for that frequency etc, until I have all of them in a large matrix. I am getting so confused at this point with indexing and syntax, any help welcome!
import numpy as np
max_freq = 50
s_frequency = np.arange(0,51,0.1)
fs = 200
time = np.arange(0,5-(1/fs),(1/fs))
x = np.empty((len(time)), dtype=np.float32)
i = 0
j = 0
full_array = np.empty((len(s_frequency),len(time),len(time)), dtype=np.float32)
amplitude = np.zeros(999)
for f1 in s_frequency:
i = 0
for t in time:
amplitude[i] = np.sin(2*np.pi*f1*t)
i = i + 1
full_array[i] = ([time], [amplitude])
I have also tried the following:
import numpy as np
max_freq = 50
s_frequency = np.arange(0,50.1,0.1)
fs = 200
time = np.arange(0,5-(1-fs),(1/fs))
#full_array = np.sin(2*np.pi*np.outer(s_frequency,time))
full_array = np.empty((len(s_frequency),len(time), len(time)), dtype=np.float32)
for f1 in s_frequency:
array = []
for i, t in enumerate(time):
amplitude = np.sin(2*np.pi*f1*t)
array.insert(i,amplitude)
full_array[i] = [time, array]
Not 100% sure what you're trying to do, but it seems like you're trying to initialize a 2-dimensional grid (i.e. a matrix) where you have a dimension for time and one for frequency. Here is what I would do:
import numpy as np
max_freq = 50
s_frequency = np.arange(0,51,0.1)
fs = 200
time = np.arange(0,5-(1/fs),(1/fs))
full_array = np.sin(2*np.pi*np.outer(s_frequency,time))
No explicit for-loops or index handling needed. np.outer() will give you a 2D grid (i.e. a matrix) of frequency versus time. Now whats left is to compute the sine of 2 Pi times that grid value. Very conveniently numpy functions do accept arrays as input, thus we can simply call np.sin(2*np.pi*np.outer(s_frequency,time).
Not sure what x and j are good for in your code and why full_array should be 3-diemsional. Would you like to include a spatial component as well?
By the way, a construct like this:
i = 0
for t in time:
amplitude[i] = np.sin(2*np.pi*f1*t)
i = i + 1
can easily be avoided in python, thanks to pythons build-in enumerate() function. It would then look like this:
for i, t in enumerate(time):
amplitude[i] = np.sin(2*np.pi*f1*t)
which does essentially the same, but you don't have to explicitly create the index i = 0 and manually incerement it in every iteration i = i + 1.

Making a wavetable synth for the first time...Can somebody point me in the right direction?

I'm trying to make a wavetable synthesizer in Python for the first time (based off an example I found here https://blamsoft.com/tutorials/expanse-creating-wavetables/) but the resultant sound I'm getting doesn't sound tonal at all. My output is just a low grainy buzz. I'm pretty new to making wavetables in Python and I was wondering if anybody might be able to tell me what I'm missing in order to write an A440 sine wavetable to the file "wavetable.wav" and have it actually produce a pure sine tone? Here's what I have at the moment:
import wave
import struct
import numpy as np
frame_count = 256
frame_size = 2048
sps = 44100
freq_hz = 440
file = "wavetable.wav" #write waveform to file
wav_file = wave.open(file, 'w')
wav_file.setparams((1, 2, sps, frame_count, 'NONE', 'not compressed'))
values = bytes(0)
for i in range(frame_count):
for ii in range(frame_size):
sample = np.sin((float(ii)/frame_size) * (i+128)/256 * 2 * np.pi * freq_hz/sps) * 65535
if sample < 0:
sample = 0
sample -= 32768
sample = int(sample)
values += struct.pack('h', sample)
wav_file.writeframes(values)
wav_file.close()
print("Generated " + file)
The sine function I have inside the for loop is probably the part I understand the least because I just went by the example verbatim. I'm used to making sine functions like (y = Asin(2πfx)) but I'm not sure what the purpose is of multiplying by ((i+128)/256) and 65535 (16-bit amplitude resolution?). I'm also not sure what the purpose is of subtracting 32768 from each sample. Is anyone able to clarify what I'm missing and maybe point me in the right direction? Am I going about this the wrong way? Any help is appreciated!
If you just wanted to generate sound data ahead of time and then dump it all into a file, and you’re also comfortable using NumPy, I’d suggest using it with a library like SoundFile. Then there’s no need to delimit the data into frames.
Starting with a naïve approach (using numpy.sin, not trying to optimize things yet), one ends with something like this:
from math import tau
import numpy as np
import soundfile as sf
file_path = 'sine.flac'
sample_rate = 48_000 # hertz
duration = 1.0 # seconds
frequency = 432.0 # hertz
amplitude = 0.8 # (not in decibels!)
start_phase = 0.0 # at what phase to start
sample_count = floor(sample_rate * duration)
# cyclical frequency in sample^-1
omega = frequency * tau / sample_rate
# all phases for which we want to sample our sine
phases = np.linspace(start_phase, start_phase + omega * sample_count,
sample_count, endpoint=False)
# our sine wave samples, generated all at once
audio = amplitude * np.sin(phases)
# now write to file
fmt, sub = 'FLAC', 'PCM_24'
assert sf.check_format(fmt, sub) # to make sure we ask the correct thing beforehand
sf.write(file_path, audio, sample_rate, format=fmt, subtype=sub)
This will be a mono sound, you can write stereo using 2d arrays (see NumPy and SoundFile’s docs).
But note that to make a wavetable specifically, you need to be sure it contains just a single period (or an integer number of periods) of the wave exactly, so the playback of the wavetable will be without clicks and have a correct frequency.
You can play chunked sound in real time in Python too, using something like PyAudio. (I’ve not yet used that, so at least for a time this answer would lack code related to that.)
Finally, frankly, all above is unrelated to the generation of sound data from a wavetable: you just pick a wavetable from somewhere, that doesn’t do much for actual synthesis. Here is a simple starting algorithm for that. Assume you want to play back a chunk of sample_count samples and have a wavetable stored in wavetable, a single period which loops perfectly and is normalized. And assume your current wave phase is start_phase, frequency is frequency, sample rate is sample_rate, amplitude is amplitude. Then:
# indices for the wavetable values; this is just for `np.interp` to work
wavetable_period = float(len(wavetable))
wavetable_indices = np.linspace(0, wavetable_period,
len(wavetable), endpoint=False)
# frequency of the wavetable played at native resolution
wavetable_freq = sample_rate / wavetable_period
# start index into the wavetable
start_index = start_phase * wavetable_period / tau
# code above you run just once at initialization of this wavetable ↑
# code below is run for each audio chunk ↓
# samples of wavetable per output sample
shift = frequency / wavetable_freq
# fractional indices into the wavetable
indices = np.linspace(start_index, start_index + shift * sample_count,
sample_count, endpoint=False)
# linearly interpolated wavetavle sampled at our frequency
audio = np.interp(indices, wavetable_indices, wavetable,
period=wavetable_period)
audio *= amplitude
# at last, update `start_index` for the next chunk
start_index += shift * sample_count
Then you output the audio. Though there are better ways to play back a wavetable, linear interpolation is at least a fine start. Frequency slides are also possible with this approach: just compute indices in another way, no longer spaced uniformly.

Interpolating between two fourier transforms over time

I'm a musician and I'm making a script that takes a wave file and snaps each of its frequencies from the fourier transforms to the nearest musical harmonic. Thanks to help from another question I posted here, that part works, but what I need to do now is make it so throughout the sound wave it tunes one frequency at a time so that at the beginning of the output it sounds the same as the input, and by the end it sounds like an instrument.
If I just wanted them to fade into each other then that would be easy, I could just crossfade in audacity or take a weighted average of the original fourier transform and the output one, but what I want to do instead is tune one frequency at a time. This means that at 50% of the way through the output, 50% of the frequencies have been snapped to the nearest harmonic and the other 50% are untouched. How could I accomplish this without computing every single sample of the output individually?
Also, I was thinking of reducing the MAX_HARMONIC over time as well but it would have a similar problem.
This is the sample I'm testing with (rename it to missile.wav):
https://my.mixtape.moe/iltlos.wav
Here is the script so far:
import struct
import wave
import numpy as np
# import data from wave
wav_file = wave.open("missile.wav", 'r')
num_samples = wav_file.getnframes()
sampling_rate = wav_file.getframerate() / 2
data = wav_file.readframes(num_samples)
wav_file.close()
data = struct.unpack('{n}h'.format(n=num_samples), data)
data = np.array(data)
# fast fourier transform makes an array of the frequencies of sine waves that comprise the sound
data_fft = np.fft.rfft(data)
# the higher MAX_HARMONIC is, the more it sounds like the original,
# the lower it is, the more it sounds like an instrument
MAX_HARMONIC = 2
# generate list of ratios that can be used for tuning (not octave reduced)
valid_ratios = []
for i in range(1, MAX_HARMONIC + 1):
for j in range(1, MAX_HARMONIC + 1):
if i % 2 != 0 and j % 2 != 0:
valid_ratios.append(i/float(j))
valid_ratios.append(j/float(i))
# remove dupes
valid_ratios = list(set(valid_ratios))
# find all the frequencies with the valid ratios
valid_frequencies = []
multiple = 2
while(multiple < num_samples / 2):
multiple *= 2
for ratio in valid_ratios:
frequency = ratio * multiple
if frequency < num_samples / 2:
valid_frequencies.append(frequency)
# remove dupes and sort and turn into a numpy array
valid_frequencies = np.sort(np.array(list(set(valid_frequencies))))
# bin the data_fft into the nearest valid frequency
valid_frequencies = valid_frequencies.astype(np.int64)
boundaries = np.concatenate([[0], np.round(np.sqrt(0.25 + valid_frequencies[:-1] * valid_frequencies[1:])).astype(np.int64)])
select = np.abs(data_fft) > 1
filtered_data_fft = np.zeros_like(data_fft)
filtered_data_fft[valid_frequencies] = np.add.reduceat(np.where(select, data_fft, 0), boundaries)
# do the inverse fourier transform to get a sound wave back
recovered_signal = np.fft.irfft(filtered_data_fft)
# write sound wave to wave file
comptype="NONE"
compname="not compressed"
nchannels=1
sampwidth=2
wav_file=wave.open("missile_output.wav", 'w')
wav_file.setparams((nchannels, sampwidth, int(sampling_rate), num_samples, comptype, compname))
for s in recovered_signal:
wav_file.writeframes(struct.pack('h', s))
wav_file.close()

Filtering a wav file using python

So i recently successfully built a system which will record, plot, and playback an audio wav file entirely with python. Now, I'm trying to put some filtering and audio mixing in between the when i record and when i start plotting and outputting the file to the speakers. However, i have no idea where to start. Right now I'm to read in a the intial wav file, apply a low pass filter, and then re-pack the newly filtered data into a new wav file. Here is the code i used to plot the initial data once i recorded it.
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
spf = wave.open('wavfile.wav','r')
#Extract Raw Audio from Wav File
signal = spf.readframes(-1)
signal = np.fromstring(signal, 'Int16')
plt.figure(1)
plt.title('Signal Wave...')
plt.plot(signal)
And here is some code i used to generate a test audio file of a single tone:
import numpy as np
import wave
import struct
freq = 440.0
data_size = 40000
fname = "High_A.wav"
frate = 11025.0
amp = 64000.0
sine_list_x = []
for x in range(data_size):
sine_list_x.append(np.sin(2*np.pi*freq*(x/frate)))
wav_file = wave.open(fname, "w")
nchannels = 1
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
wav_file.setparams((nchannels, sampwidth, framerate, nframes,
comptype, compname))
for s in sine_list_x:
wav_file.writeframes(struct.pack('h', int(s*amp/2)))
wav_file.close()
I'm not really sure how to apply said audio filter and repack it, though. Any help and/or advice you could offer would be greatly appreciated.
First step : What kind of audio filter do you need ?
Choose the filtered band
Low-pass Filter : remove highest frequency from your audio signal
High-pass Filter : remove lowest frequencies from your audio signal
Band-pass Filter : remove both highest and lowest frequencies from your audio signal
For the following steps, i assume you need a Low-pass Filter.
Choose your cutoff frequency
The Cutoff frequency is the frequency where your signal will be attenuated by -3dB.
Your example signal is 440Hz, so let's choose a Cutoff frequency of 400Hz. Then your 440Hz-signal is attenuated (more than -3dB), by the Low-pass 400Hz filter.
Choose your filter type
According to this other stackoverflow answer
Filter design is beyond the scope of Stack Overflow - that's a DSP
problem, not a programming problem. Filter design is covered by any
DSP textbook - go to your library. I like Proakis and Manolakis'
Digital Signal Processing. (Ifeachor and Jervis' Digital Signal
Processing isn't bad either.)
To go inside a simple example, I suggest to use a moving average filter (for a simple low-pass filter).
See Moving average
Mathematically, a moving average is a type of convolution and so it can be viewed as an example of a low-pass filter used in signal processing
This Moving average Low-pass Filter is a basic filter, and it is quite easy to use and to understand.
The parameter of the moving average is the window length.
The relationship between moving average window length and Cutoff frequency needs little bit mathematics and is explained here
The code will be
import math
sampleRate = 11025.0
cutOffFrequency = 400.0
freqRatio = cutOffFrequency / sampleRate
N = int(math.sqrt(0.196201 + freqRatio**2) / freqRatio)
So, in the example, the window length will be 12
Second step : coding the filter
Hand-made moving average
see specific discussion on how to create a moving average in python
Solution from Alleo is
def running_mean(x, windowSize):
cumsum = numpy.cumsum(numpy.insert(x, 0, 0))
return (cumsum[windowSize:] - cumsum[:-windowSize]) / windowSize
filtered = running_mean(signal, N)
Using lfilter
Alternatively, as suggested by dpwilson, we can also use lfilter
win = numpy.ones(N)
win *= 1.0/N
filtered = scipy.signal.lfilter(win, [1], signal).astype(channels.dtype)
Third step : Let's Put It All Together
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
import math
import contextlib
fname = 'test.wav'
outname = 'filtered.wav'
cutOffFrequency = 400.0
# from http://stackoverflow.com/questions/13728392/moving-average-or-running-mean
def running_mean(x, windowSize):
cumsum = np.cumsum(np.insert(x, 0, 0))
return (cumsum[windowSize:] - cumsum[:-windowSize]) / windowSize
# from http://stackoverflow.com/questions/2226853/interpreting-wav-data/2227174#2227174
def interpret_wav(raw_bytes, n_frames, n_channels, sample_width, interleaved = True):
if sample_width == 1:
dtype = np.uint8 # unsigned char
elif sample_width == 2:
dtype = np.int16 # signed 2-byte short
else:
raise ValueError("Only supports 8 and 16 bit audio formats.")
channels = np.fromstring(raw_bytes, dtype=dtype)
if interleaved:
# channels are interleaved, i.e. sample N of channel M follows sample N of channel M-1 in raw data
channels.shape = (n_frames, n_channels)
channels = channels.T
else:
# channels are not interleaved. All samples from channel M occur before all samples from channel M-1
channels.shape = (n_channels, n_frames)
return channels
with contextlib.closing(wave.open(fname,'rb')) as spf:
sampleRate = spf.getframerate()
ampWidth = spf.getsampwidth()
nChannels = spf.getnchannels()
nFrames = spf.getnframes()
# Extract Raw Audio from multi-channel Wav File
signal = spf.readframes(nFrames*nChannels)
spf.close()
channels = interpret_wav(signal, nFrames, nChannels, ampWidth, True)
# get window size
# from http://dsp.stackexchange.com/questions/9966/what-is-the-cut-off-frequency-of-a-moving-average-filter
freqRatio = (cutOffFrequency/sampleRate)
N = int(math.sqrt(0.196196 + freqRatio**2)/freqRatio)
# Use moviung average (only on first channel)
filtered = running_mean(channels[0], N).astype(channels.dtype)
wav_file = wave.open(outname, "w")
wav_file.setparams((1, ampWidth, sampleRate, nFrames, spf.getcomptype(), spf.getcompname()))
wav_file.writeframes(filtered.tobytes('C'))
wav_file.close()
sox library can be used for static noise removal.
I found this gist which has some useful commands as examples

Categories