Plotting fft from a wav file using python - python

I am trying to plot the frequency spectrum of a wav file, but it seems like frequency spectrum always matches the time domain signal, with the following code.
import matplotlib.pyplot as plt
import numpy as np
def plot(data):
plt.plot(data, color='steelblue')
plt.figure()
plt.show()
rate, wav_data = wavfile.read("audio_self/on/on.wav")
plot(wav_data)
plot(np.abs(np.fft.fft(wav_data)))
Am I doing something wrong?

If you want two separate a stereo track to left and right channels and then take a separate graph of each, it would be a lot more accurate of a reading unless you put the track in mono like Frank Zalkow says. This is how to separate the stereo track into left and right channels:
"""
Plot
"""
#Plots a stereo .wav file
#Decibels on the y-axis
#Frequency Hz on the x-axis
import matplotlib.pyplot as plt
import numpy as np
from pylab import*
from scipy.io import wavfile
def plot(file_name):
sampFreq, snd = wavfile.read(file_name)
snd = snd / (2.**15) #convert sound array to float pt. values
s1 = snd[:,0] #left channel
s2 = snd[:,1] #right channel
n = len(s1)
p = fft(s1) # take the fourier transform of left channel
m = len(s2)
p2 = fft(s2) # take the fourier transform of right channel
nUniquePts = ceil((n+1)/2.0)
p = p[0:nUniquePts]
p = abs(p)
mUniquePts = ceil((m+1)/2.0)
p2 = p2[0:mUniquePts]
p2 = abs(p2)
'''
Left Channel
'''
p = p / float(n) # scale by the number of points so that
# the magnitude does not depend on the length
# of the signal or on its sampling frequency
p = p**2 # square it to get the power
# multiply by two (see technical document for details)
# odd nfft excludes Nyquist point
if n % 2 > 0: # we've got odd number of points fft
p[1:len(p)] = p[1:len(p)] * 2
else:
p[1:len(p) -1] = p[1:len(p) - 1] * 2 # we've got even number of points fft
freqArray = arange(0, nUniquePts, 1.0) * (sampFreq / n);
plt.plot(freqArray/1000, 10*log10(p), color='k')
plt.xlabel('LeftChannel_Frequency (kHz)')
plt.ylabel('LeftChannel_Power (dB)')
plt.show()
'''
Right Channel
'''
p2 = p2 / float(m) # scale by the number of points so that
# the magnitude does not depend on the length
# of the signal or on its sampling frequency
p2 = p2**2 # square it to get the power
# multiply by two (see technical document for details)
# odd nfft excludes Nyquist point
if m % 2 > 0: # we've got odd number of points fft
p2[1:len(p2)] = p2[1:len(p2)] * 2
else:
p2[1:len(p2) -1] = p2[1:len(p2) - 1] * 2 # we've got even number of points fft
freqArray2 = arange(0, mUniquePts, 1.0) * (sampFreq / m);
plt.plot(freqArray2/1000, 10*log10(p2), color='k')
plt.xlabel('RightChannel_Frequency (kHz)')
plt.ylabel('RightChannel_Power (dB)')
plt.show()
I hope this helps.

Related

Output of fft.fft() for magnitude and phase (angle) not corresponding the the values set up

I set up a sine wave of a certain amplitude, frequency and phase, and tried recovering the amplitude and phase:
import numpy as np
import matplotlib.pyplot as plt
N = 1000 # Sample points
T = 1 / 800 # Spacing
t = np.linspace(0.0, N*T, N) # Time
frequency = np.fft.fftfreq(t.size, d=T) # Normalized Fourier frequencies in spectrum.
f0 = 25 # Frequency of the sampled wave
phi = np.pi/6 # Phase
A = 50 # Amplitude
s = A * np.sin(2 * np.pi * f0 * t - phi) # Signal
S = np.fft.fft(s) # Unnormalized FFT
fig, [ax1,ax2] = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
ax1.plot(t,s,'.-', label='time signal')
ax2.plot(freq[0:N//2], 2/N * np.abs(S[0:N//2]), '.', label='amplitude spectrum')
plt.show()
index, = np.where(np.isclose(frequency, f0, atol=1/(T*N))) # Getting the normalized frequency close to f0 in Hz)
magnitude = np.abs(S[index[0]]) # Magnitude
phase = np.angle(S[index[0]]) # Phase
print(magnitude)
print(phase)
phi
#21785.02149316858
#-1.2093259641890741
#0.5235987755982988
Now the amplitude should be 50, instead of 21785, and the phase pi/6=0.524, instead of -1.2.
Am I misinterpreting the output, or the answer on the post referred to in the link above?
You need to normalize the fft by 1/N with one of the two following changes (I used the 2nd one):
S = np.fft.fft(s) --> S = 1/N*np.fft.fft(s)
magnitude = np.abs(S[index[0]]) --> magnitude = 1/N*np.abs(S[index[0]])
Don't use index, = np.where(np.isclose(frequency, f0, atol=1/(T*N))), the fft is not exact and the highest magnitude may
not be at f0, use np.argmax(np.abs(S)) instead which will give
you the peak of the signal which will be very close to f0
np.angle messes up (I think its one of those pi,pi/2 arctan offset
things) just do it manually with np.arctan(np.real(x)/np.imag(x))
use more points (I made N higher) and make T smaller for higher accuracy
since a DFT (discrete fourier transform) is double sided and has peak signals in both the negative and positive frequencies, the peak in the positive side will only be half the actual magnitude. For an fft you need to multiply every frequency by two except for f=0 to acount for this. I multiplied by 2 in magnitude = np.abs(S[index])*2/N
N = 10000
T = 1/5000
...
index = np.argmax(np.abs(S))
magnitude = np.abs(S[index])*2/N
freq_max = frequency[index]
phase = np.arctan(np.imag(S[index])/np.real(S[index]))
print(f"magnitude: {magnitude}, freq_max: {freq_max}, phase: {phase}") print(phi)
Output: magnitude: 49.996693276663564, freq_max: 25.0, phase: 0.5079341239733628

How do I get the power at a particular frequency of a sound file?

I'm working on my end of the degree thesis in which I have to measure the Sound Pressure Level of underwater recordings (wav files) at a particular frequency (2000Hz). So I came up with this code:
'''
def get_value(filename, f0, NFFT=8192, plot = False):
#Load audio
data, sampling_frequency = soundfile.read(filename)
# remove stereo
if len(data.shape)> 1:
data = data[:, 0]
# remove extra length
if len(data)>sampling_frequency:
data = data[0:sampling_frequency]
# remove DC
data = data - data.mean()
# power without filtering
total_power = 10*np.log10(np.mean(data**2))
# fft
NFFT = 4096 # number of samples in the FFT
window = np.array(1) #np.hamming(len(data))
fftdata = np.fft.fft(data / NFFT, n = NFFT)
SPL = 20 * np.log10(np.abs(fftdata)) # Sound Pressure Level [dB]
freq = np.linspace(0, sampling_frequency, NFFT) # frequency axis [Hz]
# take value at desired frequency
power_at_frequency = SPL[np.argmin(np.abs(freq-f0))]
print(power_at_frequency)
'''
However, I checked the value with audacity and is completely different.
Thanks beforehand.
If you are interested in only one frequency you don't have to compute the FFT you can simply use
totalEnergy = np.sum((data - np.mean(data)) ** 2)
freqEnergy = np.abs(np.sum(data * np.exp(2j * np.pi * np.arange(len(data)) * target_freq / sampling_freq)))
And if you are using FFT and the window size is not a multiple of the wave period the frequency will leak to other frequencies. To avoid this your
import numpy as np;
import matplotlib.pyplot as plt
sampling_frequency = 48000;
target_frequency = 2000.0;
ns = 1000000;
data = np.sin(2*np.pi * np.arange(ns) * target_frequency / sampling_frequency);
# power
print('a sine wave have power 0.5 ~', np.mean(data**2), 'that will be split in two ')
## Properly scaled frequency
plt.figure(figsize=(12, 5))
plt.subplot(121);
z = np.abs(np.fft.fft(data[:8192])**2) / 8192**2
print('tuned with 8192 samples', max(z), ' some power leaked in other frequencies')
plt.semilogy(np.fft.fftfreq(len(z)) * sampling_frequency, z)
plt.ylabel('power')
plt.title('some power leaked')
plt.subplot(122);
# 6000 samples = 1/8 second is multiple of 1/2000 second
z = np.abs(np.fft.fft(data[:6000])**2) / 6000**2
print('tuned with 6000 samples', max(z))
plt.semilogy(np.fft.fftfreq(len(z)) * sampling_frequency, z)
plt.xlabel('frequency')
plt.title('all power in exact two symmetric bins')
## FFT of size not multiple of 2000
print(np.sum(np.abs(np.fft.fft(data[:8192]))**2) / 8192)

Python - performing FFT ignore DC offset from MEMS microphone

I'm trying to perform FFT of a wav file which results well, but in my plot I see huge amplitude at 0 Hz. I assumed it is a DC offset. My purpose is to ignore this DC offset in my plot or directly in code because it prevents me from seeing actual noises. In my example I record a noise around 6.1kHz, I can clearly see it, if I zoom at that point but in general view it is not observable because of the 0Hz amplitude. If you tell me how can ignore 0Hz (or DC bias) I will be glad.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
import numpy as np
from matplotlib import pyplot as plt
fs_rate, signal = wavfile.read("file.wav")
print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
N = signal.shape[0]
print ("Complete Samplings N", N)
secs = N / float(fs_rate)
print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft(signal))
FFT_side = FFT[range(N/4)] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(N/4)] # one side frequency range
fft_freqs_side = np.array(freqs_side)
print (abs(FFT_side))
plt.subplot(211)
p1 = plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(212)
p2 = plt.plot(freqs_side, abs(FFT_side), "b") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
plt.show()
A large offset is often indicative of not proper preprocessing of the signal. Common approaches include demeaning the data and dedrifting the data by using linear regression. Here is an example
from matplotlib.pyplot import *
from numpy import *
dt = 1/1000
T = 1
t = arange(0, T, dt)
n = t.size
y = sin(pi * t * 3) + 39 + 3 * t + random.rand(n)
from scipy import optimize
# subtract drift
lin = lambda x, a, b : a * x + b
coeff, _ = optimize.curve_fit(lin,t, y)
dmy= y- coeff[0] * t + coeff[0]
# compute power
fy = abs(fft.fft(y))[:n//2] ** 2
fyn= abs(fft.fft(dmy - dmy.mean()))[:n//2] ** 2 # NB demeaned
freq= linspace(0, T / dt, n//2) # get freqs
fig, ax = subplots(2, sharex = 'all')
for axi, data, label in zip(ax, [fy,fyn], 'raw processed'.split()):
axi.plot(freq, data)
axi.set(xlim = (0, 10), title = label)
axi.set_xlabel('freq')
subplots_adjust(hspace = .5)

Two dimensional FFT using python results in slightly shifted frequency

I know there have been several questions about using the Fast Fourier Transform (FFT) method in python, but unfortunately none of them could help me with my problem:
I want to use python to calculate the Fast Fourier Transform of a given two dimensional signal f, i.e. f(x,y). Pythons documentation helps a lot, solving a few issues, which the FFT brings with it, but i still end up with a slightly shifted frequency compared to the frequency i expect it to show. Here is my python code:
from scipy.fftpack import fft, fftfreq, fftshift
import matplotlib.pyplot as plt
import numpy as np
import math
fq = 3.0 # frequency of signal to be sampled
N = 100.0 # Number of sample points within interval, on which signal is considered
x = np.linspace(0, 2.0 * np.pi, N) # creating equally spaced vector from 0 to 2pi, with spacing 2pi/N
y = x
xx, yy = np.meshgrid(x, y) # create 2D meshgrid
fnc = np.sin(2 * np.pi * fq * xx) # create a signal, which is simply a sine function with frequency fq = 3.0, modulating the x(!) direction
ft = np.fft.fft2(fnc) # calculating the fft coefficients
dx = x[1] - x[0] # spacing in x (and also y) direction (real space)
sampleFrequency = 2.0 * np.pi / dx
nyquisitFrequency = sampleFrequency / 2.0
freq_x = np.fft.fftfreq(ft.shape[0], d = dx) # return the DFT sample frequencies
freq_y = np.fft.fftfreq(ft.shape[1], d = dx)
freq_x = np.fft.fftshift(freq_x) # order sample frequencies, such that 0-th frequency is at center of spectrum
freq_y = np.fft.fftshift(freq_y)
half = len(ft) / 2 + 1 # calculate half of spectrum length, in order to only show positive frequencies
plt.imshow(
2 * abs(ft[:half,:half]) / half,
aspect = 'auto',
extent = (0, freq_x.max(), 0, freq_y.max()),
origin = 'lower',
interpolation = 'nearest',
)
plt.grid()
plt.colorbar()
plt.show()
And what i get out of this when running it, is:
Now you see that the frequency in x direction is not exactly at fq = 3, but slightly shifted to the left. Why is this?
I would assume that is has to do with the fact, that FFT is an algorithm using symmetry arguments and
half = len(ft) / 2 + 1
is used to show the frequencies at the proper place. But I don't quite understand what the exact problem is and how to fix it.
Edit: I have also tried using a higher sampling frequency (N = 10000.0), which did not solve the issue, but instead shifted the frequency slightly too far to the right. So i am pretty sure that the problem is not the sampling frequency.
Note: I'm aware of the fact, that the leakage effect leads to unphysical amplitudes here, but in this post I am primarily interested in the correct frequencies.
I found a number of issues
you use 2 * np.pi twice, you should choose one of either linspace or the arg to sine as radians if you want a nice integer number of cycles
additionally np.linspace defaults to endpoint=True, giving you an extra point for 101 instead of 100
fq = 3.0 # frequency of signal to be sampled
N = 100 # Number of sample points within interval, on which signal is considered
x = np.linspace(0, 1, N, endpoint=False) # creating equally spaced vector from 0 to 2pi, with spacing 2pi/N
y = x
xx, yy = np.meshgrid(x, y) # create 2D meshgrid
fnc = np.sin(2 * np.pi * fq * xx) # create a signal, which is simply a sine function with frequency fq = 3.0, modulating the x(!) direction
you can check these issues:
len(x)
Out[228]: 100
plt.plot(fnc[0])
fixing the linspace endpoint now means you have an even number of fft bins so you drop the + 1 in the half calc
matshow() appears to have better defaults, your extent = (0, freq_x.max(), 0, freq_y.max()), in imshow appears to fubar the fft bin numbering
from scipy.fftpack import fft, fftfreq, fftshift
import matplotlib.pyplot as plt
import numpy as np
import math
fq = 3.0 # frequency of signal to be sampled
N = 100 # Number of sample points within interval, on which signal is considered
x = np.linspace(0, 1, N, endpoint=False) # creating equally spaced vector from 0 to 2pi, with spacing 2pi/N
y = x
xx, yy = np.meshgrid(x, y) # create 2D meshgrid
fnc = np.sin(2 * np.pi * fq * xx) # create a signal, which is simply a sine function with frequency fq = 3.0, modulating the x(!) direction
plt.plot(fnc[0])
ft = np.fft.fft2(fnc) # calculating the fft coefficients
#dx = x[1] - x[0] # spacing in x (and also y) direction (real space)
#sampleFrequency = 2.0 * np.pi / dx
#nyquisitFrequency = sampleFrequency / 2.0
#
#freq_x = np.fft.fftfreq(ft.shape[0], d=dx) # return the DFT sample frequencies
#freq_y = np.fft.fftfreq(ft.shape[1], d=dx)
#
#freq_x = np.fft.fftshift(freq_x) # order sample frequencies, such that 0-th frequency is at center of spectrum
#freq_y = np.fft.fftshift(freq_y)
half = len(ft) // 2 # calculate half of spectrum length, in order to only show positive frequencies
plt.matshow(
2 * abs(ft[:half, :half]) / half,
aspect='auto',
origin='lower'
)
plt.grid()
plt.colorbar()
plt.show()
zoomed the plot:

Find time shift of two signals using cross correlation

I have two signals which are related to each other and have been captured by two different measurement devices simultaneously.
Since the two measurements are not time synchronized there is a small time delay between them which I want to calculate. Additionally, I need to know which signal is the leading one.
The following can be assumed:
no or only very less noise present
speed of the algorithm is not an issue, only accuracy and robustness
signals are captured with an high sampling rate (>10 kHz) for several seconds
expected time delay is < 0.5s
I though of using-cross correlation for that purpose.
Any suggestions how to implement that in Python are very appreciated.
Please let me know if I should provide more information in order to find the most suitable algorithmn.
A popular approach: timeshift is the lag corresponding to the maximum cross-correlation coefficient. Here is how it works with an example:
import matplotlib.pyplot as plt
from scipy import signal
import numpy as np
def lag_finder(y1, y2, sr):
n = len(y1)
corr = signal.correlate(y2, y1, mode='same') / np.sqrt(signal.correlate(y1, y1, mode='same')[int(n/2)] * signal.correlate(y2, y2, mode='same')[int(n/2)])
delay_arr = np.linspace(-0.5*n/sr, 0.5*n/sr, n)
delay = delay_arr[np.argmax(corr)]
print('y2 is ' + str(delay) + ' behind y1')
plt.figure()
plt.plot(delay_arr, corr)
plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
plt.show()
# Sine sample with some noise and copy to y1 and y2 with a 1-second lag
sr = 1024
y = np.linspace(0, 2*np.pi, sr)
y = np.tile(np.sin(y), 5)
y += np.random.normal(0, 5, y.shape)
y1 = y[sr:4*sr]
y2 = y[:3*sr]
lag_finder(y1, y2, sr)
In the case of noisy signals, it is common to apply band-pass filters first. In the case of harmonic noise, they can be removed by identifying and removing frequency spikes present in the frequency spectrum.
Numpy has function correlate which suits your needs: https://docs.scipy.org/doc/numpy/reference/generated/numpy.correlate.html
To complement Reveille's answer above (I reproduce his algorithm), I would like to point out some ideas for preprocessing the input signals.
Since there seems to be no fit-for-all (duration in periods, resolution, offset, noise, signal type, ...) you may play with it.
In my example the application of a window function improves the detected phase shift (within resolution of the discretization).
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
r2d = 180.0/np.pi # conversion factor RAD-to-DEG
delta_phi_true = 50.0/r2d
def detect_phase_shift(t, x, y):
'''detect phase shift between two signals from cross correlation maximum'''
N = len(t)
L = t[-1] - t[0]
cc = signal.correlate(x, y, mode="same")
i_max = np.argmax(cc)
phi_shift = np.linspace(-0.5*L, 0.5*L , N)
delta_phi = phi_shift[i_max]
print("true delta phi = {} DEG".format(delta_phi_true*r2d))
print("detected delta phi = {} DEG".format(delta_phi*r2d))
print("error = {} DEG resolution for comparison dphi = {} DEG".format((delta_phi-delta_phi_true)*r2d, dphi*r2d))
print("ratio = {}".format(delta_phi/delta_phi_true))
return delta_phi
L = np.pi*10+2 # interval length [RAD], for generality not multiple period
N = 1001 # interval division, odd number is better (center is integer)
noise_intensity = 0.0
X = 0.5 # amplitude of first signal..
Y = 2.0 # ..and second signal
phi = np.linspace(0, L, N)
dphi = phi[1] - phi[0]
'''generate signals'''
nx = noise_intensity*np.random.randn(N)*np.sqrt(dphi)
ny = noise_intensity*np.random.randn(N)*np.sqrt(dphi)
x_raw = X*np.sin(phi) + nx
y_raw = Y*np.sin(phi+delta_phi_true) + ny
'''preprocessing signals'''
x = x_raw.copy()
y = y_raw.copy()
window = signal.windows.hann(N) # Hanning window
#x -= np.mean(x) # zero mean
#y -= np.mean(y) # zero mean
#x /= np.std(x) # scale
#y /= np.std(y) # scale
x *= window # reduce effect of finite length
y *= window # reduce effect of finite length
print(" -- using raw data -- ")
delta_phi_raw = detect_phase_shift(phi, x_raw, y_raw)
print(" -- using preprocessed data -- ")
delta_phi_preprocessed = detect_phase_shift(phi, x, y)
Without noise (to be deterministic) the output is
-- using raw data --
true delta phi = 50.0 DEG
detected delta phi = 47.864788975654 DEG
...
-- using preprocessed data --
true delta phi = 50.0 DEG
detected delta phi = 49.77938053468019 DEG
...
Numpy has a useful function, called correlation_lags for this, which uses the underlying correlate function mentioned by other answers to find the time lag. The example displayed at the bottom of that page is useful:
from scipy import signal
from numpy.random import default_rng
rng = default_rng()
x = rng.standard_normal(1000)
y = np.concatenate([rng.standard_normal(100), x])
correlation = signal.correlate(x, y, mode="full")
lags = signal.correlation_lags(x.size, y.size, mode="full")
lag = lags[np.argmax(correlation)]
Then lag would be -100

Categories