Lomb-Scargle vs FFT power spectrum: crashes with evenly spaced data - python

I am trying to create some routines to compute power spectra for both evenly and unevenly sampled data, using the Lomb-Scargle periodogram (LSP) and FFT-Power spectrum. The problem I am having is that when using the LSP implementation in scipy, I experience crashes with evenly sampled data.
The code below works, and produces near identical (and correct) output, as far as I can tell. However, I was forced to insert a kludge in the Lomb-Scargle function to add jitter to the frequencies, so they don't exactly match up to the FFT ones. When I comment out that line, I get a divide-by-zero error.
Is this an issue with the Lomb-Scargle implementation in scipy, or am I simply not supposed to use it with evenly sampled data?? Thanks in advance.
import numpy as np
import scipy.signal as sp
import matplotlib.pyplot as plt
def one_sided_fft(t,x):
full_amplitude_spectrum = np.abs(np.fft.fft(x))/x.size
full_freqs = np.fft.fftfreq(x.size, np.mean(np.ediff1d(t)))
oneinds = np.where(full_freqs >=0.0)
one_sided_freqs = full_freqs[oneinds]
one_sided_amplitude_spectrum=2*full_amplitude_spectrum[oneinds]
return one_sided_freqs, one_sided_amplitude_spectrum
def power_spectrum(t,x):
onef, oneamps = one_sided_fft(t,x)
return onef, oneamps**2
def lomb_scargle_pspec(t, x):
tstep = np.mean(np.ediff1d(t))
freqs = np.fft.fftfreq(x.size, tstep)
idxx = np.argsort(freqs)
one_sided_freqs = freqs[idxx]
one_sided_freqs = one_sided_freqs[one_sided_freqs>0]
#KLUDGE TO KEEP PERIODOGRAM FROM CRASHING
one_sided_freqs = one_sided_freqs+0.00001*np.random.random(one_sided_freqs.size)
#THE FOLLOWING LINE CRASHES WITHOUT THE KLUDGE
pgram = sp.lombscargle(t, x, one_sided_freqs*2*np.pi)
return one_sided_freqs, (pgram/(t.size/4))
if __name__ == "__main__":
#Sample data
fs = 100.0
fund_freq=5
ampl = 0.4
t = np.arange(0,10,1/fs)
x = ampl*np.cos(2*np.pi*fund_freq*t)
#power spectrum calculations
powerf, powerspec = power_spectrum(t,x)
lsf, lspspec = lomb_scargle_pspec(t,x)
#plotting
fig, (ax0, ax1, ax2)= plt.subplots(nrows=3)
fig.tight_layout()
ax0.plot(t, x)
ax0.set_title('Input Data, '+str(fund_freq)+' Hz, '+
'Amplitude: '+str(ampl)+
' Fs = '+str(fs)+' Hz')
ax0.set_ylabel('Volts')
ax0.set_xlabel('Time[s]')
ax1.plot(powerf, powerspec)
ax1.set_title('FFT-based Power Spectrum')
ax1.set_ylabel('Volts**2')
ax1.set_xlabel('Freq[Hz]')
ax2.plot(lsf, lspspec)
ax2.set_title('Lomb-Scargle Power Spectrum')
ax2.set_ylabel('Volts**2')
ax2.set_xlabel('Freq[Hz]')
plt.show()

It was a bug in lombscargle. The code contained an arctan calculation implemented as atan(2 * cs / (cc - ss)), where cc and ss depend on elements of x and freqs. For some inputs, cc - ss can be 0. The fixed code using atan2(2 * cs, cc - ss) was included in scipy 0.15.0.

Related

Why is scipy's gaussian_filter1d not calculating properly the derivative?

I am trying to calculate the derivative of a function using scipy.ndimage.gaussian_filter1d using the keyword order but the result is not working properly. Instead if I apply first the gaussian filter to the function and then differenciate it by finite differences it works.
For ease of clarity, the function I want to differentiate two times is the position, with which I obtain the acceleration.
Code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from scipy.ndimage import gaussian_filter1d
#Initial acceleration
rng = np.random.default_rng(1)
acc = np.cumsum(rng.standard_normal(2000))
#Calculate position
pos = np.zeros(len(acc)+2)
for t in range(2,len(acc)):
pos[t] = 2*pos[t-1] - pos[t-2] + acc[t-2]
#Gaussian windows
sigma = 2
truncate = 5
acc_gaus = gaussian_filter1d(acc, sigma = sigma, truncate = truncate, order = 0)
pos_gauss = gaussian_filter1d(pos, sigma = sigma, truncate = truncate, order = 0)
acc_new_dif = pos_gauss[2:] - 2*pos_gauss[1:-1] + pos_gauss[:-2]
acc_new_gaus = gaussian_filter1d(pos, sigma = sigma, truncate = truncate, order = 2)[1:-1]
#Values
plt.figure()
plt.plot(acc_gaus[:-100], label = 'Real acceleration', alpha = 0.5)
plt.plot(acc_new_gaus[:-100], label = 'Gaussian window 2nd order', alpha = 0.5)
plt.plot(acc_new_dif[:-100], label = 'Finite differences 2nd order', alpha = 0.5)
plt.legend()
plt.ylabel('Acceleration')
plt.xlabel('Time')
plt.savefig('fig1.png', dpi = 300)
#Errors
plt.figure()
plt.plot((acc_gaus - acc_new_gaus)[:-100], label = 'Error gaussian window 2nd order')
plt.plot((acc_gaus - acc_new_dif)[:-100], label = 'Error finite differences 2nd order')
plt.legend()
plt.ylabel('Error')
plt.xlabel('Time')
plt.savefig('fig2.png', dpi = 300)
Outputs:
Question: Why is this not working properly? In which situations scipy.ndimage.gaussian_filter1d fails to calculate the derivative?
Possibly related to: Does gaussian_filter1d not work well in higher orders?
This is coming from the fact that you gaussian kernel is not the same size as your input, if you want to get more consistant result you could increase the truncate value, it gets closer to result you expect. the error is cumulative.
with truncate 10 you could get results like this:

what is the problem in this butterworth filter

i am trying to remove the base wander noise from ecg signal, base wander noise is low-frequency artefact of around 0.5Hz , for that i tried a digital butterworth highpass filter:
code of filter
frequency response
the ecg signal used is the record 100 from mit bih arrhythmia data base ( record sampled at 360 samples per second), first i read the record using wfdb package and then i applied the filter on it, but the result looks something like this:
code of filtering the signal
the result
the result looks kinda off. i want to know where is the problem?
I think the problem is that your filter does not know the time step of your dataset, particularly in your call to bilinear where you explicitly give a sampling frequency of 1.
To demonstrate, let's start with a pretty strong signal:
import numpy as np
import matplotlib.pyplot as plt
N = 1_000
T = 4.
tau = 0.25 # period of signal, frequency of 4 Hz
f = 1 / tau # frequency
omega = 2 * np.pi * f # angular frequency
t, dt = np.linspace(0, T, N, endpoint = False, retstep = True)
fs = 1 / dt # sampling frequency in Hz
noise = np.random.random(N)
sig = np.sin(t * omega)
x = 0.2 * noise + sig
plt.plot(t, x)
plt.xlabel('time')
Now let's start with the filter you've developed. My wave is higher frequency than your cutoff so it should be caught, but it isn't.
from scipy import signal
b, a = signal.butter(4, 0.5, 'high', analog = True, output = 'ba')
z, p = signal.bilinear(b, a, 1.)
x_filt = signal.lfilter(z, p, x)
plt.figure()
plt.plot(t, x)
plt.plot(t, x_filt)
plt.xlabel('time')
Note that in your call to bilinear you are setting a sampling frequency of 1. Let's instead use our sampling frequency (defined above as fs).
z, p = signal.bilinear(b, a, fs)
x_filt = signal.lfilter(z, p, x)
plt.figure()
plt.plot(t, x)
plt.plot(t, x_filt)
plt.xlabel('time')
As a last note, you may sometimes observe a phase shift with Butterorth filters.
Again, I am not an expert and have only ever used them in cases where I was not worried about the phase, but see here for suggestion on using a different type of filter if this is an issue. The comments here also suggest using a different filter for this reason.

Power Spectrum Analysis for a very large set of data

I have a voltage signal that I am trying to denoise. The signal comes in very large files (524288 cells). When I take the whole file and make n equal to length of data set, I get two extremely large peaks at frequency 0 and max.
sig = np.genfromtxt(directory + '/'+ file, skip_header=3, dtype=np.float64)
n = len(sig)
freq = np.arange(n)
fhat = np.fft.fft(sig, n)
PSD = fhat * np.conj(fhat) / n
plt.plot(freq,PSD)
plt.show()
indices: bool = PSD > 100
fhat = indices * fhat
ffilt = np.fft.ifft(fhat)
plt.plot(ffilt)
plt.show()
Is there a way to analyse the whole file or I have to split it to smaller data sets?
You can analyze the whole file in one go. Those high peaks might a padding issue.
Anyway, I would suggest you to use one of the already implemented methods in Python like the Welch method from Scipy. It would save you time and less headaches figuring out if your implementation is correct. Here is an example adapted from the Scipy lectures:
import numpy as np
from matplotlib import pyplot as plt
from scipy import signal
# Seed the random number generator
np.random.seed(0)
time_step = .01
time_vec = np.arange(0, 70, time_step)
# A signal with a small frequency chirp
sig = np.sin(0.5 * np.pi * time_vec * (1 + .1 * time_vec))
plt.figure()
plt.plot(time_vec, sig)
# Compute the Power Spectral Density
freqs, psd = signal.welch(sig)
plt.figure()
plt.semilogx(freqs, psd)
plt.title('PSD: power spectral density')
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.tight_layout()
plt.show()
And here are the results:

Making a function to take the inverse fourier transform

I am trying to take the inverse fourier transform by making my own function. This is the function to take the Fourier transform of my time series which appears to work fine.
def DFT(x, frequencies):
N1 = x.size
k = frequencies.size
t = np.arange(N1)
fourier = np.zeros(k)
for i in range(0,k):
fourier[i] = np.dot(x,(1/k)*np.exp(-2j*np.pi*frequencies[i]*t))
return fourier
This is my original signal (just a sine wave):
N2 = 1*10**6
time = np.arange(0,N2,1000)
lam = .1*10**6
f = 1/lam
freq = np.arange(0,.05,.0001)
signal = np.sin(2*np.pi*f*time)
And the power spectrum is plotted using my DFT (fourier function):
plt.plot(freq, np.abs(DFT(signal,freq))**2)
plt.xlabel('Frequency')
plt.title('Spectrum of Sine Wave')
plt.grid()
plt.show()
but when I try to apply my function for the inverse fourier transform, I am not getting my original sine wave back:
def IFT(fft, frequencies):
N = fft.size
k = frequencies.size
n = np.arange(N)
inverse_fourier = np.zeros(k)
for i in range(0,k):
inverse_fourier[i] = np.dot(fft,np.exp((-2j*np.pi*frequencies[i]*n)/N)) #[None,:]
return inverse_fourier
What is wrong with my function? I get no errors, but the returned signal is totally wrong.
Running you code you should get the following warning:
ComplexWarning: Casting complex values to real discards the imaginary part
fourier[i] = np.dot(x,(1/k)*np.exp(-2j*np.pi*frequencies[i]*t))
Since the resulting Fourier transform should be complex valued, this warning should be reasons for concerns. To get rid of this warning you may initialize fourier like so:
fourier = np.zeros(k, dtype=complex)
Also the formula for Discrete Fourier Transform includes summations over frequencies covering the complete [0,1) range. To get a 1000-point DFT (as you had in your code) you'd then have to use
freq = np.arange(0,1,.001)
This will result in a spectrum that includes 2 spikes: one at the expected frequency, and another symmetric one above the Nyquist frequency. It is common to discard the results above the Nyquist frequency when plotting the spectrum of real-valued signals (but use the full spectrum into your IFT function).
Finally, as GrimTrigger pointed out:
your inverse the exponent should be positive (2j instead of -2j) and drop the /N
In your inverse the exponent should be positive (2j instead of -2j) and drop the /N, which gives (added plots for demonstration):
import numpy as np
import matplotlib.pyplot as plt
def DFT(x, frequencies):
N1 = x.size
k = frequencies.size
t = np.arange(N1)
fourier = np.zeros(k)
for i in range(0,k):
fourier[i] = np.dot(x, (1/k)*np.exp(-2j*np.pi*frequencies[i]*t))
return fourier
def IFT(fft, frequencies):
N = fft.size
k = frequencies.size
n = np.arange(N)
inverse_fourier = np.zeros(k)
for i in range(0,k):
inverse_fourier[i] = np.dot(fft, np.exp((2j*np.pi*frequencies[i]*n))) #[None,:]
return inverse_fourier
N2 = 1*10**6
time = np.arange(0,N2,2000)
lam = .1*10**6
f = 1/lam
freq = np.arange(0,.05,.0001)
signal = np.sin(2*np.pi*f*time)
plt.plot(time, signal)
plt.xlabel('Time')
plt.title('Sine Wave')
plt.grid()
plt.show()
dft = DFT(signal, freq)
plt.plot(freq, np.abs(dft)**2)
plt.xlabel('Frequency')
plt.title('Spectrum of Sine Wave')
plt.grid()
plt.show()
plt.plot(time, IFT(dft, freq))
plt.xlabel('Time')
plt.title('Sine Wave')
plt.grid()
plt.show()
which gives (first sin graph omitted):
and

Scaling x-axis after IFFT-FFT

See the edit below for details.
I have a dataset, on which I need to perform and IFFT, cut the valueable part of it (by multiplying with a gaussian curve), then FFT back.
First is in angular frequency domain, so an IFFT leads to time domain. Then FFT-ing back should lead to angular frequency again, but I can't seem to find a solution how to get back the original domain. Of course it's easy on the y-values:
yf = np.fft.ifft(y)
#cut the valueable part there..
np.fft.fft(yf)
For the x-value transforms I'm using np.fft.fftfreq the following way:
# x is in ang. frequency domain, that's the reason for the 2*np.pi division
t = np.fft.fftfreq(len(x), d=(x[1]-x[0])/(2*np.pi))
However doing
x = np.fft.fftfreq(len(t), d=2*np.pi*(t[1]-t[0]))
completely not giving me back the original x values. Is that something I'm misunderstanding?
The question can be asked generalized, for example:
import numpy as np
x = np.arange(100)
xx = np.fft.fftfreq(len(x), d = x[1]-x[0])
# how to get back the original x from xx? Is it even possible?
I've tried to use a temporal variable where I store the original x values, but it's not too elegant. I'm looking for some kind of inverse of fftfreq, and in general the possible best solution for that problem.
Thank you.
EDIT:
I will provide the code at the end.
I have a dataset which has angular frequency on x axis and intensity on the y. I want to perfrom IFFT to change to time domain. Unfortunately the x values are not
evenly spaced, so a (linear) interpolation is needed first before IFFT. Then in time domain the transform looks like this:
The next step is to cut one of the symmetrical spikes with a gaussian curve, then FFT back to angular frequency domain (the same where we started). My problem is when I transfrom the x-axis for the IFFT (which I think is correct), I can't get back into the original angular frequency domain. Here is the code, which includes the generator for the dataset too.
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.interpolate import interp1d
C_LIGHT = 299.792
# for easier case, this is zero, so it can be ignored.
def _disp(x, GD=0, GDD=0, TOD=0, FOD=0, QOD=0):
return x*GD+(GDD/2)*x**2+(TOD/6)*x**3+(FOD/24)*x**4+(QOD/120)*x**5
# the generator to make sample datasets
def generator(start, stop, center, delay, GD=0, GDD=0, TOD=0, FOD=0, QOD=0, resolution=0.1, pulse_duration=15, chirp=0):
window = (np.sqrt(1+chirp**2)*8*np.log(2))/(pulse_duration**2)
lamend = (2*np.pi*C_LIGHT)/start
lamstart = (2*np.pi*C_LIGHT)/stop
lam = np.arange(lamstart, lamend+resolution, resolution)
omega = (2*np.pi*C_LIGHT)/lam
relom = omega-center
i_r = np.exp(-(relom)**2/(window))
i_s = np.exp(-(relom)**2/(window))
i = i_r + i_s + 2*np.sqrt(i_r*i_s)*np.cos(_disp(relom, GD=GD, GDD=GDD, TOD=TOD, FOD=FOD, QOD=QOD)+delay*omega)
#since the _disp polynomial is set to be zero, it's just cos(delay*omega)
return omega, i
def interpol(x,y):
''' Simple linear interpolation '''
xs = np.linspace(x[0], x[-1], len(x))
intp = interp1d(x, y, kind='linear', fill_value = 'extrapolate')
ys = intp(xs)
return xs, ys
def ifft_method(initSpectrumX, initSpectrumY, interpolate=True):
if len(initSpectrumY) > 0 and len(initSpectrumX) > 0:
Ydata = initSpectrumY
Xdata = initSpectrumX
else:
raise ValueError
N = len(Xdata)
if interpolate:
Xdata, Ydata = interpol(Xdata, Ydata)
# the (2*np.pi) division is because we have angular frequency, not frequency
xf = np.fft.fftfreq(N, d=(Xdata[1]-Xdata[0])/(2*np.pi)) * N * Xdata[-1]/(N-1)
yf = np.fft.ifft(Ydata)
else:
pass # some irrelevant code there
return xf, yf
def fft_method(initSpectrumX ,initSpectrumY):
if len(initSpectrumY) > 0 and len(initSpectrumX) > 0:
Ydata = initSpectrumY
Xdata = initSpectrumX
else:
raise ValueError
yf = np.fft.fft(Ydata)
xf = np.fft.fftfreq(len(Xdata), d=(Xdata[1]-Xdata[0])*2*np.pi)
# the problem is there, where I transform the x values.
xf = np.fft.ifftshift(xf)
return xf, yf
# the generated data
x, y = generator(1, 3, 2, delay = 1500, resolution = 0.1)
# plt.plot(x,y)
xx, yy = ifft_method(x,y)
#if the x values are correctly scaled, the two symmetrical spikes should appear exactly at delay value
# plt.plot(xx, np.abs(yy))
#do the cutting there, which is also irrelevant now
# the problem is there, in fft_method. The x values are not the same as before transforms.
xxx, yyy = fft_method(xx, yy)
plt.plot(xxx, np.abs(yyy))
#and it should look like this:
#xs = np.linspace(x[0], x[-1], len(x))
#plt.plot(xs, np.abs(yyy))
plt.grid()
plt.show()

Categories