Curvefitting powerlaw to double-log data - python

I'm trying to fit a power-law to data which is in the double log scale. Therefore I've used the curve_fit(...) function from the scipy.optimize package.
To run the function I've implemented the following piece of code COR_coef[i] = curve_fit(lambda x, m: c * x ** m, x, COR_IFG[:, i])[0][0], to the best of my knowledge the curve_fit(...) should now correctly fit a power-law (being a straight line) to my data. However, for some reason, I just do not seem to get the fit right. See the attached picture for the data and its fit.
Some more context with regards to the minimum reproducible example (see below):
The code generates random noise for simulation purposes, this is done in the white_noise(...)
This random noise is than misaligned (in a for-loop with different fractions of misalignment according to the variable fractions_to_shift so the development of the power-law can be studied) and subtracted from the original noise to gain a residual signal
The residual signal is the signal the power-law is fitted to
The curve_fit(...) is applied in the sim_powerlaw_coefficient(...) function
I am aware of the fact that my residual signal shows some artifacts when the misalignment gets larger, unfortunately I don't know how to get rid of these artifacts.
MINIMUM REPRODUCIBLE EXAMPLE
import matplotlib.pyplot as plt
import numpy as np
import numpy.fft as fft
import numpy.random as rnd
from scipy.optimize import curve_fit
plt.style.use('seaborn-darkgrid')
rnd.seed(100) # to select a random seed for creating the "random" noise
grad = -5 / 3. # slope to use for every function
c = 1 # base parameter for the powerlaw
ylim = [1e-7, 30] # range for the double log plots of the powerfrequency domains
values_to_shift = [0, 2**-11, 2**-10, 2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3, 2**-2, 2**-1, 2**0] # fractions of missalignment
def white_noise(n: int, N: int):
"""
- Creates a data set of white noise with size n, N;
- Filters this dataset with the corresponding slope;
This slope is usually equal to -5/3 or -2/3
- Makes sure the slope is equal to the requested slope in the double log scale.
#param n: size of random array
#param N: number of random arrays
#param slope: slope of the gradient
#return: white_noise, filtered white_noise and the original signal
"""
m = grad
x = np.linspace(1, n, n // 2)
slope_loglog = c * x ** m
whitenoise = rnd.randn(n // 2, N) + 1j * rnd.randn(n // 2, N)
whitenoise[0, :] = 0 # zero-mean noise
whitenoise_filtered = whitenoise * slope_loglog[:, np.newaxis]
whitenoise = 2 * np.pi * np.concatenate((whitenoise, whitenoise[0:1, :], np.conj(whitenoise[-1:0:-1, :])), axis=0)
whitenoise_filtered = 2 * np.pi * np.concatenate(
(whitenoise_filtered, whitenoise_filtered[0:1, :], np.conj(whitenoise_filtered[-1:0:-1, :])), axis=0)
whitenoise_signal = fft.ifft(whitenoise_filtered, axis=0)
whitenoise_signal = np.real_if_close(whitenoise_signal)
if np.iscomplex(whitenoise_signal).any():
print('Warning! whitenoise_signal is complex-valued!')
whitenoise_retransformed = fft.fft(whitenoise_signal, axis=0)
return whitenoise, whitenoise_filtered, whitenoise_signal, whitenoise_retransformed, slope_loglog
def sim_powerlaw_coefficient(n: int, N: int, show_powerlaw=0):
"""
#param n: Number of values in the IFG
#param N: Number of IFG's
#return: Returns the coefficient after subtraction of two IFG's
"""
master = white_noise(n, N)
slave = white_noise(n, N)
x = np.linspace(1, n, n // 2)
signal_IFG = master[2] - slave[2]
noise_IFG = np.abs(fft.fft(signal_IFG, axis=0))[0:n // 2, :]
for k in range(len(values_to_shift)):
shift = np.int(np.round(values_to_shift[k] * n, 0))
inp = signal_IFG.copy()
# the weather model is a shifted copy of the actual signal, to better understand the errors that are introduced.
weather_model = np.roll(inp, shift, axis=0)
WM_IFG = np.abs(fft.fft(weather_model, axis=0)[0:n // 2, :])
signal_corrected = signal_IFG - weather_model
COR_IFG = np.abs(fft.fft(signal_corrected, axis=0)[0:n // 2, :])
COR_coef = np.zeros(N)
for i in range(N):
COR_coef[i] = curve_fit(lambda x, m: c * x ** m, x, COR_IFG[:, i])[0][0]
plt.figure(figsize=(15, 10))
plt.title('Corrected IFG (combined - weather model)')
plt.loglog(COR_IFG, label='Corrected IFG')
plt.ylim(ylim)
plt.xlabel('log(k)')
plt.ylabel('log(P)')
plt.loglog(c * x ** COR_coef.mean(), '-.', label=f'COR powerlaw coef:{COR_coef.mean()}')
plt.legend(loc=0)
plt.tight_layout()
sim_powerlaw_coefficient(8192, 1, show_powerlaw=1)

Related

Progressively filter/smooth a signal in python (to straight line on the left to no filtering on the right)

A picture is worth a thousand words (sorry for the shoddy work):
If the solution is preserving the value and the slope at both ends it is better.
If, in addition, the position and sharpness of the transition can be adjusted it is perfect.
But I have not found any solution yet...
Thank you very much for your help
Here is a piece of code to get started:
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
import numpy as np
def round_up_to_odd(f):
return np.int(np.ceil(f / 2.) * 2 + 1)
def generateRandomSignal(n=1000, seed=None):
"""
Parameters
----------
n : integer, optional
Number of points in the signal. The default is 1000.
Returns
-------
sig : numpy array
"""
np.random.seed(seed)
print("Seed was:", seed)
steps = np.random.choice(a=[-1, 0, 1], size=(n-1))
roughSig = np.concatenate([np.array([0]), steps]).cumsum(0)
sig = savgol_filter(roughSig, round_up_to_odd(n/20), 6)
return sig
n = 1000
t = np.linspace(0,10,n)
seed = np.random.randint(0,high=100000)
#seed = 45136
sig = generateRandomSignal(seed=seed)
###############################
# ????
# sigFilt = adaptiveFilter(sig)
###############################
# Plot
plt.figure()
plt.plot(t, sig, label="Signal")
# plt.plot(t, sigFilt, label="Signal filtered")
plt.legend()
Simple convolution does smoothing. However, as mentioned below, here we need strong smoothing first and no smoothing towards the end. I used the moving average approach with the dynamic size of the window. In the example below, the window size changes linearly.
def dynamic_smoothing(x, start_window_length=(len(x)//2), end_window_length=1):
d_sum = np.cumsum(a, dtype=float)
smoothed = list()
for i in range(len(x)):
# compute window length
a = i / len(x)
w = int(np.round(a * start_window_length + (1.0-a) * end_window_length))
# get the window
w0 = max(0, i - w) # the window must stay inside the array
w1 = min(len(x), i + w)
smoothed.append(sum(x[w0:w1])/(w1+w0))
return np.array(smoothed)

Why does my ifft result in the real part of the complex number being infinite?

I am writing a script that simulates random white noise to study its behavior when shifted into its signal form.
Below is the function that I am having a problem with:
def white_noise(n: int, N: int, slope: int = grad):
"""
- Creates a data set of white noise with size n, N;
- Filters this dataset with the corresponding slope;
This slope is usually equal to -5/3 or -2/3
- Makes sure the slope is equal to the requested slope in the double log scale.
#param n: size of random array
#param N: number of random arrays
#param slope: slope of the gradient
#return: white_noise, filtered white_noise and the original signal
"""
x = np.linspace(0, 1, n)
slope_loglog = (10 ** (slope * np.log10(x) + 1))
white_noise = rnd.rand(n, N) ** 2 # squaring the white noise to retrieve the power spectrum
white_noise_filtered = []
white_noise_signal = []
white_noise_retransformed = []
for k in range(N):
white_noise_filtered.append(white_noise[:,k] * slope_loglog)
white_noise_signal.append(fft.ifft(white_noise[:,k] * slope_loglog))
white_noise_filtered, white_noise_signal, white_noise_retransformed = \
np.asarray((white_noise_filtered, white_noise_signal, white_noise_retransformed))
white_noise_filtered = white_noise_filtered.transpose()
white_noise_signal = white_noise_signal.transpose().imag
return white_noise, white_noise_filtered, white_noise_signal, white_noise_retransformed, slope_loglog
During the step where I apply the inverse fast Fourier transform (1D), my simulated noise generates a complex array (which is to be expected when dealing with Fourier transformations). What I do not understand is why the real part from all of the complex values equals 'Inf'. These should have a finite value, so what am I doing wrong?
(Note: the slope_loglog has to do with log-log transformed data having a gradient of -5/3.)
Since I need the ifft and thereafter the fft to achieve the original noise again (or shifted), I need to understand why my script does this. Taking only the imaginary part of the array 'white_noise_signal' does not result in the original noise.
This has nothing to do with FFT/IFFT
np.log10(0) = -np.inf
and
10 ** np.inf = np.inf
So if you use a negative slope, i.e. slope = -1
10 ** (slope * np.log10(0)) = np.inf
Multiply that with any signal and you will get inf.
Also, you can simplify your code significantly if you skip using lists inbetween:
def white_noise(n: int, N: int, slope: int = -5/3):
"""
- Creates a data set of white noise with size n, N;
- Filters this dataset with the corresponding slope;
This slope is usually equal to -5/3 or -2/3
- Makes sure the slope is equal to the requested slope in the double log scale.
#param n: size of random array
#param N: number of random arrays
#param slope: slope of the gradient
#return: white_noise, filtered white_noise and the original signal
"""
x = np.linspace(0, 1, n)
slope_loglog = (10 ** (slope * np.log10(x) + 1))
white_noise = np.random.rand(n, N) ** 2 # cubing the white noise to retrieve the power spectrum
white_noise_filtered = (white_noise * slope_loglog).T
white_noise_signal = (np.fft.ifft(white_noise, axis=-1) * slope_loglog).T.imag
return white_noise, white_noise_filtered, white_noise_signal, slope_loglog

How to compute nfft

I'm trying to understand how to use the nfft method of Jake Vanderplas' nfft module. The example unfortunately isn't very illustrative as I try to parametrize everything based on just an input list of samples ([(time0, signal0), (time1, signal1), ...]):
import numpy as np
from nfft import nfft
# define evaluation points
x = -0.5 + np.random.rand(1000)
# define Fourier coefficients
N = 10000
k = - N // 2 + np.arange(N)
f_k = np.random.randn(N)
# non-equispaced fast Fourier transform
f = nfft(x, f_k)
I'm trying to compute f_k in an example where the samples are about 10 ms apart with 1 or 2 ms jitter in that interval.
The implementation documentation:
def nfft(x, f_hat, sigma=3, tol=1E-8, m=None, kernel='gaussian',
use_fft=True, truncated=True):
"""Compute the non-equispaced fast Fourier transform
f_j = \sum_{-N/2 \le k < N/2} \hat{f}_k \exp(-2 \pi i k x_j)
Parameters
----------
x : array_like, shape=(M,)
The locations of the data points. Each value in x should lie
in the range [-1/2, 1/2).
f_hat : array_like, shape=(N,)
The amplitudes at each wave number k = range(-N/2, N/2).
Where I'm stuck:
import numpy as np
from nfft import nfft
def compute_nfft(sample_instants, sample_values):
"""
:param sample_instants: `numpy.ndarray` of sample times in milliseconds
:param sample_values: `numpy.ndarray` of samples values
:return: Horizontal and vertical plot components as `numpy.ndarray`s
"""
N = len(sample_instants)
T = sample_instants[-1] - sample_instants[0]
x = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)
y = 2.0 / N * np.abs(y[0:N // 2])
y = nfft(x, y)
return (x, y)
The example defines a variable f_k which is passed as nfft's f_hat argument.
According to the definition
f_j = \sum_{-N/2 \le k < N/2} \hat{f}_k \exp(-2 \pi i k x_j)
given, f_hat represents the time-domain signal at the specified sampling instants. In your case this simply corresponds to sample_values.
The other argument x of nfft are the actual time instants of those samples. You'd need to also provide those separately:
def compute_nfft(sample_instants, sample_values):
N = len(sample_instants)
T = sample_instants[-1] - sample_instants[0]
x = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)
y = nfft(sample_instants, sample_values)
y = 2.0 / N * np.abs(y[0:N // 2])
return (x, y)

Simple DFT Coefficients => Amplitude/Frequencies => Plot

Im trying on DFT and FFT in Python with numpy and pyplot.
My Sample Vector is
x = np.array([1,2,4,3]
The DFT coefficients for that vector are
K = [10+0j, -3+1j, 0+0j, -3-1j]
so basically we have 10, -3+i, 0 and -3-1i as DFT coefficients.
My problem now is to get a combination of sin and cos to fit all 4 points.
Let's assume we have a sample Rate of 1hz.
This is my code :
from matplotlib import pyplot as plt
import numpy as np
x = np.array([1,2,4,3])
fft = np.fft.fft(x)
space = np.linspace(0,4,50)
values = np.array([1,2,3,4])
cos0 = fft[0].real * np.cos(0 * space)
cos1 = fft[1].real * np.cos(1/4 * np.pi * space)
sin1 = fft[1].imag * np.sin(1/4 * np.pi * space)
res = cos0 + cos1 + sin1
plt.scatter(values, x, label="original")
plt.plot(space, cos0, label="cos0")
plt.plot(space, cos1, label="cos1")
plt.plot(space, sin1, label="sin1")
plt.plot(space, res, label="combined")
plt.legend()
As result i get the plot:
(source: heeser-it.de)
Why isnt the final curve hitting any point?
I would appreciate your help. Thanks!
EDIT:
N = 1000
dataPoints = np.linspace(0, np.pi, N)
function = np.sin(dataPoints)
fft = np.fft.fft(function)
F = np.zeros((N,))
for i in range(0, N):
F[i] = (2 * np.pi * i) / N
F_sin = np.zeros((N,N))
F_cos = np.zeros((N,N))
res = 0
for i in range(0, N):
F_sin[i] = fft[i].imag / 500 * np.sin(dataPoints * F[i])
F_cos[i] = fft[i].real / 500* np.cos(dataPoints * F[i])
res = res + F_sin[i] + F_cos[i]
plt.plot(dataPoints, function)
plt.plot(dataPoints, res)
my plot looks like:
(source: heeser-it.de)
where do i fail?
Your testing vector x looks bit like a sawtooth because it rises linearly and then starts to decrease but with that few datapoints it's hard to tell what signal it is. This has an infinite FFT series, which means it has lot of higher harmonic frequency components in it. So to describe it with DTF coefficients and get close to original points, you would have to use
higher sample rate, to get information about higher frequencies (you should learn about nyquist theorem)
more data points (samples), so you can extract more precise information about frequencies in your signal) This means you have to have more items in your array 'x'.
Also you could try to fit some simpler signal. What about you try to fit a sine signal for start? Generate 1000 data points of low frequency sine (1Hz or one cycle per 1000 samples) and then run DTF on it to check if your code works.
There are a few mistakes:
The xs you assigned to the original values are off by one
The frequency you assigned to fft[1] is incorrect
The coefficients are incorrectly scaled
This one works:
from matplotlib import pyplot as plt
import numpy as np
x = np.array([1,2,4,3])
fft = np.fft.fft(x)
space = np.linspace(0,4,50)
values = np.array([0,1,2,3])
cos0 = fft[0].real * np.cos(0 * space)/4
cos1 = fft[1].real * np.cos(1/2 * np.pi * space)/2
sin1 = -fft[1].imag * np.sin(1/2 * np.pi * space)/2
res = cos0 + cos1 + sin1
plt.scatter(values, x, label="original")
plt.plot(space, cos0, label="cos0")
plt.plot(space, cos1, label="cos1")
plt.plot(space, sin1, label="sin1")
plt.plot(space, res, label="combined")
plt.legend()
plt.show()

Find time shift of two signals using cross correlation

I have two signals which are related to each other and have been captured by two different measurement devices simultaneously.
Since the two measurements are not time synchronized there is a small time delay between them which I want to calculate. Additionally, I need to know which signal is the leading one.
The following can be assumed:
no or only very less noise present
speed of the algorithm is not an issue, only accuracy and robustness
signals are captured with an high sampling rate (>10 kHz) for several seconds
expected time delay is < 0.5s
I though of using-cross correlation for that purpose.
Any suggestions how to implement that in Python are very appreciated.
Please let me know if I should provide more information in order to find the most suitable algorithmn.
A popular approach: timeshift is the lag corresponding to the maximum cross-correlation coefficient. Here is how it works with an example:
import matplotlib.pyplot as plt
from scipy import signal
import numpy as np
def lag_finder(y1, y2, sr):
n = len(y1)
corr = signal.correlate(y2, y1, mode='same') / np.sqrt(signal.correlate(y1, y1, mode='same')[int(n/2)] * signal.correlate(y2, y2, mode='same')[int(n/2)])
delay_arr = np.linspace(-0.5*n/sr, 0.5*n/sr, n)
delay = delay_arr[np.argmax(corr)]
print('y2 is ' + str(delay) + ' behind y1')
plt.figure()
plt.plot(delay_arr, corr)
plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
plt.show()
# Sine sample with some noise and copy to y1 and y2 with a 1-second lag
sr = 1024
y = np.linspace(0, 2*np.pi, sr)
y = np.tile(np.sin(y), 5)
y += np.random.normal(0, 5, y.shape)
y1 = y[sr:4*sr]
y2 = y[:3*sr]
lag_finder(y1, y2, sr)
In the case of noisy signals, it is common to apply band-pass filters first. In the case of harmonic noise, they can be removed by identifying and removing frequency spikes present in the frequency spectrum.
Numpy has function correlate which suits your needs: https://docs.scipy.org/doc/numpy/reference/generated/numpy.correlate.html
To complement Reveille's answer above (I reproduce his algorithm), I would like to point out some ideas for preprocessing the input signals.
Since there seems to be no fit-for-all (duration in periods, resolution, offset, noise, signal type, ...) you may play with it.
In my example the application of a window function improves the detected phase shift (within resolution of the discretization).
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
r2d = 180.0/np.pi # conversion factor RAD-to-DEG
delta_phi_true = 50.0/r2d
def detect_phase_shift(t, x, y):
'''detect phase shift between two signals from cross correlation maximum'''
N = len(t)
L = t[-1] - t[0]
cc = signal.correlate(x, y, mode="same")
i_max = np.argmax(cc)
phi_shift = np.linspace(-0.5*L, 0.5*L , N)
delta_phi = phi_shift[i_max]
print("true delta phi = {} DEG".format(delta_phi_true*r2d))
print("detected delta phi = {} DEG".format(delta_phi*r2d))
print("error = {} DEG resolution for comparison dphi = {} DEG".format((delta_phi-delta_phi_true)*r2d, dphi*r2d))
print("ratio = {}".format(delta_phi/delta_phi_true))
return delta_phi
L = np.pi*10+2 # interval length [RAD], for generality not multiple period
N = 1001 # interval division, odd number is better (center is integer)
noise_intensity = 0.0
X = 0.5 # amplitude of first signal..
Y = 2.0 # ..and second signal
phi = np.linspace(0, L, N)
dphi = phi[1] - phi[0]
'''generate signals'''
nx = noise_intensity*np.random.randn(N)*np.sqrt(dphi)
ny = noise_intensity*np.random.randn(N)*np.sqrt(dphi)
x_raw = X*np.sin(phi) + nx
y_raw = Y*np.sin(phi+delta_phi_true) + ny
'''preprocessing signals'''
x = x_raw.copy()
y = y_raw.copy()
window = signal.windows.hann(N) # Hanning window
#x -= np.mean(x) # zero mean
#y -= np.mean(y) # zero mean
#x /= np.std(x) # scale
#y /= np.std(y) # scale
x *= window # reduce effect of finite length
y *= window # reduce effect of finite length
print(" -- using raw data -- ")
delta_phi_raw = detect_phase_shift(phi, x_raw, y_raw)
print(" -- using preprocessed data -- ")
delta_phi_preprocessed = detect_phase_shift(phi, x, y)
Without noise (to be deterministic) the output is
-- using raw data --
true delta phi = 50.0 DEG
detected delta phi = 47.864788975654 DEG
...
-- using preprocessed data --
true delta phi = 50.0 DEG
detected delta phi = 49.77938053468019 DEG
...
Numpy has a useful function, called correlation_lags for this, which uses the underlying correlate function mentioned by other answers to find the time lag. The example displayed at the bottom of that page is useful:
from scipy import signal
from numpy.random import default_rng
rng = default_rng()
x = rng.standard_normal(1000)
y = np.concatenate([rng.standard_normal(100), x])
correlation = signal.correlate(x, y, mode="full")
lags = signal.correlation_lags(x.size, y.size, mode="full")
lag = lags[np.argmax(correlation)]
Then lag would be -100

Categories