power-law curve fitting scipy, numpy not working - python

I came up with a problem in fitting a power-law curve on my data. I have two data sets: bins1 and bins2
bins1 acting fine in curve-fitting by using numpy.linalg.lstsq (I then use np.exp(coefs[0])*x**coefs[1] to get power-law equation)
On the other hand, bins2 is acting weird and shows a bad R-squared
Both data have different equations than what excel shows me (and worse R-squared).
here is the code (and data):
import numpy as np
import matplotlib.pyplot as plt
bins1 = np.array([[6.769318871738219667e-03,
1.306418618130891773e-02,
1.912138120913448383e-02,
2.545189874466026111e-02,
3.214689891729670401e-02,
4.101898933375244805e-02,
5.129862592803200588e-02,
6.636505322669797313e-02,
8.409809827572585494e-02,
1.058164348650862258e-01,
1.375849753230810046e-01,
1.830664031837437311e-01,
2.682454535427478137e-01,
3.912508246490400410e-01,
5.893271848997768680e-01,
8.480213305038615257e-01,
2.408136266017391058e+00,
3.629192766488219313e+00,
4.639246557509275171e+00,
9.901792214343277720e+00],
[8.501658465758301112e-04,
1.562697718429977012e-03,
1.902062808421856087e-04,
4.411817741488644959e-03,
3.409236963162485048e-03,
1.686099657013027898e-03,
3.643231240239608402e-03,
2.544120616413291154e-04,
2.549036204611017029e-02,
3.527340723977697573e-02,
5.038482027310990652e-02,
5.617932487522721979e-02,
1.620407270423956103e-01,
1.906538999080910068e-01,
3.180688368126549093e-01,
2.364903188268162038e-01,
3.267322385964683273e-01,
9.384571074801122403e-01,
4.419747716107813029e-01,
9.254710022316929852e+00]]).T
bins2 = np.array([[6.522512685133712192e-03,
1.300415548684437199e-02,
1.888928895701269539e-02,
2.509905819337970856e-02,
3.239654633369139919e-02,
4.130706234846069635e-02,
5.123820846515786398e-02,
6.444380072984744190e-02,
8.235238352205621892e-02,
1.070907072127811749e-01,
1.403438221033725120e-01,
1.863115065963684147e-01,
2.670209758710758163e-01,
4.003337413814173074e-01,
6.549054078382223754e-01,
1.116611087124244062e+00,
2.438604844718367914e+00,
3.480674117919704269e+00,
4.410201659398489404e+00,
6.401903059926267403e+00],
[1.793454543936148608e-03,
2.441092334386309615e-03,
2.754373929745804715e-03,
1.182752729942167062e-03,
1.357797177773524414e-03,
6.711673916715021199e-03,
1.392761674092503343e-02,
1.127957613093066511e-02,
7.928803089359596004e-03,
2.524609593305639915e-02,
5.698702885370290905e-02,
8.607729156137132465e-02,
2.453761830112021203e-01,
9.734443815196883176e-02,
1.487480479168299119e-01,
9.918002699934079791e-01,
1.121298151253063535e+00,
1.389239135742518227e+00,
4.254082922056571237e-01,
2.643453492951096440e+00]]).T
bins = bins1 #change to bins2 to see results for bins2
def fit(x,a,m): # power-law fit (based on previous studies)
return a*(x**m)
coefs= np.linalg.lstsq(np.vstack([np.ones(len(bins[:,0])), np.log(bins[:,0]), bins[:,0]]).T, np.log(bins[:,1]))[0] # calculating fitting coefficients (a,m)
y_predict = fit(bins[:,0],np.exp(coefs[0]),coefs[1]) # prediction based of fitted model
model_plot = plt.loglog(bins[:,0],bins[:,1],'o',label="error")
fit_line = plt.plot(bins[:,0],y_predict,'r', label="fit")
plt.ylabel('Y (bins[:,1])')
plt.xlabel('X (bins[:,0])')
plt.title('model')
plt.legend(loc='best')
plt.show(model_plot,fit_line)
def R_sqr (y,y_predict): # calculating R squared value to measure fitting accuracy
rsdl = y - y_predict
ss_res = np.sum(rsdl**2)
ss_tot = np.sum((y-np.mean(y))**2)
R2 = 1-(ss_res/ss_tot)
R2 = np.around(R2,decimals=4)
return R2
R2= R_sqr(bins[:,1],y_predict)
print ('(R^2 = %s)' % (R2))
The fit formula for bins1[[x],[y]]: python: y = 0.337*(x)^1.223 (R^2 = 0.7773), excel: y = 0.289*(x)^1.174 (R^2 = 0.8548)
The fit formula for bins2[[x],[y]]: python: y = 0.509*(x)^1.332 (R^2 = -1.753), excel: y = 0.311*(x)^1.174 (R^2 = 0.9116)
And these are two sample data sets out of 30, I randomly see this fitting problem in my data and some have R-squared around "-150"!!
Itried scipy "curve_fit" but I didn't get better results, in fact worse!
Anyone knows how to get excel-like fit in python?

You are trying to calculate an R-squared using Y's that have not been converted to log-space. The following change gives reasonable R-squared values:
R2 = R_sqr(np.log(bins[:,1]), np.log(y_predict))

Related

Unable to fit a function onto a givin set of data points in Python using the Scipy library

I have been trying to fit a function(the function is given in the code under the name concave_func) onto data points in python but have had very little to no success. I have 7 parameters(C_1, C_2, alpha_one, alpha_two, I_x, nu_t, T_e) in the function that I have to estimate, and only 6 data points. I have tried 2 methods to fit the curve and estimate the parameters,
1). scipy.optimize.minimize
2). scipy.optimize.curve_fit.
However, I'm not obtaining the desired results i.e the curve is not fitting the data points.
I have attached my code below.
frequency = np.array([22,45,150,408,1420,23000]) #x_values
b_temp = [2.55080863e+04, 4.90777800e+03, 2.28984753e+02, 2.10842949e+01, 3.58631166e+00, 5.68716056e-04] #y_values
#Defining the function that I want to fit
def concave_func(x, C_1, C_2, alpha_one, alpha_two, I_x, nu_t, T_e):
one = x**(-alpha_one)
two = (C_2/C_1)*(x**(-alpha_two))
three = I_x*(x**-2.1)
expo = np.exp(-1*((nu_t/x)**2.1))
eqn_one = C_1*(one + two + three)*expo
eqn_two = T_e*(1 - expo)
return eqn_one + eqn_two
#Defining chi_square function
def chisq(params, xobs, yobs):
ynew = concave_func(xobs, *params)
#yerr = np.sum((ynew- yobs)**2)
yerr = np.sum(((yobs- ynew)/ynew)**2)
print(yerr)
return yerr
result = minimize(chisq, [1,2,2,2,1,1e6,8000], args = (frequency,b_temp), method = 'Nelder-Mead', options = {'disp' : True, 'maxiter': 10000})
x = np.linspace(-300,24000,1000)
plt.yscale("log")
plt.xscale("log")
plt.plot(x,concave_func(x, *result.x))
print(result.x)
print(result)
plt.plot(frequency, b_temp, 'r*' )
plt.xlabel("log Frequency[MHz]")
plt.ylabel("log Temp[K]")
plt.title('log Temparature vs log Frequency')
plt.grid()
plt.savefig('the_plot_2060.png')
I have attached the plot that I obtained below.
The plot clearly does not fit the data, and something is definitely wrong. I would also want my parameters alpha_one and alpha_two to be constrained to lie between 2 and 3. I also do not want my parameter T_e to exceed 10,000. Any thoughts?

How to find regression curve equation for a fitted PolynomialFeatures model

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
data=pd.DataFrame(
{"input":
[0.001,0.015,0.066,0.151,0.266,0.402,0.45,0.499,0.598,0.646,0.738,0.782,0.86,0.894,0.924,0.95],
"output":[0.5263157894736842,0.5789473684210524,0.6315789473684206,0.6842105263157897,
0.6315789473684206, 0.7894736842105263, 0.8421052631578945, 0.7894736842105263, 0.736842105263158,
0.6842105263157897, 0.736842105263158, 0.736842105263158,0.6842105263157897, 0.6842105263157897,
0.6315789473684206,0.5789473684210524]})
I have the above data that includes input and output data and ı want to make a curve that properly fits this data. Firstly plotting of input and output values are here :
I have made this code:
X=data.iloc[:,0].to_numpy()
X=X.reshape(-1,1)
y=data.iloc[:,1].to_numpy()
y=y.reshape(-1,1)
poly=PolynomialFeatures(degree=2)
poly.fit(X,y)
X_poly=poly.transform(X)
reg=LinearRegression().fit(X_poly,y)
plt.scatter(X,y,color="blue")
plt.plot(X,reg.predict(X_poly),color="orange",label="Polynomial Linear Regression")
plt.xlabel("Temperature")
plt.ylabel("Pressure")
plt.legend(loc="upper left")
plot is:
But ı don't find the above curve's equation (orange curve) how can ı find?
Your plot actually corresponds to your code run with
poly=PolynomialFeatures(degree=7)
and not to degree=2. Indeed, running your code with the above change, we get:
Now, your polynomial features are:
poly.get_feature_names()
# ['1', 'x0', 'x0^2', 'x0^3', 'x0^4', 'x0^5', 'x0^6', 'x0^7']
and the respective coefficients of your linear regression are:
reg.coef_
# array([[ 0. , 5.43894411, -68.14277256, 364.28508827,
# -941.70924401, 1254.89358662, -831.27091422, 216.43304954]])
plus the intercept:
reg.intercept_
# array([0.51228593])
Given the above, and setting
coef = reg.coef_[0]
since here we have a single feature in the initial data, your regression equation is:
y = reg.intercept_ + coef[0] + coef[1]*x + coef[2]*x**2 + coef[3]*x**3 + coef[4]*x**4 + coef[5]*x**5 + coef[6]*x**6 + coef[7]*x**7
For visual verification, we can plot the above function with some x data in [0, 1]
x = np.linspace(0, 1, 15)
Running the above expression for y and
plt.plot(x, y)
gives:
Using some randomly generated data x, we can verify that the results of the equation y_eq are indeed equal to the results produced by the regression model y_reg within the limits of numerical precision:
x = np.random.rand(1,10)
y_eq = reg.intercept_ + coef[0] + coef[1]*x + coef[2]*x**2 + coef[3]*x**3 + coef[4]*x**4 + coef[5]*x**5 + coef[6]*x**6 + coef[7]*x**7
y_reg = np.concatenate(reg.predict(poly.transform(x.reshape(-1,1))))
y_eq
# array([[0.72452703, 0.64106819, 0.67394222, 0.71756648, 0.71102853,
# 0.63582055, 0.54243177, 0.71104983, 0.71287962, 0.6311952 ]])
y_reg
# array([0.72452703, 0.64106819, 0.67394222, 0.71756648, 0.71102853,
# 0.63582055, 0.54243177, 0.71104983, 0.71287962, 0.6311952 ])
np.allclose(y_reg, y_eq)
# True
Irrelevant to the question, I guess you already know that trying to fit such high order polynomials to so few data points is not a good idea, and you probably should remain to a low degree of 2 or 3...
Note sure how you produced the plot shown in the question. When I ran your code I got the following (degree=2) polynomial fitted to the data as expected:
Now that you have fitted the data you can see the coefficients of the model thus:
print(reg.coef_)
print(reg.intercept_)
# [[ 0. 0.85962436 -0.83796885]]
# [0.5523586]
Note that the data that was used to fit this model is equivalent to the following:
X_poly = np.concatenate([np.ones((16,1)), X, X**2], axis=1)
Therefore a single data point is a vector created as follows:
temp = 0.5
x = np.array([1, temp, temp**2]).reshape((1,3))
Your polynomial model is simply a linear model of the polynomial features:
y = A.x + B
or
y = reg.coef_.dot(x.T) + reg.intercept_
print(y) # [[0.77267856]]
Verification:
print(reg.predict(x)) # array([[0.77267856]])

Gaussian fit to noisey data using curve_fit

I am having issues fitting a Gaussian to my data. Currently the output for my code looks like
this. Where orange is the data, blue is the gaussian fit and green is an in-built gaussian fitter however I do not wish to use it as it never quite begins at zero and I do not have access to the code. I would like my output to look something like this where the drawn in red is the gaussian fit.
I have tried reading about the curve_fit documentation however at best I get a fit that looks like this which fits over all the data, however, this is undesirable as I am only interested in the central peak which is my main issue - I do not know how to get curve_fit to fit a gaussian on the central peak like in the second image.
I have considered using a weights function like np.random.choice() or looking at the data file's maximum value and then looking at the second derivative at either side of the central peak to see where there are changes in inflection but am unsure how best to implement this.
How would I best go about this? I have done a lot of googling but cant quite get my head around changing curve_fit to suit my needs.
Cheers for any pointers!
This is a data file.
https://drive.google.com/file/d/1qrAkD74U6L46GoGnvMiUHdPuLEToS6Pv/view?usp=sharing
This is my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from matplotlib.pyplot import figure
plt.close('all')
fpathB4 = 'E:\.1. Work - Current Projects + Old Projects\Current Projects\PF 4MHz Laser System\.8. 1050 SYSTEM\AC traces'
fpath = fpathB4.replace('\\','/') + ('/')
filename = '300'
with open(fpath+filename) as f:
dataraw = f.readlines()
FWHM = dataraw[8].split(':')[1].split()[0]
FWHM = np.float(FWHM)
print("##### For AC file -", filename, "#####")
print("Auto-co guess -", FWHM, "ps")
pulseduration = FWHM/np.sqrt(2)
pulseduration = str(pulseduration)
dataraw = dataraw[15:]
print("Pulse duration -", pulseduration, "ps" + "\n")
time = np.array([])
acf1 = np.array([]) #### DATA
fit = np.array([]) #### Gaussian fit
for k in dataraw:
data = k.split()
time = np.append(time, np.float(data[0]))
acf1= np.append(acf1, np.float(data[1]))
fit = np.append(fit, np.float(data[2]))
n = len(time)
y = acf1.copy()
x = time.copy()
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
def gaus(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))
popt,pcov = curve_fit(gaus,x,y,p0=[1,mean,sigma])
plt.plot(x,gaus(x,*popt)/np.max(gaus(x,*popt)))
figure(num=1, figsize=(8, 3), dpi=96, facecolor='w', edgecolor='k') # figsize = (length, height)
plt.plot(time, acf1/np.max(acf1), label = 'Data - ' + filename, linewidth = 1)
plt.plot(time, fit/np.max(fit), label = '$FWHM_{{\Delta t}}$ (ps) = ' + pulseduration)
plt.autoscale(enable = True, axis = 'x', tight = True)
plt.title("Auto-Correlation Data")
plt.xlabel("Time (ps)")
plt.ylabel("Intensity (a.u.)")
plt.legend()
I think the problem might be that the data are not completely Gaussian-like. It seems you have some kind of Airy/sinc function due to the time resolution of your acquisition instrument. Still, if you are only interested in the center you can still fit it using a single gaussian:
import fitwrap as fw
import pandas as pd
df = pd.read_csv('300', skip_blank_lines=True, skiprows=13, sep='\s+')
def gaussian_no_offset(x, x0=2, sigma=1, amp=300):
return amp*np.exp(-(x-x0)**2/sigma**2)
fw.fit(gaussian_no_offset, df.time, df.acf1)
x0: 2.59158 +/- 0.00828 (0.3%) initial:2
sigma: 0.373 +/- 0.0117 (3.1%) initial:1
amp: 355.02 +/- 9.65 (2.7%) initial:300
If you want to be slightly more precise I can think of a sinc squared function for the peak and a broad gaussian offset. The fit seems nicer, but it really depends on what the data actually represents...
def sinc(x, x0=2.5, amp=300, width=1, amp_g=20, sigma=3):
return amp*(np.sinc((x-x0)/width))**2 + amp_g*np.exp(-(x-x0)**2/sigma**2)
fw.fit(sinc, df.time, df.acf1)
x0: 2.58884 +/- 0.0021 (0.1%) initial:2.5
amp: 303.84 +/- 3.7 (1.2%) initial:300
width: 0.49211 +/- 0.00565 (1.1%) initial:1
amp_g: 81.32 +/- 2.11 (2.6%) initial:20
sigma: 1.512 +/- 0.0351 (2.3%) initial:3
I'd add a constant to the Gaussian equation, and limit the range of that in the bounds parameter of curve fit, so that the graph isn't raised higher.
So your equation would be:
def gaus(y0,x,a,x0,sigma):
return y0 + a*np.exp(-(x-x0)**2/(2*sigma**2))
and the curve_fit bounds would be something like this:
curve_fit(..... ,bounds = [[0,a_min, x0_min, sigma_min],[0.1, a_max, x0_max, sigma_max]])

how to isolate data that are 2 and 3 sigma deviated from mean and then mark them in a plot in python?

I am reading from a dataset which looks like the following when plotted in matplotlib and then taken the best fit curve using linear regression.
The sample of data looks like following:
# ID X Y px py pz M R
1.04826492772e-05 1.04828050287e-05 1.048233088e-05 0.000107002791008 0.000106552433081 0.000108704469007 387.02 4.81947797625e+13
1.87380963036e-05 1.87370588085e-05 1.87372620448e-05 0.000121616280029 0.000151924707761 0.00012371156585 428.77 6.54636174067e+13
3.95579877816e-05 3.95603773653e-05 3.95610756809e-05 0.000163470663023 0.000265203868883 0.000228031803626 470.74 8.66961875758e+13
My code looks the following:
# Regression Function
def regress(x, y):
#Return a tuple of predicted y values and parameters for linear regression.
p = sp.stats.linregress(x, y)
b1, b0, r, p_val, stderr = p
y_pred = sp.polyval([b1, b0], x)
return y_pred, p
# plotting z
xz, yz = M, Y_z # data, non-transformed
y_pred, _ = regress(xz, np.log(yz)) # change here # transformed input
plt.semilogy(xz, yz, marker='o',color ='b', markersize=4,linestyle='None', label="l.o.s within R500")
plt.semilogy(xz, np.exp(y_pred), "b", label = 'best fit') # transformed output
However I can see a lot upward scatter in the data and the best fit curve is affected by those. So first I want to isolate the data points which are 2 and 3 sigma away from my mean data, and mark them with circle around them.
Then take the best fit curve considering only the points which fall within 1 sigma of my mean data
Is there a good function in python which can do that for me?
Also in addition to that may I also isolate the data from my actual dataset, like if the third row in the sample input represents 2 sigma deviation may I have that row as an output too to save later and investigate more?
Your help is most appreciated.
Here's some code that goes through the data in a given number of windows, calculates statistics in said windows, and separates data in well- and misbehaved lists.
Hope this helps.
from scipy import stats
from scipy import polyval
import numpy as np
import matplotlib.pyplot as plt
num_data = 10000
fake_data_x = np.sort(12.8+np.random.random(num_data))
fake_data_y = np.exp(fake_data_x) + np.random.normal(0,scale=50000,size=num_data)
# Regression Function
def regress(x, y):
#Return a tuple of predicted y values and parameters for linear regression.
p = stats.linregress(x, y)
b1, b0, r, p_val, stderr = p
y_pred = polyval([b1, b0], x)
return y_pred, p
# plotting z
xz, yz = fake_data_x, fake_data_y # data, non-transformed
y_pred, _ = regress(xz, np.log(yz)) # change here # transformed input
plt.figure()
plt.semilogy(xz, yz, marker='o',color ='b', markersize=4,linestyle='None', label="l.o.s within R500")
plt.semilogy(xz, np.exp(y_pred), "b", label = 'best fit') # transformed output
plt.show()
num_bin_intervals = 10 # approx number of averaging windows
window_boundaries = np.linspace(min(fake_data_x),max(fake_data_x),int(len(fake_data_x)/num_bin_intervals)) # window boundaries
y_good = [] # list to collect the "well-behaved" y-axis data
x_good = [] # list to collect the "well-behaved" x-axis data
y_outlier = []
x_outlier = []
for i in range(len(window_boundaries)-1):
# create a boolean mask to select the data within the averaging window
window_indices = (fake_data_x<=window_boundaries[i+1]) & (fake_data_x>window_boundaries[i])
# separate the pieces of data in the window
fake_data_x_slice = fake_data_x[window_indices]
fake_data_y_slice = fake_data_y[window_indices]
# calculate the mean y_value in the window
y_mean = np.mean(fake_data_y_slice)
y_std = np.std(fake_data_y_slice)
# choose and select the outliers
y_outliers = fake_data_y_slice[np.abs(fake_data_y_slice-y_mean)>=2*y_std]
x_outliers = fake_data_x_slice[np.abs(fake_data_y_slice-y_mean)>=2*y_std]
# choose and select the good ones
y_goodies = fake_data_y_slice[np.abs(fake_data_y_slice-y_mean)<2*y_std]
x_goodies = fake_data_x_slice[np.abs(fake_data_y_slice-y_mean)<2*y_std]
# extend the lists with all the good and the bad
y_good.extend(list(y_goodies))
y_outlier.extend(list(y_outliers))
x_good.extend(list(x_goodies))
x_outlier.extend(list(x_outliers))
plt.figure()
plt.semilogy(x_good,y_good,'o')
plt.semilogy(x_outlier,y_outlier,'r*')
plt.show()

Detrending a time-series of a multi-dimensional array without the for loops

I have a 3D array which has a time-series of air-sea carbon flux for each grid point on the earth's surface (model output). I want to remove the trend (linear) in the time series. I came across this code:
from matplotlib import mlab
for x in xrange(40):
for y in xrange(182):
cflux_detrended[:, x, y] = mlab.detrend_linear(cflux[:, x, y])
Can I speed this up by not using for loops?
Scipy has a lot of signal processing tools.
Using scipy.signal.detrend() will remove the linear trend along an axis of the data. From the documentation it looks like the linear trend of the complete data set will be subtracted from the time-series at each grid point.
import scipy.signal
cflux_detrended = scipy.signal.detrend(cflux, axis=0)
Using scipy.signal will get the same result as using the method in the original post. Using Josef's detrend_separate() function will also return the same result.
Here are two versions using numpy.linalg.lstsq. This version uses np.vander to create any polynomial trend.
Warning: not tested except on the example.
I think something like this will be added to scikits.statsmodels, which doesn't have yet a multivariate version for detrending either. For the common trend case, we could use scikits.statsmodels OLS and we would also get all the result statistics for the estimation.
# -*- coding: utf-8 -*-
"""Detrending multivariate array
Created on Fri Dec 02 15:08:42 2011
Author: Josef Perktold
http://stackoverflow.com/questions/8355197/detrending-a-time-series-of-a-multi-dimensional-array-without-the-for-loops
I should also add the multivariate version to statsmodels
"""
import numpy as np
import matplotlib.pyplot as plt
def detrend_common(y, order=1):
'''detrend multivariate series by common trend
Paramters
---------
y : ndarray
data, can be 1d or nd. if ndim is greater then 1, then observations
are along zero axis
order : int
degree of polynomial trend, 1 is linear, 0 is constant
Returns
-------
y_detrended : ndarray
detrended data in same shape as original
'''
nobs = y.shape[0]
shape = y.shape
y_ = y.ravel()
nobs_ = len(y_)
t = np.repeat(np.arange(nobs), nobs_ /float(nobs))
exog = np.vander(t, order+1)
params = np.linalg.lstsq(exog, y_)[0]
fittedvalues = np.dot(exog, params)
resid = (y_ - fittedvalues).reshape(*shape)
return resid, params
def detrend_separate(y, order=1):
'''detrend multivariate series by series specific trends
Paramters
---------
y : ndarray
data, can be 1d or nd. if ndim is greater then 1, then observations
are along zero axis
order : int
degree of polynomial trend, 1 is linear, 0 is constant
Returns
-------
y_detrended : ndarray
detrended data in same shape as original
'''
nobs = y.shape[0]
shape = y.shape
y_ = y.reshape(nobs, -1)
kvars_ = len(y_)
t = np.arange(nobs)
exog = np.vander(t, order+1)
params = np.linalg.lstsq(exog, y_)[0]
fittedvalues = np.dot(exog, params)
resid = (y_ - fittedvalues).reshape(*shape)
return resid, params
nobs = 30
sige = 0.1
y0 = 0.5 * np.random.randn(nobs,4,3)
t = np.arange(nobs)
y_observed = y0 + t[:,None,None]
for detrend_func, name in zip([detrend_common, detrend_separate],
['common', 'separate']):
y_detrended, params = detrend_func(y_observed, order=1)
print '\n\n', name
print 'params for detrending'
print params
print 'std of detrended', y_detrended.std() #should be roughly sig=0.5 (var of y0)
print 'maxabs', np.max(np.abs(y_detrended - y0))
print 'observed'
print y_observed[-1]
print 'detrended'
print y_detrended[-1]
print 'original "true"'
print y0[-1]
plt.figure()
for i in range(4):
for j in range(3):
plt.plot(y0[:,i,j], 'bo', alpha=0.75)
plt.plot(y_detrended[:,i,j], 'ro', alpha=0.75)
plt.title(name + ' detrending: blue - original, red - detrended')
plt.show()
Since Nicholas pointed out scipy.signal.detrend. My detrend separate is basically the same as scipy.signal.detrend with fewer (no axis or breaks) or different (with polynomial order) options.
>>> res = signal.detrend(y_observed, axis=0)
>>> (res - y0).var()
0.016931858083279336
>>> (y_detrended - y0).var()
0.01693185808327945
>>> (res - y_detrended).var()
8.402584948582852e-30
I think a plain old list comprehension is easiest:
cflux_detrended = np.array([[mlab.detrend_linear(t) for t in kk] for kk in cflux.T])

Categories