I have written a code that reads in my data file and plots it and then fits it and finds the peaks however I have 6 peaks and the code is only currently fitting 2 of the peaks and isn't returning any data on them by code is as follows:
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
data = np.genfromtxt("C:\\Users\\lenovo laptop\\practice_data_ll16ame1.dat", skip_header = 15)
x = data[: , 0]
y = data[: , 1]
plt.plot(x,y)
plt.show()
def func(x, *params):
y = np.zeros_like(x)
for i in range(0, len(params), 3):
ctr = params[i]
amp = params[i+1]
wid = params[i+2]
y = y + amp * np.exp( -((x - ctr)/wid)**2)
return y
guess = [0, 60000, 80, 1000, 60000, 80]
for i in range(12):
guess += [60+80*i, 46000, 25]
popt, pcov = curve_fit(func, x, y, p0=guess)
fit = func(x, *popt)
plt.plot(x, y)
plt.plot(x, fit , 'r-')
plt.show()
When I looked at the plot of your custom function, it was clear that the majority of points were in a more-or-less horizontal line, so the function wouldn't fit well to your peaks. Because there is no noise and the peaks are so prominent, you just need to pass the y values and a threshold to the find_peaks function.
By implementing find_peaks instead of your custom function, you get the following code:
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
data = np.genfromtxt("C:\\Users\\lenovo laptop\\practice_data_ll16ame1.dat", skip_header = 15)
x = data[: , 0]
y = data[: , 1]
points = find_peaks(y, height = 100)
plt.plot(x, y)
for i in points[0]:
plt.scatter(x[i], y[i])
plt.show()
Find_peaks returns a tuple consisting of two things:
1. The index of the peaks ( points[0] in the code above)
2. The height of each peak (points[1])
The code yields the following plot, which I believe is what you want:
Related
I am getting a horrible fit when I am trying to fit a parabola to this data.
I am initially making a histogram of the data which is the position of an object and then plotting the negative log values of the histogram bin counts to the position using a parabola fit.
the code I am using is this:
time,pos=postime()
plt.plot(time, pos)
poslen=len(pos)
plt.xlabel('Time')
plt.ylabel('Positions')
plt.show()
n,bins,patches = plt.hist(pos,bins=100)
n=n.tolist()
plt.show()
l=len(bins)
s=len(n)
posx=[]
i=0
j=0
pbin=[]
sig=[]
while j < (l-1):
pbin.append((bins[j]+bins[j+1])/2)
j=j+1
while i < s:
if n[i]==0:
pbin[i]=0
else:
sig.append(np.power(1/n[i],2))
n[i]=n[i]/poslen
n[i]=np.log(n[i])
n[i]=n[i]*(-1)
i=i+1
n[:]=[y for y in n if y != 0]
pbin[:]=[y for y in pbin if y != 0]
from scipy.optimize import curve_fit
def parabola(x, a , b):
return a * (np.power(x,2)) + b
popt, pcov = curve_fit(parabola, pbin, n)
print popt
plt.plot(pbin,n)
plt.plot(pbin, parabola(pbin, *popt), 'r-')
I am not sure why you are computing the histogram... But here is a working example which does not require histogram computation.
import numpy as np
from scipy.optimize import curve_fit
from matplotlib import pyplot
time_ = np.arange(-5, 5, 0.1)
pos = time_**2 + np.random.rand(len(time_))*5
def parabola(x, a, b):
return a * (np.power(x, 2)) + b
popt, pcov = curve_fit(parabola, time_, pos)
yfit = parabola(time_, *popt)
pyplot.plot(time_, pos, 'o')
pyplot.plot(time_, yfit)
Also, if your time_ vector is not uniformly sampled, and you want it to be uniformly sampled for the fit, you can do: fittime_ = np.linsapce(np.min(time_), np.max(time_)) and then yfit = parabola(fittime_, *popt).
You can also use matrix inversion.
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-5,5,100)
Y = (np.power(x,2) + np.random.normal(0,1,x.shape)).reshape(-1,1)
X = np.c_[np.ones(x.shape), x, np.power(x,2)]
A = np.linalg.inv(X.transpose().dot(X)).dot(X.transpose().dot(Y))
Yp = X.dot(A)
fig = plt.figure()
ax = fig.add_subplot()
plt.plot(x,Y,'o',alpha=0.5)
plt.plot(x,Yp)
plt.show()
The matrix form is
X*A=Y
A=(Xt*X)-1*Xt*Y
You can have a better idea here if needed. It does not always work out and you may want to apply some form of regularization.
I write simple code using interpolation of sin function, nearest method. My question is it's that code it's correct? It seems to me that the function should consist of straight lines. Curved lines appear on the generated graph.
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import math
# Original "data set" --- 21 random numbers between 0 and 1.
x0 = np.arange(9)
y0 = [math.sin(i) for i in x0]
plt.plot(x0, y0, 'o', label='Data')
plt.grid(linestyle="-", color=(0.7, 0.8, 1.0))
x = np.linspace(0, 8, len(x0)*2)
# Available options for interp1d
options = ('linear', 'nearest')
f = interp1d(x0, y0, kind='nearest') # interpolation function
plt.plot(x, f(x), label='nearest') # plot of interpolated data
plt.legend()
plt.show()
EDIT:
I woudl like to impelment own interpolation algorithm, I try to divide sum of 2 values by 2
lst = list(x0)
for i, val in enumerate(lst):
lst[i] = lst[i] + lst[i+1] / 2
x0 = tuple(lst)
plt.plot(x0, y0, label='nearest')
But it's not working correctly
The problem is that the green line is drawn as a connected graph between all the points, and you have too few points. Maybe you have misunderstood how np.linspace works. If you increase the number of points, (and change to plot only the points instead as connected lines) you will get a result that looks much more like you probably expect:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import math
# Original "data set" --- 21 random numbers between 0 and 1.
x0 = np.arange(9)
y0 = [math.sin(i) for i in x0]
plt.plot(x0, y0, 'o', label='Data')
plt.grid(linestyle="-", color=(0.7, 0.8, 1.0))
x = np.linspace(0, 8, 1000)
# Available options for interp1d
options = ('linear', 'nearest')
f = interp1d(x0, y0, kind='nearest') # interpolation function
plt.plot(x, f(x), '.', label='nearest') # plot of interpolated data
plt.legend()
plt.show()
Would anything have to be changed in the answer for Gaussian fit for Python to fit data in log-log space? Specifically, for both x and y data covering several orders of magnitude and this code snippet:
from scipy.optimize import curve_fit
from scipy import asarray as ar,exp
def gaus(x,a,x0,sigma):
return a*exp(-(x-x0)**2/(2*sigma**2))
b=np.genfromtxt('Stuff.dat', delimiter=None, filling_values=0)
x = b[:,0]
y = b[:,1]
n = len(x) #the number of data
mean = sum(x*y)/n #note this correction
sigma = sum(y*(x-mean)**2)/n #note this correction
popt,pcov = curve_fit(gaus,x,y,p0=[max(y),mean,sigma])
ax = pl.gca()
ax.plot(x, y, 'r.-')
ax.plot(x,gaus(x,*popt),'ro:')
ax.set_xscale('log')
ax.set_yscale('log')
The "fits" are horizontal lines and I am not sure whether I am missing something in my code, or if my data simply isn't fittable by a Gaussian. Any help will be appreciated!
This is what I was missing: the data needs to be transformed before doing the fitting, then transformed back to plot on log axes:
from scipy.optimize import curve_fit
from scipy import asarray as ar,exp
import numpy as np
def gaus(x,a,x0,sigma):
return a*exp(-(x-x0)**2/(2*sigma**2))
b=np.genfromtxt('Stuff.dat', delimiter=None, filling_values=0)
x = np.log(b[:,0])
y = np.log(b[:,1])
n = len(x) #the number of data
mean = sum(x*y)/n #note this correction
sigma = sum(y*(x-mean)**2)/n #note this correction
popt,pcov = curve_fit(gaus,x,y,p0=[max(y),mean,sigma])
ax = pl.gca()
ax.plot(x, y, 'r.-')
ax.plot(10**x,10**(gaus(x,*popt)),'ro:')
ax.set_xscale('log')
ax.set_yscale('log')
I have been working on fitting a negatively sloped sigmoidal trendline for a set of data. I have only been working on python for a week, sorry for the sloppy code. I have two sets of code which produce the data, however, I cannot get the sigmoid curve output as well.
from numpy import *
from matplotlib.pyplot import *
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def sigmoid(x, x0, k):
y = 1 / (1 + np.exp(-(-k*(x-x0))))
return y
x = [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001]
y = [0.649097038, 0.682633434, 0.705470344, 0.749350609, 0.989377822, 0.972679201]
coefficients = np.polyfit(x, y, 2)
polynomial = poly1d(coefficients)
xs = arange(0.000001, 0, 0.1)
ys = polynomial(xs)
curve_fit(sigmoid, x, y)
semilogx()
np.polyfit(x, y, 3, rcond=None, full=False, w=None, cov=False)
plot(x, y, 'o')
plot(xs, ys)
ylabel('Cell Viability')
xlabel('Concentration mM')
show()
.
import numpy as np
import pylab
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
def sigmoid(x, x0, k):
y = 1 / (1 + np.exp(-(-k*(x-x0))))
return y
xdata = np.array([0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001])
ydata = np.array([0.649097038, 0.682633434, 0.705470344, 0.749350609, 0.989377822, 0.972679201])
popt, pcov = curve_fit(sigmoid, xdata, ydata)
print popt
x = np.linspace(-10, 1, 50)
y = sigmoid(x, *popt)
semilogx()
pylab.plot(xdata, ydata, 'o', label='data')
pylab.plot(x,y, label='fit')
pylab.ylim(0, 1.05)
pylab.legend(loc='best')
pylab.show()
There are a number of issues with your two code pieces - some of which Ajean has hinted at. Let's carefully review what there is and what problems that causes.
1st Code Block
Discard the first two lines and use only:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
Now, instead of poly1d, you'll have to call np.poly1d; instead of semilogx() it's plt.semilogx(); plot, xlabel, ylabel and show become plt.plot, etc.
Next, your use of arange returns an empty array. Instead, try this:
np.arange(0.000001, 0.1, 0.000001)
From curvefit you should actually store the returns, as your second code does:
popt, pcov = curve_fit(sigmoid, x, y)
Next, use sigmoid to generate new y-values:
ysig = sigmoid(x,*popt)
If now you include an additional plot statement at the bottom, e.g.:
plt.plot(x,ysig,'g')
the output will be something like this:
2nd Code Block
It is sufficient to import matplotlib.pyplot as plt. Now, replace the pylab. occurrences with plt.
However, all that does not really work, is the linspace command. If you try
x = np.arange(0.000001, 0.1, 0.000001)
instead, you'll get this output
However, both approaches indicate that your fit does not really suit the data. But that may be a different question.
This is what I have for code block 1.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def sigmoid(x, x0, k):
y = 1 / (1 + np.exp(-(-k*(x-x0))))
return y
x = [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001]
y = [0.649097038, 0.682633434, 0.705470344, 0.749350609, 0.989377822, 0.972679201]
coefficients = np.polyfit(x, y, 3)
polynomial = np.poly1d(coefficients)
popt, pcov = curve_fit(sigmoid, x, y)
ysig = sigmoid(x, *popt)
plt.semilogx()
np.arange(0.000001, 0.1, 0.000001)
np.polyfit(x, y, 3, rcond=None, full=False, w=None, cov=False)
plt.plot(x, y, 'o')
plt.plot(x, ysig, 'g')
plt.ylabel('Cell Viability')
plt.xlabel('Concentration mM')
plt. show()
Most pyplot examples out there use linear data, but what if data is scattered?
x = 3,7,9
y = 1,4,5
z = 20,3,7
better meshgrid for contourf
xi = np.linspace(min(x)-1, max(x)+1, 9)
yi = np.linspace(min(y)-1, max(y)+1, 9)
X, Y = np.meshgrid(xi, yi)
Now "z" data got to be interpolated onto the meshgrid.
numpy.interp does little help here, while both linear and nn interpolaton of
zi = matplotlib.mlab.griddata(x,y,z,xi,yi,interp="linear")
returns rather strange results
scipy.interpolate.griddata cubic from second answer below needs something else to return data rather than nils
With custom levels data expected be looking something like this
This is what happens:
Although contour requires grid data, we can caste scatter data to a grid and then using masked arrays mask out the blank regions. I simulate this in the code below, by creating a random array, then using this to mask a test dataset (shown at bottom). The bulk of the code is taken from this matplotlib demo page.
import matplotlib
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
matplotlib.rcParams['xtick.direction'] = 'out'
matplotlib.rcParams['ytick.direction'] = 'out'
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
from numpy.random import *
import numpy.ma as ma
J = random_sample(X.shape)
mask = J > 0.7
X = ma.masked_array(X, mask=mask)
Y = ma.masked_array(Y, mask=mask)
Z = ma.masked_array(Z, mask=mask)
plt.figure()
CS = plt.contour(X, Y, Z, 20)
plt.clabel(CS, inline=1, fontsize=10)
plt.title('Simplest default with labels')
plt.savefig('cat.png')
plt.show()
countourf will only work with a grid of data. If you're data is scattered, then you'll need to create an interpolated grid matching your data, like this: (note you'll need scipy to perform the interpolation)
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
from numpy.random import uniform, seed
# your data
x = [3,7,9]
y = [1,4,5]
z = [20,3,7]
# define grid.
xi = np.linspace(0,10,300)
yi = np.linspace(0,6,300)
# grid the data.
zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')
# contour the gridded data, plotting dots at the randomly spaced data points.
CS = plt.contour(xi,yi,zi,15,linewidths=0.5,colors='k')
CS = plt.contourf(xi,yi,zi,15,cmap=plt.cm.jet)
plt.colorbar() # draw colorbar
# plot data points.
plt.scatter(x,y,marker='o',c='b',s=5)
plt.xlim(min(x),max(x))
plt.ylim(min(y),max(y))
plt.title('griddata test (%d points)' % len(x))
plt.show()
See here for the origin of that code.