I have a simple problem to fit a straight line on log-log scale. My code is,
data=loadtxt(filename)
xdata=data[:,0]
ydata=data[:,1]
polycoeffs = scipy.polyfit(xdata, ydata, 1)
yfit = scipy.polyval(polycoeffs, xdata)
pylab.plot(xdata, ydata, 'k.')
pylab.plot(xdata, yfit, 'r-')
Now I need to plot fit line on log scale so I just change x and y axis,
ax.set_yscale('log')
ax.set_xscale('log')
then its not plotting correct fit line. So how can I change fit function (in log scale) so that it can plot fit line on log-log scale?
EDIT:
from scipy import polyfit
data = loadtxt("data.txt")
xdata,ydata = data[:,0],data[:,1]
xdata,ydata = zip(*sorted(zip(xdata,ydata))) # sorts the two lists after the xdata
xd,yd = log10(xdata),log10(ydata)
polycoef = polyfit(xd, yd, 1)
yfit = 10**( polycoef[0]*xd+polycoef[1] )
plt.subplot(211)
plt.plot(xdata,ydata,'.k',xdata,yfit,'-r')
plt.subplot(212)
plt.loglog(xdata,ydata,'.k',xdata,yfit,'-r')
plt.show()
you want
log(y) = k log(x) + q, so
y = exp(k log(x) + q) = exp(k log(x)) * exp(q) = exp(log(x^k)) * exp(q) = A x^k
as you can see one requirement is y(0) = 0.
From the code point of view, you are plotting the fit function using only the x of the data, probably it is better to add points:
xfit = scipy.linspace(min(xdata), max(xdata), 50)
yfit = scipy.polyval(polycoeffs, xfit)
ax.plot(xfit, yfit, 'r-')
Related
I have a loglog plot which I need to find the gradient of the line of best fit. I've tried using python's curvefit function to find the gradient but the line of best fit clearly isn't fitted correctly. Is there anything I need to change in my code below?
fig = plt.figure(figsize=[7,7])
ax = plt.axes()
ax.set_xscale("log")
ax.set_yscale("log")
ax.errorbar(m, unumpy.nominal_values(dn_dm) , yerr= unumpy.std_devs(dn_dm), fmt='k.', label='Data')
newX = np.logspace(-.25, .5, base=10)
def myExpFunc(x, a, b):
return a * np.power(x, b)
popt, pcov = curve_fit(myExpFunc, m, unumpy.nominal_values(dn_dm))
ax.plot(newX, myExpFunc(newX, *popt), 'r-')
I have a dataset for curvature and I need to find the tangent to the curve. My code is as follows but unfortunately, I am not getting the required resut:
chData = efficient.get('Car.Road.y')
fittedParameters = (np.gradient(chData_m_5[:],1)) # 999 values
plt.plot(chData[1:]) # orginally 1000 values
plt.plot(fittedParameters)
plt.show()
The output is:
Edit 1:
I made the following changes to the code to get the tangent to curvature but unfortunately, this is a bit far from the curve. Kindly guide me the issue solution related to the problem. Thank you!
fig, ax1 = plt.subplots()
chData_m = efficient.get('Car.Road.y')
x_fit = chData_m.timestamps
y_fit = chData_m.samples
fittedParameters = np.polyfit(x_fit[:],y_fit[:],1)
f = plt.figure(figsize=(800/100.0, 600/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(x_fit, y_fit, 'D')
# create data for the fitted equation plot
xModel = np.linspace(min(x_fit), max(x_fit))
yModel = np.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
# polynomial derivative from numpy
deriv = np.polyder(fittedParameters)
# for plotting
minX = min(x_fit)
maxX = max(x_fit)
# value of derivative (slope) at a specific X value, so
# that a straight line tangent can be plotted at the point
# you might place this code in a loop to animate
pointVal = 10.0 # example X value
y_value_at_point = np.polyval(fittedParameters, pointVal)
slope_at_point = np.polyval(deriv, pointVal)
ylow = (minX - pointVal) * slope_at_point + y_value_at_point
yhigh = (maxX - pointVal) * slope_at_point + y_value_at_point
# now the tangent as a line plot
axes.plot([minX, maxX], [ylow, yhigh])
plt.show()
plt.close('all') # clean up after using pyplot
And the output is:
Most likely just a scaling problem that we can address by creating a twin axis for the gradient that is scaled independently of the original data. To be on the safe side, we also provide the x-values to np.gradient in case they are not evenly spaced.
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots()
def func(x, a=0, b=100, c=1, n=3.5):
return a + (b/(1+(c/x)**n))
x_fit = np.linspace(0.1, 70, 100)
y_fit = func(x_fit, 1, 2, 15, 2.4)
tang = np.gradient(y_fit, x_fit)
ax1.plot(x_fit, y_fit, c="blue", label="data")
ax1.legend()
ax1.set_ylabel("data")
ax2 = ax1.twinx()
ax2.plot(x_fit, tang, c="red", label="gradient")
ax2.legend()
ax2.set_ylabel("gradient")
plt.show()
Sample output:
The figure if we plotted it in the same graph:
I'm currently working on a lab report for Brownian Motion using this PDF equation with the intent of evaluating D:
Brownian PDF equation
And I am trying to curve_fit it to a histogram. However, whenever I plot my curve_fits, it's a line and does not appear correctly on the histogram.
Example Histogram with bad curve_fit
And here is my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
# Variables
eta = 1e-3
ra = 0.95e-6
T = 296.5
t = 0.5
# Random data
r = np.array(np.random.rayleigh(0.5e-6, 500))
# Histogram
plt.hist(r, bins=10, density=True, label='Counts')
# Curve fit
x,y = np.histogram(r, bins=10, density=True)
x = x[2:]
y = y[2:]
bin_width = y[1] - y[2]
print(bin_width)
bin_centers = (y[1:] + y[:-1])/2
err = x*0 + 0.03
def f(r, a):
return (((1e-6)3*np.pi*r*eta*ra)/(a*T*t))*np.exp(((-3*(1e-6 * r)**2)*eta*ra*np.pi)/(a*T*t))
print(x) # these are flipped for some reason
print(y)
plt.plot(bin_centers, x, label='Fitting this', color='red')
popt, pcov = optimize.curve_fit(f, bin_centers, x, p0 = (1.38e-23), sigma=err, maxfev=1000)
plt.plot(y, f(y, popt), label='PDF', color='orange')
print(popt)
plt.title('Distance vs Counts')
plt.ylabel('Counts')
plt.xlabel('Distance in micrometers')
plt.legend()
Is the issue with my curve_fit? Or is there an underlying issue I'm missing?
EDIT: I broke down D to get the Boltzmann constant as a in the function, which is why there are more numbers in f than the equation above. D and Gamma.
I've tried messing with the initial conditions and plotting the function with 1.38e-23 instead of popt, but that does this (the purple line). This tells me something is wrong with the equation for f, but no issues jump out to me when I look at it. Am I missing something?
EDIT 2: I changed the function to this to simplify it and match the numpy.random.rayleigh() distribution:
def f(r, a):
return ((r)/(a))*np.exp((-1*(r)**2)/(2*a))
But this doesn't resolve the issue that the curve_fit is a line with a positive slope instead of anything remotely what I'm interested in. Now I am more confused as to what the issue is.
There are a few things here. I don't think x and y were ever flipped, or at least when I assumed they weren't, everything seemed to work fine. I also cleaned up a few parts of the code, for example, I'm not sure why you call two different histograms; and I think there may have been problems handling the single element tuple of parameters. Also, for curve fitting, the initial parameter guess often needs to be in the ballpark, so I changed that too.
Here's a version that works for me:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
# Random data
r = np.array(np.random.rayleigh(0.5e-6, 500))
# Histogram
hist_values, bin_edges, patches = plt.hist(r, bins=10, density=True, label='Counts')
bin_centers = (bin_edges[1:] + bin_edges[:-1])/2
x = bin_centers[2:] # not necessary, and I'm not sure why the OP did this, but I'm doing this here because OP does
y = hist_values[2:]
def f(r, a):
return (r/(a*a))*np.exp((-1*(r**2))/(2*a*a))
plt.plot(x, y, label='Fitting this', color='red')
err = x*0 + 0.03
popt, pcov = optimize.curve_fit(f, x, y, p0 = (1.38e-6,), sigma=err, maxfev=1000)
plt.plot(x, f(x, *popt), label='PDF', color='orange')
plt.title('Distance vs Counts')
plt.ylabel('Counts')
plt.xlabel('Distance in Meters') # Motion seems to be in micron range, but calculation and plot has been done in meters
plt.legend()
Say I got a histogramm which resembles a normal distribution but is slightly asymmetric.
Say I want to fit only the peak of my distribution witt a gaussian, i.e. only the data in a small range around the peak should be taken into account. How do I do that? Here my code so far:
def gaussian(x, mean, amplitude, standard_deviation):
return amplitude * np.exp( - ((x - mean) / standard_deviation) ** 2)
#Histogram
fig, ax = plt.subplots()
y, x, _ = ax.hist(data, bins = 'auto')
#Fit
bin_centers = x[:-1] + np.diff(x) / 2
params, cov = sp.optimize.curve_fit(gaussian, bin_centers, y, p0=[x.max(), 100, 5000])
x_values = np.linspace(0, 70000, 1000)
plt.plot(x_values, gaussian(x_values, *params), label='fit')
Print the hist and bin_centers. Then only fit the part you need. I had a similar code where I fitted using a normal fit.
hist, bin_edges = np.histogram(data, bins='auto') #get hist and bin_edges
bin_centers = .5*(bin_edges[:-1] + bin_edges[1:]) #get bin_centers
A = bin_centers[m:n] #values of bin_centers of only the part you need, in my case it was index m to index n.
B = hist[m:n] #values of histogram of only the part you need
slope, c = np.polyfit(A, B, 1) #fit curve (y) = m*(x) + c
deg_fit = (slope*A + c) #calculate the fitted values of y
plt.plot(bin_centers[m:n],deg_fit)
I'm trying to fit a second order polynomial to raw data and output the results using Matplotlib. There are about a million points in the data set that I'm trying to fit. It is supposed to be simple, with many examples available around the web. However for some reason I cannot get it right.
I get the following warning message:
RankWarning: Polyfit may be poorly conditioned
This is my output:
This is output using Excel:
See below for my code. What am I missing??
xData = df['X']
yData = df['Y']
xTitle = 'X'
yTitle = 'Y'
title = ''
minX = 100
maxX = 300
minY = 500
maxY = 2200
title_font = {'fontname':'Arial', 'size':'30', 'color':'black', 'weight':'normal',
'verticalalignment':'bottom'} # Bottom vertical alignment for more space
axis_font = {'fontname':'Arial', 'size':'18'}
#Poly fit
# calculate polynomial
z = np.polyfit(xData, yData, 2)
f = np.poly1d(z)
print(f)
# calculate new x's and y's
x_new = xData
y_new = f(x_new)
#Plot
plt.scatter(xData, yData,c='#002776',edgecolors='none')
plt.plot(x_new,y_new,c='#C60C30')
plt.ylim([minY,maxY])
plt.xlim([minX,maxX])
plt.xlabel(xTitle,**axis_font)
plt.ylabel(yTitle,**axis_font)
plt.title(title,**title_font)
plt.show()
The array to plot must be sorted. Here is a comparisson between plotting a sorted and an unsorted array. The plot in the unsorted case looks completely distorted, however, the fitted function is of course the same.
2
-3.496 x + 2.18 x + 17.26
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(0)
x = (np.random.normal(size=300)+1)
fo = lambda x: -3*x**2+ 1.*x +20.
f = lambda x: fo(x) + (np.random.normal(size=len(x))-0.5)*4
y = f(x)
fig, (ax, ax2) = plt.subplots(1,2, figsize=(6,3))
ax.scatter(x,y)
ax2.scatter(x,y)
def fit(ax, x,y, sort=True):
z = np.polyfit(x, y, 2)
fit = np.poly1d(z)
print(fit)
ax.set_title("unsorted")
if sort:
x = np.sort(x)
ax.set_title("sorted")
ax.plot(x, fo(x), label="original func", color="k", alpha=0.6)
ax.plot(x, fit(x), label="fit func", color="C3", alpha=1, lw=2.5 )
ax.legend()
fit(ax, x,y, sort=False)
fit(ax2, x,y, sort=True)
plt.show()
The problem is probably using a power basis for data that is displaced some distance from zero along the x axis. If you use the Polynomial class from numpy.polynomial it will scale and shift the data before the fit, which will help, and also keep track of the scale and shift used. Note that if you want the coefficients in the normal form you will need to convert to that form.