I'm aware that there are threads pertaining to this, but i'm confused to where I want to I fit my data to the fit.
My data is imported and plotted as such.
import matplotlib.pyplot as plt
%matplotlib inline
import pylab as plb
import numpy as np
import scipy as sp
import csv
FreqTime1 = []
DecayCount1 = []
with open('Half_Life.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
FreqTime1.append(row[0])
DecayCount1.append(row[3])
FreqTime1 = np.array(FreqTime1)
DecayCount1 = np.array(DecayCount1)
fig1 = plt.figure(figsize=(15,6))
ax1 = fig1.add_subplot(111)
ax1.plot(FreqTime1,DecayCount1, ".", label = 'Run 1')
ax1.set_xlabel('Time (sec)')
ax1.set_ylabel('Count')
plt.legend()
Problem is, i'm having difficulty setting up general exponential decay, in which I'm not sure how compute the parameter values from the data set.
If possible as well, I'm then wanting to have the equation of the fitted decay equation to be displayed with the graph. But that can be easily applied if a fit is able to be produced.
Edit -------------------------------------------------------------
So when using the fitting function that Stanely R mentioned
def model_func(x, a, k, b):
return a * np.exp(-k*x) + b
x = FreqTime1
y = DecayCount1
p0 = (1.,1.e-5,1.)
opt, pcov = curve_fit(model_func, x, y, p0)
a, k, b = opt
I'm returned with this error message
TypeError: ufunc 'multiply' did not contain a loop with signature matching types dtype('S32') dtype('S32') dtype('S32')
Any idea on how to resolve this?
You have to use curve_fit from scipy.optimize: http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.optimize.curve_fit.html
from scipy.optimize import curve_fit
import numpy as np
# define type of function to search
def model_func(x, a, k, b):
return a * np.exp(-k*x) + b
# sample data
x = np.array([399.75, 989.25, 1578.75, 2168.25, 2757.75, 3347.25, 3936.75, 4526.25, 5115.75, 5705.25])
y = np.array([109,62,39,13,10,4,2,0,1,2])
# curve fit
p0 = (1.,1.e-5,1.) # starting search koefs
opt, pcov = curve_fit(model_func, x, y, p0)
a, k, b = opt
# test result
x2 = np.linspace(250, 6000, 250)
y2 = model_func(x2, a, k, b)
fig, ax = plt.subplots()
ax.plot(x2, y2, color='r', label='Fit. func: $f(x) = %.3f e^{%.3f x} %+.3f$' % (a,k,b))
ax.plot(x, y, 'bo', label='data with noise')
ax.legend(loc='best')
plt.show()
"I'm returned with this error message
TypeError: ufunc 'multiply' did not contain a loop with signature matching types dtype('S32') dtype('S32') dtype('S32')
Any idea on how to resolve this?"
Your code that reads the CSV file to create FreqTime1 and DelayCount1 is creating arrays of strings. You can fix that by following the suggestion that #StanleyR made in a comment. A better idea is to replace this code:
FreqTime1 = []
DecayCount1 = []
with open('Half_Life.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
FreqTime1.append(row[0])
DecayCount1.append(row[3])
FreqTime1 = np.array(FreqTime1)
DecayCount1 = np.array(DecayCount1)
with:
FreqTime1, DecayCount1 = np.loadtxt('Half_Life.csv', delimiter=',', usecols=(0, 3), unpack=True)
Related
I'm currently trying to fit data to this function to exctract "e/lambda" :
To do so, I tried (for the first time) to fit the data using python and I rearranged a little the fit function :
import matplotlib.pyplot as plt
import scipy.optimize as optimize
import numpy as np
# data
Io = np.array([0.3,0.5,1.4,2.9,3.8])
Is = np.array([2.7,2.7,2.7,2.7,2.7])
R = Io/Is
T = np.array([0.,50,70,80,85])
F = R/R[0]
plt.plot(T, F, 'ro', label="original data")
# curvefit
## a = np.exp(e/lambda)
def func(T, a):
return a * (((np.exp ((np.cos(T)-1)/(np.cos(T)))) - \
(np.exp ((1-np.cos(T))/((np.cos(T))**2)))) \
/ ((np.exp ((np.cos(T)-1)/(np.cos(T)))) - \
(np.exp ((1-np.cos(T))/((np.cos(T)))))))
popt, pcov = optimize.curve_fit(func, T, F, maxfev=100000)
t = np.linspace(0,85)
plt.plot(t, func(t, *popt), label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
However, I'm getting this message : "Optimal parameters not found: Number of calls to function has reached maxfev = 100000"
This might be more a mathematical issue since I've tried succesfully this code with another function :
def func(T, a, b, c):
return a + np.exp(b*T-c)
Does anyone know if it is possible to fit this function using it's "true" form ?
Thanks !!
As part of my research project, I was working on performing linear regression with some data using matplotlib. Unfortunately, I am unable to get my line to touch the origin; matplotlib seems to cut it off at the minimum value of my dataset. How can I fix this and get my line to touch the origin? As reference, here is my code:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels import api as sm
def file_analysis(csv_file, state):
"""
This method takes in a file object and the name of a state.
:param csv_file: Pass in a csv file object.
:param state: Name of the state as a string.
:return: None.
"""
data = pd.read_csv(csv_file)
data = data[["Total Cases", "Total Deaths"]]
y = data["Total Deaths"]
x = data["Total Cases"]
results = sm.OLS(y, x).fit()
plt.scatter(x, y)
yhat = results.params[0] * x
print(results.params)
plt.ylim(ymin=0)
plt.xlim(xmin=0)
plt.margins(0)
fig = plt.plot(x, yhat, lw=4, c="orange", label="regressionline")
plt.xlabel("Total Cases", fontsize=20)
plt.ylabel('Total Deaths', fontsize=20)
plt.title(state)
plt.savefig(state + "_scatterplot" + ".png")
plt.show()
with open(state + "_analysis.txt", "w") as file:
file.write(results.summary().as_text())
And here is the resulting scatter-plot after passing in the name of the state and the csv file for the state:
You should just change the x-values that you want in your regression to include 0.
yhat = results.params[0] * range(0, x.max())
fig = plt.plot(range(0, x.max()), yhat, lw=4, c="orange", label="regressionline")
I think the reason your line does not touch the origin is that your are only plotting it at the extent of your data. By calculating the predicted deaths like this yhat = results.params[0] * x you are restricting the line to points in your dataset. You can easily fix this if you supply a wider range of x parameters:
newX = range(0, 80)
yhat = results.params[0] * newX
fig = plt.plot(newX, yhat, lw=4, c="orange", label="regressionline")
By the way, are you fitting the model without intercept on purpose?
I don't have the data, to try the solution I am proposing, but If I was you, I would add a 0 value to the yhat values and also a 0 to the x values in the same position, so you will see the line in the [0,0] position.
Let me know if this works :)
After a lot of searching and being unable to find an answer i choose to place my question here.
How do i fit an exponential function in the form of y=(1/A)e^(-x/A) to the shown data and plot this function? I still need some getting used to fitting in Python. Help will be more than appreciated!
Thank you in advance.
Looks like i figured it out.
def exponential_fit(x, a, c):
"""
Logarithmic fit used for the MuonLab life time measurements.
:param x:
:param a:
:param c:
:return:
"""
return (1/a)*np.exp(-x/a)+c
def logarithmic_fit_plot(x, y): # WIP
font = {'family': 'normal',
'weight': 'bold',
'size': 20}
matplotlib.rc('font', **font)
xdata = x
ydata = y
plt.rc('text', usetex=True)
plt.plot(xdata, ydata, '.', label='sample')
popt, pcov = sp.optimize.curve_fit(exponential_fit, xdata, ydata)
plt.plot(xdata, exponential_fit(xdata, *popt), 'r-',
label=r"$\frac{1}{\tau_0}e^{\frac{-x}{\tau_0}}, \tau_0=%5.3f, c=%5.3f$" % tuple(popt))
plt.legend()
plt.show()
Sadly it doesn't fit the data very well, but that's just a math problem i guess.
This code produces a decent fit.
first = True
lifetimes = []
counts = []
with open('Werkverkeer.txt') as w:
next(w)
for line in w:
_, life, count = line.rstrip().split()
life, count = float(life), int(count)
if count==0:
continue
lifetimes.append(life-0.005)
counts.append(count)
probs = [_/sum(counts) for _ in counts]
print (probs)
from scipy.optimize import leastsq
from scipy.stats import expon
from numpy import exp
def residual(params, X, data):
model = [expon.cdf(x+0.005, scale=params[0])-expon.cdf(x-0.005, scale=params[0]) for x in X]
return [d-m for (d,m) in zip(data, model)]
r = leastsq(residual, [140], args=(lifetimes, probs))
estimate = r[0][0]
print (estimate)
fitted = [expon.cdf(x+0.005, scale=estimate)-expon.cdf(x-0.005, scale=estimate) for x in lifetimes]
print(fitted)
from matplotlib import pyplot as plt
plt.plot(lifetimes, probs, 'r.')
plt.plot(lifetimes, fitted, 'b-')
plt.show()
Things to note:
Rather than fitting to counts I've fitted to normalised counts, which are estimates of probabilities, because the counts are really a way of getting at an estimate of the probability density function for the lifetimes.
Because I'm using counts I need to fit the areas under the density function, for a given value of the parameter, between the boundaries of the bins. Hence, the line model =.
As usual, the final line in residual returns the difference between the observed probabilities (based on counts) and the provisionally calculated probabilities.
leastsq returns a value of 0.0497646352872 for the parameter.
I am doing a Kernel Density Estimation in Python and getting the contours and paths as shown below. (here is my sample data: https://pastebin.com/193PUhQf).
from numpy import *
from math import *
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
x_2d = []
y_2d = []
data = {}
data['nodes'] = []
# here is the sample data:
# https://pastebin.com/193PUhQf
X = [.....]
for Picker in xrange(0, len(X)):
x_2d.append(X[Picker][0])
y_2d.append(X[Picker][1])
# convert to arrays
m1 = np.array([x_2d])
m2 = np.array([y_2d])
x_min = m1.min() - 30
x_max = m1.max() + 30
y_min = m2.min() - 30
y_max = m2.max() + 30
x, y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
positions = np.vstack([x.ravel(), y.ravel()])
values = np.vstack([m1, m2])
kde = stats.gaussian_kde(values)
z = np.reshape(kde(positions).T, x.shape)
fig = plt.figure(2, dpi=200)
ax = fig.add_subplot(111)
pc = ax.pcolor(x, y, z)
cb = plt.colorbar(pc)
cb.ax.set_ylabel('Probability density')
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
ax.plot(m1, m2, 'o', mfc='w', mec='k')
ax.set_title("My Title", fontsize='medium')
plt.savefig("kde.png", dpi=200)
plt.show()
There is a similar way to get the contours using R, which is described here:
http://bl.ocks.org/diegovalle/5166482
Question: how can I achieve the same output using my python script or as a start point?
the desired output should be like contours_tj.json which can be used by leaflet.js lib.
UPDATE:
My input data structure is composed of three columns, comma separated:
first one is the X value
second one is the Y value
third one is the ID of my data, it has no numerical value, it is simply an identifier of the data point.
Update 2:
Question, if simply put, is that I want the same output as in the above link using my input file which is in numpy array format.
update 3:
my input data structure is of list type:
print type(X)
<type 'list'>
and here are the first few lines:
print X[0:5]
[[10.800584, 11.446064, 4478597], [10.576840,11.020229, 4644503], [11.434276,10.790881, 5570870], [11.156718,11.034633, 6500333], [11.054956,11.100243, 6513301]]
geojsoncontour is a python library to convert matplotlib contours to geojson
geojsoncontour.contour_to_geojson requires a contour_levels argument. The levels in pyplot.contour are chosen automatically, but you can access them with c_s._levels
So, for your example you could do:
import geojsoncontour
# your code here
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
# Convert matplotlib contour to geojson
geojsoncontour.contour_to_geojson(
contour=c_s,
geojson_filepath='out.geojson',
contour_levels=c_s._levels,
ndigits=3,
unit='m'
)
I'm using a library which produces 3 plots given an object k.
I need to figure the data points (x,y,z) that produced these plot, but the problem is that the plots comes from a function from k.
The library I'm using is pyKriging and this is their github repository.
A simplified version of their example code is:
import pyKriging
from pyKriging.krige import kriging
from pyKriging.samplingplan import samplingplan
sp = samplingplan(2)
X = sp.optimallhc(20)
testfun = pyKriging.testfunctions().branin
y = testfun(X)
k = kriging(X, y, testfunction=testfun, name='simple')
k.train()
k.plot()
The full code, comments and output can be found here.
In summary, I'm trying to get the numpy array that produced these plots so I can create plots that follows my formatting styles.
I'm not knowledgeable about going into library codes in Python and I appreciate any help!
There is no single data array that produces the plot. Instead many arrays used for plotting are generated inside the kriging plot function.
Changing the filled contours to line contours is of course not a style option. One therefore needs to use the code from the original plotting function.
An option is to subclass kriging and implement a custom plot function (let's call it myplot). In this function, one can use contour instead of contourf. Naturally, it's also possible to change it completely to one's needs.
import pyKriging
from pyKriging.krige import kriging
from pyKriging.samplingplan import samplingplan
import numpy as np
import matplotlib.pyplot as plt
class MyKriging(kriging):
def __init__(self,*args,**kwargs):
kriging.__init__(self,*args,**kwargs)
def myplot(self,labels=False, show=True, **kwargs):
fig = plt.figure(figsize=(8,6))
# Create a set of data to plot
plotgrid = 61
x = np.linspace(self.normRange[0][0], self.normRange[0][1], num=plotgrid)
y = np.linspace(self.normRange[1][0], self.normRange[1][1], num=plotgrid)
X, Y = np.meshgrid(x, y)
# Predict based on the optimized results
zs = np.array([self.predict([xi,yi]) for xi,yi in zip(np.ravel(X), np.ravel(Y))])
Z = zs.reshape(X.shape)
#Calculate errors
zse = np.array([self.predict_var([xi,yi]) for xi,yi in zip(np.ravel(X), np.ravel(Y))])
Ze = zse.reshape(X.shape)
spx = (self.X[:,0] * (self.normRange[0][1] - self.normRange[0][0])) + self.normRange[0][0]
spy = (self.X[:,1] * (self.normRange[1][1] - self.normRange[1][0])) + self.normRange[1][0]
contour_levels = kwargs.get("levels", 25)
ax = fig.add_subplot(222)
CS = plt.contour(X,Y,Ze, contour_levels)
plt.colorbar()
plt.plot(spx, spy,'or')
ax = fig.add_subplot(221)
if self.testfunction:
# Setup the truth function
zt = self.testfunction( np.array(zip(np.ravel(X), np.ravel(Y))) )
ZT = zt.reshape(X.shape)
CS = plt.contour(X,Y,ZT,contour_levels ,colors='k',zorder=2, alpha=0)
if self.testfunction:
contour_levels = CS.levels
delta = np.abs(contour_levels[0]-contour_levels[1])
contour_levels = np.insert(contour_levels, 0, contour_levels[0]-delta)
contour_levels = np.append(contour_levels, contour_levels[-1]+delta)
CS = plt.contour(X,Y,Z,contour_levels,zorder=1)
plt.plot(spx, spy,'or', zorder=3)
plt.colorbar()
ax = fig.add_subplot(212, projection='3d')
ax.plot_surface(X, Y, Z, rstride=3, cstride=3, alpha=0.4)
if self.testfunction:
ax.plot_wireframe(X, Y, ZT, rstride=3, cstride=3)
if show:
plt.show()
sp = samplingplan(2)
X = sp.optimallhc(20)
testfun = pyKriging.testfunctions().branin
y = testfun(X)
k = MyKriging(X, y, testfunction=testfun, name='simple')
k.train()
k.myplot()