With some help I have produced the following code. Below are some of the desired outputs for given inputs. However I am having some trouble completing the last task of this code. Looking for some help with this, any guidance or help is greatly appreciated, thanks!
flops = 0
def add(x1, x2):
global flops
flops += 1
return x1 + x2
def multiply(x1, x2):
global flops
flops += 1
return x1 * x2
def poly_horner(A, x):
global flops
flops = 0
p = A[-1]
i = len(A) - 2
while i >= 0:
p = add(multiply(p, x), A[i])
i -= 1
return p
def poly_naive(A, x):
global flops
p = 0
flops = 0
for i, a in enumerate(A):
xp = 1
for _ in range(i):
xp = multiply(xp, x)
p = add(p, multiply(xp, a))
return p
Given the following inputs, I got the following outputs:
poly_horner([1,2,3,4,5], 2)
129
print(flops)
8
poly_naive([1,2,3,4,5, 2])
129
print(flops)[![enter image description here][1]][1]
20
np.polyval([5,4,3,2,1], 2)
129
I assume you want to create a figure, though your question is quite vague...but I have a few minutes to kill while my code runs. Anyway, it seems you MIGHT be having difficulty plotting.
import numpy as np
import pylab as pl
x = np.arange(10)
y = x * np.pi
# you can calculate a line of best fit (lobf) using numpy's polyfit function
lobf1 = np.polyfit(x, y, 1) # first degree polynomial
lobf2 = np.polyfit(x, y, 2) # second degree polynomial
lobf3 = np.polyfit(x, y, 3) # third degree polynomial
# you can now use the lines of best fit to calculate the
# value anywhere within the domain using numpy's polyval function
# FIRST, create a figure and a plotting axis within the fig
fig = pl.figure(figsize=(3.25, 2.5))
ax0 = fig.add_subplot(111)
# now use polyval to calculate your y-values at every x
x = np.arange(0, 20, 0.1)
ax0.plot(x, np.polyval(lobf1, x), 'k')
ax0.plot(x, np.polyval(lobf2, x), 'b')
ax0.plot(x, np.polyval(lobf3, x), 'r')
# add a legend for niceness
ax0.legend(('Degree 1', 'Degree 2', 'Degree 3'), fontsize=8, loc=2)
# you can label the axes whatever you like
ax0.set_ylabel('My y-label', fontsize=8)
ax0.set_xlabel('My x-label', fontsize=8)
# you can show the figure on your screen
fig.show()
# and you can save the figure to your computer in different formats
# specifying bbox_inches='tight' helps eliminate unnecessary whitespace around
# the axis when saving...it just looks better this way.
pl.savefig('figName.png', dpi=500, bbox_inches='tight')
pl.savefig('figName.pdf', bbox_inches='tight')
# don't forget to close the figure
pl.close('all')
Related
I have multiple measurement datasets that I want to combine to a single dataset. While I have a working solution, it is terribly inefficient and I would be happy for some tips on how I can improve it.
Think of the measurements as multiple height maps of one object that I want to combine to a single height map. My measurements are not perfect and may have some tilt and height offset. Let's assume (for now) that we know the x-y position perfectly accurate. Here is an example:
import numpy as np
import matplotlib.pyplot as plt
def height_profile(x, y):
radius = 100
return np.sqrt(radius**2-x**2-y**2)-radius
np.random.seed(123)
datasets = {}
# DATASET 1
x = np.arange(-8, 2.01, 0.1)
y = np.arange(-3, 7.01, 0.1)
xx, yy = np.meshgrid(x, y)
# height is the actual profile + noise
zz = height_profile(xx, yy) + np.random.randn(*xx.shape)*0.001
datasets[1] = [xx, yy, zz]
plt.figure()
plt.pcolormesh(*datasets[1])
plt.colorbar()
# DATASET 2
x = np.arange(-2, 8.01, 0.1)
y = np.arange(-3, 7.01, 0.1)
xx, yy = np.meshgrid(x, y)
# height is the actual profile + noise + random offset + random tilt
zz = height_profile(xx, yy) + np.random.randn(*xx.shape)*0.001 + np.random.rand() + np.random.rand()*xx*0.1 + np.random.rand()*yy*0.1
datasets[2] = [xx, yy, zz]
plt.figure()
plt.pcolormesh(*datasets[2])
plt.colorbar()
# DATASET 3
x = np.arange(-5, 5.01, 0.1)
y = np.arange(-7, 3.01, 0.1)
xx, yy = np.meshgrid(x, y)
# height is the actual profile + noise + random offset + random tilt
zz = height_profile(xx, yy) + np.random.randn(*xx.shape)*0.001 + np.random.rand() + np.random.rand()*xx*0.1 + np.random.rand()*yy*0.1
datasets[3] = [xx, yy, zz]
plt.figure()
plt.pcolormesh(*datasets[3])
plt.colorbar()
To combine the three (or more) datasets, I have the following strategy: Find the overlap between the datasets, calculate the summed-up height difference between datasets in the overlap regions (residual_overlap) and try to minimize the height differences (residual) using lmfit. To apply the transformations on the dataset (tilt, offset, etc.) I have a dedicated function.
from lmfit import minimize, Parameters
from copy import deepcopy
from itertools import combinations
from scipy.interpolate import griddata
def data_transformation(dataset, idx, params):
dataset = deepcopy(dataset)
if 'x_offset_{}'.format(idx) in params:
x_offset = params['x_offset_{}'.format(idx)].value
else:
x_offset = 0
if 'y_offset_{}'.format(idx) in params:
y_offset = params['y_offset_{}'.format(idx)].value
else:
y_offset = 0
if 'tilt_x_{}'.format(idx) in params:
x_tilt = params['tilt_x_{}'.format(idx)].value
else:
x_tilt = 0
if 'tilt_y_{}'.format(idx) in params:
y_tilt = params['tilt_y_{}'.format(idx)].value
else:
y_tilt = 0
if 'piston_{}'.format(idx) in params:
piston = params['piston_{}'.format(idx)].value
else:
piston = 0
_x = dataset[0] - np.mean(dataset[0])
_y = dataset[1] - np.mean(dataset[1])
dataset[0] = dataset[0] + x_offset
dataset[1] = dataset[1] + y_offset
dataset[2] = dataset[2] + 2 * (x_tilt * _x + y_tilt * _y) + piston
return dataset
def residual_overlap(dataset_0, dataset_1):
xy_0 = np.stack((dataset_0[0].flatten(), dataset_0[1].flatten()), axis=1)
xy_1 = np.stack((dataset_1[0].flatten(), dataset_1[1].flatten()), axis=1)
difference = griddata(xy_0, dataset_0[2].flatten(), xy_1) - \
dataset_1[2].flatten()
return difference
def residual(params, datasets):
datasets = deepcopy(datasets)
for idx in datasets:
datasets[idx] = data_transformation(
datasets[idx], idx, params)
residuals = []
for combination in combinations(list(datasets), 2):
residuals.append(residual_overlap(
datasets[combination[0]], datasets[combination[1]]))
residuals = np.concatenate(residuals)
residuals[np.isnan(residuals)] = 0
return residuals
def minimize_datasets(params, datasets, **minimizer_kw):
minimize_fnc = lambda *args, **kwargs: residual(*args, **kwargs)
datasets = deepcopy(datasets)
min_result = minimize(minimize_fnc, params,
args=(datasets, ), **minimizer_kw)
return min_result
I run the "stitching" like this:
params = Parameters()
params.add('tilt_x_2', 0)
params.add('tilt_y_2', 0)
params.add('piston_2', 0)
params.add('tilt_x_3', 0)
params.add('tilt_y_3', 0)
params.add('piston_3', 0)
fit_result = minimize_datasets(params, datasets)
plt.figure()
plt.pcolormesh(*data_transformation(datasets[1], 1, fit_result.params), alpha=0.3, vmin=-0.5, vmax=0)
plt.pcolormesh(*data_transformation(datasets[2], 2, fit_result.params), alpha=0.3, vmin=-0.5, vmax=0)
plt.pcolormesh(*data_transformation(datasets[3], 3, fit_result.params), alpha=0.3, vmin=-0.5, vmax=0)
plt.colorbar()
As you can see, it does work, but the stitching takes about a minute for these small datasets on my computer. In reality I have more and bigger datasets.
Do you see a way to improve the stitching performance?
Edit: As suggested, I ran a profiler and it shows that 99.5% of the time is spent in the griddata function. That one is used to interpolate datapoints from dataset_0 to the locations of dataset_1. If I switch method to "nearest", the execution time drops to about a second, but then there is no interpolation happening. Any chance to improve the speed of the interpolation?
Skimming through the code, I can't really see anywhere to improve other than you are running deepcopy() over and over again.
However, I would recommend you to do profiling. If you are using pycharm, you can do profiling using the clock/run sign.
I am sure other IDEs also have such capabilities. This way you can figure out which function is taking the most time.
Whole graph:
When I zoom in to a few functions (I am showing google cloud functions):
You can see how many times they are called and how long they took etc.
Long story short, you need a profiler!
So I am trying to plot the nullclines of a system of ODEs, however I can't seem to plot them in the correct way. When I plot them, I manage to plot them according to time (t vs x and t vs y) but not at (x vs y). I'm not really sure how to explain it, and I think it would be better to just show it. I am trying to replicate this. The equations and parameters are given, however this was done in a program called XPP (I'll post these at the bottom), and there are some parameters that i don't understand what they mean.
My entire code is:
import numpy as np
from scipy import integrate
import matplotlib.pyplot as plt
# define system in terms of a Numpy array
def Sys(X, t=0):
# here X[0] = x and x[1] = y
#protien [] is represented with y, and mRNA [] is represented by x
return np.array([ (k1*S*Kd**p)/(Kd**p + X[1]**p) - kdx*X[0], ksy*X[0] - (k2*ET*X[1])/(Km + X[1])])
#variables
k1=.1
S=1
Kd=1
kdx=.1
p=2
ksy=1
k2=1
ET=1
Km=1
# generate 1000 linearly spaced numbers for x-axes
t = np.linspace(0, 50,100)
# initial values
Sys0 = np.array([1, 0])
#Solves the ODE
X, infodict = integrate.odeint(Sys, Sys0, t, full_output = 1, mxstep = 50000)
#assigns appropriate equations to x and y
x,y = X.T
#plot's the graph
fig = plt.figure(figsize=(15,5))
fig.subplots_adjust(wspace = 0.5, hspace = 0.3)
ax1 = fig.add_subplot(1,2,1)
ax1.plot(x, color="blue")
ax1.plot(y, color = 'red')
ax1.set_xlabel("Protien concentration")
ax1.set_ylabel("mRNA concentration")
ax1.set_title("Phase space")
ax1.grid()
The given equations and parameters are:
model for a simple negative feedback loop
protein (y) inhibits the synthesis of its mRNA (x)
dx/dt = k1SKd^p/(Kd^p + y^p) - kdx*x
dy/dt = ksyx - k2ET*y/(Km + y)
p k1=0.1, S=1, Kd=1, kdx=0.1, p=2
p ksy=1, k2=1, ET=1, Km=1
# XP=y, YP=x, TOTAL=100, METH=stiff, XLO=0, XHI=4, YLO=0, YHI=1.05 (I don't exactly understand what is going on here)
Again, this uses a program called XPP or WINPP.
Any help with this would be appreciated, the original paper I am trying to replicate this from is : Design principles of biochemical oscillators by Bela Novak and John J. Tyson
Essentially all I'm trying to do is produce as set of points via an IFS and use a color map to show the multiplicity of each point. In other words, if we assume a color map where high values are more yellow and lower ones are more red, then values repeatedly produced by the IFS will be more yellow.
I'm struggling to get correct results for this. Each thing I've tried has resulted in an image that looks interesting, but is clearly incorrect as it differs wildly from what you get from simply plotting the points without color mapping.
Below is the base code that I'm comfortable with, without the failed attempts at color mapping. What can I do to get a proper color map?
The basic strategy, I think, is to make a matrix 'mat' holding the point multiplicities and do something like plt.imshow(xs, ys, c=mat. cmap="..."). I've tried different approaches to this but keep coming up with incorrect results.
import numpy as np
import matplotlib.pyplot as plt
import random
def f(x, y, n):
N = np.array([[x, y]])
M = np.array([[1, 0], [0, 1]])
b = np.array([[.5], [0]])
b2 = np.array([[0], [.5]])
if n == 0:
return np.dot(M, N.T)
elif n == 1:
return np.dot(M, N.T) + b
elif n == 2:
return np.dot(M, N.T) + b2
elif n == 3:
return np.dot(M, N.T) - b
elif n == 4:
return np.dot(M, N.T) - b2
xs = [] # x coordinates
ys = [] # y coordinates
D = {} # point multiplicities
random.seed()
x = 1
y = 1
for i in range(0, 100000):
n = random.randint(1, 4)
V = f(x, y, n)
x = V.item(0)
y = V.item(1)
xs.append(x)
ys.append(y)
xi = round(x, 3)
yi = round(y, 3)
if (xi, yi) in D:
D[(xi, yi)] += 1
else:
D[(xi, yi)] = 1
plt.xlabel('x')
plt.ylabel('y')
plt.scatter(xs,ys, s=.05)
plt.autoscale(True, True, True)
plt.show()
If I understand your problem, it sounds like you want to use a 2D histogram to get the density of points,
H, x, y = np.histogram2d(xs,ys,bins=100)
X, Y = np.meshgrid(x[:-1],y[:-1],indexing='ij')
plt.pcolormesh(X,Y,H,alpha=0.8, cmap = plt.cm.YlOrRd_r)
plt.colorbar()
Which gives,
This is a transparent colormesh plotted over the scatter plot.
You could also colour your scatter plot by the value at point,
pc = some_fn_to_get_color_at_points(X, Y, H, xs, yx)
plt.scatter(xs,ys, s=.05, c=pc)
Python's curve_fit calculates the best-fit parameters for a function with a single independent variable, but is there a way, using curve_fit or something else, to fit for a function with multiple independent variables? For example:
def func(x, y, a, b, c):
return log(a) + b*log(x) + c*log(y)
where x and y are the independent variable and we would like to fit for a, b, and c.
You can pass curve_fit a multi-dimensional array for the independent variables, but then your func must accept the same thing. For example, calling this array X and unpacking it to x, y for clarity:
import numpy as np
from scipy.optimize import curve_fit
def func(X, a, b, c):
x,y = X
return np.log(a) + b*np.log(x) + c*np.log(y)
# some artificially noisy data to fit
x = np.linspace(0.1,1.1,101)
y = np.linspace(1.,2., 101)
a, b, c = 10., 4., 6.
z = func((x,y), a, b, c) * 1 + np.random.random(101) / 100
# initial guesses for a,b,c:
p0 = 8., 2., 7.
print(curve_fit(func, (x,y), z, p0))
Gives the fit:
(array([ 9.99933937, 3.99710083, 6.00875164]), array([[ 1.75295644e-03, 9.34724308e-05, -2.90150983e-04],
[ 9.34724308e-05, 5.09079478e-06, -1.53939905e-05],
[ -2.90150983e-04, -1.53939905e-05, 4.84935731e-05]]))
optimizing a function with multiple input dimensions and a variable number of parameters
This example shows how to fit a polynomial with a two dimensional input (R^2 -> R) by an increasing number of coefficients. The design is very flexible so that the callable f from curve_fit is defined once for any number of non-keyword arguments.
minimal reproducible example
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def poly2d(xy, *coefficients):
x = xy[:, 0]
y = xy[:, 1]
proj = x + y
res = 0
for order, coef in enumerate(coefficients):
res += coef * proj ** order
return res
nx = 31
ny = 21
range_x = [-1.5, 1.5]
range_y = [-1, 1]
target_coefficients = (3, 0, -19, 7)
xs = np.linspace(*range_x, nx)
ys = np.linspace(*range_y, ny)
im_x, im_y = np.meshgrid(xs, ys)
xdata = np.c_[im_x.flatten(), im_y.flatten()]
im_target = poly2d(xdata, *target_coefficients).reshape(ny, nx)
fig, axs = plt.subplots(2, 3, figsize=(29.7, 21))
axs = axs.flatten()
ax = axs[0]
ax.set_title('Unknown polynomial P(x+y)\n[secret coefficients: ' + str(target_coefficients) + ']')
sm = ax.imshow(
im_target,
cmap = plt.get_cmap('coolwarm'),
origin='lower'
)
fig.colorbar(sm, ax=ax)
for order in range(5):
ydata=im_target.flatten()
popt, pcov = curve_fit(poly2d, xdata=xdata, ydata=ydata, p0=[0]*(order+1) )
im_fit = poly2d(xdata, *popt).reshape(ny, nx)
ax = axs[1+order]
title = 'Fit O({:d}):'.format(order)
for o, p in enumerate(popt):
if o%2 == 0:
title += '\n'
if o == 0:
title += ' {:=-{w}.1f} (x+y)^{:d}'.format(p, o, w=int(np.log10(max(abs(p), 1))) + 5)
else:
title += ' {:=+{w}.1f} (x+y)^{:d}'.format(p, o, w=int(np.log10(max(abs(p), 1))) + 5)
title += '\nrms: {:.1f}'.format( np.mean((im_fit-im_target)**2)**.5 )
ax.set_title(title)
sm = ax.imshow(
im_fit,
cmap = plt.get_cmap('coolwarm'),
origin='lower'
)
fig.colorbar(sm, ax=ax)
for ax in axs.flatten():
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
P.S. The concept of this answer is identical to my other answer here, but the code example is way more clear. At the time given, I will delete the other answer.
Fitting to an unknown numer of parameters
In this example, we try to reproduce some measured data measData.
In this example measData is generated by the function measuredData(x, a=.2, b=-2, c=-.8, d=.1). I practice, we might have measured measData in a way - so we have no idea, how it is described mathematically. Hence the fit.
We fit by a polynomial, which is described by the function polynomFit(inp, *args). As we want to try out different orders of polynomials, it is important to be flexible in the number of input parameters.
The independent variables (x and y in your case) are encoded in the 'columns'/second dimension of inp.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def measuredData(inp, a=.2, b=-2, c=-.8, d=.1):
x=inp[:,0]
y=inp[:,1]
return a+b*x+c*x**2+d*x**3 +y
def polynomFit(inp, *args):
x=inp[:,0]
y=inp[:,1]
res=0
for order in range(len(args)):
print(14,order,args[order],x)
res+=args[order] * x**order
return res +y
inpData=np.linspace(0,10,20).reshape(-1,2)
inpDataStr=['({:.1f},{:.1f})'.format(a,b) for a,b in inpData]
measData=measuredData(inpData)
fig, ax = plt.subplots()
ax.plot(np.arange(inpData.shape[0]), measData, label='measuered', marker='o', linestyle='none' )
for order in range(5):
print(27,inpData)
print(28,measData)
popt, pcov = curve_fit(polynomFit, xdata=inpData, ydata=measData, p0=[0]*(order+1) )
fitData=polynomFit(inpData,*popt)
ax.plot(np.arange(inpData.shape[0]), fitData, label='polyn. fit, order '+str(order), linestyle='--' )
ax.legend( loc='upper left', bbox_to_anchor=(1.05, 1))
print(order, popt)
ax.set_xticklabels(inpDataStr, rotation=90)
Result:
Yes. We can pass multiple variables for curve_fit. I have written a piece of code:
import numpy as np
x = np.random.randn(2,100)
w = np.array([1.5,0.5]).reshape(1,2)
esp = np.random.randn(1,100)
y = np.dot(w,x)+esp
y = y.reshape(100,)
In the above code I have generated x a 2D data set in shape of (2,100) i.e, there are two variables with 100 data points. I have fit the dependent variable y with independent variables x with some noise.
def model_func(x,w1,w2,b):
w = np.array([w1,w2]).reshape(1,2)
b = np.array([b]).reshape(1,1)
y_p = np.dot(w,x)+b
return y_p.reshape(100,)
We have defined a model function that establishes relation between y & x.
Note: The shape of output of the model function or predicted y should be (length of x,)
popt, pcov = curve_fit(model_func,x,y)
The popt is an 1D numpy array containing predicted parameters. In our case there are 3 parameters.
Yes, there is: simply give curve_fit a multi-dimensional array for xData.
I am trying to deblend the emission lines of low resolution spectrum in order to get the gaussian components. This plot represents the kind of data I am using:
After searching a bit, the only option I found was the application of the gauest function from the kmpfit package (http://www.astro.rug.nl/software/kapteyn/kmpfittutorial.html#gauest). I have copied their example but I cannot make it work.
I wonder if anyone could please offer me any alternative to do this or how to correct my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
def CurveData():
x = np.array([3963.67285156, 3964.49560547, 3965.31835938, 3966.14111328, 3966.96362305,
3967.78637695, 3968.60913086, 3969.43188477, 3970.25463867, 3971.07714844,
3971.89990234, 3972.72265625, 3973.54541016, 3974.36791992, 3975.19067383])
y = np.array([1.75001533e-16, 2.15520995e-16, 2.85030769e-16, 4.10072843e-16, 7.17558032e-16,
1.27759917e-15, 1.57074192e-15, 1.40802933e-15, 1.45038722e-15, 1.55195653e-15,
1.09280316e-15, 4.96611341e-16, 2.68777266e-16, 1.87075114e-16, 1.64335999e-16])
return x, y
def FindMaxima(xval, yval):
xval = np.asarray(xval)
yval = np.asarray(yval)
sort_idx = np.argsort(xval)
yval = yval[sort_idx]
gradient = np.diff(yval)
maxima = np.diff((gradient > 0).view(np.int8))
ListIndeces = np.concatenate((([0],) if gradient[0] < 0 else ()) + (np.where(maxima == -1)[0] + 1,) + (([len(yval)-1],) if gradient[-1] > 0 else ()))
X_Maxima, Y_Maxima = [], []
for index in ListIndeces:
X_Maxima.append(xval[index])
Y_Maxima.append(yval[index])
return X_Maxima, Y_Maxima
def GaussianMixture_Model(p, x, ZeroLevel):
y = 0.0
N_Comps = int(len(p) / 3)
for i in range(N_Comps):
A, mu, sigma = p[i*3:(i+1)*3]
y += A * np.exp(-(x-mu)*(x-mu)/(2.0*sigma*sigma))
Output = y + ZeroLevel
return Output
def Residuals_GaussianMixture(p, x, y, ZeroLevel):
return GaussianMixture_Model(p, x, ZeroLevel) - y
Wave, Flux = CurveData()
Wave_Maxima, Flux_Maxima = FindMaxima(Wave, Flux)
EmLines_Number = len(Wave_Maxima)
ContinuumLevel = 1.64191e-16
# Define initial values
p_0 = []
for i in range(EmLines_Number):
p_0.append(Flux_Maxima[i])
p_0.append(Wave_Maxima[i])
p_0.append(2.0)
p1, conv = optimize.leastsq(Residuals_GaussianMixture, p_0[:],args=(Wave, Flux, ContinuumLevel))
Fig = plt.figure(figsize = (16, 10))
Axis1 = Fig.add_subplot(111)
Axis1.plot(Wave, Flux, label='Emission line')
Axis1.plot(Wave, GaussianMixture_Model(p1, Wave, ContinuumLevel), 'r', label='Fit with optimize.leastsq')
print p1
Axis1.plot(Wave, GaussianMixture_Model([p1[0],p1[1],p1[2]], Wave, ContinuumLevel), 'g:', label='Gaussian components')
Axis1.plot(Wave, GaussianMixture_Model([p1[3],p1[4],p1[5]], Wave, ContinuumLevel), 'g:')
Axis1.set_xlabel( r'Wavelength $(\AA)$',)
Axis1.set_ylabel('Flux' + r'$(erg\,cm^{-2} s^{-1} \AA^{-1})$')
plt.legend()
plt.show()
A typical simplistic way to fit:
def model(p,x):
A,x1,sig1,B,x2,sig2 = p
return A*np.exp(-(x-x1)**2/sig1**2) + B*np.exp(-(x-x2)**2/sig2**2)
def res(p,x,y):
return model(p,x) - y
from scipy import optimize
p0 = [1e-15,3968,2,1e-15,3972,2]
p1,conv = optimize.leastsq(res,p0[:],args=(x,y))
plot(x,y,'+') # data
#fitted function
plot(arange(3962,3976,0.1),model(p1,arange(3962,3976,0.1)),'-')
Where p0 is your initial guess. By the looks of things, you might want to use Lorentzian functions...
If you use full_output=True, you get all kind of info about the fitting. Also check out curve_fit and the fmin* functions in scipy.optimize. There are plenty of wrappers around these around, but often, like here, it's easier to use them directly.