ValueError: scale < 0 during normalization by using gaussian distribution function - python

I'm trying to read my text file and extract 3 main parameters and put them in separate list and apply normalizing on lists of parameters which are (Temperature, Speed, Acceleration) after assigning Gaussian distribution function. For getting good result I split up positive and negative numbers of each parameters' list and apply gaussian distribution function and pick mean value of negative numbers as the real Minimum and pick mean value of positive numbers as the real Maximum instead of directly find Min and Max values in main list of these parameters which could repeat few times due to they're not in desired confidence interval. The problem is I faced RunTimeWarning error which I avoided already but still I have below error(s) which I don't have any clue how I can solve them includes ValueError: scale <0 , hope that someone has good idea about solution for errors ot better way to apply normalization by using gaussian distribution function Thanks for your attention:
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 265, in main
wait=args.wait)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 258, in handle_args
debug_main(addr, name, kind, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 45, in debug_main
run_file(address, name, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 79, in run_file
run(argv, addr, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 140, in _run
_pydevd.main()
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1925, in main
debugger.connect(host, port)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1283, in run
return self._exec(is_module, entry_point_fn, module_name, file, globals, locals)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1290, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\_pydev_imps\_pydev_execfile.py", line 25, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "p:\Desktop\correctt\news.py", line 142, in <module>
plotgaussianfunction(t_p_mean, t_sigma_Positive)
File "p:\Desktop\correctt\news.py", line 58, in plotgaussianfunction
s = np.random.normal(mu, sigma,1000)
File "mtrand.pyx", line 1656, in mtrand.RandomState.normal
ValueError: scale < 0
So my code is:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings("ignore",category =RuntimeWarning)
df = pd.read_csv('D:/me.txt', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
speed = df[df.index % 4 == 1].values
acceleration = df[df.index % 4 == 2].values
temperature = df[df.index % 4 == 3].values
m_data={'p_Speed': s_p_results[:,0],'n_Speed': s_n_results[:,0], 'p_Acceleration': a_p_results[:,0],'n_Acceleration': a_n_results[:,0], 'p_Temperature': t_p_results[:,0],'n_Temperature': t_n_results[:,0]}
m_main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
data = {'Speed': speed[:,0], 'Acceleration': acceleration[:,0], 'Temperature': temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
main_data = main_data.replace([np.inf, -np.inf], np.nan)
def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value
def createpositiveandnegativelist(listtocreate):
l_negative = []
l_positive = []
for value in listtocreate:
if (value < 0):
l_negative.append(value)
elif (value > 0):
l_positive.append(value)
#print(t_negative)
#print(t_positive)
return l_negative,l_positive
def calculatemean(listtocalculate):
return sum(listtocalculate)/len(listtocalculate)
def plotgaussianfunction(mu,sigma):
s = np.random.normal(mu, sigma,1000)
abs(mu - np.mean(s))<0.01
abs(sigma - np.std(s,ddof=1))<0.01
#count, bins, ignored = plt.hist(s,30,density=True)
#plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#plt.show()
return
def plotboundedCI(s, mu, sigma, lists):
plt.figure()
count, bins, ignored = plt.hist(s,30,density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#confidential interval calculation
ci = scipy.stats.norm.interval(0.68, loc = mu, scale = sigma)
#confidence interval for left line
one_x12, one_y12 = [ci[0],ci[0]], [0,3]
#confidence interval for right line
two_x12, two_y12 = [ci[1],ci[1]], [0,3]
plt.title("Gaussian 68% Confidence Interval", fontsize=12, color='black', loc='left', style='italic')
plt.plot(one_x12, one_y12, two_x12, two_y12, marker = 'o')
plt.show()
results = []
for value in lists:
if(ci[0]< value <ci[1]):
results.append(value)
else:
#print("NOT WANTED: ",value)
pass
return results
t_negative, t_positive = createpositiveandnegativelist(temperature)
a_negative, a_positive = createpositiveandnegativelist(acceleration)
s_negative, s_positive = createpositiveandnegativelist(speed)
#calculating the mean value
t_p_mean = calculatemean(t_positive)
a_p_mean = calculatemean(a_positive)
s_p_mean = calculatemean(s_positive)
t_n_mean = calculatemean(t_negative)
a_n_mean = calculatemean(a_negative)
s_n_mean = calculatemean(s_negative)
#calculating the sigma value
t_sigma_Negative = np.std(t_negative)
t_sigma_Positive = np.std(t_positive)
a_sigma_Negative = np.std(t_negative)
a_sigma_Positive = np.std(t_positive)
s_sigma_Negative = np.std(t_negative)
s_sigma_Positive = np.std(t_positive)
#plot the gaussian function with histograms
plotgaussianfunction(t_p_mean, t_sigma_Positive)
plotgaussianfunction(t_n_mean, t_sigma_Negative)
plotgaussianfunction(a_p_mean, a_sigma_Positive)
plotgaussianfunction(a_n_mean, a_sigma_Negative)
plotgaussianfunction(s_p_mean, s_sigma_Positive)
plotgaussianfunction(s_n_mean, s_sigma_Negative)
#normalization
t_p_s = np.random.normal(t_p_mean, t_sigma_Positive,1000)
t_n_s = np.random.normal(t_n_mean, t_sigma_Negative,1000)
a_p_s = np.random.normal(a_p_mean, a_sigma_Positive,1000)
a_n_s = np.random.normal(a_n_mean, a_sigma_Negative,1000)
s_p_s = np.random.normal(s_p_mean, s_sigma_Positive,1000)
s_n_s = np.random.normal(s_n_mean, s_sigma_Negative,1000)
#histograms minus the outliers
t_p_results = plotboundedCI(t_p_s, t_p_mean, t_sigma_Positive, t_positive)
t_n_results = plotboundedCI(t_n_s, t_n_mean, t_sigma_Negative, t_negative)
a_p_results = plotboundedCI(a_p_s, a_p_mean, a_sigma_Positive, a_positive)
a_n_results = plotboundedCI(a_n_s, a_n_mean, a_sigma_Negative, a_negative)
s_p_results = plotboundedCI(s_p_s, s_p_mean, s_sigma_Positive, s_positive)
s_n_results = plotboundedCI(s_n_s, s_n_mean, s_sigma_Negative, s_negative)
Note: I have some missing data(nan or inf) in my list of values which are already replaced by zero! but considering that when I have no missing values in my list of parameters , the code works!

from documentation of numpy.random.normal:
Parameters:
loc : float or array_like of floats
Mean (“centre”) of the distribution.
scale : float or array_like of floats
Standard deviation (spread or “width”) of the distribution.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise, np.broadcast(loc, scale).size samples are drawn.
the scale is the Standard deviation of the distribution hence it can not be negative. Hence the error you get: ValueError: scale < 0
you may want to check the sign of this parameter. give it a try with:
s = np.random.normal(mu, np.abs(sigma),1000)

Related

How to implement multiprocessing in a for loop inside a function

I've built some code to minimize the sum of the weighted least squares of some residuals. I first read all the data from a .gz file and then process it on the code below (details are irrelevant). I want to use multiprocessing in order to speed up the "runFit" function.
My code is below:
"""
Fit 3D lines to cylinders
"""
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.optimize import minimize
from numba import jit
from multiprocessing import Pool
def readData(filename):
"Read compressed data."
return np.loadtxt(filename, delimiter=",")
#jit(nopython=True)
def weightedResiduals(unknown, wire_coords, radii, d_radii, d_zcoords):
"Calculates the sum of the weighted residuals"
y_intercept = unknown[0]
z_intercept = unknown[1]
xy_slope = unknown[2]
xz_slope = unknown[3]
intercept_vector = np.array([0, y_intercept, z_intercept])
gradient_vector = np.array([1, xy_slope, xz_slope])
gradient_vector /= np.linalg.norm(gradient_vector)
result = 0
for index in range(np.shape(wire_coords)[0]):
distance = np.linalg.norm(np.cross((wire_coords[index]-intercept_vector), gradient_vector)) - radii[index]
weight = (d_radii[index]**2 + d_zcoords[index]**2)**(-1/2)
result += (weight * distance)**2
return result
def runFit(inputfilename, outputfilename):
"""
Parameters
----------
inputfilename : string
input data file name for fitting.
outputfilename : string
result storage file name.
Returns
-------
counter : int
number of successful fits; 100% would be twice the number
of events (two lines per event).
"""
counter = 0
#Reading the required data set
fulldata = readData(inputfilename)
#Defining the output array and filling in the first two columns
event_no = int(fulldata[-1,0])
result = np.zeros((2*event_no, 10))
result[:,0] = np.repeat(np.arange(1, event_no+1), 2)
line_no_array = np.empty((2*event_no,))
line_no_array[::2] = 1
line_no_array[1::2] = 2
result[:,1] = line_no_array
def singleEventFit(event):
#Using masking to obtain required rows
mask = (fulldata==event)
desired_rows = mask[:, 0]
#Calculating the fitted line variables using weighted least squares
for line in range(1,3):
#Extracting the desired rows from the full data array
desired_array = fulldata[np.logical_and(desired_rows,(fulldata==line)[:,1])]
#Extracting grouped data from the desired rows
wire_coords = desired_array[:,2:5]
wire_x_coords = wire_coords[:,0]
wire_y_coords = wire_coords[:,1]
wire_z_coords = wire_coords[:,2]
radii = desired_array[:,5]
d_radii, d_zcoords = desired_array[:,6], desired_array[:,7]
#Estimating an initial guess for the fitted line variables
x_min_index = np.argmin(np.abs(wire_x_coords))
x_max_index = np.argmax(np.abs(wire_x_coords))
y_intercept_guess = wire_y_coords[x_min_index]
z_intercept_guess = wire_z_coords[x_min_index]
xy_slope_guess = (wire_y_coords[x_max_index]-wire_y_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
xz_slope_guess = (wire_z_coords[x_max_index]-wire_z_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
init = np.array([y_intercept_guess, z_intercept_guess, xy_slope_guess, xz_slope_guess])
#Minimizing the sum of the weighted residuals
fit_vars = minimize(weightedResiduals, init, args=(wire_coords, radii, d_radii, d_zcoords), tol=1e-5)
if fit_vars.success == True:
y_intercept, z_intercept = fit_vars.x[0], fit_vars.x[1]
xy_slope, xz_slope = fit_vars.x[2], fit_vars.x[3]
#Using the half of the inverse of the Hessian matrix as the covariance matrix to recover errors
std_array = np.sqrt(np.diag(0.5*fit_vars.hess_inv))
#Inputting the variables and their errors on the output array
result[2*event+line-3, 2], result[2*event+line-3, 4] = y_intercept, xy_slope
result[2*event+line-3, 6], result[2*event+line-3, 8] = z_intercept, xz_slope
result[2*event+line-3, 3], result[2*event+line-3, 5] = std_array[0], std_array[2]
result[2*event+line-3, 7], result[2*event+line-3, 9] = std_array[1], std_array[3]
with Pool() as pool:
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
#Returning resulting array as a text file
np.savetxt(outputfilename, result, delimiter=',')
return counter
start = timer()
if __name__=='__main__':
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
end = timer()
print("Time: " + str(end-start) + "s")
However, I get the following traceback:
Traceback (most recent call last):
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 113, in <module>
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 105, in runFit
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 771, in get
raise self._value
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 537, in _handle_tasks
put(task)
File "C:\Users\vanes\anaconda3\lib\multiprocessing\connection.py", line 211, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Users\vanes\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'runFit.<locals>.singleEventFit'
Is there any way that I can use multiprocessing in order to speed up the for-loop?
After reviewing the internet, the recommendation was to move the inner function outside and make it global. However, this can't work since I need variables defined inside "runFit()" in order to execute the loop.

VALUE ERROR: x, y, and format string must not be None. (error while plotting hysteresis loop)

With the low cycle fatigue data, I'm trying to plot the Hysteresis loop. But I'm getting the following error:
[ -52.18297297 -45.58565338 16.9913185 ... -354.53630032 -295.50857248
-155.42088911]
[-0.01229182 -0.00891753 0.02256744 ... -0.33507242 -0.31283728
-0.24790212]
Traceback (most recent call last):
File "f:\I2M\LCF\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\ep1_camp4_P4.py", line 16, in <module>
plt.plot(strain, Sigma, color = 'k')
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\pyplot.py", line 2840, in plot
return gca().plot(
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_axes.py", line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)]
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_base.py", line 273, in __call__
yield from self._plot_args(this, kwargs)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_base.py", line 379, in _plot_args
raise ValueError("x, y, and format string must not be None")
ValueError: x, y, and format string must not be None
And here is my code:
import matplotlib.pyplot as plt
import numpy as np
plt.style.use(['science','no-latex'])
x = np.loadtxt('F:\\I2M\\LCF\\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\\data_1.csv',unpack = True,
skiprows = 2, usecols = 2, delimiter = ',')
y = np.loadtxt('F:\\I2M\\LCF\\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\\data_1.csv',unpack = True,
skiprows = 2, usecols = 3, delimiter = ',')
stress = (x*1000)/28.27 #N/mm^2 = MPa
length = len(stress)
length = len(y)
plt.figure(figsize=(5, 5))
Sigma = print(stress[0:length:10]) #stress
strain = print(y[0:length:10])
plt.plot(strain, Sigma, color = 'k')
plt.show()
Data contains many rows. So I used some commands to access only particular values from the row
Your problem is here
Sigma = print(stress[0:length:10]) #stress
strain = print(y[0:length:10])
what you want plausibly is to sample every 10th data point, but what you get is … nothing or, from the point of view of Python: None, so that later your stack trace informs you that x, y, and format string must not be None.
Why this happens, and how you solve the problem?
When you make an assignment, the value of the expression on the right is saved and you can use the name on the left to use it later, so you save, e.g., the value returned by print(y[0:length:10]) to use it later under the name strain, but print() is used for its side effects (i.e., showing a bunch of characters on your terminal) and the value that is returned in these cases is by default None, not what was shown on your terminal.
If I have understood your intentions, you should omit the two lines above and just use
plt.plot(x[0:length:10], y[0:length:10], color='k')
A side note, you have
length = len(stress)
length = len(y)
but you read them from the same file, one assignment should be enough…
PS
x, y = np.loadtxt('…\\data_1.csv', unpack=1, skiprows=2, usecols=[2,3], delimiter=',')

How to use xarray to group by time and then run a bin function on the groups?

I have a multidimensional 'mean direction of total ocean swell' (mdts), netCDF data set. The dimensions are time (in hours), latitude, and longitude. I simply wish to group the hourly data by day and then for each day, for each lat/lon grid, determine which of 16 predefined directional bins contains the most hours (maximum could be 24). The direction value associated with the bin with the most hours, for each lat/lon grid, would then be assigned as the direction for that particular day, for each lat/lon grid. I'm applying a custom function to the groupby command and that is where the error is occurring. I think I'm not understanding what is being passed to the function.
Note: each netCDF file represents 1979-2019 for one month. Therefore, I'm using groupby instead of resample as resample adds the 11 other months not in the file. I also first converted all the hours to 00:00 so that groupby would work for grouping by days.
Note: my actual code is set to loop through several netCDF files. I've simplified it here for one file.
My simplified code:
import numpy as np
import xarray as xr
ifile = 'mean_direction_total_swell_Nov_1979_2019_hourly.nc'
# min, max, and center values of angle direction bins
min = [348.75, 11.25, 33.75, 56.25, 78.75, 101.25, 123.75, 146.25, 168.75, 191.25, 213.75, 236.25, 258.75, 281.25, 303.75, 326.25]
max = [ 11.25, 33.75, 56.25, 78.75, 101.25, 123.75, 146.25, 168.75, 191.25, 213.75, 236.25, 258.75, 281.25, 303.75, 326.25, 348.75]
dir = [ 0.0, 22.5, 45.0, 67.5, 90.0, 112.5, 135.0, 157.5, 180.0, 202.5, 225.0, 247.5, 270.0, 292.5, 315.0, 337.5]
# custom function that I think is causing the problem
def bins(x):
bins_n = np.zeros([16], dtype=int)
# North bin requires 'or' statement
if(x >= min[0] or x < max[0]): bins_n[0] = bins_n[0] + 1
# other bins require 'and' statement
for i in range(1,16,1): # bins
if(x >= min[i] and x < max[i]):
bins_n[i] = bins_n[i] + 1
break
slot = np.argmax(bins_n)
return dir[slot]
idatanc = xr.open_dataset(ifile)
idata = idatanc['mdts']
idata.coords['time'] = idata.time.dt.floor('1D') # setting all hourly values to 0000
idata_dy = idata.groupby("time").apply(bins)
What gets returned. Note: this error is based on the looping program for multiple netCDF files so it may not correspond exactly to the code above. The errors are still the same.
Traceback (most recent call last):
File "<ipython-input-216-82adffe45690>", line 9, in <module>
idata_dy = idata.groupby("time").apply(bins)
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\groupby.py", line 815, in apply
return self.map(func, shortcut=shortcut, args=args, **kwargs)
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\groupby.py", line 800, in map
return self._combine(applied, shortcut=shortcut)
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\groupby.py", line 819, in _combine
applied_example, applied = peek_at(applied)
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\utils.py", line 183, in peek_at
peek = next(gen)
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\groupby.py", line 799, in <genexpr>
applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped)
File "<ipython-input-215-3d060f71ca15>", line 6, in bins
if(x >= min[0] or x < max[0]): bins_n[0] = bins_n[0] + 1
File "C:\Users\TWHawk\Anaconda3\envs\tim_python36\lib\site-packages\xarray\core\common.py", line 119, in __bool__
return bool(self.values)
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I didn't check the results all the way but I think the code bellows does what you need:
import numpy as np
import xarray as xr
from scipy import stats
def func(x, axis):
mode, count = np.apply_along_axis(stats.mode, axis, x)
return mode.squeeze()
infile = 'mean_direction_total_swell_Nov_1979_2019_hourly.nc'
ds = xr.open_dataset(infile)
# make sure range is 0 <= x < 360
ds['mdts'] = np.mod(ds['mdts'], 360)
# bin the data in 16 directions (17 actually, North appears as the first and
# last bin)
step = 360 / 16
centers = np.r_[np.r_[0: 360: step], 0]
edges = np.r_[0, np.r_[step / 2: 360: step], 360]
ds['mdts_binned_idx'] = (ds['mdts'].dims, np.digitize(ds['mdts'], edges))
ds['mdts_binned'] = (ds['mdts'].dims, centers[ds['mdts_binned_idx'] - 1])
# apply stats.mode to get the modal (most common) value in each day
ds2 = xr.Dataset()
ds2['mdts_mode_1d'] = ds['mdts_binned'].resample(time='1D').reduce(func)

Creating a column of random numbers from a distribution with mean and std from other columns using python

I would like to be able to basically pass a column into the np.random.normal() function.
I have the following;
def calc_z(w,S,a1,a2,yt1,yt2):
mu = w * S
print 'Mu' , mu
sigma = mt.sqrt(0.5)
z = np.array(np.random.normal(mu,sigma))
u = [a1,a2,z]
yt = [yt1,yt2,1]
thetaset = np.random.rand(len(u))
m = [i for i in range(len(u))]
max_iter = 30
#Calculate E-step
for i in range(max_iter):
print 'Iteration:', i
print 'z:', z
print 'thetaset', thetaset
devLz = eq6(var,w,S,z,yt,u,thetaset,m)
dev2Lz2 = eq9(var,thetaset,u)
#Calculate M-Step
z = z - (devLz / dev2Lz2)
w = lambdaw * z
for i in range(len(thetaset)):
devLTheta = eq7(yt,u,thetaset,lambdatheta)
dev2LTheta2 = eq10(thetaset,u,lambdatheta)
thetaset = thetaset - (devLTheta / dev2LTheta2)
return float(z)
calc_z_udf = udf(calc_z,FloatType())
data.show()
data = data.withColumn('z', calc_z(data['w'],data['Org_Depth_Diff_S'],data['proximity_rank_a1'],data['cotravel_count_a2'],data['cotravel_yt1'],data['proximity_yt2']))
But when I pass S in, the np.random.normal function doesn't like being passed a column and gives me the following error;
Traceback (most recent call last):
File "/home/taylorr2/PySparkLatent3.py", line 125, in <module>
data = data.withColumn('z', calc_z(data['w'],data['Org_Depth_Diff_S'],data['proximity_rank_a1'],data['cotravel_count_a2'],data['cotravel_yt1'],data['proximity_yt2']))
File "/home/taylorr2/PySparkLatent3.py", line 90, in calc_z
z = np.array(np.random.normal(mu,sigma))
File "mtrand.pyx", line 1282, in mtrand.RandomState.normal (numpy/random/mtrand/mtrand.c:6920)
ValueError: setting an array element with a sequence.
I am trying to think of a way to get this function to accept this value or do this a different way.
Thanks!

Scipy fmin_powell function

I'm trying to optimize the function eul with the initial guess X0 (X0 = [0.6421, -0.5046]) using fmin_powell. The function eul gets the initial conditions and calculates the velocity and temperature profile across a vertical flat plate using predictor-corrector method. I've displayed my code below:
def eul(X):
f2, q1 = X
N_tot = 5000;
n=np.linspace(0.0,10.0,N_tot)
f = np.zeros(N_tot,dtype=float).reshape(N_tot,)
dfdn = np.zeros(N_tot,dtype=float).reshape(N_tot,)
d2fdn2 = np.zeros(N_tot,dtype=float).reshape(N_tot,)
q = np.zeros(N_tot,dtype=float).reshape(N_tot,)
dqdn = np.zeros(N_tot,dtype=float).reshape(N_tot,)
Pr = 0.72; #Prandtl Number
##x0 = [d2fdn2_g1, dtdn_g1]
# Boundary Conditions
f[0] = 0.0;
dfdn[0] = 0.0;
d2fdn2[0] = f2;
q[0] = 1.0;
dqdn[0] = q1;
for i in np.arange(0,N_tot-1):
Dn = n[i+1] - n[i];
f_tmp=f[i]+dfdn[i]*Dn;
dfdn_tmp=dfdn[i]+d2fdn2[i]*Dn;
d2fdn2_tmp=d2fdn2[i]+(-3*f[i]*d2fdn2[i]+2*(dfdn[i])**2-q[i])*Dn;
q_tmp=q[i]+dqdn[i]*Dn;
dqdn_tmp=dqdn[i]-3*Pr*f[i]*dqdn[i]*Dn;
f[i+1]=f[i]+0.5*Dn*(dfdn[i]+dfdn_tmp);
dfdn[i+1]=dfdn[i]+0.5*Dn*(d2fdn2[i]+d2fdn2_tmp);
d2fdn2[i+1]=d2fdn2[i]+0.5*Dn*((-3*f[i]*d2fdn2[i]+2*(dfdn[i])**2-q[i])+(-3*f_tmp*d2fdn2_tmp+2*(dfdn_tmp)**2-q_tmp));
q[i+1]=q[i]+0.5*Dn*(dqdn[i]+dqdn_tmp);
dqdn[i+1]=dqdn[i]-0.5*Dn*((3*Pr*f[i]*dqdn[i])+(3*Pr*f_tmp*dqdn_tmp));
if((q[i+1]>1)|(q[i+1]<0)|(f[i+1]>2)|(f[i+1]<0)):
q[N_tot-1]=1+1/i;
dfdn[N_tot-1]=1+1/i;
break
return dfdn, q, n
MAIN PROGRAM
import numpy as np
import scipy as sp
import scipy.optimize
# Initial Guess
d2fdn2_g1 = 0.6421;
dtdn_g1 = -0.5046;
X0 = np.array([d2fdn2_g1, dtdn_g1])
X = scipy.optimize.fmin_powell(eul, X0)
I'm getting an error message:
Traceback (most recent call last):
File "C:\Users\labuser\Desktop\Sankar\New_Euler.py", line 52, in <module>
X = scipy.optimize.fmin_powell(eul, X0)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1519, in fmin_powell
fval, x, direc1 = _linesearch_powell(func, x, direc1, tol=xtol*100)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1418, in _linesearch_powell
alpha_min, fret, iter, num = brent(myfunc, full_output=1, tol=tol)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1241, in brent
brent.optimize()
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1113, in optimize
xa,xb,xc,fa,fb,fc,funcalls = self.get_bracket_info()
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1089, in get_bracket_info
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, args=args)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1357, in bracket
if (fa < fb): # Switch so fa > fb
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
My guess is your function eul is returning an array. fmin_powell minimizes a scalar function. Check that eul returns a single value, not an array.
(Without seeing more code, the best we can do is guess. It would help if you added the definition of eul to the question.)
Instead of sending an array to fmin_powell just define another function that computes sum of the returned array, and use it.
# Initial Guess
d2fdn2_g1 = 0.6421;
dtdn_g1 = -0.5046;
def eeul(X):
return np.sum(eul(X))
X0 = np.array([d2fdn2_g1, dtdn_g1])
X = scipy.optimize.fmin_powell(eeul, X0)
This seems to work properly.

Categories