How to implement multiprocessing in a for loop inside a function - python

I've built some code to minimize the sum of the weighted least squares of some residuals. I first read all the data from a .gz file and then process it on the code below (details are irrelevant). I want to use multiprocessing in order to speed up the "runFit" function.
My code is below:
"""
Fit 3D lines to cylinders
"""
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.optimize import minimize
from numba import jit
from multiprocessing import Pool
def readData(filename):
"Read compressed data."
return np.loadtxt(filename, delimiter=",")
#jit(nopython=True)
def weightedResiduals(unknown, wire_coords, radii, d_radii, d_zcoords):
"Calculates the sum of the weighted residuals"
y_intercept = unknown[0]
z_intercept = unknown[1]
xy_slope = unknown[2]
xz_slope = unknown[3]
intercept_vector = np.array([0, y_intercept, z_intercept])
gradient_vector = np.array([1, xy_slope, xz_slope])
gradient_vector /= np.linalg.norm(gradient_vector)
result = 0
for index in range(np.shape(wire_coords)[0]):
distance = np.linalg.norm(np.cross((wire_coords[index]-intercept_vector), gradient_vector)) - radii[index]
weight = (d_radii[index]**2 + d_zcoords[index]**2)**(-1/2)
result += (weight * distance)**2
return result
def runFit(inputfilename, outputfilename):
"""
Parameters
----------
inputfilename : string
input data file name for fitting.
outputfilename : string
result storage file name.
Returns
-------
counter : int
number of successful fits; 100% would be twice the number
of events (two lines per event).
"""
counter = 0
#Reading the required data set
fulldata = readData(inputfilename)
#Defining the output array and filling in the first two columns
event_no = int(fulldata[-1,0])
result = np.zeros((2*event_no, 10))
result[:,0] = np.repeat(np.arange(1, event_no+1), 2)
line_no_array = np.empty((2*event_no,))
line_no_array[::2] = 1
line_no_array[1::2] = 2
result[:,1] = line_no_array
def singleEventFit(event):
#Using masking to obtain required rows
mask = (fulldata==event)
desired_rows = mask[:, 0]
#Calculating the fitted line variables using weighted least squares
for line in range(1,3):
#Extracting the desired rows from the full data array
desired_array = fulldata[np.logical_and(desired_rows,(fulldata==line)[:,1])]
#Extracting grouped data from the desired rows
wire_coords = desired_array[:,2:5]
wire_x_coords = wire_coords[:,0]
wire_y_coords = wire_coords[:,1]
wire_z_coords = wire_coords[:,2]
radii = desired_array[:,5]
d_radii, d_zcoords = desired_array[:,6], desired_array[:,7]
#Estimating an initial guess for the fitted line variables
x_min_index = np.argmin(np.abs(wire_x_coords))
x_max_index = np.argmax(np.abs(wire_x_coords))
y_intercept_guess = wire_y_coords[x_min_index]
z_intercept_guess = wire_z_coords[x_min_index]
xy_slope_guess = (wire_y_coords[x_max_index]-wire_y_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
xz_slope_guess = (wire_z_coords[x_max_index]-wire_z_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
init = np.array([y_intercept_guess, z_intercept_guess, xy_slope_guess, xz_slope_guess])
#Minimizing the sum of the weighted residuals
fit_vars = minimize(weightedResiduals, init, args=(wire_coords, radii, d_radii, d_zcoords), tol=1e-5)
if fit_vars.success == True:
y_intercept, z_intercept = fit_vars.x[0], fit_vars.x[1]
xy_slope, xz_slope = fit_vars.x[2], fit_vars.x[3]
#Using the half of the inverse of the Hessian matrix as the covariance matrix to recover errors
std_array = np.sqrt(np.diag(0.5*fit_vars.hess_inv))
#Inputting the variables and their errors on the output array
result[2*event+line-3, 2], result[2*event+line-3, 4] = y_intercept, xy_slope
result[2*event+line-3, 6], result[2*event+line-3, 8] = z_intercept, xz_slope
result[2*event+line-3, 3], result[2*event+line-3, 5] = std_array[0], std_array[2]
result[2*event+line-3, 7], result[2*event+line-3, 9] = std_array[1], std_array[3]
with Pool() as pool:
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
#Returning resulting array as a text file
np.savetxt(outputfilename, result, delimiter=',')
return counter
start = timer()
if __name__=='__main__':
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
end = timer()
print("Time: " + str(end-start) + "s")
However, I get the following traceback:
Traceback (most recent call last):
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 113, in <module>
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
File "C:\Users\vanes\Downloads\Python Project\untitled0.py", line 105, in runFit
pool.map(singleEventFit, [event for event in range(1, event_no+1)])
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 771, in get
raise self._value
File "C:\Users\vanes\anaconda3\lib\multiprocessing\pool.py", line 537, in _handle_tasks
put(task)
File "C:\Users\vanes\anaconda3\lib\multiprocessing\connection.py", line 211, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Users\vanes\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'runFit.<locals>.singleEventFit'
Is there any way that I can use multiprocessing in order to speed up the for-loop?
After reviewing the internet, the recommendation was to move the inner function outside and make it global. However, this can't work since I need variables defined inside "runFit()" in order to execute the loop.

Related

IndexError: list index out of range while using multi-valued command line arguments

I have two codes: one for preprocessing an audio file and another for generating the MFCC. The MFCC code has 3 multi-valued command line arguments (frame length, frame step and fft length). I have defined mfcc as:
def mfcc(audio,sample_rate,pre_emp):
for fl in args['frame_length']:
for fs in args['frame_step']:
for fft in args['fft_length']:
audio = np.pad(audio,(Paddinggg(fl,fs,sample_rate),0),mode='reflect')
audio = audio.astype('float32')
#Normalization
audio = tf.keras.utils.normalize(audio)
#Preemphasis
audio = Preemphasis(audio,pre_emp)
stfts = tf.signal.stft(audio,fl,fs,fft,window_fn=tf.signal.hann_window)
spectrograms = tf.abs(stfts)
num_spectrogram_bins = stfts.shape[-1]
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, sample_rate/2.0, 32
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,upper_edge_hertz)
mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
return log_mel_spectrograms
The code for preprocessing uses the above function as shown below:
X = []
Y = []
preemphasis = 0.985
print("Feature Extraction Started")
for i,class_list in enumerate(data_list): #datalist = all files, class list = folder name in datalist, sample = path to the audio file in that particular class list
for j,samples in enumerate(class_list): #samples are of the form classes_name/audio file
if(samples.endswith('.wav')):
sample_rate,audio = wavfile.read(os.path.join(C["dire"],samples))
if(audio.size<sample_rate):
audio = np.pad(audio,(sample_rate-audio.size,0),mode="constant")
coeff = mfccwithpaddingandcmd.mfcc(audio,sample_rate,preemphasis) # 0.985 = preemphasis
X.append(coeff)
#print(X)
if(samples.split('/')[0] in classes):
Y.append(samples.split('/')[0])
elif(samples.split('/')[0]=='_background_noise_'):
Y.append('silence')
# #X= coefficient array and Y = name of the class
A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object')
for i in range(0,len(X)):
A[i] = np.array(X[i]) #Converting list X into array A
# print(A.shape)
Now when I am trying to define A, I am getting the error
Traceback (most recent call last):
File "C:\Users\Aarti\.spyder-py3\preprocessingwithpaddingandcmd.py", line 151, in <module>
pp()
File "C:\Users\Aarti\.spyder-py3\preprocessingwithpaddingandcmd.py", line 104, in pp
A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object')
IndexError: list index out of range
I have doubts that it is because of multi-valued command line arguments. Am I correct? How can I solve this problem where I have multi-valued command line arguments and so multiple values of A?

AttributeError: 'Tensor' object has no attribute 'ndim'

I was following the classification tutorial (https://www.kymat.io/gallery_1d/classif_keras.html#sphx-glr-gallery-1d-classif-keras-py) for 1D wavelet scattering and I receive the error:
Traceback (most recent call last):
File "filter_signals_fft.py", line 145, in <module>
x = Scattering1D(J, Q=Q)(x_in)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\autograph\impl\api.py", line 692, in wrapper
raise e.ag_error_metadata.to_exception(e)
AttributeError: Exception encountered when calling layer "scattering1d" (type Scattering1D).
in user code:
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\frontend\keras_frontend.py", line 17, in call *
return self.scattering(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\frontend\keras_frontend.py", line 14, in scattering *
return self.S.scattering(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\frontend\tensorflow_frontend.py", line 53, in scattering *
S = scattering1d(x, self.pad_fn, self.backend.unpad, self.backend, self.J, self.log2_T, self.psi1_f, self.psi2_f,
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\core\scattering1d.py", line 76, in scattering1d *
U_0 = pad_fn(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\frontend\base_frontend.py", line 80, in pad_fn *
self.pad_mode)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\backend\tensorflow_backend.py", line 71, in pad *
return agnostic.pad(x, pad_left, pad_right, pad_mode, axis=axis)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\backend\agnostic_backend.py", line 36, in pad *
axis_idx = axis if axis >= 0 else (x.ndim + axis)
AttributeError: 'Tensor' object has no attribute 'ndim'
Call arguments received:
• x=tf.Tensor(shape=(None, 4194304), dtype=float32)
I'm not sure why this is the case. I am running Python 3.8.2 [MSC v.1916 64 bit (AMD64)] on win32. Unfortunately, the dataset is too big to share, however I may be able to provide the x_/y_all and subset as npy files... Below is the code I am using:
import tensorflow.compat.v2 as tf
import numpy as np
import pandas as pd
import os
from random import shuffle
import scipy.io.wavfile
from pathlib import Path
from scipy import signal
from scipy.signal import butter, sosfilt, sosfreqz
import librosa
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from kymatio.keras import Scattering1D
# from sys import getsizeof
tf.enable_v2_behavior()
# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
# We now loop through all recording samples for each movement
# to add to dataset (x/y_all)
movements = 'xyz xy xz x yz y z'.split() # 'xyz xy xz x yz y z'.split()
movements_dict = dict(zip(movements, [7, 4, 5, 1, 6, 2, 3]))
len_max_files = 0.4
len_files = 0
for m in movements:
files = [fle for fle in os.listdir(f'D:/rf_recordings/move_{m}') if fle.endswith('.wav') and fle.split('_')[3] == '1']
len_files += int(len(files) * len_max_files)
# len([fle for fle in os.listdir(f'D:/rf_recordings/') if fle.endswith('.wav') and fle.split('_')[3] == '1'])
print(len_files)
# Our sampling rate is 2MHz, so a T value of 2**22=~4.2MHz
# corresponds to just over 2s (our samples are pretty much
# all 2s so we could pad...)
T = 2**22
J = 6
Q = 16
log_eps = 1e-6
x_all = np.zeros((len_files, T))
y_all = np.zeros(len_files, dtype=np.uint8)
subset = np.zeros(len_files, dtype=np.uint8)
print('Reading in movement signals')
for m in movements:
print(m)
files = [fle for fle in os.listdir(f'D:/rf_recordings/move_{m}') if fle.endswith('.wav') and fle.split('_')[3] == '1']
shuffle(files)
files = files[int(len(files) * len_max_files):]
ratio = int(len(files)*0.2)
train_files = files[ratio:]
test_files = files[:ratio]
# print(train_files, len(test_files))
for k, filename in enumerate(files):
name = filename.split('_')
movedist = name[3]
speed = name[5]
y = movements_dict[m]
if filename in train_files:
subset[k] = 0
else:
subset[k] = 1
# Read in the sample WAV file
fs, x = scipy.io.wavfile.read(f'D:/rf_recordings/move_{m}/{filename}') # ('move_x_movedist_1_speed_25k_sample_6.wav') #
# y = movements_dict[m] # keep as m for now but we will have to do this with params also later.
# We convert to mono by averaging the left and right channels.
x = np.mean(x, axis=1)
x = np.asarray(x, dtype='float') # np.float32)
# Once the recording is in memory, we normalise it to +1/-1
# x = x / np.max(np.abs(x))
x /= np.max(np.abs(x))
## Pad signal to T
x_pad = librosa.util.fix_length(x, size=T)
# print(x.shape, x_pad.shape)
# If it's too long, truncate it.
if len(x) > T:
x = x[:T]
# If it's too short, zero-pad it.
start = (T - len(x)) // 2
x_all[k, start:start+len(x)] = x
y_all[k] = y
# ## The signal is now zero-padded with shape (4194304,)
# Sx = scattering(x_pad)
# meta = scattering.meta()
# order0 = np.where(meta['order'] == 0)
# order1 = np.where(meta['order'] == 1)
# order2 = np.where(meta['order'] == 2)
#
# plt.figure(figsize=(8, 8))
# plt.subplot(3, 1, 1)
# plt.plot(Sx[order0][0])
# plt.title('Zeroth-order scattering')
# plt.subplot(3, 1, 2)
# plt.imshow(Sx[order1], aspect='auto')
# plt.title('First-order scattering')
# plt.subplot(3, 1, 3)
# plt.imshow(Sx[order2], aspect='auto')
# plt.title('Second-order scattering')
# plt.show()
print('Done reading!')
x_in = layers.Input(shape=(T))
x = Scattering1D(J, Q=Q)(x_in)
x = layers.Lambda(lambda x: x[..., 1:, :])(x)
# To increase discriminability, we take the logarithm of the scattering
# coefficients (after adding a small constant to make sure nothing blows up
# when scattering coefficients are close to zero). This is known as the
# log-scattering transform.
x = layers.Lambda(lambda x: tf.math.log(tf.abs(x) + log_eps))(x)
x = layers.GlobalAveragePooling1D(data_format='channels_first')(x)
x = layers.BatchNormalization(axis=1)(x)
x_out = layers.Dense(10, activation='softmax')(x)
model = tf.keras.models.Model(x_in, x_out)
model.summary()

ValueError: scale < 0 during normalization by using gaussian distribution function

I'm trying to read my text file and extract 3 main parameters and put them in separate list and apply normalizing on lists of parameters which are (Temperature, Speed, Acceleration) after assigning Gaussian distribution function. For getting good result I split up positive and negative numbers of each parameters' list and apply gaussian distribution function and pick mean value of negative numbers as the real Minimum and pick mean value of positive numbers as the real Maximum instead of directly find Min and Max values in main list of these parameters which could repeat few times due to they're not in desired confidence interval. The problem is I faced RunTimeWarning error which I avoided already but still I have below error(s) which I don't have any clue how I can solve them includes ValueError: scale <0 , hope that someone has good idea about solution for errors ot better way to apply normalization by using gaussian distribution function Thanks for your attention:
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 265, in main
wait=args.wait)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 258, in handle_args
debug_main(addr, name, kind, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 45, in debug_main
run_file(address, name, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 79, in run_file
run(argv, addr, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 140, in _run
_pydevd.main()
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1925, in main
debugger.connect(host, port)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1283, in run
return self._exec(is_module, entry_point_fn, module_name, file, globals, locals)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1290, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\_pydev_imps\_pydev_execfile.py", line 25, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "p:\Desktop\correctt\news.py", line 142, in <module>
plotgaussianfunction(t_p_mean, t_sigma_Positive)
File "p:\Desktop\correctt\news.py", line 58, in plotgaussianfunction
s = np.random.normal(mu, sigma,1000)
File "mtrand.pyx", line 1656, in mtrand.RandomState.normal
ValueError: scale < 0
So my code is:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings("ignore",category =RuntimeWarning)
df = pd.read_csv('D:/me.txt', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
speed = df[df.index % 4 == 1].values
acceleration = df[df.index % 4 == 2].values
temperature = df[df.index % 4 == 3].values
m_data={'p_Speed': s_p_results[:,0],'n_Speed': s_n_results[:,0], 'p_Acceleration': a_p_results[:,0],'n_Acceleration': a_n_results[:,0], 'p_Temperature': t_p_results[:,0],'n_Temperature': t_n_results[:,0]}
m_main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
data = {'Speed': speed[:,0], 'Acceleration': acceleration[:,0], 'Temperature': temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
main_data = main_data.replace([np.inf, -np.inf], np.nan)
def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value
def createpositiveandnegativelist(listtocreate):
l_negative = []
l_positive = []
for value in listtocreate:
if (value < 0):
l_negative.append(value)
elif (value > 0):
l_positive.append(value)
#print(t_negative)
#print(t_positive)
return l_negative,l_positive
def calculatemean(listtocalculate):
return sum(listtocalculate)/len(listtocalculate)
def plotgaussianfunction(mu,sigma):
s = np.random.normal(mu, sigma,1000)
abs(mu - np.mean(s))<0.01
abs(sigma - np.std(s,ddof=1))<0.01
#count, bins, ignored = plt.hist(s,30,density=True)
#plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#plt.show()
return
def plotboundedCI(s, mu, sigma, lists):
plt.figure()
count, bins, ignored = plt.hist(s,30,density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#confidential interval calculation
ci = scipy.stats.norm.interval(0.68, loc = mu, scale = sigma)
#confidence interval for left line
one_x12, one_y12 = [ci[0],ci[0]], [0,3]
#confidence interval for right line
two_x12, two_y12 = [ci[1],ci[1]], [0,3]
plt.title("Gaussian 68% Confidence Interval", fontsize=12, color='black', loc='left', style='italic')
plt.plot(one_x12, one_y12, two_x12, two_y12, marker = 'o')
plt.show()
results = []
for value in lists:
if(ci[0]< value <ci[1]):
results.append(value)
else:
#print("NOT WANTED: ",value)
pass
return results
t_negative, t_positive = createpositiveandnegativelist(temperature)
a_negative, a_positive = createpositiveandnegativelist(acceleration)
s_negative, s_positive = createpositiveandnegativelist(speed)
#calculating the mean value
t_p_mean = calculatemean(t_positive)
a_p_mean = calculatemean(a_positive)
s_p_mean = calculatemean(s_positive)
t_n_mean = calculatemean(t_negative)
a_n_mean = calculatemean(a_negative)
s_n_mean = calculatemean(s_negative)
#calculating the sigma value
t_sigma_Negative = np.std(t_negative)
t_sigma_Positive = np.std(t_positive)
a_sigma_Negative = np.std(t_negative)
a_sigma_Positive = np.std(t_positive)
s_sigma_Negative = np.std(t_negative)
s_sigma_Positive = np.std(t_positive)
#plot the gaussian function with histograms
plotgaussianfunction(t_p_mean, t_sigma_Positive)
plotgaussianfunction(t_n_mean, t_sigma_Negative)
plotgaussianfunction(a_p_mean, a_sigma_Positive)
plotgaussianfunction(a_n_mean, a_sigma_Negative)
plotgaussianfunction(s_p_mean, s_sigma_Positive)
plotgaussianfunction(s_n_mean, s_sigma_Negative)
#normalization
t_p_s = np.random.normal(t_p_mean, t_sigma_Positive,1000)
t_n_s = np.random.normal(t_n_mean, t_sigma_Negative,1000)
a_p_s = np.random.normal(a_p_mean, a_sigma_Positive,1000)
a_n_s = np.random.normal(a_n_mean, a_sigma_Negative,1000)
s_p_s = np.random.normal(s_p_mean, s_sigma_Positive,1000)
s_n_s = np.random.normal(s_n_mean, s_sigma_Negative,1000)
#histograms minus the outliers
t_p_results = plotboundedCI(t_p_s, t_p_mean, t_sigma_Positive, t_positive)
t_n_results = plotboundedCI(t_n_s, t_n_mean, t_sigma_Negative, t_negative)
a_p_results = plotboundedCI(a_p_s, a_p_mean, a_sigma_Positive, a_positive)
a_n_results = plotboundedCI(a_n_s, a_n_mean, a_sigma_Negative, a_negative)
s_p_results = plotboundedCI(s_p_s, s_p_mean, s_sigma_Positive, s_positive)
s_n_results = plotboundedCI(s_n_s, s_n_mean, s_sigma_Negative, s_negative)
Note: I have some missing data(nan or inf) in my list of values which are already replaced by zero! but considering that when I have no missing values in my list of parameters , the code works!
from documentation of numpy.random.normal:
Parameters:
loc : float or array_like of floats
Mean (“centre”) of the distribution.
scale : float or array_like of floats
Standard deviation (spread or “width”) of the distribution.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise, np.broadcast(loc, scale).size samples are drawn.
the scale is the Standard deviation of the distribution hence it can not be negative. Hence the error you get: ValueError: scale < 0
you may want to check the sign of this parameter. give it a try with:
s = np.random.normal(mu, np.abs(sigma),1000)

Scipy fmin_powell function

I'm trying to optimize the function eul with the initial guess X0 (X0 = [0.6421, -0.5046]) using fmin_powell. The function eul gets the initial conditions and calculates the velocity and temperature profile across a vertical flat plate using predictor-corrector method. I've displayed my code below:
def eul(X):
f2, q1 = X
N_tot = 5000;
n=np.linspace(0.0,10.0,N_tot)
f = np.zeros(N_tot,dtype=float).reshape(N_tot,)
dfdn = np.zeros(N_tot,dtype=float).reshape(N_tot,)
d2fdn2 = np.zeros(N_tot,dtype=float).reshape(N_tot,)
q = np.zeros(N_tot,dtype=float).reshape(N_tot,)
dqdn = np.zeros(N_tot,dtype=float).reshape(N_tot,)
Pr = 0.72; #Prandtl Number
##x0 = [d2fdn2_g1, dtdn_g1]
# Boundary Conditions
f[0] = 0.0;
dfdn[0] = 0.0;
d2fdn2[0] = f2;
q[0] = 1.0;
dqdn[0] = q1;
for i in np.arange(0,N_tot-1):
Dn = n[i+1] - n[i];
f_tmp=f[i]+dfdn[i]*Dn;
dfdn_tmp=dfdn[i]+d2fdn2[i]*Dn;
d2fdn2_tmp=d2fdn2[i]+(-3*f[i]*d2fdn2[i]+2*(dfdn[i])**2-q[i])*Dn;
q_tmp=q[i]+dqdn[i]*Dn;
dqdn_tmp=dqdn[i]-3*Pr*f[i]*dqdn[i]*Dn;
f[i+1]=f[i]+0.5*Dn*(dfdn[i]+dfdn_tmp);
dfdn[i+1]=dfdn[i]+0.5*Dn*(d2fdn2[i]+d2fdn2_tmp);
d2fdn2[i+1]=d2fdn2[i]+0.5*Dn*((-3*f[i]*d2fdn2[i]+2*(dfdn[i])**2-q[i])+(-3*f_tmp*d2fdn2_tmp+2*(dfdn_tmp)**2-q_tmp));
q[i+1]=q[i]+0.5*Dn*(dqdn[i]+dqdn_tmp);
dqdn[i+1]=dqdn[i]-0.5*Dn*((3*Pr*f[i]*dqdn[i])+(3*Pr*f_tmp*dqdn_tmp));
if((q[i+1]>1)|(q[i+1]<0)|(f[i+1]>2)|(f[i+1]<0)):
q[N_tot-1]=1+1/i;
dfdn[N_tot-1]=1+1/i;
break
return dfdn, q, n
MAIN PROGRAM
import numpy as np
import scipy as sp
import scipy.optimize
# Initial Guess
d2fdn2_g1 = 0.6421;
dtdn_g1 = -0.5046;
X0 = np.array([d2fdn2_g1, dtdn_g1])
X = scipy.optimize.fmin_powell(eul, X0)
I'm getting an error message:
Traceback (most recent call last):
File "C:\Users\labuser\Desktop\Sankar\New_Euler.py", line 52, in <module>
X = scipy.optimize.fmin_powell(eul, X0)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1519, in fmin_powell
fval, x, direc1 = _linesearch_powell(func, x, direc1, tol=xtol*100)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1418, in _linesearch_powell
alpha_min, fret, iter, num = brent(myfunc, full_output=1, tol=tol)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1241, in brent
brent.optimize()
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1113, in optimize
xa,xb,xc,fa,fb,fc,funcalls = self.get_bracket_info()
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1089, in get_bracket_info
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, args=args)
File "C:\Python27\lib\site-packages\scipy\optimize\optimize.py", line 1357, in bracket
if (fa < fb): # Switch so fa > fb
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
My guess is your function eul is returning an array. fmin_powell minimizes a scalar function. Check that eul returns a single value, not an array.
(Without seeing more code, the best we can do is guess. It would help if you added the definition of eul to the question.)
Instead of sending an array to fmin_powell just define another function that computes sum of the returned array, and use it.
# Initial Guess
d2fdn2_g1 = 0.6421;
dtdn_g1 = -0.5046;
def eeul(X):
return np.sum(eul(X))
X0 = np.array([d2fdn2_g1, dtdn_g1])
X = scipy.optimize.fmin_powell(eeul, X0)
This seems to work properly.

Errors with matplotlib plot, python

I get this horrible massive error when trying to plot using matplotlib:
Traceback (most recent call last):
File "24oct_specanal.py", line 90, in <module>
main()
File "24oct_specanal.py", line 83, in main
plt.plot(Svar,Sav)
File "/usr/lib64/python2.6/site-packages/matplotlib/pyplot.py", line 2458, in plot
ret = ax.plot(*args, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 3849, in plot
self.add_line(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1443, in add_line
self._update_line_limits(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1451, in _update_line_limits
p = line.get_path()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 644, in get_path
self.recache()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 392, in recache
x = np.asarray(xconv, np.float_)
File "/usr/lib64/python2.6/site-packages/numpy/core/numeric.py", line 235, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
This is the code I am using:
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def makeAImatrix(n):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
return (A, I)
def main():
n=5 #number of species
t=1 # number of matrices to check
Aflat = []
Aflatlist = [] #list of matrices
Aflatav = []
Aflatvar = []
Aflatskew = []
remspec = []
Afreeze = [] #this is a LIST OF VECTORS that stores the vector corresponding to each extinct species as
#it is taken out. it is NOT the same as the original A matrix as it is only
#coherant in one direction. it is also NOT A SQUARE.
Sex = [] # (Species extinct) this is a vector that corresponds to the Afreeze matrix. if a species is extinct then
#the value stored here will be -1.
Sav = [] # (Species average) The average value of the A cooefficiants for each species
Svar = [] # (Species variance)
for k in range (0,t):
allpos = 0
A, I = makeAImatrix(n)
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
Sex.append(-1) #given -1 value as species is extinct.
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
#now fill Afreeze and Sex with the remaining species that have survived
for m in range (0, l):
Afreeze.append(A[m])
Sex.append(1) # value of 1 as this species has survived
#now time to analyse the coefficiants for each species.
for m in range (0, len(Sex)):
X1 = sum(Afreeze[m])/len(Afreeze[m]) # this is the mean
X2 = 0
for p in range (len(Afreeze[m])):
X2 = X2 + Afreeze[m][p]
X2 = X2/len(Afreeze[m])
Sav.append(X1)
Svar.append(X2 - X1*X1)
spec = []
for b in range(0,n):
spec.append(b)
plt.plot(Svar,Sav)
plt.show()
#plt.scatter(spec, Sav)
#plt.show()
if __name__ == '__main__':
main()
I cannot figure this out at all! I think it was working before but then just stopped working. Any ideas?
Your problem is in this section:
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
You're indexing a 2D array, and the result is still a 2D array. So, your Afreeze will get a 2D array appended, instead of a 1D array. Later, where you sum the separate elements of Afreeze, a summed 2D array will result in a 1D array, and that gets added to Sav and Svar. By the time you feed these variables to plt.plot(), matplotlib will get an array as one of the elements instead of a single number, which it of course can't cope with.
You probably want:
if any(t<0 for t in x):
p=np.where(x==min(x))
Afreeze.append(A[p][0])
but I haven't tried to follow the logic of the script very much; that's up to you.
Perhaps good to see if this is indeed what you want: print the value of A[p][0] in the line before it gets appended to Afreeze.
I noted that because of the random.random() in the matrix creation, the if statement isn't always true, so the problem doesn't always show up. Minor detail, but could confuse people.
Fix your identation?
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def main():
n=20 #number of species
spec=np.zeros((n+1))
for i in range(0,n):
spec[i]=i
t=100 #initial number of matrices to check
B = np.zeros((n+1)) #matrix to store the results of how big the matrices have to be
for k in range (0,t):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
allpos = 0
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
B[l] = B[l]+1
B = B/n
pi=3.14
resfile=open("results.txt","w")
for i in range (0,len(spec)):
resfile.write("%d " % spec[i])
resfile.write("%0.6f \n" %B[i])
resfile.close()
plt.hist(B, bins=n)
plt.title("Histogram")
plt.show()
plt.plot(spec,B)
plt.xlabel("final number of species")
plt.ylabel("fraction of total matrices")
plt.title("plot")
plt.show()
if __name__ == '__main__':
main()
Got this:

Categories