Mean of empty slice and Degrees of freedom <=0

Mean of empty slice and Degrees of freedom <=0 - python

This code below is suppose to run a bayes classifier for a full covariance gaussian (http://courses.ee.sun.ac.za/Pattern_Recognition_813/lectures/lecture03/node2.html), but I get two errors when I run the code. They are:
RuntimeWarning: Mean of empty slice.
warnings.warn("Mean of empty slice.", RuntimeWarning)
and
RuntimeWarning: Degrees of freedom <= 0 for slice
warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
This is my code:
def modelFull(train, test):
err_train = 0
err_test = 0
x_train = []
x_test = []
labels = []
train_labels = []
test_labels = []
for i in train:
x_train.append(i[:-1]/255)
labels.append(i[-1])
train_labels.append(i[-1])
for i in test:
x_test.append(i[:-1]/255)
labels.append(i[-1])
test_labels.append(i[-1])
x_train = np.array(x_train)
x_0 = []
x_1 = []
for i in train:
if i[-1] == 0:
x_0.append(i[:-1]/255)
if i[-1] == 1:
x_1.append(i[:-1]/255)
x_0 = np.array(x_0)
x_1 = np.array(x_1)
p_0 = float(x_0.shape[0])/float((x_0.shape[0]+x_1.shape[0]))
p_1 = float(x_1.shape[0])/float((x_0.shape[0]+x_1.shape[0]))
train_x0_mean = x_0.mean(axis=0)
train_x1_mean = x_1.mean(axis=0)
cov_x0 = np.cov(np.transpose(x_0))
cov_x1 = np.cov(np.transpose(x_1))
cov_x0 = cov_x0 + np.eye(256) * .01
cov_x1 = cov_x1 + np.eye(256) * .01
det_x1_cov = -float(np.linalg.slogdet(cov_x1)[1])
det_x0_cov = -float(np.linalg.slogdet(cov_x0)[1])
train_results = []
test_results = []
for x in x_train:
x0_minus_mu_T = np.transpose((x-train_x0_mean))
x0_inverse = np.linalg.inv(cov_x0)
x0_minus_mu = x-train_x0_mean
x1_minus_mu_T = np.transpose((x-train_x1_mean))
x1_inverse = np.linalg.inv(cov_x1)
x1_minus_mu = x-train_x1_mean
x_0_probability = det_x0_cov - (x0_minus_mu_T.dot(x0_inverse)).dot(x0_minus_mu)
x_1_probability = det_x1_cov - (x1_minus_mu_T.dot(x1_inverse)).dot(x1_minus_mu)
if (x_0_probability+np.log(p_0))/(x_1_probability+np.log(p_1)) < 1:
train_results.append(1)
else:
train_results.append(0)
for x in x_test:
x0_minus_mu_T = np.transpose((x-train_x0_mean))
x0_inverse = np.linalg.inv(cov_x0)
x0_minus_mu = x-train_x0_mean
x1_minus_mu_T = np.transpose((x-train_x1_mean))
x1_inverse = np.linalg.inv(cov_x1)
x1_minus_mu = x-train_x1_mean
x_0_probability = det_x0_cov - (x0_minus_mu_T.dot(x0_inverse)).dot(x0_minus_mu)
x_1_probability = det_x1_cov - (x1_minus_mu_T.dot(x1_inverse)).dot(x1_minus_mu)
if (x_0_probability+np.log(p_0))/(x_1_probability+np.log(p_1)) < 1:
test_results.append(1)
else:
test_results.append(0)
train_correct = 0
test_correct = 0
for i in range(len(train_results)):
if int(train_results[i]) == int(train_labels[i]):
train_correct +=1
for i in range(len(test_results)):
if int(test_results[i]) == int(test_labels[i]):
test_correct +=1
err_train = 1-(float(test_correct)/ len(test_results))
err_train = 1-(float(train_correct)/ len(train_results))
return err_train, err_test

RuntimeWarning: Degrees of freedom <= 0 for slice
occurs when you use the wrong shape, e.g.:
import numpy as np
x = np.random.random([1000,1])
y = np.random.random([1000,1])
print(x.shape, y.shape)
# (1000, 1) (1000, 1)
t = np.cov(x, y) #RuntimeWarning
t = np.cov(x.T, y.T) #This works

An edge case is: the array you calculate covariance on only contains one element.
np.cov([0.5])

In addition to the use of wrong shape mentioned above, using np.nanstd across all NaNs array will also invoke the message of "RuntimeWarning: Degrees of freedom <= 0 for slice"; using np.nanmean across all NaNs array will invoke the message of "RuntimeWarning: Mean of empty slice.".

Related

For loop if statement return undesired outcome at the first row

I have a code on python that tries to calculate the sample variance while I accumulate entries in each loop.
y_hat = y_df.loc[n-1]
var = []
var_sum = 0
for i in range(n):
var_i = (g_i[i] - y_hat)**2
var_sum += var_i
if i == 0:
var_avg = var_sum
var.append(var_avg)
else:
var_avg = var_sum/i
var.append(var_avg)
the output of the result yields very strange first rows (when i is 1), while the rest of rows are fine. Can someone help please?
This is the current output:
Below is my entire script, essentially I am testing Monte Carlo simulation to evaluate pi.
import numpy as np
import math
import matplotlib.pyplot as plt
import random
import pandas as pd
import statistics as stats
n = 1000
k = 100
# generate u r.v. with size k*n -> (100,1000)
u = []
for i in range(k):
u_i = np.random.uniform(size = n)
u.append(u_i)
# put into dataframe (k*n)
u_df = pd.DataFrame(u)
# calculate g_i, g_i is a df with k*n
g_i = 4*np.sqrt(1-u_df**2)
g_sum = 0
y = []
for i in range(n):
g_sum += g_i[i]
y_i = g_sum/(i+1)
y.append(y_i)
# put y into df -> n*k
y_df = pd.DataFrame(y)
y_df = y_df.reset_index(drop=True)
y_hat = y_df.loc[n-1]
var = []
var_sum = 0
for i in range(n):
var_i = (g_i[i] - y_hat)**2
var_sum += var_i
if i == 0:
var_avg = var_sum
var.append(var_avg)
else:
var_avg = var_sum/i
var.append(var_avg)
var_df = pd.DataFrame(var)
var_df = var_df.reset_index(drop=True)
var_df.head()

When var.append(var_avg) is run inside the i==0 if statement, you are appending var_sum by reference to var. Thus every time you change var_sum, you change var[0]. You can fix the problem by explicitly copying var_sum when i == 0. The corrected if-else statement would be
if i == 0:
var_avg = var_sum
var.append(var_avg.copy())
else:
var_avg = var_sum/(i+1)
var.append(var_avg)

Neural network XOR gate not learning

I'm trying to make a XOR gate by using 2 perceptron network but for some reason the network is not learning, when I plot the change of error in a graph the error comes to a static level and oscillates in that region.
I did not add any bias to the network at the moment.
import numpy as np
def S(x):
return 1/(1+np.exp(-x))
win = np.random.randn(2,2)
wout = np.random.randn(2,1)
eta = 0.15
# win = [[1,1], [2,2]]
# wout = [[1],[2]]
obj = [[0,0],[1,0],[0,1],[1,1]]
target = [0,1,1,0]
epoch = int(10000)
emajor = ""
for r in range(0,epoch):
for xy in range(len(target)):
tar = target[xy]
fdata = obj[xy]
fdata = S(np.dot(1,fdata))
hnw = np.dot(fdata,win)
hnw = S(np.dot(fdata,win))
out = np.dot(hnw,wout)
out = S(out)
diff = tar-out
E = 0.5 * np.power(diff,2)
emajor += str(E[0]) + ",\n"
delta_out = (out-tar)*(out*(1-out))
nindelta_out = delta_out * eta
wout_change = np.dot(nindelta_out[0], hnw)
for x in range(len(wout_change)):
change = wout_change[x]
wout[x] -= change
delta_in = np.dot(hnw,(1-hnw)) * np.dot(delta_out[0], wout)
nindelta_in = eta * delta_in
for x in range(len(nindelta_in)):
midway = np.dot(nindelta_in[x][0], fdata)
for y in range(len(win)):
win[y][x] -= midway[y]
f = open('xor.csv','w')
f.write(emajor) # python will convert \n to os.linesep
f.close() # you can omit in most cases as the destructor will call it
This is the error changing by the number of learning rounds. Is this correct? The red color line is the line I was expecting how the error should change.
Anything wrong I'm doing in the code? As I can't seem to figure out what's causing the error. Help much appreciated.
Thanks in advance

Here is a one hidden layer network with backpropagation which can be customized to run experiments with relu, sigmoid and other activations. After several experiments it was concluded that with relu the network performed better and reached convergence sooner, while with sigmoid the loss value fluctuated. This happens because, "the gradient of sigmoids becomes increasingly small as the absolute value of x increases".
import numpy as np
import matplotlib.pyplot as plt
from operator import xor
class neuralNetwork():
def __init__(self):
# Define hyperparameters
self.noOfInputLayers = 2
self.noOfOutputLayers = 1
self.noOfHiddenLayerNeurons = 2
# Define weights
self.W1 = np.random.rand(self.noOfInputLayers,self.noOfHiddenLayerNeurons)
self.W2 = np.random.rand(self.noOfHiddenLayerNeurons,self.noOfOutputLayers)
def relu(self,z):
return np.maximum(0,z)
def sigmoid(self,z):
return 1/(1+np.exp(-z))
def forward (self,X):
self.z2 = np.dot(X,self.W1)
self.a2 = self.relu(self.z2)
self.z3 = np.dot(self.a2,self.W2)
yHat = self.relu(self.z3)
return yHat
def costFunction(self, X, y):
#Compute cost for given X,y, use weights already stored in class.
self.yHat = self.forward(X)
J = 0.5*sum((y-self.yHat)**2)
return J
def costFunctionPrime(self,X,y):
# Compute derivative with respect to W1 and W2
delta3 = np.multiply(-(y-self.yHat),self.sigmoid(self.z3))
djw2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3,self.W2.T)*self.sigmoid(self.z2)
djw1 = np.dot(X.T,delta2)
return djw1,djw2
if __name__ == "__main__":
EPOCHS = 6000
SCALAR = 0.01
nn= neuralNetwork()
COST_LIST = []
inputs = [ np.array([[0,0]]), np.array([[0,1]]), np.array([[1,0]]), np.array([[1,1]])]
for epoch in xrange(1,EPOCHS):
cost = 0
for i in inputs:
X = i #inputs
y = xor(X[0][0],X[0][1])
cost += nn.costFunction(X,y)[0]
djw1,djw2 = nn.costFunctionPrime(X,y)
nn.W1 = nn.W1 - SCALAR*djw1
nn.W2 = nn.W2 - SCALAR*djw2
COST_LIST.append(cost)
plt.plot(np.arange(1,EPOCHS),COST_LIST)
plt.ylim(0,1)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title(str('Epochs: '+str(EPOCHS)+', Scalar: '+str(SCALAR)))
plt.show()
inputs = [ np.array([[0,0]]), np.array([[0,1]]), np.array([[1,0]]), np.array([[1,1]])]
print "X\ty\ty_hat"
for inp in inputs:
print (inp[0][0],inp[0][1]),"\t",xor(inp[0][0],inp[0][1]),"\t",round(nn.forward(inp)[0][0],4)
End Result:
X y y_hat
(0, 0) 0 0.0
(0, 1) 1 0.9997
(1, 0) 1 0.9997
(1, 1) 0 0.0005
The weights obtained after training were:
nn.w1
[ [-0.81781753 0.71323677]
[ 0.48803631 -0.71286155] ]
nn.w2
[ [ 2.04849235]
[ 1.40170791] ]
I found the following youtube series extremely helpful for understanding neural nets: Neural networks demystified
There is only little which I know and also that can be explained in this answer. If you want an even better understanding of neural nets, then I would suggest you to go through the following link: cs231n: Modelling one neuron

The error calculated in each epoch should be a sum total of all sum squared errors (i.e. error for every target)
import numpy as np
def S(x):
return 1/(1+np.exp(-x))
win = np.random.randn(2,2)
wout = np.random.randn(2,1)
eta = 0.15
# win = [[1,1], [2,2]]
# wout = [[1],[2]]
obj = [[0,0],[1,0],[0,1],[1,1]]
target = [0,1,1,0]
epoch = int(10000)
emajor = ""
for r in range(0,epoch):
# ***** initialize final error *****
finalError = 0
for xy in range(len(target)):
tar = target[xy]
fdata = obj[xy]
fdata = S(np.dot(1,fdata))
hnw = np.dot(fdata,win)
hnw = S(np.dot(fdata,win))
out = np.dot(hnw,wout)
out = S(out)
diff = tar-out
E = 0.5 * np.power(diff,2)
# ***** sum all errors *****
finalError += E
delta_out = (out-tar)*(out*(1-out))
nindelta_out = delta_out * eta
wout_change = np.dot(nindelta_out[0], hnw)
for x in range(len(wout_change)):
change = wout_change[x]
wout[x] -= change
delta_in = np.dot(hnw,(1-hnw)) * np.dot(delta_out[0], wout)
nindelta_in = eta * delta_in
for x in range(len(nindelta_in)):
midway = np.dot(nindelta_in[x][0], fdata)
for y in range(len(win)):
win[y][x] -= midway[y]
# ***** Save final error *****
emajor += str(finalError[0]) + ",\n"
f = open('xor.csv','w')
f.write(emajor) # python will convert \n to os.linesep
f.close() # you can omit in most cases as the destructor will call it

isolation forest algorithm in python

I am trying to reproduce the algorithm described in the Isolation Forest paper in python. http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf?q=isolation
This is my current code:
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA
def _h(i):
return np.log(i) + 0.5772156649
def _c(n):
if n > 2:
h = _h(n-1)
return 2*h - 2*(n - 1)/n
if n == 2:
return 1
else:
return 0
def _anomaly_score(dict_scores, n_samples):
score = np.array([np.mean(dict_scores[k]) for k in dict_scores.keys()])
score = -score/_c(n_samples)
return 2**score
def _split_data(X):
''' split the data in the left and right nodes '''
n_samples, n_columns = X.shape
n_features = n_columns - 1
feature_id = np.random.randint(low=0, high=n_features-1)
feature = X[:, feature_id]
split_value = np.random.choice(feature)
left_X = X[feature <= split_value]
right_X = X[feature > split_value]
return left_X, right_X, feature_id, split_value
def iTree(X, add_index=False, max_depth = np.inf):
''' construct an isolation tree and returns the number of step required
to isolate an element. A column of index is added to the input matrix X if
add_index=True. This column is required in the algorithm. '''
n_split = {}
def iterate(X, count = 0):
n_samples, n_columns = X.shape
n_features = n_columns - 1
if count > max_depth:
for index in X[:,-1]:
n_split[index] = count
return
if n_samples == 1:
index = X[0, n_columns-1]
n_split[index] = count
return
else:
lX, rX, feature_id, split_value = _split_data(X)
# Uncomment the print to visualize a draft of
# the construction of the tree
#print(lX[:,-1], rX[:,-1], feature_id, split_value, n_split)
n_samples_lX, _ = lX.shape
n_samples_rX, _ = rX.shape
if n_samples_lX > 0:
iterate(lX, count+1)
if n_samples_rX >0:
iterate(rX, count+1)
if add_index:
n_samples, _ = X.shape
X = np.c_[X, range(n_samples)]
iterate(X)
return n_split
class iForest():
''' Class to construct the isolation forest.
-n_estimators: is the number of trees in the forest,
-sample_size: is the bootstrap parameter used during the construction
of the forest,
-add_index: adds a column of index to the matrix X. This is required and
add_index can be set to False only if the last column of X contains
already indeces.
-max_depth: is the maximum depth of each tree
'''
def __init__(self, n_estimators=20, sample_size=None, add_index = True,
max_depth = 100):
self.n_estimators = n_estimators
self.sample_size = sample_size
self.add_index = add_index
self.max_depth = max_depth
return
def fit(self, X):
n_samples, n_features = X.shape
if self.sample_size == None:
self.sample_size = int(n_samples/2)
if self.add_index:
X = np.c_[X, range(n_samples)]
trees = [iTree(X[np.random.choice(n_samples,
self.sample_size,
replace=False)],
max_depth=self.max_depth)
for i in range(self.n_estimators)]
self.all_anomaly_score_ = {k:None for k in range(n_samples)}
for k in self.all_anomaly_score_.keys():
self.all_anomaly_score_[k] = np.array([tree[k]
for tree in trees
if k in tree])
self.anomaly_score_ = _anomaly_score(self.all_anomaly_score_, n_samples)
return self
The main part of the code is the iTree function that returns a dictionary with the number of steps required to isolate each sample.
A column of index is attached to the input matrix X in order to make easier to understand what are the samples in each node.
When I compare the result obtained with my code and the ones obtained with the isolation forest available for R I get different results.
Consider for example that stackloss dataset :
data = pd.read_csv("stackloss.csv")
X = data.as_matrix()[:, 1:]
max_depth = 100
itree = iTree(X, add_index=True, max_depth=max_depth) #example of isolation tree
iforest = iForest(n_estimators=1, max_depth=max_depth, sample_size=21) # isolation forest
iforest.fit(X)
sol = np.argsort(iforest.anomaly_score_)
#correct sol = [10 5 4 8 12 9 11 17 6 19 7 14 13 15 18 3 20 16 2 1 0]
sol is often different by correct solution obtained with the R software.
https://r-forge.r-project.org/projects/iforest/
The correct solution in R has been obtained with:
> tr = IsolationTrees(stackloss,ntree = 100000,hlim = 100, rFactor = 1)
> as = AnomalyScore(stackloss, tr)
> order(as$outF)
[1] 11 6 5 9 13 10 12 18 7 20 8 15 14 16 19 4 21 17 3 2 1
> order(as$outF)-1
[1] 10 5 4 8 12 9 11 17 6 19 7 14 13 15 18 3 20 16 2 1 0
>
Where is the mistake ?

I have finally been able to solve the problem.
The code is still slow due to the continuous copy operation performed in each split on the data.
This is the working version of the algorithm.
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import pandas as pd
def _h(i):
return np.log(i) + 0.5772156649
def _c(n):
if n > 2:
h = _h(n-1)
return 2*h - 2*(n - 1)/n
if n == 2:
return 1
else:
return 0
def _anomaly_score(score, n_samples):
score = -score/_c(n_samples)
return 2**score
def _split_data(X):
''' split the data in the left and right nodes '''
n_samples, n_columns = X.shape
n_features = n_columns - 1
m = M = 0
while m == M:
feature_id = np.random.randint(low=0, high=n_features)
feature = X[:, feature_id]
m = feature.min()
M = feature.max()
#print(m, M, feature_id, X.shape)
split_value = np.random.uniform(m, M, 1)
left_X = X[feature <= split_value]
right_X = X[feature > split_value]
return left_X, right_X, feature_id, split_value
def iTree(X, add_index=False, max_depth = np.inf):
''' construct an isolation tree and returns the number of step required
to isolate an element. A column of index is added to the input matrix X if
add_index=True. This column is required in the algorithm. '''
n_split = {}
def iterate(X, count = 0):
n_samples, n_columns = X.shape
n_features = n_columns - 1
if count > max_depth:
for index in X[:,-1]:
n_split[index] = count
return
if n_samples == 1:
index = X[0, n_columns-1]
n_split[index] = count
return
else:
lX, rX, feature_id, split_value = _split_data(X)
# Uncomment the print to visualize a draft of
# the construction of the tree
#print(lX[:,-1], rX[:,-1], feature_id, split_value, n_split)
n_samples_lX, _ = lX.shape
n_samples_rX, _ = rX.shape
if n_samples_lX > 0:
iterate(lX, count+1)
if n_samples_rX >0:
iterate(rX, count+1)
if add_index:
n_samples, _ = X.shape
X = np.c_[X, range(n_samples)]
iterate(X)
return n_split
class iForest():
''' Class to construct the isolation forest.
-n_estimators: is the number of trees in the forest,
-sample_size: is the bootstrap parameter used during the construction
of the forest,
-add_index: adds a column of index to the matrix X. This is required and
add_index can be set to False only if the last column of X contains
already indeces.
-max_depth: is the maximum depth of each tree
'''
def __init__(self, n_estimators=20, sample_size=None, add_index = True,
max_depth = 100):
self.n_estimators = n_estimators
self.sample_size = sample_size
self.add_index = add_index
self.max_depth = max_depth
return
def fit(self, X):
n_samples, n_features = X.shape
if self.sample_size == None:
self.sample_size = int(n_samples/2)
if self.add_index:
X = np.c_[X, range(n_samples)]
trees = [iTree(X[np.random.choice(n_samples,
self.sample_size,
replace=False)],
max_depth=self.max_depth)
for i in range(self.n_estimators)]
self.path_length_ = {k:None for k in range(n_samples)}
for k in self.path_length_.keys():
self.path_length_[k] = np.array([tree[k]
for tree in trees
if k in tree])
self.path_length_ = np.array([self.path_length_[k].mean() for k in
self.path_length_.keys()])
self.anomaly_score_ = _anomaly_score(self.path_length_, self.sample_size)
return self

self.anomaly_score_ = _anomaly_score(self.all_anomaly_score_, n_samples)
You're calculating _anomaly_score with n_samples which is total number of samples. However, you are building trees with subsamples. Therefore, when you're calculating the average search length '_c(n)' you should use sample_size instead of n_samples as the trees are build with subsamples. So, I believe your code should be:
self.anomaly_score_ = _anomaly_score(self.all_anomaly_score_, self.sample_size)

There is a pull-request in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/4163

Donbeo, your code works pretty well with just a few minor adjustments, the main problem it had is that you missed one of the base cases (end condition) of the recursive algorithm, so it hangs in loop when that condition comes up. You need something to this effect in the _split_data function (shown in code below) and also handle the this case in the iterate function (not shown)
minv = maxv = 0
inspected = Set() # this set tracks the candidates that we already inspected
while minv == maxv:
# check whether we run out of features to try an none of them has different values,
# if that is the case we need to break the loop otherwise this loops forever
if len(inspected) == n_features:
# if we run out of features to try an none of them has different values,
# return -1 to signal the caller that we can't split X anymore.
return X, X, -1, None
feature_id = np.random.randint(low=0, high=n_features)
if feature_id not in inspected:
inspected.add(feature_id)
split_feature = X[:, feature_id]
minv = split_feature.min()
maxv = split_feature.max()

Python: TypeError: 'float' object has no attribute 'getitem'

I am trying to implement particle filter algorithm in python. I am getting this error:
x_P_update[i] = 0.5*x_P[i] + 25*x_P[i]/(1 + x_P[i]**2) + 8*math.cos(1.2*(t-1)) + math.sqrt(x_N)*np.random.randn()
TypeError: 'float' object has no attribute '__getitem__'
My code:
import math
import numpy as np
import matplotlib.pyplot as plt
x = 0.1 #initial value
x_N = 1 #process noise covariance in state update
x_R = 1 #noise covariance in measurement
T = 75 #number of iterations
N = 10 #number of particles
V = 2
x_P = [None]*(N)
for i in xrange(0, N):
x_P[i] = x + math.sqrt(V)*np.random.randn()
z_out = np.array([x**2 / 20 + math.sqrt(x_R) * np.random.randn()]) #the actual output vector for measurement values.
x_out = np.array([x]) #the actual output vector for measurement values.
x_est = np.array([x]); # time by time output of the particle filters estimate
x_est_out = np.array([x_est]) # the vector of particle filter estimates.
x_P_update = [None]*N
z_update = [None]*N
P_w = [None]*N
for t in xrange(1, T+1):
x = 0.5*x + 25*x/(1 + x**2) + 8*math.cos(1.2*(t-1)) + math.sqrt(x_N)*np.random.randn()
z = x**2/20 + math.sqrt(x_R)*np.random.randn()
for i in xrange(0, N):
#each particle is updated with process eq
x_P_update[i] = 0.5*x_P[i] + 25*x_P[i]/(1 + x_P[i]**2) + 8*math.cos(1.2*(t-1)) + math.sqrt(x_N)*np.random.randn()
#observations are updated for each particle
z_update[i] = x_P_update[i]**2/20
#generate weights
P_w[i] = (1/math.sqrt(2*math.pi*x_R)) * math.exp(-(z - z_update[i])**2/(2*x_R))
P_w[:] = [ k / sum(P_w) for k in P_w]
# print(np.where(np.cumsum(P_w, axis=0) >= np.random.rand()))
# print(index_tuple[0][1])
# P_w_array = np.array(list(P_w))
# indices = [i for i in range(len(P_w)) if np.cumsum(P_w_array) >= np.random.rand()]
for i in xrange(0, N):
index_tuple = np.where(np.random.rand() <= np.cumsum(P_w, axis=0))
m = index_tuple[0][1]
x_P = x_P_update[m]
x_est = np.array([np.mean(x_P)])
x_out = np.array([x_out, x])
z_out = np.array([z_out, z])
x_est_out = np.array([x_est_out, x_est])
I am using matlab code from here to learn how to implement this algorithm in python using scipy. http://studentdavestutorials.weebly.com/particle-filter-with-matlab-code.html
I just started learning python and can't get out of this problem, kindly help.

I'm not going to go through the video tutorial and fix your algorithm, but I can show you why you're getting this error.
In this line:
x_P = x_P_update[m]
You are assigning an array with a float value, which you then attempt to access as an array in the outer loop. Updating it instead will get rid of your error:
x_P[m] = x_P_update[m]

Python, how to optimize this code

I tried to optimize the code below but I cannot figure out how to improve computation speed. I tried Cthon but the performance is like in python.
Is it possible to improve the performance without rewrite everything in C/C++?
Thanks for any help
import numpy as np
heightSequence = 400
widthSequence = 400
nHeights = 80
DOF = np.zeros((heightSequence, widthSequence), dtype = np.float64)
contrast = np.float64(np.random.rand(heightSequence, widthSequence, nHeights))
initDOF = np.zeros([heightSequence, widthSequence], dtype = np.float64)
initContrast = np.zeros([heightSequence, widthSequence, nHeights], dtype = np.float64)
initHeight = np.float64(np.r_[0:nHeights:1.0])
initPixelContrast = np.array(([0 for ii in range(nHeights)]), dtype = np.float64)
# for each row
for row in range(heightSequence):
# for each col
for col in range(widthSequence):
# initialize variables
height = initHeight # array ndim = 1
c = initPixelContrast # array ndim = 1
# for each height
for indexHeight in range(0, nHeights):
# get contrast profile for current pixel
tempC = contrast[:, :, indexHeight]
c[indexHeight] = tempC[row, col]
# save original contrast
# originalC = c
# originalHeight = height
# remove profile before maximum and after minumum contrast
idxMaxContrast = np.argmax(c)
c = c[idxMaxContrast:]
height = height[idxMaxContrast:]
idxMinContrast = np.argmin(c) + 1
c = c[0:idxMinContrast]
height = height[0:idxMinContrast]
# remove some refraction
if (len(c) <= 1) | (np.max(c) <= 0):
DOF[row, col] = 0
else:
# linear fitting of profile contrast
P = np.polyfit(height, c, 1)
m = P[0]
q = P[1]
# remove some refraction
if m >= 0:
DOF[row, col] = 0
else:
DOF[row, col] = -q / m
print 'row=%i/%i' %(row, heightSequence)
# set range of DOF
DOF[DOF < 0] = 0
DOF[DOF > nHeights] = 0

By looking at the code it seems that you can get rid of the two outer loops completely, converting the code to a vectorised form. However, the np.polyfit call must then be replaced by some other expression, but the coefficients for a linear fit are easy to find, also in vectorised form. The last if-else can then be turned into a np.where call.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Mean of empty slice and Degrees of freedom <=0 - python

RuntimeWarning: Degrees of freedom <= 0 for slice occurs when you use the wrong shape, e.g.: import numpy as np x = np.random.random([1000,1]) y = np.random.random([1000,1]) print(x.shape, y.shape) # (1000, 1) (1000, 1) t = np.cov(x, y) #RuntimeWarning t = np.cov(x.T, y.T) #This works

An edge case is: the array you calculate covariance on only contains one element. np.cov([0.5])

In addition to the use of wrong shape mentioned above, using np.nanstd across all NaNs array will also invoke the message of "RuntimeWarning: Degrees of freedom <= 0 for slice"; using np.nanmean across all NaNs array will invoke the message of "RuntimeWarning: Mean of empty slice.".

Related

For loop if statement return undesired outcome at the first row

Neural network XOR gate not learning

isolation forest algorithm in python

Python: TypeError: 'float' object has no attribute 'getitem'

Python, how to optimize this code

Categories

Resources

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Mean of empty slice and Degrees of freedom <=0 - python

RuntimeWarning: Degrees of freedom <= 0 for slice occurs when you use the wrong shape, e.g.: import numpy as np x = np.random.random([1000,1]) y = np.random.random([1000,1]) print(x.shape, y.shape) # (1000, 1) (1000, 1) t = np.cov(x, y) #RuntimeWarning t = np.cov(x.T, y.T) #This works

An edge case is: the array you calculate covariance on only contains one element. np.cov([0.5])

In addition to the use of wrong shape mentioned above, using np.nanstd across all NaNs array will also invoke the message of "RuntimeWarning: Degrees of freedom <= 0 for slice"; using np.nanmean across all NaNs array will invoke the message of "RuntimeWarning: Mean of empty slice.".

Related

For loop if statement return undesired outcome at the first row

Neural network XOR gate not learning

isolation forest algorithm in python

Python: TypeError: 'float' object has no attribute '__getitem__'

Python, how to optimize this code

Categories

Resources

Python: TypeError: 'float' object has no attribute 'getitem'