k nearest neighbours algorithm python

k nearest neighbours algorithm python - python

this is my code for the k nearest neighbor algorithm:
import numpy as np
from EuclideanDistance import EuclideanDistance
dataset = np.loadtxt('C:\Users\Toshiba\Documents\machine learning\RealEstate.csv', delimiter=',', usecols=(2,3,4,5))
p1 = ()
def normalizeToZscores(data):
'''Normalizes the variables to z-scores'''
zScores = list()
for s in data:
zScore = (s - np.mean(data))/np.std(data)
zScores.append(zScore)
return np.asarray(zScores)
def InOutBudget(data):
'''Decides whether a particular house is within
or outside the budget of $153000 and assigns values of 1 and 0 respectively'''
data2 = list()
for i in data:
if (i > 153000): data2.append(0)
else: data2.append(1)
return np.array(data2)
classes = dataset[:,0]
classes = classes.reshape((dataset.shape[0],1))
classes = InOutBudget(classes)
data = dataset[:20,:]
data = normalizeToZscores(data)
p1s = dataset[20:400,:]
def measureDis(data, p1):
listD = []
for x in data:
D = EuclideanDistance(x, p1)
listD.append(D)
return listD
def most_common(lst):
'''Finds the most frequently occuring element of a list.
It will be used to predict a class based on the classification
of the k-nearest neighbours'''
return max(set(lst), key=lst.count)
def findKnn(k):
'''K nearest neighbours algorithm'''
knns = list()
errors = list()
#for i in k:
for p1 in p1s:
# Create a list of tuples containing distance and class,
# Then sort them by shortest distance
tuples = zip(measureDis(data,p1), classes[20:400])
tuples = sorted(tuples)
knn = tuples[:k]
print knn
knn = [x[1] for x in knn]
knn = most_common(knn)
knns = knns.append(knn)
print knn
error = np.abs(knn - p1)
errors = errors.append(error)
errorsNum = np.sum(errors)
return knns
But I keep getting:
Traceback (most recent call last):
File "C:\Users\Toshiba\workspace\assignment5\src\knn2.py", line 76, in <module> knn = findKnn(k)
File "C:\Users\Toshiba\workspace\assignment5\src\knn2.py", line 64, in findKnn knns = knns.append(knn)
AttributeError: 'NoneType' object has no attribute 'append'
I know the code is really amateur, but could someone please help me just solve the issue?

list.append doesn't return the list. Simply do:
knns.append(knn)
instead of:
knns = knns.append(knn)

append does not return the list, it returns None, so you are clobbering it after the first loop.

Related

Datatype definition - TypeError

import cmath
import math
import random
import time
P = []
V = []
Vin = []
def Compute_wn_win(n,V,Vin):
for i in range (0,n):
V.append(complex(math.cos(2*math.pi*i/n),math.sin(2*math.pi*i/n)))
Vin.append(1/(complex(math.cos(2*math.pi*i/n),math.sin(2*math.pi*i/n))))
Compute_wn_win(8,V,Vin)
for i in range(0,8):
random_number = random.uniform(-1.0,1.0)
P.append(random_number)
def FFT(P,V,n):
if(n==1):
return P[0]
else:
Peven = []
Podd = []
for i in range(0,n/2):
Peven.append(P[2*i])
Podd.append(P[(2*i)+1])
Vsquared = []
for i in range(0,n/2):
Vsquared.append(V[i]*V[i])
Sole = FFT(Peven,Vsquared,n/2)
Solo = FFT(Podd,Vsquared,n/2)
Sol = [0 for x in range(0,n)]
for i in range(0,n/2):
Sol[i] = Sole[i]+V[i]*Solo[i]
Sol[i+n/2] = Sole[i]-V[i]*Solo[i]
return Sol
Sol = FFT(P,V,8)
I am new to Python. I have the following code. However I get the following error for lines Sole = FFT(Peven,Vsquared,n/2) and Sol[i] = Sole[i]+V[i]*Solo[i]. However, I have defined, Sole, Solo and Sol as list data type so I don't understand why it mentions that float datatype does not have an attribute getitem
Exact Error is
Traceback (most recent call last):
File "/Users/globetrekker/Documents/CS5050/Assignment7/Test_py.py", line 40, in <module>
Sol = FFT(P,V,8)
File "/Users/globetrekker/Documents/CS5050/Assignment7/Test_py.py", line 33, in FFT
Sole = FFT(Peven,Vsquared,n//2)
File "/Users/globetrekker/Documents/CS5050/Assignment7/Test_py.py", line 33, in FFT
Sole = FFT(Peven,Vsquared,n//2)
File "/Users/globetrekker/Documents/CS5050/Assignment7/Test_py.py", line 37, in FFT
Sol[i] = Sole[i]+V[i]*Solo[i]
TypeError: 'float' object has no attribute '__getitem__'

Sole and Solo are the return values from recursive calls to FFT(), but FFT()'s base case (when n == 1) returns a float, not a list, so the step above the base case will fail by trying to index a float. Presumably, you want to change return P[0] in the base case to return [P[0]].

This is introducing a float:
for i in range(0,n/2):
Checkout: I keep getting this error for my simple python program: "TypeError: 'float' object cannot be interpreted as an integer"

Don't see why I'm getting 'numpy.ndarray object not callable' error?

I have a code that uses numpy and hermval along with multiple functions to compute psi at the end for given parameters. But I keep getting the error numpy.ndarray object not callable, and I really don't see why this is happening. Here are the relevant parts of my code:
import numpy as np
import math
from numpy.linalg import eigh
from numpy.polynomial.hermite import hermval
def matrices(N, lam):
H_0 = np.zeros([N+1, N+1])
x_four_matrix = np.zeros([N+1, N+1])
for n in range(N+1):
for m in range(N+1):
if n == m:
H_0[n][m] = n + 0.5
x_four_matrix[n][m] = (6.0*n**2 + 6.0*n + 3.0)/4.0
elif n == m-2:
x_four_matrix[n][m] = np.sqrt((n+1)*(n+2))*(n+1.5)
elif n == m+2:
x_four_matrix[n][m] = (n-0.5)*np.sqrt(n*(n-1))
elif n == m-4:
x_four_matrix[n][m] = np.sqrt((n+1)*(n+2)*(n+3)*(n+4))/4.0
elif n == m+4:
x_four_matrix[n][m] = np.sqrt((n-3)*(n-2)*(n-1)*n)/4.0
return H_0, x_four_matrix
def H_lam(N, lam):
return matrices(N, lam)[0] + lam*matrices(N, lam)[1]
# Solve for eigenvalues (energies)
def lowest_eigenvals(N, n, lam):
lowest_eigs = []
eigenvals = eigh(H_lam(N, lam))[0]
eigenvals.sort()
for i in range(n):
lowest_eigs.append(eigenvals[i])
return lowest_eigs
# Solve for eigenvectors
def lowest_eigenvectors(N, n, lam):
lowest_vecs = []
for i in range(len(lowest_eigenvals(N, n, lam))):
for j in range(len(eig(H_lam(N, lam))[0])):
if lowest_eigenvals(N, n, lam)[i] == eigh(H_lam(N, lam))[0][j]:
lowest_vecs.append(eigh(H_lam(N, lam))[1][j])
return np.array(lowest_vecs)
def N_coeff(i):
return 1.0/np.sqrt(2**i*math.factorial(i)*np.sqrt(np.pi))
# for E_0 (first eigenfunction):
def psi(x, lowest_eigenvectors, i):
herm_coeffs = [element*N_coeff(i) for element in lowest_eigenvectors(N, n, lam)[i]]
return np.exp((x**2)/2.0)*hermval(x, herm_coeffs)
print [element*N_coeff(0) for element in lowest_eigenvectors(100, 4, 0.1)[0]]
print psi(1.0, lowest_eigenvectors(100, 4, 1.0), 0) # for lambda = 1
And then with my last print statement here, I get TypeError: 'numpy.ndarray' object is not callable coming from the herm_coeffs line in my last function. But I'm not sure why this is happening, since the second to last print statement prints correctly! What is going on here?
Here is the traceback:
TypeError Traceback (most recent call last)
<ipython-input-350-04692f269a26> in <module>()
13 # print [element*N_coeff(0) for element in lowest_eigenvectors(100, 4, 0.1)[0]]
14
---> 15 print psi(1.0, lowest_eigenvectors(100, 4, 0.1), 0)
<ipython-input-350-04692f269a26> in psi(x, lowest_eigenvectors, i)
7 # for E_0 (first eigenfunction):
8 def psi(x, lowest_eigenvectors, i):
----> 9 herm_coeffs = [element*N_coeff(i) for element in lowest_eigenvectors(N, n, lam)[i]]
10 return np.exp((x**2)/2.0)*hermval(x, herm_coeffs)
11
TypeError: 'numpy.ndarray' object is not callable

lowest_eigenvectors parameter name in function psi is clashing with function lowest_eigenvectors.
EDIT: it looks like you don't need to pass psi the function lowest_eigenvectors since the psi function is within the same lexical scope as lowest_eigenvectors.

Building off what #Scott suggested, I think psi should be changed to:
def psi(x, vectors, i):
herm_coeffs = [element*N_coeff(i) for element in vectors[i]]
return np.exp((x**2)/2.0)*hermval(x, herm_coeffs)
print psi(1.0, lowest_eigenvectors(100, 4, 1.0), 0)
In other words, you calculate vectors = lowest_eigenvectors(100, 4, 1.0), and pass that to psi. Even if you got this array vs. function naming correct, using:
lowest_eigenvectors(N, n, lam)
in psi would be a problem because N, n, lam are not defined either in the function or globally.
I wonder if that function could be further simplified with:
herm_coeffs = N_coeff(i)*vectors[i]

python why data type changed by def function?

Why num_r1(x) and num_r2(x) type numpy.ndarray, but num_r(t) is type float? How can I keep num_r(t) type as array?
def num_r(t):
for x in t:
if x>tx:
return num_r2(x)
else:
return num_r1(x)
Thank you!
The complete example is below
# -*- coding: utf-8 -*
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import math
from pylab import *
#### physical parameters
c = 2.998*10**10
hp = 6.626*10**-27
hb = 1.055*10**-27
kb = 1.381*10**-16
g = 6.673*10**-8
me = 9.109*10**-28
mp = 1.673*10**-24
q = 4.803*10**-10 #gausi
sigT = 6.652*10**-25
# The evolution of the characteristic frequencies
p = 2.5
E52 = 1
epsB_r = 1
epse_r = 1
D28 = 1
n1 = 1.0
nu15 = 1*10**(-5)
r014 = 1
g42 = 1
delt12 =1
g4 = g42*10**2.5
E0 = E52*10**52
eta = g4
N0 = E0/(g4*mp*c**2)
p_tx = 3**(1./3)*2**(4./3)*mp**(-1./3)*c**(-5./3)
tx = p_tx*n1**(-1./3)*eta**(-8./3)
p_num_r1 = 2**(11./2)*7**(-2)*mp**(5./2)*me**(-3)*pi**(-1./2)*q*p_tx**(-6)*2**30*3**18*10**12
p_nuc_r1 = 2**(-33./2)*3**(-4)*10**(-4)*me*mp**(-3./2)*c**(-2)*sigT**(-2)*pi**(-1./2)*q
p_Fmax_r1 = 2**(15./2)*3**(9./2)*10**30*p_tx**(-3./2)*10**(-56)*me*mp**(1./2)*c**3*sigT*q**(-1)*2**(1./2)*3**(-1)
p_num_r2 = 2**(11./2)*7**(-2)*mp**(5./2)*me**(-3)*pi**(-1./2)*q*p_tx**(54./35)*(2**5*3**3*10**2)**(-54./35)
p_nuc_r2 = 2**(-13./2)*3**2*pi**(-1./2)*me*mp**(-3./2)*c**(-2)*sigT**(-2)*q*p_tx**(-74./35)*(2**5*3**3*10**2)**(4./35)
p_Fmax_r2 = 2**(1./2)*3**(-1)*pi**(-1./2)*me*mp**(1./2)*c**3*sigT*q**(-1)*10**(-56)
num_r1 = lambda t : p_num_r1*eta**18*((p-2)/(p-1))**2*epse_r**2*epsB_r**(1./2)*n1**(5./2)*t**6*E52**(-2)
nuc_r1 = lambda t : p_nuc_r1*eta**(-4)*epsB**(-3./2)*n1**(-3./2)*t**(-2)
Fmax_r1 = lambda t : p_Fmax_r1*N0**t**(3./2)*n1*eta**6*E52**(-1./2)*D28**(-2)*epsB_r**(1./2)
num_r2 = lambda t : p_num_r2*((p-2)/(p-1))**2*n1**(-74./35)*n1**(74./105)*eta**(592./105)*E52**(-74./105)
nuc_r2 = lambda t : p_nuc_r2*eta**(172./105)*t**(4./35)*n1**(-167./210)*epsB_r**(-3./2)
Fmax_r2 = lambda t : N0*eta**(62./105)*n1**(37./210)*epsB_r**(1./2)*t**(-34./35)*D28**(-2)
def fspe(t,u):
if num_r(t)<nuc_r(t):
return np.where(u<num_r(t),(u/num_r(t))**(1./3)*Fmax_r(t),np.where(u<nuc_r(t),(u/num_r(t))**(-(p-1.)/2)*Fmax_r(t),(u/nuc_r(t))**(-p/2)*(nuc_r(t)/num_r(t))**(-(p-1.)/2)*Fmax_r(t)))
else:
return np.where(u<nuc_r(t),(u/nuc_r(t))**(1./3)*Fmax_r(t),np.where(u<num_r(t),(u/nuc_r(t))**(-1./2)*Fmax_r(t),(u/num_r(t))**(-p/2)*(num_r(t)/nuc_r(t))**(-1.2)*Fmax_r(t)))
def num_r(t):
for x in t:
if x>tx:
return num_r2(x)
else:
return num_r1(x)
def nuc_r(t):
for x in t:
if t>tx:
return nuc_r2(x)
else:
return nuc_r1(x)
def Fmax_r(t):
for x in t:
if t>tx:
return Fmax_r2(x)
else:
return Fmax_r1(x)
i= np.arange(-4,6,0.1)
t = 10**i
dnum = [math.log10(mmm) for mmm in num_r(t)]
dnuc = [math.log10(j) for j in nuc_r(t)]
nu_obs = [math.log(2.4*10**17,10) for a in i]
plt.figure('God Bless: Observable Limit')
plt.title(r'$\nu_{obs}$ and $\nu_c$ and $\nu_m$''\nComparation')
plt.xlabel('Time: log t')
plt.ylabel(r'log $\nu$')
plt.axvline(math.log10(tx))
plt.plot(i,nu_obs,'.',label=r'$\nu_{obs}$')
plt.plot(i,dnum,'D',label=r'$\nu_m$')
plt.plot(i,dnuc,'s',label=r'$\nu_c$')
plt.legend()
plt.grid(True)
plt.savefig("nu_obs.eps", dpi=120,bbox_inches='tight')
plt.show()
But thereś a Error
TypeError Traceback (most recent call last)
<ipython-input-250-c008d4ed7571> in <module>()
95 i= np.arange(-4,6,0.1)
96 t = 10**i
---> 97 dnum = [math.log10(mmm) for mmm in num_r(t)]
TypeError: 'float' object is not iterable

You should write your function as:
def num_r_(x):
if x > tx:
return num_r2(x)
else:
return num_r1(x)
And then pass it through np.vectorize to lift it from float to float to np.array to np.array
num_r = np.vectorize(num_r_)
From Efficient evaluation of a function at every cell of a NumPy array
And then when you use it in:
dnum = [math.log10(mmm) for mmm in num_r(t)]
You should rather do:
dnum = np.log10(num_r(t))
That is to say don't use the functions from the math module. Use those from the np module as they can take np.array as well as float.
As:
i = np.arange(-4,6,0.1)
t = 10**i
results in t being a np.array

So i is an array (arange); so is t (a math expression of i).
def num_r(t):
for x in t:
if x>tx:
return num_r2(x)
else:
return num_r1(x)
You iterate on t. x is an element of t. You test it and pass it through num_r2 or num_r1, and return immediately. So only the 1st element t is being processed. Thus the error - num_r returns one value, not an array.
You need to write num_r in a way that processes all the values of t, not just the first. A simple, crude way is
def num_r(t):
result = []
for x in t:
if x>tx:
value = num_r2(x)
else:
value = num_r1(x)
result.append(value)
# result = np.array(result)
return result
Now num_r should return a list the same length as t, and can be use in the list comprehension
[math.log10(mmm) for mmm in num_r(t)]
num_r could be written as a list comprehension:
[(num_r2(x) if x>tx else num_r1(x)) for x in t]
You could have it return an array instead of a list, but as long as you are using it in the list comprehension, there's no need. A list is just fine.
If it did return an array, then you could replace the list comprehension with a numpy log operation, e.g.
np.log10(num_r(t))
If num_r1 and num_r2 are written so they can take an array (looks off hand like they are, but I haven't tested), you could write
def num_r(t):
ind = t>tx
result = np.zeros_like(t)
result[ind] = num_r2(t[ind])
result[~ind] = num_r1(t[~ind])
return result
The idea is to find a mask of a values in t that are >tx, and pass all those through num_r2 at once; similarly for num_r1; and collect the values in the correct slots of result. The result is an array that can be passed to np.log10. This should be quite a bit faster than iterating on t, or using np.vectorize.
There may be some errors in my suggestions, since I did not test them in an script or interpreter. But the underlying ideas should be correct and set you on the right path.

Implementing a simple gaussian naive bayes algorithm in python

So im a real amateur, trying to implement something you may call a sort of 'simplified' version of the naive bayes algorithm in python, and seem to have a lot of trouble [the reason for which is perhaps the fact that im not too sure i completely understand the way the algorithm works..]. I would appreciate any help/suggestions very much though. This is the code I have:
class GaussianNB(object):
def __init__(self):
'''
Constructor
'''
# This variable will hold the gaussian distribution over your data
# In fact, you need a distribution per class for each feature variable.
# This can be done as a list of lists.
self.classmodels_count = {}
self.classmodels = {}
self.classmodelsMeanAndVariance = {}
self.featureTokenCount= 0;
self.featureTypeCount = 0;
def train(self, trainingdata):
for i in trainingdata:
current_class = i[0]
features = i[1]
if self.classmodels.has_key(current_class):
current_class_model = self.classmodels[current_class]
self.classmodels_count[current_class] = self.classmodels_count[current_class] + 1
else:
current_class_model = {}
self.classmodels_count[current_class] = 1
for f in features:
feature = f[0]
value = f[1]
if current_class_model.has_key(feature):
list_of_values = current_class_model[feature]
list_of_values.append(value)
current_class_model[feature] = list_of_values
else:
list_of_values = []
list_of_values.append(value)
current_class_model[feature] = list_of_values
self.classmodels[current_class] = current_class_model
for a_class in self.classmodels.keys():
a_class_model = self.classmodels[a_class]
a_class_model_mean_and_variance = {}
for feature in a_class_model.keys():
a_class_model_mean_and_variance[feature] = findMeanSD(np.array(a_class_model[feature]))
self.classmodelsMeanAndVariance[a_class] = a_class_model_mean_and_variance
def classify(self, testing_vecs):
outputs = []
for vec in testing_vecs:
features = vec[1]
class_model_output_prob = {}
for a_class in self.classmodelsMeanAndVariance.keys():
a_class_output_prob = 0.0
a_class_model_mean_and_variance = self.classmodelsMeanAndVariance[a_class]
for feature_value in features:
feature = feature_value[0]
value = feature_value[1]
#simply ignore a feature if its not seen in training
if(a_class_model_mean_and_variance.has_key(feature)):
feature_mean = a_class_model_mean_and_variance[feature][0]
feature_std = a_class_model_mean_and_variance[feature][1]
a_class_output_prob = a_class_output_prob + math.log10(norm(value,feature_mean,feature_std))
#ignoring P(class) prior.. assuming equal priors
class_model_output_prob[a_class_output_prob] = a_class
probs = class_model_output_prob.keys()
print probs
probs.sort()
max_prob = probs[len(probs)-1]
max_class =class_model_output_prob[max_prob]
outputs.append(max_class)
return outputs
When running on some data, the error I get is
Traceback (most recent call last):
File "C:\Users\Toshiba\workspace\Assignment6\src\gnb_test.py", line 34, in
gaussian = Model.train(testData)
File "C:\Users\Toshiba\workspace\Assignment6\src\gnb.py", line 91, in train
for f in features:
TypeError: 'numpy.float64' object is not iterable
And I dont really [at all] understand what it means

Your traceback suggests that the problem is that you are trying to iterate through features, but features is a float and not a list or tuple - basically, it can't be broken into individual elements. I think it is a float because the lines
for i in trainingdata:
current_class = i[0]
features = i[1]
suggest features keeps getting rewritten as a successive series of numbers, when what you seem to want is to save the numbers into an iterable type. Try
features = []
for i in trainingdata:
current_class = i[0]
features.append(i[1])

Python: TypeError: Unhashable Type: List

I have a code that basically tests which circles in a random distribution of circles in a box touch - touching circles are added to a dictionary as clusters. I keep getting a TypeError when I run this code:
leftedge = 0
rightedge = 1
result = []
color = {}
parent = {}
clusters = {}
number = 0
def bfs(vertices, neighbours, source) :
global number
number +=1
clusters[number] = set()
color[source] = 'g'
q = []
q.append(source)
while q != [] :
v = q.pop(0)
for v2 in neighbours[v] :
if color[v2] == 'w' :
color[v2] = 'g'
parent[v2] = v
q.append(v2)
color[v] = 'b'
clusters[number].add(v)
def createclusters(vertices, neighbours) :
for v in vertices :
color[v] = 'w'
parent[v] = -1
while 'w' in color.values() :
for v in color.keys() :
if color[v] == 'w' :
bfs(vertices, neighbours, v)
def overlap(c1,c2,r) :
if ((c1[0]-c2[0])**2 +(c1[0]-c2[0])**2)**0.5 > 2*radius :
return 0
return 1
def findclusters(array, radius) :
d={}
for c1 in array :
d[c1]=[]
for c2 in array :
if overlap(c1, c2, radius) :
d[c1].append(c2)
createclusters(array,d)
for cluster in clusters.values() :
l = [i[0] for i in cluster]
left = right = False
x = max(l)
if x + radius > rightedge :
right = True
x = min(l)
if x - radius < leftedge :
left = True
result.append((cluster,left,right))
import numpy.random as nr
array = nr.uniform(size=(10,2)).tolist
radius = 0.1
findclusters(array, radius)
print(clusters)
print(result)
When I try and run it, I get this error:
TypeError Traceback (most recent call last)
/Users/annikamonari/<ipython-input-316-be6c65f2ce89> in <module>()
----> 1 findclusters(array,0.1)
/Users/annikamonari/<ipython-input-309-32f214b46080> in findclusters(array, radius)
2 d={}
3 for c1 in array:
----> 4 d[c1]=[]
5 for c2 in array:
6 if overlap(c1,c2,radius):
TypeError: unhashable type: 'list'
For the life of me, can't figure out why. Can anyone figure it out?
Thank you!

First, you probably need to call .tolist() rather than leaving off the parens.
Also, array is a 2-dimensional array, which means that when you do...
for c1 in array:
d[c1]=[]
...c1 is a list. Lists, being mutable, are not a hashable type (because if they were, the hash could change at any time based on the contents changing, and hash-based data structures aren't designed to handle that) and thus can't be used as a dictionary key.
If you intended to use a sequence of values as a dictionary key, you need to make them non-mutable (and thus hashable) first. The easiest way to do this is by converting to a tuple:
for c1 in array:
d[tuple(c1)]=[]
However, from reading your code, it seems more like you might be wanting to just iterate over the indices of the first array, which means you probably want something like...
for index, values in enumerate(array):
d[index] = []
for c2 in values:
# ...
or similar.

As the program expects array to be a list of 2d hashable types (2d tuples), its best if you convert array to that form, before calling any function on it.
temp = nr.uniform(size=(10,2)).tolist()
array = [tuple(i) for i in temp]
This should create the input in the required format.

You don't actually call the tolist method in your code, you just pass the function itself.
Add the () to call it:
array = nr.uniform(size=(10,2)).tolist()
Notice that c2 in your code is a list of two numbers, it's not a single number.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

k nearest neighbours algorithm python - python

list.append doesn't return the list. Simply do: knns.append(knn) instead of: knns = knns.append(knn)

append does not return the list, it returns None, so you are clobbering it after the first loop.

Related

Datatype definition - TypeError

Don't see why I'm getting 'numpy.ndarray object not callable' error?

python why data type changed by def function?

Implementing a simple gaussian naive bayes algorithm in python

Python: TypeError: Unhashable Type: List

Categories

Resources