I have programmed this ID3 algorithm and for some reason the predicted value seems to always return None. I cannot figure out why the code does not enter the if statement in the predict function but have narrowed down the problem to that area.
I have tried changing the predict function multiple times and debugging but cannot find out why the issue persists and the feature value is not in tree[root_node]. Can someone please help with this?
def predict(tree, instance):
if not isinstance(tree, dict):
return tree
else:
root_node = next(iter(tree))
feat_val = instance[root_node]
if feat_val in tree[root_node]:
return predict(tree[root_node][feat_val], instance)
else:
return None
def evaluate(tree, test_data_m, label):
correct_preditct = 0
wrong_preditct = 0
for index, row in test_data_m.iterrows():#for each row in the dataset
result = predict(tree, test_data_m.loc[index])
if result == test_data_m[label][index]:
correct_predict += 1 #increase correct count
else:
wrong_predict += 1 #increase incorrect count
accuracy = correct_predict / (correct_predict + wrong_predict)
return accuracy
Im very new to programming, I wrote a simple program for a school project and wanted to make the code "prettier" by not just having the program be one giant function but instead be made up of multiple smaller functions with a singe purpose. I seemed to have messed up royally since the program now runs 13 times slower. How should I structured the program to make it run faster and just in general make programs easier to write, read and edit?
Here are the two programs:
First program (for reference values runs in ≈0:20):
import numpy as np
import matplotlib.pyplot as plt
def graf(a,b,H,p):
GM = 39.5216489684
x_0 = a + np.sqrt(a**2 - b**2)
v_0 = np.sqrt(GM*(2/x_0 - 1/a))
konstant_period = np.sqrt(a**3)*H
h = 1/H
'''starting position given by an elliptic orbit '''
stor_x_lista = [x_0]
stor_y_lista = [0]
hastighet_x = [0]
hastighet_y = [v_0]
liten_x_lista = []
liten_y_lista = []
''' a loop that approximates the points of the orbit'''
t = 0
tid_lista = []
n = 0
while n < konstant_period:
hastighet_x.append(hastighet_x[n] - h*GM* stor_x_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_x_lista.append(stor_x_lista[n] + h*hastighet_x[n])
hastighet_y.append(hastighet_y[n] - h*GM*stor_y_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_y_lista.append(stor_y_lista[n] + h*hastighet_y[n])
'''smaller list of points to run faster'''
if n % p == 0:
liten_x_lista.append(stor_x_lista[n])
liten_y_lista.append(stor_y_lista[n])
tid_lista.append(t)
n += 1
t += h
''' function that finds the angle'''
vinkel = []
siffra = 0
while siffra < len(liten_x_lista):
if liten_y_lista[siffra ] >= 0:
vinkel.append( np.arccos( liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2)))
siffra += 1
elif liten_y_lista[siffra] < 0 :
vinkel.append( np.pi + np.arccos( -liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2) ))
siffra += 1
'''get rid of line to find periodic function'''
mod_lista = []
modn = 0
while modn < len(vinkel):
mod_lista.append(vinkel[modn] - (2*np.pi*tid_lista[modn])/np.sqrt(a**3))
modn += 1
'''make all inputs have period 1'''
squeeze_tid = []
squeezen = 0
while squeezen < len(tid_lista):
squeeze_tid.append(tid_lista[squeezen]/np.sqrt(a**3))
squeezen += 1
del mod_lista[-1:]
del tid_lista[-1:]
del squeeze_tid[-1:]
plt.plot(squeeze_tid,mod_lista)
plt.title('p(t) där a = ' + str(a) + ' och b = ' + str(b))
plt.show
Second more split up program (for reference values runs in ≈4:20):
import numpy as np
import matplotlib.pyplot as plt
'''function that generates the points of the orbit'''
def punkt(a,b,H,p):
GM = 39.5216489684
x_0 = a + np.sqrt(a**2 - b**2)
v_0 = np.sqrt(GM*(2/x_0 - 1/a))
konstant_period = np.sqrt(a**3)*H
h = 1/H
'''starting position given by an elliptic orbit '''
stor_x_lista = [x_0]
stor_y_lista = [0]
hastighet_x = [0]
hastighet_y = [v_0]
liten_x_lista = []
liten_y_lista = []
''' a loop that approximates the points of the orbit'''
t = 0
tid_lista = []
n = 0
while n < konstant_period:
hastighet_x.append(hastighet_x[n] - h*GM* stor_x_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_x_lista.append(stor_x_lista[n] + h*hastighet_x[n])
hastighet_y.append(hastighet_y[n] - h*GM*stor_y_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_y_lista.append(stor_y_lista[n] + h*hastighet_y[n])
'''smaller list of points to run faster'''
if n % p == 0:
liten_x_lista.append(stor_x_lista[n])
liten_y_lista.append(stor_y_lista[n])
tid_lista.append(t)
n += 1
t += h
return (liten_x_lista,liten_y_lista,tid_lista)
''' function that finds the angle'''
def vinkel(a,b,H,p):
'''import lists'''
liten_x_lista = punkt(a,b,H,p)[0]
liten_y_lista = punkt(a,b,H,p)[1]
tid_lista = punkt(a,b,H,p)[2]
'''find the angle'''
vinkel_lista = []
siffra = 0
while siffra < len(liten_x_lista):
if liten_y_lista[siffra ] >= 0:
vinkel_lista.append( np.arccos( liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2)))
siffra += 1
elif liten_y_lista[siffra] < 0 :
vinkel_lista.append( np.pi + np.arccos( -liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2) ))
siffra += 1
return (vinkel_lista, tid_lista)
def periodisk(a,b,H,p):
'''import lists'''
tid_lista = vinkel(a,b,H,p)[1]
vinkel_lista = vinkel(a,b,H,p)[0]
'''get rid of linear line to find p(t)'''
mod_lista = []
modn = 0
while modn < len(vinkel_lista):
mod_lista.append((vinkel_lista[modn] - (2*np.pi*tid_lista[modn])/np.sqrt(a**3)))
modn += 1
'''make all inputs have period 1'''
squeeze_tid = []
squeezen = 0
while squeezen < len(tid_lista):
squeeze_tid.append(tid_lista[squeezen]/np.sqrt(a**3))
squeezen += 1
del mod_lista[-1:]
del tid_lista[-1:]
del squeeze_tid[-1:]
return (squeeze_tid,mod_lista)
'''fixa 3d-punkt av p(a,b) a är konstant b varierar??? '''
def hitta_amp(a):
x_b = []
y_b = []
n_b = 0.1
while n_b <= a:
x_b.append(n_b)
y_b.append(punkt(a,n_b,10**5,10**3))
return 0
def graf(a,b,H,p):
plt.plot(periodisk(a,b,H,p)[0],periodisk(a,b,H,p)[1])
plt.show
I would assume the thing that is going wrong is that the program is running the same, slow code multiple times instead of just running it once and then accessing the data. Is the problem that everything is done locally and nothing is stored globally or is it something else?
Just as a heads up, the only thing I know about programming is basic syntax, I have no clue how to actually write and run programs. I ran all the code in spyder if that affects anything.
plt.plot(periodisk(a,b,H,p)[0],periodisk(a,b,H,p)[1])
This code runs periodisk twice with the same arguments, thus at this point we know we run things at least 2 times slower.
You should do some_var = periodisk(a,b,H,p) and then some_var[0], some_var[1]. Or just use unpacking:
plt.plot(*periodisk(a,b,H,p))
tid_lista = vinkel(a,b,H,p)[1]
vinkel_lista = vinkel(a,b,H,p)[0]
Again doing the same thing twice (total: 4*time of (current) vinkel function). Again, smart assignment to fix this:
vinkel_lista, tid_lista = vinkel(a,b,H,p)
liten_x_lista = punkt(a,b,H,p)[0]
liten_y_lista = punkt(a,b,H,p)[1]
tid_lista = punkt(a,b,H,p)[2]
And now you repeat yourself thrice. (total: 12 * time of current punkt function)
liten_x_lista, liten_y_lista, tid_lista = punkt(a,b,H,p)
punkt function is like in original, so we arrived as total being 12 times slower - which quite matches your time estimations. :)
You are calling the functions once per returned list, you should only call them once.
When a method returns multiple variables, (e.g. punkt):
def punkt(a,b,H,p):
# Here is all your code
return (liten_x_lista,liten_y_lista,tid_lista)
You must be careful to only call the function once:
result = punkt(a,b,H,p)
liten_x_lista = result[0]
liten_y_lista = result[1]
tid_lista = result[2]
# As opposed to:
liten_x_lista = punkt(a,b,H,p)[0] # 1st call, ignoring results 2 and 3
liten_y_lista = punkt(a,b,H,p)[1] # 2nd call, ignoring results 1 and 3
tid_lista = punkt(a,b,H,p)[2] # 3rd call, ignoring results 1 and 2
Note: I would personally not return a list, but use python's unpacking:
def punkt(a,b,H,p):
# Here is all your code
return liten_x_lista, liten_y_lista, tid_lista
And you'd access it:
liten_x_lista, liten_y_lista, tid_lista = punkt(a,b,H,p)
I am facing the following problem. I want to have a function that given the number of points in each hidden layer, creates the weights for a simple NN.
def initialize_parameters(hidden):
parameters = dict({})
def W_creator(b,a,i):
return tf.get_variable("W"+str(i+1), [b,a], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
def b_creator(b,i):
return tf.get_variable('b'+str(i+1), [b,1], initializer = tf.zeros_initializer())
for l in range(len(hidden)):
parameters['W'+str(l+1)] = W_creator(hidden[l+1],hidden[l],l)
parameters['b'+str(l+1)] = b_creator(hidden[l+1],l)
return parameters
I call this function then using
tf.reset_default_graph()
with tf.Session() as sess:
parameters = initialize_parameters()
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
to check everything is fine and an IndexError is raised.
29 for l in range(len(hidden)):
---> 30 W = W_creator(hidden[l+1],hidden[l],l)
31 parameters['W'+str(l+1)] = W
32
IndexError: list index out of range
Anyone can help with this?
As you use hidden[l+1], your loop has to stop one step earlier, to avoid "out of range" IndexError:
for l in range(len(hidden)-1):
I am trying to implement dijkstra's algorithm (on an undirected graph) to find the shortest path and my code is this.
Note: I am not using heap/priority queue or anything but an adjacency list, a dictionary to store weights and a bool list to avoid cycling in the loops/recursion forever. Also, the algorithm works for most test cases but fails for this particular one here: https://ideone.com/iBAT0q
Important : Graph can have multiple edges from v1 to v2 (or vice versa), you have to use the minimum weight.
import sys
sys.setrecursionlimit(10000)
def findMin(n):
for i in x[n]:
cost[n] = min(cost[n],cost[i]+w[(n,i)])
def dik(s):
for i in x[s]:
if done[i]:
findMin(i)
done[i] = False
dik(i)
return
q = int(input())
for _ in range(q):
n,e = map(int,input().split())
x = [[] for _ in range(n)]
done = [True]*n
w = {}
cost = [1000000000000000000]*n
for k in range(e):
i,j,c = map(int,input().split())
x[i-1].append(j-1)
x[j-1].append(i-1)
try: #Avoiding multiple edges
w[(i-1,j-1)] = min(c,w[(i-1,j-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
try:
w[(i-1,j-1)] = min(c,w[(j-1,i-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
w[(j-1,i-1)] = c
w[(i-1,j-1)] = c
src = int(input())-1
#for i in sorted(w.keys()):
# print(i,w[i])
done[src] = False
cost[src] = 0
dik(src) #First iteration assigns possible minimum to all nodes
done = [True]*n
dik(src) #Second iteration to ensure they are minimum
for val in cost:
if val == 1000000000000000000:
print(-1,end=' ')
continue
if val!=0:
print(val,end=' ')
print()
The optimum isn't always found in the second pass. If you add a third pass to your example, you get closer to the expected result and after the fourth iteration, you're there.
You could iterate until no more changes are made to the cost array:
done[src] = False
cost[src] = 0
dik(src)
while True:
ocost = list(cost) # copy for comparison
done = [True]*n
dik(src)
if cost == ocost:
break
I was trying to understand what is the effective difference between those two pieces of code. They are both written for an assignment I got at school, but only the first one works as it should. I've been unable to understand what goes wrong in the second one so I'd be fantastically grateful if someone could shine some light on this problem.
First code:
def classify(self, obj):
if sum([c[0].classify(obj)*c[1] for c in self.classifiers]) >0:
return 1
else: return -1
def update_weights(self, best_error, best_classifier):
w=self.data_weights
for index in range(len(self.data_weights)):
if self.standard.classify(self.data[index])==best_classifier.classify(self.data[index]):
s=-1
else: s=1
self.data_weights[index] = self.data_weights[index]*math.exp(s*error_to_alpha(best_error))
Second code:
def classify(self, obj):
score = 0
for c, alpha in self.classifiers:
score += alpha * c.classify(obj)
if score > 0:
return 1
else:
return -1
def update_weights(self, best_error, best_classifier):
alpha = error_to_alpha(best_error)
for d, w in zip(self.data, self.data_weights):
if self.standard.classify(d) == best_classifier.classify(d):
w *= w * math.exp(alpha)
else:
w *= w * math.exp(-1.0*alpha)
the second doesn't modify the weights.
in the first you explicitly modify the weights array with the line
self.data_weights[index] = ...
but in the second you are only modifying w:
w *= ...
(and you have an extra factor of w). in the second case, w is a variable that is initialised from data_weights, but it is a new variable. it is not the same thing as the array entry, and changing its value does not change the array itself.
so when you later go to look at data_weights in the second case, it will not have been updated.