Genetic Algorithm Elitism Python - python

I'm trying to do the elitism method to get the best fitness value of each of the generations I generate, keeping beyond the fitness the values ​​of X and Y to be an individual of the next generation, however, I can't apply a logic using dict that Solve the problem. It remains to get this detail right to be able to finalize the complete implementation and carry out the general revisions.
import random
def generate_population(size, x_boundaries, y_boundaries):
lower_x_boundary, upper_x_boundary = x_boundaries
lower_y_boundary, upper_y_boundary = y_boundaries
population = []
for i in range(size):
individual = {
'x': random.uniform(lower_x_boundary, upper_x_boundary),
'y': random.uniform(lower_y_boundary, upper_y_boundary),
}
population.append(individual)
return population
def fitness(individual):
x = individual['x']
y = individual['y']
return abs((-(100*(x*x - y)*(x*x - y) + (1 - x)*(1-x))))
def sort_population_by_fitness(population):
return sorted(population, key=fitness)
def choice_by_roulette(sorted_population, fitness_sum):
drawn = random.uniform(0, 1)
accumulated = 0
for individual in sorted_population:
fitnessX = fitness(individual)
probability = fitnessX / fitness_sum
accumulated += probability
if drawn <= accumulated:
return individual
def crossover(choice_a, choice_b):
xa = choice_a['x']
ya = choice_a['y']
xb = choice_b['x']
yb = choice_b['y']
#xa = xa*xb
#xa = xa**0.5
#ya = ya*yb
#ya = ya**0.5
return {'x': xa+0.01, 'y': ya+0.01}
def mutate(new_individual):
x = new_individual['x']
y = new_individual['y']
flagx = 0
flagy = 0
new_x = x*(1+random.uniform(-0.01/2, 0.01/2))
new_y = y*(1+random.uniform(-0.01/2, 0.01/2))
while flagx == 1:
if (new_x > 2) or (new_x < -2):
new_x = x*(1+random.uniform(-0.01/2, 0.01/2))
flagx = 1
else:
flagx = 0
while flagy == 1:
if (new_y > 2) or (new_y < -2):
new_y = y*(1+random.uniform(-0.01/2, 0.01/2))
flagy = 1
else:
flagy = 0
return {'x': new_x, 'y': new_y}
def eletism(x_gen, milior):
pior = sort_population_by_fitness(x_gen)
fitness(pior)
print(pior)
#for i in x_gen:
#print(teste['x'])
#x = teste['x']
#y = teste['y']
#print(milior)
return pior
def make_next_gen(population):
next_gen = []
sorted_population = sort_population_by_fitness(population)
soma_fitness = sum(fitness(individual)for individual in population)
for i in range(9):
first_choice = choice_by_roulette(sorted_population, soma_fitness)
second_choice = choice_by_roulette(sorted_population, soma_fitness)
new_individual = crossover(first_choice, second_choice)
drawn = random.randint(1,5)
if drawn == 1:
new_individual = mutate(new_individual)
next_gen.append(new_individual)
return next_gen
generations = 100
population = generate_population(size=10, x_boundaries=(-2, 2), y_boundaries=(-2, 2))
i = 0
while i!= generations:
for individual in population:
print(individual, fitness(individual))
population = make_next_gen(population)
i += 1
best_individual = sort_population_by_fitness(population)[-1]
print(best_individual, fitness(best_individual))

Related

How to solve unsupported operand type(s) for +: 'dict_items' and 'dict_items' in Python

I'm trying to do Schelling’s Segregation Model in Python which takes 6 parameters: the width and height of the city, the ratio of empty houses in the city, the similarity threshold, the number of iterations, and the number of races.
There are 4 methods within the Schelling class: populate, is_unsatisfied, update, move_to_empty, and plot.
This is the code:
class Schelling:
def __init__(self, width, height, empty_ratio, similarity_threshold, n_iterations, races = 2):
self.width = width
self.height = height
self.races = races
self.empty_ratio = empty_ratio
self.similarity_threshold = similarity_threshold
self.n_iterations = n_iterations
self.empty_houses = []
self.agents = {}
def populate(self):
self.all_houses = list(itertools.product(range(self.width),range(self.height)))
random.shuffle(self.all_houses)
self.n_empty = int( self.empty_ratio * len(self.all_houses) )
self.empty_houses = self.all_houses[:self.n_empty]
self.remaining_houses = self.all_houses[self.n_empty:]
houses_by_race = [self.remaining_houses[i::self.races] for i in range(self.races)]
for i in range(self.races):
#create agents for each race
self.agents = dict(
self.agents.items() +
dict(zip(houses_by_race[i], [i+1]*len(houses_by_race[i]))).items()
)
def is_unsatisfied(self, x, y):
race = self.agents[(x,y)]
count_similar = 0
count_different = 0
if x > 0 and y > 0 and (x-1, y-1) not in self.empty_houses:
if self.agents[(x-1, y-1)] == race:
count_similar += 1
else:
count_different += 1
if y > 0 and (x,y-1) not in self.empty_houses:
if self.agents[(x,y-1)] == race:
count_similar += 1
else:
count_different += 1
if x < (self.width-1) and y > 0 and (x+1,y-1) not in self.empty_houses:
if self.agents[(x+1,y-1)] == race:
count_similar += 1
else:
count_different += 1
if x > 0 and (x-1,y) not in self.empty_houses:
if self.agents[(x-1,y)] == race:
count_similar += 1
else:
count_different += 1
if x < (self.width-1) and (x+1,y) not in self.empty_houses:
if self.agents[(x+1,y)] == race:
count_similar += 1
else:
count_different += 1
if x > 0 and y < (self.height-1) and (x-1,y+1) not in self.empty_houses:
if self.agents[(x-1,y+1)] == race:
count_similar += 1
else:
count_different += 1
if x > 0 and y < (self.height-1) and (x,y+1) not in self.empty_houses:
if self.agents[(x,y+1)] == race:
count_similar += 1
else:
count_different += 1
if x < (self.width-1) and y < (self.height-1) and (x+1,y+1) not in self.empty_houses:
if self.agents[(x+1,y+1)] == race:
count_similar += 1
else:
count_different += 1
if (count_similar+count_different) == 0:
return False
else:
return float(count_similar)/(count_similar+count_different) < self.happy_threshold
def update(self):
for i in range(self.n_iterations):
self.old_agents = copy.deepcopy(self.agents)
n_changes = 0
for agent in self.old_agents:
if self.is_unhappy(agent[0], agent[1]):
agent_race = self.agents[agent]
empty_house = random.choice(self.empty_houses)
self.agents[empty_house] = agent_race
del self.agents[agent]
self.empty_houses.remove(empty_house)
self.empty_houses.append(agent)
n_changes += 1
print (n_changes)
if n_changes == 0:
break
def move_to_empty(self, x, y):
race = self.agents[(x,y)]
empty_house = random.choice(self.empty_houses)
self.updated_agents[empty_house] = race
del self.updated_agents[(x, y)]
self.empty_houses.remove(empty_house)
self.empty_houses.append((x, y))
def plot(self, title, file_name):
fig, ax = plt.subplots()
#If you want to run the simulation with more than 7 colors, you should set agent_colors accordingly
agent_colors = {1:'b', 2:'r', 3:'g', 4:'c', 5:'m', 6:'y', 7:'k'}
for agent in self.agents:
ax.scatter(agent[0]+0.5, agent[1]+0.5, color=agent_colors[self.agents[agent]])
ax.set_title(title, fontsize=10, fontweight='bold')
ax.set_xlim([0, self.width])
ax.set_ylim([0, self.height])
ax.set_xticks([])
ax.set_yticks([])
plt.savefig(file_name)
Now that has an implementation of the Schelling class, can run different simulations and plot the results.
Build three simulations with the following characteristics:
width = 50, and height = 50 (2500 houses)
30% of empty houses
Similarity Threshold = 30% (for Simulation 1), Similarity Threshold = 50% (for Simulation 2), and Similarity Threshold = 80% (for Simulation 3)
Maximum number of iterations = 500
Number of races = 2
Start by creating and populating the cities.
schelling_1 = Schelling(50, 50, 0.3, 0.3, 500, 2)
schelling_1.populate()
schelling_2 = Schelling(50, 50, 0.3, 0.5, 500, 2)
schelling_2.populate()
schelling_3 = Schelling(50, 50, 0.3, 0.8, 500, 2)
schelling_3.populate()
After I run schelling_1.populate(), there is an error occurs.
The error is unsupported operand type(s) for +: 'dict_items' and 'dict_items' on
self.agents = dict(
self.agents.items() +
dict(zip(houses_by_race[i], [i+1]*len(houses_by_race[i]))).items()
)
How to make this error solve ?
Thank you

k means algorithm python

Whenever k = 2, the code runs in a loop
if k > 2 it sets all, but one of the centroids location to 0,0
I've reviewed it a couple of times , and it doesn't seem like there are any errors probably some sort of logic flaw. The code starts by having a class and its methods which initiate the centroids, calculate the Euclidean distance, and reassign centroids to the average positions of the points that are in the cluster. It then runs a loop that consists of reassigning and calculating distance until a list of the assignments are equal and then plots it.
class Kmeans:
def __init__(self, K, dataset, centroids, sorting):
self.K = K
self.dataset = dataset
self.centroids = centroids
self.sorting = sorting
#sets starting position of centroids
def initializeCentroids(self):
bigX = 0
bigY = 0
self.centroids = []
for i in self.dataset:
if i[0] > bigX:
bigX = i[0]
if i[1] > bigY:
bigY = i[1]
for q in range(self.K):
self.centroids.append([random.randint(0, bigX), random.randint(0, bigY)])
plt.scatter((self.centroids[0][0], self.centroids[1][0]), (self.centroids[0][1], self.centroids[1][1]))
return self.centroids
#calculates euclidean distance
def calcDistance(self):
self.sorting = []
for w in self.dataset:
print(w)
distances = []
counter = 0
for centr in self.centroids:
distances.append(math.sqrt(abs((centr[0] - w[0] * centr[0] - w[0]) + (centr[1] - w[1] * centr[1] - w[1]))))
counter += 1
if counter > 0:
try:
if distances[0] > distances[1]:
distances.pop(0)
if distances[1] > distances[0]:
distances.pop(1)
counter -= 1
except IndexError:
pass
self.sorting.append([w, counter, distances[0]])
return self.sorting
def reassignCentroids(self):
counter3 = 1
for r in range(len(self.centroids)):
positionsX = []
positionsY = []
for t in self.sorting:
if t[1] == counter3:
positionsX.append(t[0][0])
positionsY.append(t[0][1])
population = len(positionsY)
if population == 0:
population = 1
self.centroids.append([sum(positionsX) / population, sum(positionsY) / population])
counter3 += 1
self.centroids.pop(0)
return
k = 4
dataSetSize = input("Enter the amount of tuples you want generated: ")
data_set = []
for o in range(int(dataSetSize)):
data_set.append((random.randint(0, 1000), random.randint(0, 1000)))
attempt = Kmeans(k, data_set, 0, 0)
attempt.initializeCentroids()
xvals = []
yvals = []
sortCompare = []
# plots
for p in data_set:
xvals.append(p[0])
yvals.append(p[1])
running = True
while running:
if len(sortCompare) > 1:
centroidChoice0 = []
centroidChoice1 = []
for p in sortCompare[0]:
centroidChoice0.append(p[1])
for d in sortCompare[1]:
centroidChoice1.append(d[1])
print(centroidChoice1)
print(attempt.centroids)
if centroidChoice1 == centroidChoice0:
running = False
for m in attempt.centroids:
plt.scatter((attempt.centroids[0][0], attempt.centroids[1][0]), (attempt.centroids[0][1], attempt.centroids[1][1]))
running = False
sortCompare.pop(0)
attempt.calcDistance()
sortCompare.append(attempt.sorting)
attempt.reassignCentroids()

How do I get my data in my heatmap?

I've programmed Conways Game of Life in Python and now I'm trying to display the simple data that it gives me as an output in a heat map.
This is my current code:
from Tkinter import *
import matplotlib.pyplot as plt
import time
import numpy as np
import random
size_x = 100
size_y = 10
# create the matrices
cell = [[0 for row in range(0, size_y)] for col in range(0, size_x)]
live = [[0 for row in range(0, size_y)] for col in range(0, size_x)]
temp = [[0 for row in range(0, size_y)] for col in range(0, size_x)]
# process and draw the next frame
def frame():
process()
draw()
root.after(100, frame)
# load the initial data
def load(initial=0.5):
for y in range(0, size_y):
for x in range(0, size_x):
if random.random()<initial: live[x][y] = 1
temp[x][y] = 0
# Applying rules
def process():
for y in range(0, size_y):
for x in range(0, size_x):
lives = live_neighbors(x,y)
if live[x][y] == 1:
if lives < 2 or lives > 3:
temp[x][y] = 0
else:
temp[x][y] = 1
if live[x][y] == 0:
if lives == 3:
temp[x][y] = 1
else:
temp[x][y] = 0
for y in range(0, size_y):
for x in range(0, size_x):
live[x][y] = temp[x][y]
# live = temp
# Count live neighbors
def live_neighbors(a,b):
lives = 0
if live[a][(b+1)%size_y] == 1: lives += 1
if live[a][(b-1)%size_y] == 1: lives += 1
if live[(a+1)%size_x][b] == 1: lives += 1
if live[(a+1)%size_x][(b+1)%size_y] == 1: lives += 1
if live[(a+1)%size_x][(b-1)%size_y] == 1: lives += 1
if live[(a-1)%size_x][b] == 1: lives += 1
if live[(a-1)%size_x][(b+1)%size_y] == 1: lives += 1
if live[(a-1)%size_x][(b-1)%size_y] == 1: lives += 1
return lives
# Draw all cells
def draw():
nLiving = 0
nDead = 0
for y in range(size_y):
for x in range(size_x):
if live[x][y]==0:
canvas.itemconfig(cell[x][y], fill="black")
nDead+=1
if live[x][y]==1:
canvas.itemconfig(cell[x][y], fill="white")
nLiving+=1
print nLiving,nDead
# count cells
def count():
nLiving = 0
nDead = 0
for y in range(size_y):
for x in range(size_x):
if live[x][y]==0:
nDead+=1
if live[x][y]==1:
nLiving+=1
z = nLiving / 10.0
print z,
print "%"
def one_game(initial):
load(initial)
for gen in range(1, 101):
print str(gen) + ":",
count()
process()
def many_games():
numbers = range(1,51)
for initial in numbers:
print initial/100.0
one_game(initial/100.0)
many_games()
#one_game(0.5)
The code for making a normal heat map with given input would be:
fig, ax = plt.subplots(1)
x = np.array( [[11,12,13], [21,22,23], [31,32,33]] )
p = ax.pcolormesh(x)
fig.colorbar(p)
plt.show()
How do I get my data (which in this case would be, the generations, the value which initializes the one_game() function, and nLiving) into an array?
I'm not 100% sure this is what you're intending, but it produced a pretty output heat map :)
def count():
nLiving = 0
nDead = 0
for y in range(size_y):
for x in range(size_x):
if live[x][y]==0:
nDead+=1
if live[x][y]==1:
nLiving+=1
z = nLiving / 10.0
print("nLiving over ten is: ", z,)
print("%")
return nLiving
def one_game(initial):
load(initial)
gen_array = []
for gen in range(1, 101):
print("Gen: ", str(gen) + ":",)
nLiving = count()
process()
gen_array.append(nLiving)
return gen_array
def many_games():
gen_output = []
numbers = range(1,51)
for initial in numbers:
print(initial/100.0)
gen_array = one_game(initial/100.0)
gen_output.append(gen_array)
return gen_output
gen_output = many_games()
#one_game(0.5)
fig, ax = plt.subplots(1)
x = np.array( gen_output )
p = ax.pcolormesh(x)
fig.colorbar(p)
plt.show()
That is just code modified from your count function to the end of the file. Basically you just need to return the output from the functions that you're calling into the right kind of data structures, I think...

SVM: problems with SMO algorithm

I'm currently trying to code a non linear SVM for handwritten digits recognition using the MNIST data base.
I chose to use the SMO algorithm (based on Platt's paper and other books), but I have some trouble implementing it.
When I run the code over the training set, the bias goes higher and higher, sometimes until "Inf" value, leading the SVM to "classify" every example in the same class.
Here is my code:
import numpy
import gzip
import struct
import matplotlib
from sklearn import datasets
from copy import copy
class SVM:
def __init__(self, constant, data_set, label_set):
self._N = len(data_set)
if self._N != len(label_set):
raise Exception("Data size and label size don't match.")
self._C = constant
self._epsilon = 0.001
self._tol = 0.001
self._data = [numpy.ndarray.flatten((1/255)*elt) for elt in data_set]
self._dimension = len(self._data[0])
self._label = label_set
self._alphas = numpy.zeros((1, self._N))
self._b = 0
self._errors = numpy.ndarray((2, 0))
def kernel(self, x1, x2):
x1 = x1.reshape(1,self._dimension)
result = numpy.power(numpy.dot(x1, x2), 3)
return result
def evaluate(self, x):
result = 0
i = 0
while i < self._N:
result += self._alphas[0, i]*self._label[i]*self.kernel(x, self._data[i])
i += 1
result += self._b
return result
def update(self, i1, i2, E2):
i1 = int(i1)
i2 = int(i2)
if i1 == i2:
return 0
y1 = self._label[i1]
y2 = self._label[i2]
alpha1 = self._alphas[0, i1]
alpha2 = self._alphas[0, i2]
#If alpha1 is non-bound, its error is in the cache.
#So we check its position to extract its error.
#Else, we compute it.
if alpha1 > 0 and alpha1 < self._C :
position = 0
for i, elt in enumerate(self._errors[0, :]):
if elt == i1:
position = i
E1 = self._errors[1, position]
else:
E1 = self.evaluate(self._data[i1]) - y1
s = y1*y2
H = L = 0
if y1 != y2:
L = max(0, alpha2 - alpha1)
H = min(self._C, self._C + alpha2 - alpha1)
else:
L = max(0, alpha2 + alpha1 - self._C)
H = min(self._C, alpha2 + alpha1)
if H == L:
return 0
K11 = self.kernel(self._data[i1], self._data[i1])
K12 = self.kernel(self._data[i1], self._data[i2])
K22 = self.kernel(self._data[i2], self._data[i2])
eta = K11 + K22 - 2*K12
if eta > 0:
alpha2_new = alpha2 + (y2*(E1 - E2)/eta)
if alpha2_new < L:
alpha2_new = L
elif alpha2_new > H:
alpha2_new = H
else:
f1 = y1*(E1 + self._b) - alpha1*K11 - s*alpha2*K12
f2 = y2*(E2 + self._b) - alpha2*K22 - s*alpha1*K12
L1 = alpha1 + s*(alpha2 - L)
H1 = alpha1 + s*(alpha2 - H)
FuncL = L1*f1 + L*f2 + (1/2)*numpy.square(L1)*K11 + (1/2)*numpy.square(L)*K22 + s*L1*L*K12
FuncH = H1*f1 + H*f2 + (1/2)*numpy.square(H1)*K11 + (1/2)*numpy.square(H)*K22 + s*H1*H*K12
if FuncL < FuncH - self._epsilon:
alpha2_new = L
elif FuncL > FuncH + self._epsilon:
alpha2_new = H
else:
alpha2_new = alpha2
if numpy.abs(alpha2_new - alpha2) < self._epsilon*(alpha2_new+alpha2+ self._epsilon):
return 0
alpha1_new = alpha1 + s*(alpha2 - alpha2_new)
#Update of the threshold.
b1 = E1 + y1*(alpha1_new - alpha1)*K11 + y2*(alpha2_new - alpha2)*K12 + self._b
b2 = E2 + y1*(alpha1_new - alpha1)*K12 + y2*(alpha2_new - alpha2)*K22 + self._b
if L < alpha1_new < H:
b_new = b1
elif L < alpha2_new < H:
b_new = b2
else:
b_new = (b1+b2)/2
#Update the cache error
#If alpha2 was bound and its new value is non-bound, we add its index and its error to the cache.
#If alpha2 was unbound and its new value is bound, we delete it from the cache.
if (alpha2 == 0 or alpha2 == self._C) and (alpha2_new > 0 and alpha2_new < self._C):
vector_alpha2_new = numpy.array([i2, E2])
vector_alpha2_new = vector_alpha2_new.reshape((2, 1))
self._errors = numpy.concatenate((self._errors, vector_alpha2_new), 1)
if (alpha2 > 0 and alpha2 < self._C) and (alpha2_new == 0 or alpha2_new == self._C):
l = 0
position = 0
while l < len(self._errors[0, :]):
if self._errors[0, l] == i2:
position = l
l += 1
self._errors = numpy.delete(self._errors, position, 1)
#We do the exact same thing with alpha1.
if (alpha1 == 0 or alpha1 == self._C) and (alpha1_new > 0 and alpha1_new < self._C):
vector_alpha1_new = numpy.array([i1, E1])
vector_alpha1_new = vector_alpha1_new.reshape((2, 1))
self._errors = numpy.concatenate((self._errors, vector_alpha1_new), 1)
if (alpha1 > 0 and alpha1 < self._C) and (alpha1_new == 0 or alpha1_new == self._C):
l = 0
position = 0
while l < len(self._errors[0, :]):
if self._errors[0, l] == i1:
position = l
l += 1
self._errors = numpy.delete(self._errors, position, 1)
#Then we update the error for each non bound point using the new values for alpha1 and alpha2.
for i,error in enumerate(self._errors[1, :]):
self._errors[1, i] = error + (alpha2_new - alpha2)*y2*self.kernel(self._data[i2], self._data[int(self._errors[0, i])]) + (alpha1_new - alpha1)*y1*self.kernel(self._data[i1], self._data[int(self._errors[0, i])]) - self._b + b_new
#Storing the new values of alpha1 and alpha2:
self._alphas[0, i1] = alpha1_new
self._alphas[0, i2] = alpha2_new
self._b = b_new
print(self._errors)
return 1
def examineExample(self, i2):
i2 = int(i2)
y2 = self._label[i2]
alpha2 = self._alphas[0, i2]
if alpha2 > 0 and alpha2 < self._C:
position = 0
for i, elt in enumerate(self._errors[0, :]):
if elt == i2:
position = i
E2 = self._errors[1, position]
else:
E2 = self.evaluate(self._data[i2]) - y2
r2 = E2*y2
if (r2< -self._tol and alpha2 < self._C) or (r2 > self._tol and alpha2 > 0):
n = numpy.shape(self._errors)[1]
if n > 1:
i1 = 0
if E2 > 0:
min = self._errors[1, 0]
position = 0
for l, elt in enumerate(self._errors[1, :]):
if elt < min:
min = elt
position = l
i1 = self._errors[0, position]
else:
max = self._errors[1, 0]
position = 0
for l, elt in enumerate(self._errors[1, :]):
if elt > max:
max = elt
position = l
i1 = self._errors[0, position]
if self.update(i1, i2, E2):
return 1
#loop over all non bound examples starting at a random point.
list_index = [i for i in range(n)]
numpy.random.shuffle(list_index)
for i in list_index:
i1 = self._errors[0, i]
if self.update(i1, i2, E2):
return 1
#Loop over all the training examples, starting at a random point.
list_bound = [i for i in range(self._N) if not numpy.any(self._errors[0, :] == i)]
numpy.random.shuffle(list_bound)
for i in list_bound:
i1 = i
if self.update(i1, i2, E2):
return 1
return 0
def SMO(self):
numChanged = 0
examineAll = 1
cpt = 1
while(numChanged > 0 or examineAll):
numChanged = 0
if examineAll == 1:
for i in range(self._N):
numChanged += self.examineExample(i)
else:
for i in self._errors[0, :]:
numChanged += self.examineExample(i)
if examineAll == 1:
examineAll = 0
elif numChanged == 0:
examineAll = 1
cpt += 1
def load_training_data(a, b):
train = gzip.open("train-images-idx3-ubyte.gz", "rb")
labels = gzip.open("train-labels-idx1-ubyte.gz", "rb")
train.read(4)
labels.read(4)
number_images = train.read(4)
number_images = struct.unpack(">I", number_images)[0]
rows = train.read(4)
rows = struct.unpack(">I", rows)[0]
cols = train.read(4)
cols = struct.unpack(">I", cols)[0]
number_labels = labels.read(4)
number_labels = struct.unpack(">I", number_labels)[0]
image_list = []
label_list = []
if number_images != number_labels:
raise Exception("The number of labels doesn't match with the number of images")
else:
for l in range(number_labels):
if l % 1000 == 0:
print("l:{}".format(l))
mat = numpy.zeros((rows, cols), dtype = numpy.uint8)
for i in range(rows):
for j in range(cols):
pixel = train.read(1)
pixel = struct.unpack(">B", pixel)[0]
mat[i][j] = pixel
image_list += [mat]
lab = labels.read(1)
lab = struct.unpack(">B", lab)[0]
label_list += [lab]
train.close()
labels.close()
i = 0
index_a = []
index_b = []
while i < number_labels:
if label_list[i] == a:
index_a += [i]
elif label_list[i] == b:
index_b += [i]
i += 1
image_list = [m for i,m in enumerate(image_list) if (i in index_a) | (i in index_b)]
mean = (a+b)/2
label_list = [ numpy.sign(m - mean) for l,m in enumerate(label_list) if l in index_a+index_b]
return ([image_list, label_list])
def load_test_data():
test = gzip.open("t10k-images-idx3-ubyte.gz", "rb")
labels = gzip.open("t10k-labels-idx1-ubyte.gz", "rb")
test.read(4)
labels.read(4)
number_images = test.read(4)
number_images = struct.unpack(">I", number_images)[0]
rows = test.read(4)
rows = struct.unpack(">I", rows)[0]
cols = test.read(4)
cols = struct.unpack(">I", cols)[0]
number_labels = labels.read(4)
number_labels = struct.unpack(">I", number_labels)[0]
image_list = []
label_list = []
if number_images != number_labels:
raise Exception("The number of labels doesn't match with the number of images")
else:
for l in range(number_labels):
if l % 1000 == 0:
print("l:{}".format(l))
mat = numpy.zeros((rows, cols), dtype = numpy.uint8)
for i in range(rows):
for j in range(cols):
pixel = test.read(1)
pixel = struct.unpack(">B", pixel)[0]
mat[i][j] = pixel
image_list += [mat]
lab = labels.read(1)
lab = struct.unpack(">B", lab)[0]
label_list += [lab]
test.close()
labels.close()
return ([image_list, label_list])
data = load_training_data(0, 7)
images_training = data[0]
labels_training = data[1]
svm = SVM(0.1, images_training[0:200], labels_training[0:200])
svm.SMO()
def view(image, label=""):
print("Number : {}".format(label))
pylab.imshow(image, cmap = pylab.cm.gray)
pylab.show()
First, SMO is a fairly complicated algorithm - it is not one easy to debug in this kind of format.
Second, you are starting too high up in your testing. Some advice to help you debug your problems.
1) First, switch to using the linear kernel. Its much easier for you to compute the exact linear solution with another algorithm and compare what you are getting with the exact solution. This way its only the weight vectors and bias term. If you stay in the dual space, you'll have to compare all the coefficients and make sure things stay in the same order.
2) Start with a much simpler 2D problem where you know what the general solution should look like. You can then visualize the solution, and watch as it changes at each step - this can be a visual tool to help you find where something goes wrong.
One important thing is you said this:
b1 = E1 + y1*(alpha1_new - alpha1)*K11 + y2*(alpha2_new - alpha2)*K12 + self._b
b2 = E2 + y1*(alpha1_new - alpha1)*K12 + y2*(alpha2_new - alpha2)*K22 + self._b
Basically you're just adding to b every time with this code. Your b's should look more like this:
b1 = smo.b - E1 - y1 * (a1 - alpha1) * smo.K[i1, i1] - y2 * (a2 - alpha2) * smo.K[i1, i2]
b2 = smo.b - E2 - y1 * (a1 - alpha1) * smo.K[i1, i2] - y2 * (a2 - alpha2) * smo.K[i2, i2]
This version is not perfect, but I recommend checking apex51's version on Github for pointers:
SVM-and-sequential-minimal-optimization
The mathematical basis in the notes are very strong (despite some minor discrepancies with Platt's paper) and the code is not perfect, but a good direction for you. I would also suggest looking at other, completed SMOs and trying to tweak that code to math your needs instead of writing from scratch.

python script speed improvements

for my robot I am analyzing laser range data. I need to analyze a lot of samples per second. So speed is required.
I know python is not the right language based on this - but I don't want to switch for now as I am in the prototyping phase (will see if I ever get out of it :-) ).
At the moment I am stuck on squeezing more speed out of the analyzing code I have.
I pulled out the relevant code and created a small test. It would be brilliant if someone could give me a some hints on where to improve speed in this test script.
from math import degrees, radians, sin, cos, fabs
import time
class NewRobotMap(object):
def __init__(self, sizeX, sizeY, Resolution, RobotPosX, RobotPosY, RobotTheta, ServoPos, mapMaxOcc, mapMaxFree, OccValue, EmptyValue):
self.sizeX = sizeX
self.sizeY = sizeY
self.RobotPosX = int(RobotPosX)
self.RobotPosY = int(RobotPosY)
self.mapResolution = int(Resolution)
self.StartPosX = int(RobotPosX)
self.StartPosY = int(RobotPosY)
self.RobotTheta = float(RobotTheta)
self.EmptyValue = EmptyValue
self.ServoPos = ServoPos
self.mapMaxOcc = mapMaxOcc
self.mapMaxFree = mapMaxFree
self.mapOccValue = OccValue
self.RobotPosOldX = ""
self.RobotPosOldY = ""
def clear(self):
self.RobotMap = [[self.EmptyValue for i in xrange(self.sizeY)] for j in xrange(self.sizeX)]
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
if self.RobotMap[x][y] > self.mapMaxOcc:
self.RobotMap[x][y] = self.mapMaxOcc
elif self.RobotMap[x][y] < self.mapMaxFree:
self.RobotMap[x][y] = self.mapMaxFree
return oldval, self.RobotMap[x][y]
def setOcc(self,x,y):
self.RobotMap[x][y] = self.mapMaxOcc
def updateRobot(self,theta,x,y):
robotThetaold=self.RobotTheta
self.RobotTheta = float(theta)
self.RobotPosX = int(round(self.StartPosX + float(int(x)/self.mapResolution), 0))
self.RobotPosY = int(round(self.StartPosY - float(int(y)/self.mapResolution),0))
if x != self.RobotPosOldX or y != self.RobotPosOldX:
self.RobotPosOldX = x
self.RobotPosOldY = y
return True
else:
self.RobotPosOldX = x
self.RobotPosOldY = y
return False
def getRobotPos(self):
return self.RobotPosX, self.RobotPosY
def display(self):
s = [[str(e) for e in row] for row in self.RobotMap]
lens = [len(max(col, key=len)) for col in zip(*s)]
fmt = '\t'.join('{{:{}}}'.format(x) for x in lens)
table = [fmt.format(*row) for row in s]
print '\n'.join(table)
def updateServoPos(self, newServoPos):
self.ServoPos = newServoPos
templateData = {
'MapWidth' : 800,
'MapHeight': 600,
'StartPosX' : 500,
'StartPosY' : 300,
'StartTheta' : 0,
'Resolution' : 5,
'mapThresholdFree' : 126,
'mapThresholdOcc' : 130, #169
'EmptyValue' : 128,
'mapMaxOcc' : 137,
'mapMaxFree' : 119,
'ServoPos' : 0,
'CurrentPosX' : 0,
'CurrentPosY' : 0,
'CurrentTheta' : 0,
'SafeZone' : 10
}
templateData["MapHeight"] = templateData["MapHeight"] / templateData["Resolution"]
templateData["MapWidth"] = templateData["MapWidth"] / templateData["Resolution"]
templateData["StartPosX"] = templateData["StartPosX"] / templateData["Resolution"]
templateData["StartPosY"] = templateData["StartPosY"] / templateData["Resolution"]
def updateSonarCalcMapVal(val):
mapThresholdFree = templateData["mapThresholdFree"]
mapThresholdOcc = templateData["mapThresholdOcc"]
#oldval
if val[0] <= mapThresholdFree:
oldval = 0
elif mapThresholdFree < val[0] < mapThresholdOcc:
oldval = 1
elif val[0] >= mapThresholdOcc:
oldval = 2
# newval
if val[1] <= mapThresholdFree:
newval = 0
elif mapThresholdFree < val[1] < mapThresholdOcc:
newval = 1
elif val[1] >= mapThresholdOcc:
newval = 2
if oldval != newval:
return newval
else:
return 'n'
def dur( op=None, clock=[time.time()] ):
if op != None:
duration = time.time() - clock[0]
print '%s finished. Duration %.6f seconds.' % (op, duration)
clock[0] = time.time()
def updateIRWrite(RobotPos, coord, updateval):
XtoUpdate=RobotPos[0] + coord[0]
YtoUpdate=RobotPos[1] - coord[1]
val = map.updateMap(XtoUpdate, YtoUpdate , updateval)
newval=updateSonarCalcMapVal(val)
########### main Script #############
map=NewRobotMap(templateData["MapWidth"],templateData["MapHeight"], templateData["Resolution"], templateData["StartPosX"],templateData["StartPosY"], templateData["StartTheta"], templateData["ServoPos"],templateData["mapMaxOcc"],templateData["mapMaxFree"],templateData["mapThresholdOcc"],templateData["EmptyValue"])
map.clear()
dur()
for x in xrange(0,10001*40):
updateIRWrite((100,100), (10,10), 1)
dur("loops")
I tried a numpy array as self.RobotMap in the NewRobotMap class/object. But this was much slower.
Few tips
Minimize too deep redirections
Your code here:
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
if self.RobotMap[x][y] > self.mapMaxOcc:
self.RobotMap[x][y] = self.mapMaxOcc
elif self.RobotMap[x][y] < self.mapMaxFree:
self.RobotMap[x][y] = self.mapMaxFree
return oldval, self.RobotMap[x][y]
is all the time repeating self.RobotMap[x][y] what requires 4 levels of hops to get the value (self -> RobotMap -> [x] -> [y])
This can be optimized:
In place update
old:
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
new (saving diving for existing value second time)
self.RobotMap[x][y] += Val
Use local variable instead of deeply nested structure
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
newval = oldval + Val
if newval > self.mapMaxOcc:
newval = self.mapMaxOcc
elif newval < self.mapMaxFree:
newval = self.mapMaxFree
return oldval, newval
Note, that your old return oldval, self.RobotMap[x][y] is not only returning a value, but you have already modified the self.RobotMap[x][y] anyway (as it is mutable), so if you rely on that, you could be surprised.
Using global variables instead of tempateData dictionary
Changing dictionary into global variable speeded up the run a bit as it removed one level ov indirection. I know, it looks nasty, but this may happen with optimization.
Skip returning self.RobotMap[x][y]
Consider saving returning self.RobotMap[x][y] if this not necessary, or if you have already changed that value.
Quick clear
change original:
def clear(self):
self.RobotMap = [[self.EmptyValue for i in xrange(self.sizeY)] for j in xrange(self.sizeX)]
to:
def clear(self):
self.RobotMap = self.sizeY * [self.sizeY * [self.EmptyValue]]
My test show about twice as fast execution for x = 3, y = 5, larger sizez could be even better.
Modified code - from 0.790581 to 0.479875 seconds
from math import degrees, radians, sin, cos, fabs
import time
templ_MapWidth = 800
templ_MapHeight = 600
templ_StartPosX = 500
templ_StartPosY = 300
templ_StartTheta = 0
templ_Resolution = 5
templ_mapThresholdFree = 126
templ_mapThresholdOcc = 130
templ_EmptyValue = 128
templ_mapMaxOcc = 137
templ_mapMaxFree = 119
templ_ServoPos = 0
templ_CurrentPosX = 0
templ_CurrentPosY = 0
templ_CurrentTheta = 0
templ_SafeZone = 10
templ_MapHeight = templ_MapHeight / templ_Resolution
templ_MapWidth = templ_MapWidth / templ_Resolution
templ_StartPosX = templ_StartPosX / templ_Resolution
templ_StartPosY = templ_StartPosY / templ_Resolution
class NewRobotMap(object):
def __init__(self, sizeX, sizeY, Resolution, RobotPosX, RobotPosY, RobotTheta, ServoPos, mapMaxOcc, mapMaxFree, OccValue, EmptyValue):
self.sizeX = sizeX
self.sizeY = sizeY
self.RobotPosX = int(RobotPosX)
self.RobotPosY = int(RobotPosY)
self.mapResolution = int(Resolution)
self.StartPosX = int(RobotPosX)
self.StartPosY = int(RobotPosY)
self.RobotTheta = float(RobotTheta)
self.EmptyValue = EmptyValue
self.ServoPos = ServoPos
self.mapMaxOcc = mapMaxOcc
self.mapMaxFree = mapMaxFree
self.mapOccValue = OccValue
self.RobotPosOldX = ""
self.RobotPosOldY = ""
def clear(self):
self.RobotMap = self.sizeX * [self.sizeY * [self.EmptyValue]]
def updateMap(self, x, y, Val):
oldval = self.RobotMap[x][y]
newval = oldval + Val
if newval < self.mapMaxFree:
return oldval, self.mapMaxFree
if newval > self.mapMaxOcc:
return oldval, self.mapMaxOcc
return oldval, newval
def setOcc(self, x, y):
self.RobotMap[x][y] = self.mapMaxOcc
def updateRobot(self, theta, x, y):
robotThetaold = self.RobotTheta
self.RobotTheta = float(theta)
self.RobotPosX = int(round(self.StartPosX + float(int(x)/self.mapResolution), 0))
self.RobotPosY = int(round(self.StartPosY - float(int(y)/self.mapResolution), 0))
if x != self.RobotPosOldX or y != self.RobotPosOldX:
self.RobotPosOldX = x
self.RobotPosOldY = y
return True
else:
self.RobotPosOldX = x
self.RobotPosOldY = y
return False
def getRobotPos(self):
return self.RobotPosX, self.RobotPosY
def display(self):
s = [[str(e) for e in row] for row in self.RobotMap]
lens = [len(max(col, key=len)) for col in zip(*s)]
fmt = '\t'.join('{{:{}}}'.format(x) for x in lens)
table = [fmt.format(*row) for row in s]
print '\n'.join(table)
def updateServoPos(self, newServoPos):
self.ServoPos = newServoPos
def updateSonarCalcMapVal(org, new):
mapThresholdFree = templ_mapThresholdFree
mapThresholdOcc = templ_mapThresholdOcc
#oldval
if org <= mapThresholdFree:
oldval = 0
elif mapThresholdFree < org < mapThresholdOcc:
oldval = 1
elif org >= mapThresholdOcc:
oldval = 2
# newval
if new <= mapThresholdFree:
newval = 0
elif mapThresholdFree < new < mapThresholdOcc:
newval = 1
elif new >= mapThresholdOcc:
newval = 2
if oldval != newval:
return newval
else:
return 'n'
def dur(op=None, clock=[time.time()]):
if op != None:
duration = time.time() - clock[0]
print '%s finished. Duration %.6f seconds.' % (op, duration)
clock[0] = time.time()
def updateIRWrite(RobotPos, coord, updateval):
XtoUpdate = RobotPos[0] + coord[0]
YtoUpdate = RobotPos[1] - coord[1]
newval = updateSonarCalcMapVal(*mymap.updateMap(XtoUpdate, YtoUpdate, updateval))
########### main Script #############
mymap = NewRobotMap(templ_MapWidth, templ_MapHeight, templ_Resolution, templ_StartPosX, templ_StartPosY, templ_StartTheta, templ_ServoPos, templ_mapMaxOcc, templ_mapMaxFree, templ_mapThresholdOcc, templ_EmptyValue)
mymap.clear()
dur()
for x in xrange(0, 10001*40):
updateIRWrite((100, 100), (10, 10), 1)
dur("loops")
Conclusions
The code definitely needs review for doing correct work. E.g. there are methods, which are not used at all and other calls, which never use returned value.
But some optimization could be shown. Generally, following is good to follow:
Make your code running correctly first
Clarify what is acceptable speed, do not optimize, if not necessary
Measure, profile
Start optimizing in busiest loops, there are best chances to speed things up. In them, each line of code counts.
Can you install PyPy and run your script with it instead of CPython (the default) ? It should work as a drop-in replacement of CPython.
http://pypy.org/
It is based on (tracing?) JIT and famous for its high runtime performance.
http://speed.pypy.org/

Categories