I met a problem with call the value from one class into a new iterative loop outside of that class. The code is shown below: (data and newdata are vectors)
class A:
def __init__(self, k, tol=0.0001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
def fit(self, data):
self.centroids = {}
for i in range(self.k):
self.centroids[i] = data[i+50]
for i in range(self.max_iter):
self.classifications = {}
for i in range(self.k):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
prev_centroids = dict(self.centroids)
for classification in self.classifications:
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
optimized = True
for c in self.centroids:
original_centroid = prev_centroids[c]
current_centroid = self.centroids[c]
if np.sum((current_centroid - original_centroid) / original_centroid * 100.0) > self.tol:
#print(np.sum((current_centroid - original_centroid) / original_centroid * 100.0))
optimized = False
if optimized:
break
def cluster_labels(self,data):
cluster_labels = []
for featureset in data:
distances=[np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
cluster_labels.append(distances.index(min(distances)))
return cluster_labels
def predict(self, data):
distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
return classification
def update(self, new_data, delta):
for featureset in new_data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
if min(distances) < delta:
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
else:
self.centroids[self.k] = featureset
self.classifications[self.k] = []
self.classifications[self.k].append(featureset)
self.k = self.k + 1
k = self.k
print (k)
return k
class Recorder:
def __init__(rec):
rec.p = pyaudio.PyAudio()
rec.stream = rec.p.open(format = pyaudio.paInt16, channels = 1, rate = 44100, input = True, input_device_index = 2, frames_per_buffer = chunk)
def write():
a = A(k=3)
a.fit(data)
k=a.update(newdata,20)
for num in range(1,100):
rec.Recorder()
rec.write()
Initially, I want to set k =3. And then, the value of k should be updated with k=a.update(newdata,20) However,now for every running, the value of K is staying at 3. And if I set k = 3 outside of the classes it always shows the error :
UnboundLocalError: local variable 'k' referenced before assignment
How could I solve this problem?
The issue is in this function:
def update(self, new_data, delta):
for featureset in new_data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
if min(distances) < delta:
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
else:
self.centroids[self.k] = featureset
self.classifications[self.k] = []
self.classifications[self.k].append(featureset)
self.k = self.k + 1
k = self.k
You are only setting the "k" value inside the "else" block. Leaving out anything unrelated it looks like this:
def update(self, new_data, delta):
for featureset in new_data:
...
if min(distances) < delta:
...
else:
...
k = self.k
print (k) # <-- error here
return k # <-- error here
In the case where min(dinstances) >= delta, k will not be set and you will get the error you report.
You have two options:
Add a k = ... line into the if-block where min(distances) < delta
Add a k = ... line just above the if-block (still inside the for-block) to set a "default" value for k
On review it is also possible that you just need to return self.k instead of just k.
Related
I'm trying to implement a genetic algorithm for solving the Travelling Salesman Problem (TSP).
I have 2 classes, which are City and Fitness.
I have done the code for initialization.
class City:
def __init__(self, x, y):
self.x = x
self.y = y
def distance(self, city):
xDis = abs(self.x - city.x)
yDis = abs(self.y - city.y)
distance = np.sqrt((xDis ** 2) + (yDis ** 2))
return distance
def __repr__(self):
return "(" + str(self.x) + "," + str(self.y) + ")"
class Fitness:
def __init__(self, route):
self.route = route
self.distance = None
self.fitness = None
def routeDistance(self):
if self.distance == None:
pathDistance = 0.0
for i in range(0, len(self.route)):
fromCity = self.route[i]
toCity = None
if i+1 < len(self.route):
toCity = self.route[i+1]
else:
toCity = self.route[0]
pathDistance += fromCity.distance(toCity)
self.distance = pathDistance
return self.distance
def routeFitness(self):
if self.fitness == None:
self.fitness = 1 / float(self.routeDistance())
return self.fitness
def selection(population, size=None):
if size== None:
size= len(population)
matingPool = []
fitnessResults = {}
for i in range(0, size):
fitnessResults[i] = Fitness(population[i]).routeFitness()
matingPool.append(random.choice(population))
return matingPool
The code above just randomly selects a parent in the selection method.
My question is: How to code to select a parent using roulette wheels?
You could try this [1, 2]:
from numpy.random import choice
def selection(population, size=None):
if size== None:
size= len(population)
fitnessResults = []
for i in range(0, size):
fitnessResults.append(Fitness(population[i]).routeFitness())
sum_fitness = sum(fitnessResults)
probability_lst = [f/sum_fitness for f in fitnessResults]
matingPool = choice(population, size=size, p=probability_lst)
return matingPool
Read this
So basically, the higher a fitness value, the higher are its chances to be chosen. But that is when high fitness value means a high fitness. But in TSP a lower value of fitness is better so to implement this, we need to implement the concept where probability is indirectly proportional to the fitness value.
Here is something I had implemented in python with some changes
def choose_parent_using_RWS(genes, S):
P = random.uniform(0, S)
for x in genes:
P += get_fitness_value(x)
if P > S:
return x
return genes[-1]
where S is the sum of the inverse of the fitness values of the current population (i.e, 1/f1 + 1/f2 + 1/f3 + ...)
and
get_fitness_value(x) returns the inverse of the distance, just like your routeFitness() function
TeeHee
In Mathematica I can convert multivariable moments in cumulants and back using MomentConvert:
MomentConvert[Cumulant[{2, 2,1}], "Moment"] // TraditionalForm
as one can try in wolframcloud.
I would like to do exactly the same in python. Is there any library in python capable of this?
At least the one direction I now programmed by myself:
# from http://code.activestate.com/recipes/577211-generate-the-partitions-of-a-set-by-index/
from collections import defaultdict
class Partition:
def __init__(self, S):
self.data = list(S)
self.m = len(S)
self.table = self.rgf_table()
def __getitem__(self, i):
#generates set partitions by index
if i > len(self) - 1:
raise IndexError
L = self.unrank_rgf(i)
result = self.as_set_partition(L)
return result
def __len__(self):
return self.table[self.m,0]
def as_set_partition(self, L):
# Transform a restricted growth function into a partition
n = max(L[1:]+[1])
m = self.m
data = self.data
P = [[] for _ in range(n)]
for i in range(m):
P[L[i+1]-1].append(data[i])
return P
def rgf_table(self):
# Compute the table values
m = self.m
D = defaultdict(lambda:1)
for i in range(1,m+1):
for j in range(0,m-i+1):
D[i,j] = j * D[i-1,j] + D[i-1,j+1]
return D
def unrank_rgf(self, r):
# Unrank a restricted growth function
m = self.m
L = [1 for _ in range(m+1)]
j = 1
D = self.table
for i in range(2,m+1):
v = D[m-i,j]
cr = j*v
if cr <= r:
L[i] = j + 1
r -= cr
j += 1
else:
L[i] = r // v + 1
r %= v
return L
# S = set(range(4))
# P = Partition(S)
# for x in P:
# print (x)
# using https://en.wikipedia.org/wiki/Cumulant#Joint_cumulants
import math
def Cum2Mom(arr, state):
def E(op):
return qu.expect(op, state)
def Arr2str(arr,sep):
r = ''
for i,x in enumerate(arr):
r += str(x)
if i<len(arr)-1:
r += sep
return r
if isinstance( arr[0],str):
myprod = lambda x: Arr2str(x,'*')
mysum = lambda x: Arr2str(x,'+')
E=lambda x: 'E('+str(x)+')'
myfloat = str
else:
myfloat = lambda x: x
myprod = np.prod
mysum = sum
S = set(range(len(arr)))
P = Partition(S)
return mysum([
myprod([myfloat(math.factorial(len(pi)-1) * (-1)**(len(pi)-1))
,myprod([
E(myprod([
arr[i]
for i in B
]))
for B in pi])])
for pi in P])
print(Cum2Mom(['a','b','c','d'],1) )
import qutip as qu
print(Cum2Mom([qu.qeye(3) for i in range(3)],qu.qeye(3)) )
It's designed to work with qutip opjects and it also works with strings to verify the correct separation and prefactors.
Exponents of the variables can be represented by repeating the variable.
I'm implementing bidirectional A* algorithm in Python 2.7.12 and testing it on the map of Romania from from Russell and Norvig, Chapter 3. The edges have weights and the aim is to find the shortest path between two nodes.
Here is the visualization of the testing graph:
The example where my Bidirectional A* is failing is that where the starting point is 'a' and the goal is 'u'. This is the path that my implementation has found:
The length of ['a', 's', 'f', 'b', 'u'] is 535.
This is the actual shortest path from 'a' to 'u':
The length of ['a', 's', 'r', 'p', 'b', 'u'] is 503.
As we can see, my implementation failed to find the shortest path. I think that the problem may be in my stopping conditions, but I don't know.
This is the python script with my implementation of A* (I used Euclidean distance as a heuristic) and few other help classes and functions:
from __future__ import division
import math
from networkx import *
import random
import pickle
import sys
import heapq
import matplotlib.pyplot as plt
class PriorityQueue():
"""Implementation of a priority queue"""
def __init__(self):
self.queue = []
self.node_finder = dict()
self.current = 0
self.REMOVED_SYMBOL = '<removed>'
def next(self):
if self.current >=len(self.queue):
self.current
raise StopIteration
out = self.queue[self.current]
self.current += 1
return out
def pop(self):
while self.queue:
node = heapq.heappop(self.queue)
nodeId = node[1]
if nodeId is not self.REMOVED_SYMBOL:
try:
del self.node_finder[nodeId]
except KeyError:
dummy=1
return node
def remove(self, nodeId):
node = self.node_finder[nodeId]
node[1] = self.REMOVED_SYMBOL
def __iter__(self):
return self
def __str__(self):
return 'PQ:[%s]'%(', '.join([str(i) for i in self.queue]))
def append(self, node):
nodeId = node[1]
nodePriority = node[0]
node = [nodePriority, nodeId]
self.node_finder[nodeId] = node
heapq.heappush(self.queue, node)
def update(self, node):
nodeId = node[1]
nodePriority = node[0]
node = [nodePriority, nodeId]
self.remove(nodeId)
self.node_finder[nodeId] = node
heapq.heappush(self.queue, node)
def getPriority(self, nodeId):
return self.node_finder[nodeId][0]
def __contains__(self, key):
self.current = 0
return key in [n for v,n in self.queue]
def __eq__(self, other):
return self == other
def size(self):
return len(self.queue)
def clear(self):
self.queue = []
def top(self):
return self.queue[0]
__next__ = next
def bidirectional_a_star(graph, start, goal):
if start == goal:
return []
pq_s = PriorityQueue()
pq_t = PriorityQueue()
closed_s = dict()
closed_t = dict()
g_s = dict()
g_t = dict()
g_s[start] = 0
g_t[goal] = 0
cameFrom1 = dict()
cameFrom2 = dict()
def euclidean_distance(graph, v, goal):
xv, yv = graph.node[v]['pos']
xg, yg = graph.node[goal]['pos']
return ((xv-xg)**2 + (yv-yg)**2)**0.5
def h1(v): # heuristic for forward search (from start to goal)
return euclidean_distance(graph, v, goal)
def h2(v): # heuristic for backward search (from goal to start)
return euclidean_distance(graph, v, start)
cameFrom1[start] = False
cameFrom2[goal] = False
pq_s.append((0+h1(start), start))
pq_t.append((0+h2(goal), goal))
done = False
i = 0
mu = 10**301 # 10**301 plays the role of infinity
connection = None
while pq_s.size() > 0 and pq_t.size() > 0 and done == False:
i = i + 1
if i % 2 == 1: # alternate between forward and backward A*
fu, u = pq_s.pop()
closed_s[u] = True
for v in graph[u]:
weight = graph[u][v]['weight']
if v in g_s:
if g_s[u] + weight < g_s[v]:
g_s[v] = g_s[u] + weight
cameFrom1[v] = u
if v in closed_s:
del closed_s[v]
if v in pq_s:
pq_s.update((g_s[v]+h1(v), v))
else:
pq_s.append((g_s[v]+h1(v), v))
else:
g_s[v] = g_s[u] + weight
cameFrom1[v] = u
pq_s.append((g_s[v]+h1(v), v))
if v in closed_t:
if g_s[u] + weight + g_t[v] < mu:
mu = g_s[u] + weight + g_t[v]
connection = v
done = True
else:
fu, u = pq_t.pop()
closed_t[u] = True
for v in graph[u]:
weight = graph[u][v]['weight']
if v in g_t:
if g_t[u] + weight < g_t[v]:
g_t[v] = g_t[u] + weight
cameFrom2[v] = u
if v in closed_t:
del closed_t[v]
if v in pq_t:
pq_t.update((g_t[v]+h2(v), v))
else:
pq_t.append((g_t[v]+h2(v), v))
else:
g_t[v] = g_t[u] + weight
cameFrom2[v] = u
pq_t.append((g_t[v]+h2(v), v))
if v in closed_s:
if g_t[u] + weight + g_s[v] < mu:
mu = g_t[u] + weight + g_s[v]
connection = v
done = True
if u in closed_s and u in closed_t:
if g_s[u] + g_t[u] < mu:
mu = g_s[u] + g_t[u]
connection = u
stopping_distance = min(min([f for (f,x) in pq_s]), min([f for (f,x) in pq_t]))
if mu <= stopping_distance:
done = True
#connection = u
continue
if connection is None:
# start and goal are not connected
return None
#print cameFrom1
#print cameFrom2
path = []
current = connection
#print current
while current != False:
#print predecessor
path = [current] + path
current = cameFrom1[current]
current = connection
successor = cameFrom2[current]
while successor != False:
path = path + [successor]
current = successor
successor = cameFrom2[current]
return path
# This function visualizes paths
def draw_graph(graph, node_positions={}, start=None, goal=None, path=[]):
explored = list(graph.get_explored_nodes())
labels ={}
for node in graph:
labels[node]=node
if not node_positions:
node_positions = networkx.spring_layout(graph)
edge_labels = networkx.get_edge_attributes(graph,'weight')
networkx.draw_networkx_nodes(graph, node_positions)
networkx.draw_networkx_edges(graph, node_positions, style='dashed')
networkx.draw_networkx_edge_labels(graph, node_positions, edge_labels=edge_labels)
networkx.draw_networkx_labels(graph,node_positions, labels)
networkx.draw_networkx_nodes(graph, node_positions, nodelist=explored, node_color='g')
if path:
edges = [(path[i], path[i+1]) for i in range(0, len(path)-1)]
networkx.draw_networkx_edges(graph, node_positions, edgelist=edges, edge_color='b')
if start:
networkx.draw_networkx_nodes(graph, node_positions, nodelist=[start], node_color='b')
if goal:
networkx.draw_networkx_nodes(graph, node_positions, nodelist=[goal], node_color='y')
plt.plot()
plt.show()
# this function calculates the length of the path
def calculate_length(graph, path):
pairs = zip(path, path[1:])
return sum([graph.get_edge_data(a, b)['weight'] for a, b in pairs])
#Romania map data from Russell and Norvig, Chapter 3.
romania = pickle.load(open('romania_graph.pickle', 'rb'))
node_positions = {n: romania.node[n]['pos'] for n in romania.node.keys()}
start = 'a'
goal = 'u'
path = bidirectional_a_star(romania, start, goal)
print "This is the path found by bidirectional A* :", path
print "Its length :", calculate_length(romania, path)
# visualize my path
draw_graph(romania, node_positions=node_positions, start=start, goal=goal, path=path)
# compare to the true shortest path between start and goal
true_path = networkx.shortest_path(romania, start, goal, weight='weight')
print "This is the actual shortest path: ", true_path
print "Its lenght: ", calculate_length(romania, true_path)
#visualize true_path
draw_graph(romania, node_positions=node_positions, start=start, goal=goal, path=true_path)
Pickle data for Romania can be downloaded from here.
I corrected some errors in PriorityQueue and bidirectional_a_star. It's working fine now.
The corrected code for the class and the function is as follows:
class PriorityQueue():
"""Implementation of a priority queue"""
def __init__(self):
self.queue = []
self.node_finder = dict()
self.current = 0
self.REMOVED_SYMBOL = '<removed>'
def next(self):
if self.current >=len(self.queue):
self.current
raise StopIteration
out = self.queue[self.current]
while out == self.REMOVED_SYMBOL:
self.current += 1
out = self.queue[self.current]
self.current += 1
return out
def pop(self):
# TODO: finish this
while self.queue:
node = heapq.heappop(self.queue)
nodeId = node[1]
if nodeId is not self.REMOVED_SYMBOL:
try:
del self.node_finder[nodeId]
except KeyError:
dummy=1
return node
#raise KeyError('pop from an empty priority queue')
def remove(self, nodeId):
node = self.node_finder[nodeId]
node[1] = self.REMOVED_SYMBOL
def __iter__(self):
return self
def __str__(self):
return 'PQ:[%s]'%(', '.join([str(i) for i in self.queue]))
def append(self, node):
# node = (priority, nodeId)
nodeId = node[1]
nodePriority = node[0]
node = [nodePriority, nodeId]
self.node_finder[nodeId] = node
heapq.heappush(self.queue, node)
def update(self, node):
nodeId = node[1]
nodePriority = node[0]
node = [nodePriority, nodeId]
self.remove(nodeId)
self.node_finder[nodeId] = node
heapq.heappush(self.queue, node)
def getPriority(self, nodeId):
return self.node_finder[nodeId][0]
def __contains__(self, key):
self.current = 0
return key in [n for v,n in self.queue]
def __eq__(self, other):
return self == other
def size(self):
return len([1 for priority, node in self.queue if node!=self.REMOVED_SYMBOL])
def clear(self):
self.queue = []
def top(self):
return self.queue[0]
__next__ = next
def bidirectional_a_star(graph, start, goal):
if start == goal:
return []
pq_s = PriorityQueue()
pq_t = PriorityQueue()
closed_s = dict()
closed_t = dict()
g_s = dict()
g_t = dict()
g_s[start] = 0
g_t[goal] = 0
cameFrom1 = dict()
cameFrom2 = dict()
def euclidean_distance(graph, v, goal):
xv, yv = graph.node[v]['pos']
xg, yg = graph.node[goal]['pos']
return ((xv-xg)**2 + (yv-yg)**2)**0.5
def h1(v): # heuristic for forward search (from start to goal)
return euclidean_distance(graph, v, goal)
def h2(v): # heuristic for backward search (from goal to start)
return euclidean_distance(graph, v, start)
cameFrom1[start] = False
cameFrom2[goal] = False
pq_s.append((0+h1(start), start))
pq_t.append((0+h2(goal), goal))
done = False
i = 0
mu = 10**301 # 10**301 plays the role of infinity
connection = None
while pq_s.size() > 0 and pq_t.size() > 0 and done == False:
i = i + 1
if i % 2 == 1: # alternate between forward and backward A*
fu, u = pq_s.pop()
closed_s[u] = True
for v in graph[u]:
weight = graph[u][v]['weight']
if v in g_s:
if g_s[u] + weight < g_s[v]:
g_s[v] = g_s[u] + weight
cameFrom1[v] = u
if v in closed_s:
del closed_s[v]
if v in pq_s:
pq_s.update((g_s[v]+h1(v), v))
else:
pq_s.append((g_s[v]+h1(v), v))
else:
g_s[v] = g_s[u] + weight
cameFrom1[v] = u
pq_s.append((g_s[v]+h1(v), v))
else:
fu, u = pq_t.pop()
closed_t[u] = True
for v in graph[u]:
weight = graph[u][v]['weight']
if v in g_t:
if g_t[u] + weight < g_t[v]:
g_t[v] = g_t[u] + weight
cameFrom2[v] = u
if v in closed_t:
del closed_t[v]
if v in pq_t:
pq_t.update((g_t[v]+h2(v), v))
else:
pq_t.append((g_t[v]+h2(v), v))
else:
g_t[v] = g_t[u] + weight
cameFrom2[v] = u
pq_t.append((g_t[v]+h2(v), v))
if u in closed_s and u in closed_t:
if g_s[u] + g_t[u] < mu:
mu = g_s[u] + g_t[u]
connection = u
try:
stopping_distance = max(min([f for (f,x) in pq_s]), min([f for (f,x) in pq_t]))
except ValueError:
continue
if mu <= stopping_distance:
done = True
connection = u
continue
if connection is None:
# start and goal are not connected
return None
#print cameFrom1
#print cameFrom2
path = []
current = connection
#print current
while current != False:
#print predecessor
path = [current] + path
current = cameFrom1[current]
current = connection
successor = cameFrom2[current]
while successor != False:
path = path + [successor]
current = successor
successor = cameFrom2[current]
return path
I'm making a code that is analyzing an image and finding the three most dominant colors using KMeans clustering. The code works fine but I want to count the number of pixels in the three clusters.
code:
class Cluster(object):
def __init__(self):
self.pixels = []
self.centroid = None
def addPoint(self, pixel):
self.pixels.append(pixel)
def setNewCentroid(self):
R = [colour[0] for colour in self.pixels]
G = [colour[1] for colour in self.pixels]
B = [colour[2] for colour in self.pixels]
R = sum(R) / len(R)
G = sum(G) / len(G)
B = sum(B) / len(B)
self.centroid = (R, G, B)
self.pixels = []
return self.centroid
class Kmeans(object):
def __init__(self, k=2, max_iterations=5, min_distance=2.0, size=200):
self.k = k
self.max_iterations = max_iterations
self.min_distance = min_distance
self.size = (size, size)
def run(self, image):
self.image = image
self.image.thumbnail(self.size)
self.pixels = numpy.array(image.getdata(), dtype=numpy.uint8)
self.clusters = [None for i in range(self.k)]
self.oldClusters = None
randomPixels = random.sample(self.pixels, self.k)
for idx in range(self.k):
self.clusters[idx] = Cluster()
self.clusters[idx].centroid = randomPixels[idx]
iterations = 0
while self.shouldExit(iterations) is False:
self.oldClusters = [cluster.centroid for cluster in self.clusters]
print iterations
for pixel in self.pixels:
self.assignClusters(pixel)
for cluster in self.clusters:
cluster.setNewCentroid()
iterations += 1
return [cluster.centroid for cluster in self.clusters]
def assignClusters(self, pixel):
shortest = float('Inf')
for cluster in self.clusters:
distance = self.calcDistance(cluster.centroid, pixel)
if distance < shortest:
shortest = distance
nearest = cluster
nearest.addPoint(pixel)
def calcDistance(self, a, b):
result = numpy.sqrt(sum((a - b) ** 2))
return result
def shouldExit(self, iterations):
if self.oldClusters is None:
return False
for idx in range(self.k):
dist = self.calcDistance(
numpy.array(self.clusters[idx].centroid),
numpy.array(self.oldClusters[idx])
)
if dist < self.min_distance:
return True
if iterations <= self.max_iterations:
return False
return True
Is there a way that I can count how many pixels that is assigned to each cluster? I don't want how many that has the same value, just the total number.
You mean, you want to do
for cluster in self.clusters:
print len(cluster.pixels)
for each cluster?
I have spent a week working on this branch and bound code for the knapsack problem, and I have looked at numerous articles and books on the subject. However, when I am running my code I don't get the result I expect. Input is received from a text file, such as this:
12
4 1
1 1
3 2
2 3
where the first line is the capacity, and each subsequent line are value/weight pairs. The result I get from using this file is '8' instead of '10' (unless I am mistaken and all the items won't fit in the knapsack). Here is my code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import Queue
from collections import namedtuple
Item = namedtuple("Item", ['index', 'value', 'weight', 'level', 'bound', 'contains'])
class Node:
def __init__(self, level, value, weight, bound, contains):
self.level = level
self.value = value
self.weight = weight
self.bound = bound
self.contains = contains
def upper_bound(u, k, n, v, w):
if u.weight > k:
return 0
else:
bound = u.value
wt = u.weight
j = u.level + 1
while j < n and wt + w[j] <= k:
bound += v[j]
wt += w[j]
j += 1
# fill knapsack with fraction of a remaining item
if j < n:
bound += (k - wt) * (v[j] / w[j])
return bound
def knapsack(items, capacity):
item_count = len(items)
v = [0]*item_count
w = [0]*item_count
# sort items by value to weight ratio
items = sorted(items, key=lambda k: k.value/k.weight, reverse = True)
for i,item in enumerate(items, 0):
v[i] = int(item.value)
w[i] = int(item.weight)
q = Queue.Queue()
root = Node(0, 0, 0, 0.0, [])
root.bound = upper_bound(root, capacity, item_count, v, w)
q.put(root)
value = 0
taken = [0]*item_count
best = set()
while not q.empty():
c = q.get()
if c.bound > value:
level = c.level+1
# check 'left' node (if item is added to knapsack)
left = Node(c.value + v[level], c.weight + w[level], level, 0.0, c.contains[:])
left.contains.append(level)
if left.weight <= capacity and left.value > value:
value = left.value
best |= set(left.contains)
left.bound = upper_bound(left, capacity, item_count, v, w)
if left.bound > value:
q.put(left)
# check 'right' node (if items is not added to knapsack)
right = Node(c.value, c.weight, level, 0.0, c.contains[:])
right.contains.append(level)
right.bound = upper_bound(right, capacity, item_count, v, w)
if right.bound > value:
q.put(right)
for b in best:
taken[b] = 1
value = sum([i*j for (i,j) in zip(v,taken)])
return str(value)
Are my indices off? Am I not traversing the tree or calculating the bounds correctly?
def upper_bound(u, k, n, v, w):
if u.weight > k:
return 0
else:
bound = u.value
wt = u.weight
j = u.level
while j < n and wt + w[j] <= k:
bound += v[j]
wt += w[j]
j += 1
# fill knapsack with fraction of a remaining item
if j < n:
bound += (k - wt) * float(v[j])/ w[j]
return bound
def knapsack(items, capacity):
item_count = len(items)
v = [0]*item_count
w = [0]*item_count
# sort items by value to weight ratio
items = sorted(items, key=lambda k: float(k.value)/k.weight, reverse = True)
for i,item in enumerate(items, 0):
v[i] = int(item.value)
w[i] = int(item.weight)
q = Queue.Queue()
root = Node(0, 0, 0, 0.0,[])
root.bound = upper_bound(root, capacity, item_count, v, w)
q.put(root)
value = 0
taken = [0]*item_count
best = set()
while not q.empty():
c = q.get()
if c.bound > value:
level = c.level+1
# check 'left' node (if item is added to knapsack)
left = Node(level,c.value + v[level-1], c.weight + w[level-1], 0.0, c.contains[:])
left.bound = upper_bound(left, capacity, item_count, v, w)
left.contains.append(level)
if left.weight <= capacity:
if left.value > value:
value = left.value
best = set(left.contains)
if left.bound > value:
q.put(left)
# check 'right' node (if items is not added to knapsack)
right = Node(level,c.value, c.weight, 0.0, c.contains[:])
right.bound = upper_bound(right, capacity, item_count, v, w)
if right.weight <= capacity:
if right.value > value:
value = right.value
best = set(right.contains)
if right.bound > value:
q.put(right)
for b in best:
taken[b-1] = 1
value = sum([i*j for (i,j) in zip(v,taken)])
return str(value)
I think you only want to calculate the bound if you're not taking the item. If you take the item, that means that your bound is still attainable. If you don't, you have to readjust your expectations.
import functools
class solver():
def __init__(self, Items, capacity):
self.sortedItems = list(filter(lambda x: x.value > 0, Items))
self.sortedItems = sorted(self.sortedItems, key=lambda x:float(x.weight)/float(x.value))
self.numItems = len(Items)
self.capacity = capacity
self.bestSolution = solution(0, self.capacity)
def isOptimisitcBetter(self, sol, newItemIdx):
newItem = self.sortedItems[newItemIdx]
rhs = (sol.value + (sol.capacity/newItem.weight)*newItem.value)
return rhs > self.bestSolution.value
def explore(self, sol, itemIndex):
if itemIndex < self.numItems:
if self.isOptimisitcBetter(sol, itemIndex):
self.exploreLeft(sol, itemIndex)
self.exploreRight(sol, itemIndex)
def exploreLeft(self, sol, itemIndex):
newItem = self.sortedItems[itemIndex]
thisSol = sol.copy()
if thisSol.addItem(newItem):
if thisSol.value > self.bestSolution.value:
self.bestSolution = thisSol
self.explore(thisSol, itemIndex+1)
def exploreRight(self, sol, itemIndex):
self.explore(sol, itemIndex+1)
def solveWrapper(self):
self.explore(solution(0, self.capacity), 0)
class solution():
def __init__(self, value, capacity, items=set()):
self.value, self.capacity = value, capacity
self.items = items.copy()
def copy(self):
return solution(self.value, self.capacity, self.items)
def addItem(self, newItem):
remainingCap = self.capacity-newItem.weight
if remainingCap < 0:
return False
self.items.add(newItem)
self.capacity = remainingCap
self.value+=newItem.value
return True
solver = solver(items, capacity)
solver.solveWrapper()
bestSol = solver.bestSolution