Determine which numbers in list add up to specified value - python

I have a quick (hopefully accounting problem. I just entered a new job and the books are a bit of a mess. The books have these lump sums logged, while the bank account lists each and every individual deposit. I need to determine which deposits belong to each lump sum in the books. So, I have these four lump sums:
[6884.41, 14382.14, 2988.11, 8501.60]
I then have this larger list of individual deposits (sorted):
[98.56, 98.56, 98.56, 129.44, 160.0, 242.19, 286.87, 290.0, 351.01, 665.0, 675.0, 675.0, 677.45, 677.45, 695.0, 695.0, 695.0, 695.0, 715.0, 720.0, 725.0, 730.0, 745.0, 745.0, 750.0, 750.0, 750.0, 750.0, 758.93, 758.93, 763.85, 765.0, 780.0, 781.34, 781.7, 813.79, 824.97, 827.05, 856.28, 874.08, 874.44, 1498.11, 1580.0, 1600.0, 1600.0]
In Python, how can I determine which sub-set of the longer list sums to one of the lump sum values?
(NOTE: these numbers have the additional problem that the sum of the lump sums is $732.70 more than the sum of the individual accounts. I'm hoping that this doesn't make this problem completely unsolvable)

Here's a pretty good start at a solution:
import datetime as dt
from itertools import groupby
from math import ceil
def _unique_subsets_which_sum_to(target, value_counts, max_sums, index):
value, count = value_counts[index]
if index:
# more values to be considered; solve recursively
index -= 1
rem = max_sums[index]
# find the minimum amount that this value must provide,
# and the minimum occurrences that will satisfy that value
if target <= rem:
min_k = 0
else:
min_k = (target - rem + value - 1) // value # rounded up to next int
# find the maximum occurrences of this value
# which result in <= target
max_k = min(count, target // value)
# iterate across min..max occurrences
for k in range(min_k, max_k+1):
new_target = target - k*value
if new_target:
# recurse
for solution in _unique_subsets_which_sum_to(new_target, value_counts, max_sums, index):
yield ((solution + [(value, k)]) if k else solution)
else:
# perfect solution, no need to recurse further
yield [(value, k)]
else:
# this must finish the solution
if target % value == 0:
yield [(value, target // value)]
def find_subsets_which_sum_to(target, values):
"""
Find all unique subsets of values which sum to target
target integer >= 0, total to be summed to
values sequence of integer > 0, possible components of sum
"""
# this function is basically a shell which prepares
# the input values for the recursive solution
# turn sequence into sorted list
values = sorted(values)
value_sum = sum(values)
if value_sum >= target:
# count how many times each value appears
value_counts = [(value, len(list(it))) for value,it in groupby(values)]
# running total to each position
total = 0
max_sums = [0]
for val,num in value_counts:
total += val * num
max_sums.append(total)
start = dt.datetime.utcnow()
for sol in _unique_subsets_which_sum_to(target, value_counts, max_sums, len(value_counts) - 1):
yield sol
end = dt.datetime.utcnow()
elapsed = end - start
seconds = elapsed.days * 86400 + elapsed.seconds + elapsed.microseconds * 0.000001
print(" -> took {:0.1f} seconds.".format(seconds))
# I multiplied each value by 100 so that we can operate on integers
# instead of floating-point; this will eliminate any rounding errors.
values = [
9856, 9856, 9856, 12944, 16000, 24219, 28687, 29000, 35101, 66500,
67500, 67500, 67745, 67745, 69500, 69500, 69500, 69500, 71500, 72000,
72500, 73000, 74500, 74500, 75000, 75000, 75000, 75000, 75893, 75893,
76385, 76500, 78000, 78134, 78170, 81379, 82497, 82705, 85628, 87408,
87444, 149811, 158000, 160000, 160000
]
sum_to = [
298811,
688441,
850160 #,
# 1438214
]
def main():
subset_sums_to = []
for target in sum_to:
print("\nSolutions which sum to {}".format(target))
res = list(find_subsets_which_sum_to(target, values))
print(" {} solutions found".format(len(res)))
subset_sums_to.append(res)
return subset_sums_to
if __name__=="__main__":
subsetsA, subsetsB, subsetsC = main()
which on my machine results in
Solutions which sum to 298811
-> took 0.1 seconds.
2 solutions found
Solutions which sum to 688441
-> took 89.8 seconds.
1727 solutions found
Solutions which sum to 850160
-> took 454.0 seconds.
6578 solutions found
# Solutions which sum to 1438214
# -> took 7225.2 seconds.
# 87215 solutions found
The next step is to cross-compare solution subsets and see which ones can coexist together. I think the fastest approach would be to store subsets for the smallest three lump sums, iterate through them and (for compatible combinations) find the remaining values and plug them into the solver for the last lump sum.
Continuing from where I left off (+ a few changes to the above code to grab the return lists for subsums to the first three values).
I wanted a way to easily get the remaining value-coefficients each time;
class NoNegativesDict(dict):
def __sub__(self, other):
if set(other) - set(self):
raise ValueError
else:
res = NoNegativesDict()
for key,sv in self.iteritems():
ov = other.get(key, 0)
if sv < ov:
raise ValueError
# elif sv == ov:
# pass
elif sv > ov:
res[key] = sv - ov
return res
then I apply it as
value_counts = [(value, len(list(it))) for value,it in groupby(values)]
vc = NoNegativesDict(value_counts)
nna = [NoNegativesDict(a) for a in subsetsA]
nnb = [NoNegativesDict(b) for b in subsetsB]
nnc = [NoNegativesDict(c) for c in subsetsC]
# this is kind of ugly; with some more effort
# I could probably make it a recursive call also
b_tries = 0
c_tries = 0
sol_count = 0
start = dt.datetime.utcnow()
for a in nna:
try:
res_a = vc - a
sa = str(a)
for b in nnb:
try:
res_b = res_a - b
b_tries += 1
sb = str(b)
for c in nnc:
try:
res_c = res_b - c
c_tries += 1
#unpack remaining values
res_values = [val for val,num in res_c.items() for i in range(num)]
for sol in find_subsets_which_sum_to(1438214, res_values):
sol_count += 1
print("\n================")
print("a =", sa)
print("b =", sb)
print("c =", str(c))
print("d =", str(sol))
except ValueError:
pass
except ValueError:
pass
except ValueError:
pass
print("{} solutions found in {} b-tries and {} c-tries".format(sol_count, b_tries, c_tries))
end = dt.datetime.utcnow()
elapsed = end - start
seconds = elapsed.days * 86400 + elapsed.seconds + elapsed.microseconds * 0.000001
print(" -> took {:0.1f} seconds.".format(seconds))
and the final output:
0 solutions found in 1678 b-tries and 93098 c-tries
-> took 73.0 seconds.
So the final answer is there is no solution for your given data.
Hope that helps ;-)

Related

how to find 3 Numbers with Sum closest to a given number

I'm trying to write simple code for that problem. If I get an array and number I need to find the 3 numbers that their sum are close to the number that's given.
I've thought about first to pop out the last digit (the first number)
then I'll have a new array without this digit. So now I look for the second number who needs to be less the sum target. so I take only the small numbers that it's smaller them the second=sum-first number (but I don't know how to choose it.
The last number will be third=sum-first-second
I tried to write code but it's not working and it's very basic
def f(s,target):
s=sorted(s)
print(s)
print(s[0])
closest=s[0]+s[1]+s[2]
m=s[:-1]
print(m)
for i in range(len(s)):
for j in range(len(m)):
if (closest<=target-m[0]) and s[-1] + m[j] == target:
print (m[j])
n = m[:j] + nums[j+1:]
for z in range (len(z)):
if (closest<target-n[z]) and s[-1]+ m[j]+n[z] == target:
print (n[z])
s=[4,2,12,3,4,8,14]
target=20
f(s,target)
if you have idea what to change here. Please let me know
Thank you
Here is my solution I tried to maximize the performance of the code to not repeat any combinations. Let me know if you have any questions.
Good luck.
def find_3(s,target):
to_not_rep=[] #This list will store all combinations without repetation
close_to_0=abs(target - s[0]+s[1]+s[2]) #initile
There_is_one=False #False: don't have a combination equal to the target yet
for s1,first_n in enumerate(s):
for s2,second_n in enumerate(s):
if (s1==s2) : continue #to not take the same index
for s3,third_n in enumerate(s):
if (s1==s3) or (s2==s3) : continue #to not take the same index
val=sorted([first_n,second_n,third_n]) #sorting
if val in to_not_rep :continue #to not repeat the same combination with diffrent positions
to_not_rep.append(val)#adding all the combinations without repetation
sum_=sum(val) #the sum of the three numbers
# Good one
if sum_==target:
print(f"Found a possibility: {val[0]} + {val[1]} + {val[2]} = {target}")
There_is_one = True
if There_is_one is False: #No need if we found combination equal to the target
# close to the target
# We know that (target - sum) should equal to 0 otherwise :
# We are looking for the sum of closet combinations(in abs value) to 0
pos_n=abs(target-sum_)
if pos_n < close_to_0:
closet_one=f"The closet combination to the target is: {val[0]} + {val[1]} + {val[2]} = {sum_} almost {target} "
close_to_0=pos_n
# Print the closet combination to the target in case we did not find a combination equal to the target
if There_is_one is False: print(closet_one)
so we can test it :
s =[4,2,3,8,6,4,12,16,30,20,5]
target=20
find_3(s,target)
#Found a possibility: 4 + 4 + 12 = 20
#Found a possibility: 2 + 6 + 12 = 20
#Found a possibility: 3 + 5 + 12 = 20
another test :
s =[4,2,3,8,6,4,323,23,44]
find_3(s,target)
#The closet combination to the target is: 4 + 6 + 8 = 18 almost 20
This is a simple solution that returns all possibilites.
For your case it completed in 0.002019 secs
from itertools import combinations
import numpy as np
def f(s, target):
dic = {}
for tup in combinations(s, 3):
try:
dic[np.absolute(np.sum(tup) - target)].append(str(tup))
except KeyError:
dic[np.absolute(np.sum(tup) - target)] = [tup]
print(dic[min(dic.keys())])
Use itertools.combinations to get all combinations of your numbers without replacement of a certain length (three in your case). Then take the three-tuple for which the absolute value of the difference of the sum and target is minimal. min can take a key argument to specify the ordering of the iterable passed to the function.
from typing import Sequence, Tuple
def closest_to(seq: Sequence[float], target: float, length: int = 3) -> Tuple[float]:
from itertools import combinations
combs = combinations(seq, length)
diff = lambda x: abs(sum(x) - target)
return min(combs, key=diff)
closest_to([4,2,12,3,4,8,14], 20) # (4, 2, 14)
This is not the fastest or most efficient way to do it, but it's conceptionally simple and short.
Something like this?
import math
num_find = 1448
lst_Results = []
i_Number = num_find
while i_Number > 0:
num_Exp = math.floor(math.log(i_Number) / math.log(2))
lst_Results.append(dict({num_Exp: int(math.pow(2, num_Exp))}))
i_Number = i_Number - math.pow(2, num_Exp)
print(lst_Results)
In a sequence of numbers: for example 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, etc ...
The sum of the previous numbers is never greater than the next. This gives us the possibility of combinations, for example:
The number: 1448, there is no other combination than the sum of the previous numbers: 8 + 32 + 128 + 256 + 1024
Then you find the numbers whose sum is close to the number provided

Python Filtering a Point Cloud with PhotoScan Based on a Threshold Value - basic python help needed

I'm trying to implement a filter with Python to sort out the points on a point cloud generated by Agisoft PhotoScan. PhotoScan is a photogrammetry software developed to be user friendly but also allows to use Python commands through an API.
Bellow is my code so far and I'm pretty sure there is better way to write it as I'm missing something. The code runs inside PhotoScan.
Objective:
Selecting and removing 10% of points at a time with error within defined range of 50 to 10. Also removing any points within error range less than 10% of the total, when the initial steps of selecting and removing 10% at a time are done. Immediately after every point removal an optimization procedure should be done. It should stop when no points are selectable or when selectable points counts as less than 1% of the present total points and it is not worth removing them.
Draw it for better understanding:
Actual Code Under Construction (3 updates - see bellow for details):
import PhotoScan as PS
import math
doc = PS.app.document
chunk = doc.chunk
# using float with range and that by setting i = 1 it steps 0.1 at a time
def precrange(a, b, i):
if a < b:
p = 10**i
sr = a*p
er = (b*p) + 1
p = float(p)
for n in range(sr, er):
x = n/p
yield x
else:
p = 10**i
sr = b*p
er = (a*p) + 1
p = float(p)
for n in range(sr, er):
x = n/p
yield x
"""
Determine if x is close to y:
x relates to nselected variable
y to p10 variable
math.isclose() Return True if the values a and b are close to each other and
False otherwise
var is the tolerance here setted as a relative tolerance:
rel_tol is the relative tolerance – it is the maximum allowed difference
between a and b, relative to the larger absolute value of a or b. For example,
to set a tolerance of 5%, pass rel_tol=0.05. The default tolerance is 1e-09,
which assures that the two values are the same within about 9 decimal digits.
rel_tol must be greater than zero.
"""
def test_isclose(x, y, var):
if math.isclose(x, y, rel_tol=var): # if variables are close return True
return True
else:
False
# 1. define filter limits
f_ReconstUncert = precrange(50, 10, 1)
# 2. count initial point number
tiePoints_0 = len(chunk.point_cloud.points) # storing info for later
# 3. call Filter() and init it
f = PS.PointCloud.Filter()
f.init(chunk, criterion=PS.PointCloud.Filter.ReconstructionUncertainty)
a = 0
"""
Way to restart for loop!
should_restart = True
while should_restart:
should_restart = False
for i in xrange(10):
print i
if i == 5:
should_restart = True
break
"""
restartLoop = True
while restartLoop:
restartLoop = False
for count, i in enumerate(f_ReconstUncert): # for each threshold value
# count points for every i
tiePoints = len(chunk.point_cloud.points)
p10 = int(round((10 / 100) * tiePoints, 0)) # 10% of the total
f.selectPoints(i) # selects points
nselected = len([p for p in chunk.point_cloud.points if p.selected])
percent = round(nselected * 100 / tiePoints, 2)
if nselected == 0:
print("For threshold {} there´s no selectable points".format(i))
break
elif test_isclose(nselected, p10, 0.1):
a += 1
print("Threshold found in iteration: ", count)
print("----------------------------------------------")
print("# {} Removing points from cloud ".format(a))
print("----------------------------------------------")
print("# {}. Reconstruction Uncerntainty:"
" {:.2f}".format(a, i))
print("{} - {}"
" ({:.1f} %)\n".format(tiePoints,
nselected, percent))
f.removePoints(i) # removes points
# optimization procedure needed to refine cameras positions
print("--------------Optimizing cameras-------------\n")
chunk.optimizeCameras(fit_f=True, fit_cx=True,
fit_cy=True, fit_b1=False,
fit_b2=False, fit_k1=True,
fit_k2=True, fit_k3=True,
fit_k4=False, fit_p1=True,
fit_p2=True, fit_p3=False,
fit_p4=False, adaptive_fitting=False)
# count again number of points in point cloud
tiePoints = len(chunk.point_cloud.points)
print("= {} remaining points after"
" {} removal".format(tiePoints, a))
# reassigning variable to get new 10% of remaining points
p10 = int(round((10 / 100) * tiePoints, 0))
percent = round(nselected * 100 / tiePoints, 2)
print("----------------------------------------------\n\n")
# restart loop to investigate from range start
restartLoop = True
break
else:
f.resetSelection()
continue # continue to next i
else:
f.resetSelection()
print("for loop didnt work out")
print("{} iterations done!".format(count))
tiePoints = len(chunk.point_cloud.points)
print("Tiepoints 0: ", tiePoints_0)
print("Tiepoints 1: ", tiePoints)
Problems:
A. Currently I'm stuck on an endless processing because of a loop. I know it's about my bad coding. But how do I implement my objective and get away with the infinite loops? ANSWER: Got the code less confusing and updated above.
B. How do I start over (or restart) my search for valid threshold values in the range(50, 20) after finding one of them? ANSWER: Stack Exchange: how to restart a for loop
C. How do I turn the code more pythonic?
IMPORTANT UPDATE 1: altered above
Using a better range with float solution adapted from stackoverflow: how-to-use-a-decimal-range-step-value
# using float with range and that by setting i = 1 it steps 0.1 at a time
def precrange(a, b, i):
if a < b:
p = 10**i
sr = a*p
er = (b*p) + 1
p = float(p)
return map(lambda x: x/p, range(sr, er))
else:
p = 10**i
sr = b*p
er = (a*p) + 1
p = float(p)
return map(lambda x: x/p, range(sr, er))
# some code
f_ReconstUncert = precrange(50, 20, 1)
And also using math.isclose() to determine if selected points are close to the 10% selected points instead of using a manual solution through assigning new variables. This was implemented as follows:
"""
Determine if x is close to y:
x relates to nselected variable
y to p10 variable
math.isclose() Return True if the values a and b are close to each other and
False otherwise
var is the tolerance here setted as a relative tolerance:
rel_tol is the relative tolerance – it is the maximum allowed difference
between a and b, relative to the larger absolute value of a or b. For example,
to set a tolerance of 5%, pass rel_tol=0.05. The default tolerance is 1e-09,
which assures that the two values are the same within about 9 decimal digits.
rel_tol must be greater than zero.
"""
def test_threshold(x, y, var):
if math.isclose(x, y, rel_tol=var): # if variables are close return True
return True
else:
False
# some code
if test_threshold(nselected, p10, 0.1):
# if true then a valid threshold is found
# some code
UPDATE 2: altered on code under construction
Minor fixes and got to restart de for loop from beginning by following guidance from another Stack Exchange post on the subject. Have to improve the range now or alter the isclose() to get more values.
restartLoop = True
while restartLoop:
restartLoop = False
for i in range(0, 10):
if condition:
restartLoop = True
break
UPDATE 3: Code structure to achieve listed objectives:
threshold = range(0, 11, 1)
listx = []
for i in threshold:
listx.append(i)
restart = 0
restartLoop = True
while restartLoop:
restartLoop = False
for idx, i in enumerate(listx):
print("do something as printing i:", i)
if i > 5: # if this condition restart loop
print("found value for condition: ", i)
del listx[idx]
restartLoop = True
print("RESTARTING LOOP\n")
restart += 1
break # break inner while and restart for loop
else:
# continue if the inner loop wasn't broken
continue
else:
continue
print("restart - outer while", restart)

Alerter monitoring tool : How to find if a number greater than average of the consecutive subsets of windowsize

The Alerter is a simple monitoring tool, intended to help detect
increases in response time for some process. It does that by computing
a few statistics about the process across a 'window' of a certain
number of runs, and alerting (returning true) if certain thresholds
are met.
It takes the following parameters:
inputs: A list of integer times for the process. This list may be very long
window size: how many runs long a window is, as an integer
allowedIncrease: how far over 'average' a window or value is allowed to be, as a percent.
This is represented as a decimal value based on one, so a 50%
allowable increase would be represented as 1.5
Your Alerter should return true if either of the following conditions
are met:
Any value is more than the allowed increase above the window average in ALL windows in which it appears. For example: alert({1, 2, 100, 2,
2}, 3, 1.5) should alert: the value 100 appears in three windows, and
in all cases is more than 50% over the average value alert({1, 2, 4,
2, 2}, 3, 2) should not alert: the largest outlier is 4, and that
value appears in a window with average value 2.6, less than 100% of
that average
Any window's average is more than the acceptable increase over a previous window's average value For example: alert({1,2,100,2,2}, 2,
2.5) should alert: Even though no individual value causes an alert, there is a window with average 1.5 and a later window with an average
more than 2.5 times larger
Otherwise, you should return false.
This is my solution, but it is not working.
from decimal import *
def alert(inputs, windowSize, allowedIncrease):
average = dict()
increase_average = dict()
val_list = list()
## calculating the average and appending to the dictionary
for i in range(0, len(inputs)):
val = sum(inputs[i:i + windowSize])
avg = Decimal(val) / windowSize
if i == len(inputs) - windowSize + 1:
break
else:
for j in range(0, windowSize-1):
try:
average[inputs[i + j]] = avg
except:
average[inputs[i + j]].append(avg)
increase = Decimal(allowedIncrease - 1)
##appending increase in the average
for key, values in average.items():
data = (Decimal(values) * increase) + Decimal(values)
try:
increase_average[key] = data
except:
increase_average[key].append(data)
##checking if all the average value is greater than key
for key, value in increase_average.items():
if key > value:
return True
##checking if any average value greater than incease*previous average value
for (k, v) in average.items():
val_list.append(v)
for h in range(len(val_list)):
if any(val_list >= (Decimal(x * increase) + Decimal(x)) for x in val_list[:h]):
return True
return False
if __name__ == "__main__":
inputs = [1, 2, 4, 2, 2]
windowSize = 3
allowedIncrease = 2
res = alert(inputs, windowSize, allowedIncrease)
print res
There would be a TypeError in the following line:
if any(val_list >= (Decimal(x * increase) + Decimal(x)) for x in val_list[:h]:
You need to change val_list to val_list[k]
change here, and it will start working.
for h in range(len(val_list)):
if any(val_list[h] >= (Decimal(x * increase) + Decimal(x)) for x in ..
This will work.
def alerter(l,w,inc):
dic = {}
lis = []
for i in range(0,len(l)-w+1):
avg = sum(l[i:i+w])/w
lis.append(avg)
for j in range(0,w):
if l[i+j] in dic.keys():
dic[l[i+j]].append(avg)
else:
dic[l[i+j]] = [avg]
for i in range(len(lis)-1):
if lis[i]*inc < lis[i+1]:
return True
for k,v in dic.items():
if min(v)*inc < k:
return True
return False

Speeding a numpy correlation program using the fact that lists are sorted

I am currently using python and numpy for calculations of correlations between 2 lists: data_0 and data_1. Each list contains respecively sorted times t0 and t1.
I want to calculate all the events where 0 < t1 - t0 < t_max.
for time_0 in np.nditer(data_0):
delta_time = np.subtract(data_1, np.full(data_1.size, time_0))
delta_time = delta_time[delta_time >= 0]
delta_time = delta_time[delta_time < time_max]
Doing so, as the list are sorted, I am selecting a subarray of data_1 of the form data_1[index_min: index_max].
So I need in fact to find two indexes to get what I want.
And what's interesting is that when I go to the next time_0, as data_0 is also sorted, I just need to find the new index_min / index_max such as new_index_min >= index_min / new_index_max >= index_max.
Meaning that I don't need to scann again all the data_1.
(data list from scratch).
I have implemented such a solution not using the numpy methods (just with while loop) and it gives me the same results as before but not as fast than before (15 times longer!).
I think as normally it requires less calculation, there should be a way to make it faster using numpy methods but I don't know how to do it.
Does anyone have an idea?
I am not sure if I am super clear so if you have any questions, do not hestitate.
Thank you in advance,
Paul
Here is a vectorized approach using argsort. It uses a strategy similar to your avoid-full-scan idea:
import numpy as np
def find_gt(ref, data, incl=True):
out = np.empty(len(ref) + len(data) + 1, int)
total = (data, ref) if incl else (ref, data)
out[1:] = np.argsort(np.concatenate(total), kind='mergesort')
out[0] = -1
split = (out < len(data)) if incl else (out >= len(ref))
if incl:
out[~split] -= len(data)
split[0] = False
return np.maximum.accumulate(np.where(split, -1, out))[split] + 1
def find_intervals(ref, data, span, incl=(True, True)):
index_min = find_gt(ref, data, incl[0])
index_max = len(ref) - find_gt(-ref[::-1], -span-data[::-1], incl[1])[::-1]
return index_min, index_max
ref = np.sort(np.random.randint(0,20000,(10000,)))
data = np.sort(np.random.randint(0,20000,(10000,)))
span = 2
idmn, idmx = find_intervals(ref, data, span, (True, True))
print('checking')
for d,mn,mx in zip(data, idmn, idmx):
assert mn == len(ref) or ref[mn] >= d
assert mn == 0 or ref[mn-1] < d
assert mx == len(ref) or ref[mx] > d+span
assert mx == 0 or ref[mx-1] <= d+span
print('ok')
It works by
indirectly sorting both sets together
finding for each time in one set the preceding time in the other
this is done using maximum.reduce
the preceding steps are applied twice, the second time the times in
one set are shifted by span

Traveling Salesman with GA, mutation, crossover

For school last semester, I wrote a python program to solve the traveling salesman problem. For those not familiar with what it is, the wolfram alpha explanation does a pretty good job of explaining it.
This was one of the first programs I wrote in Python, and so far I LOVE the language, coming from a C++/Java background. Anyway, I used a lot of inefficient methods/programming practices to get it working, so I wanted to go back and improve. I used a genetic algorithm with mutations and ordered crossovers. First, I create a list of random unique nodes with a given length and a given number of strategies. The GA runs through a given number of generations of these strategies, changing a random selection of strategies by using ordered crossover and an inverse mutation between two random indices. Each strategy has a given probability of a mutation and another probability of crossover. After this, the algorithm chooses two strategies at random, takes the best one, then compares it to the best solution found so far. The end goal of the program is to find the shortest distance through all the nodes.
Here is my original, inefficient, working code
Here is my newer, efficient, not working code:
import random
import math
import pprint
from matplotlib import pyplot as plt
def create_nodes(num_nodes, num_rows):
elements = range(1, num_nodes + 1)
return [random.sample(elements, num_nodes) for _ in range(num_rows)]
def mutate(table, node_table, mutate_probability, cross_probability):
for next_id, row in enumerate(table, 1):
nodes = len(row)
# print
# print "Original: ", row
#mutation
if random.random() > mutate_probability:
mini, maxi = sorted(random.sample(range(nodes),2))
row[mini:maxi+1] = row[mini:maxi+1][::-1]
# print "After mutation: ", row
# print "Between: ", mini, maxi
#crossover
if random.random() > cross_probability:
try:
next_row = table[next_id]
# print "Parent: ", next_row
except IndexError:
pass
else:
half_length = nodes//2
mini = random.randint(0, half_length)
maxi = mini + half_length - 1 + (nodes % 2)
crossed = [None] * nodes
# print "Before crossed: ", row
crossed[mini:maxi+1] = next_row[mini:maxi+1]
# print "Cross with: ", crossed
iterator = 0
for element in row:
if element in crossed:
continue
while mini <= iterator <= maxi:
iterator += 1
crossed[iterator] = element
iterator += 1
row[:] = crossed
# print "After crossed: ", row
# print "Between: ", mini, maxi
def sample_best(table, node_table):
t1, t2 = random.sample(table[1:], 2)
return distance(t1, t2, node_table)
def distance(s1, s2, node_table):
distance1 = sum_distances(s1, node_table)
distance2 = sum_distances(s2, node_table)
if distance1 < distance2:
return s1, distance1
else:
return s2, distance2
def sum_distances(strategy, node_table):
dist = 0
first_row, second_row = node_table
for idx_next_node, node1 in enumerate(strategy, 1):
try:
node2 = strategy[idx_next_node]
except IndexError:
node2 = strategy[0]
dist += math.hypot(
first_row[node2-1] - first_row[node1-1],
second_row[node2-1] - second_row[node1-1])
return dist
def draw_graph(node_table, strategy):
graphX = [node_table[0][index - 1] for index in strategy]
graphY = [node_table[1][index - 1] for index in strategy]
plt.scatter(graphX, graphY)
plt.plot(graphX, graphY)
plt.show()
def main(nodes=8, strategies=100, generations=10000, mutateP=.7, crossP=.7):
#create node locations
node_table = create_nodes(nodes, 2)
# for i in range(2):
# print node_table[i]
#create first generation
table = create_nodes(nodes, strategies)
# for i in range(strategies):
# print i
# print table[i]
print "TOP MEN are looking through:"
print strategies, "strategies in", generations, "generations with",
print nodes, "nodes in each strategy..."
best_score = None
for count in range(generations):
mutate(table, node_table, mutateP, crossP)
# crossover(table, node_table, crossP)
strategy, score = sample_best(table, node_table)
if best_score is None or score < best_score:
best_strategy = strategy
best_score = score
if count % 100 == 0:
print "Foraged", count, "berries"
print "Best we got so far:", best_score, "with: ", best_strategy
# if count % 2 == 0:
# print count
# for i in range(strategies):
# print table[i]
print "=========================================================================="
print "Best we could find: ", best_score, "for strategy", best_strategy
draw_graph(node_table, best_strategy)
main()
The new code is what I'm having trouble with. The mutation and crossover seem to be working correctly, but the algorithm isn't even coming close to finding a solution and I have no idea why. Thanks for the help in advance, I really appreciate it!

Categories