How to produce different random results with DEAP? - python

I am using the DEAP library to maximize a metric, and I noticed that whenever I restart the algorithm (which is supposed to create a random list of binary values - 1s and 0s) it is producing the same initial values.
I became suspicious and copied their basic DEAP example here and re-ran the algorithms again:
import array, random
from deap import creator, base, tools, algorithms
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", array.array, typecode='b', fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 10)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
def evalOneMax(individual):
return sum(individual),
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoints)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
population = toolbox.population(n=10)
NGEN=40
for gen in range(NGEN):
offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)
fits = toolbox.map(toolbox.evaluate, offspring)
for fit, ind in zip(fits, offspring):
ind.fitness.values = fit
population = offspring
The code above is exactly their example, but with the population and individual size reduced to 10. I ran the algorithm 5 times and it produced exact copies of each other. I also added a print statement to get the below output:
>python testGA.py
[1, 0, 1, 0, 1, 0, 1, 1, 1, 1]
Starting the Evolution Algorithm...
Evaluating Individual: [0, 1, 0, 1, 0, 1, 1, 1, 1, 0]
Evaluating Individual: [1, 1, 0, 1, 0, 1, 0, 1, 0, 0]
Evaluating Individual: [0, 0, 1, 0, 0, 1, 1, 0, 0, 1]
Evaluating Individual: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Evaluating Individual: [0, 1, 1, 0, 1, 0, 1, 1, 0, 1]
Evaluating Individual: [1, 0, 1, 1, 1, 0, 0, 1, 0, 0]
Evaluating Individual: [0, 1, 0, 0, 0, 1, 0, 0, 0, 1]
Evaluating Individual: [1, 1, 0, 1, 0, 1, 0, 1, 1, 1]
Evaluating Individual: [1, 1, 1, 1, 0, 0, 1, 0, 0, 0]
Evaluating Individual: [0, 0, 1, 1, 1, 1, 0, 1, 1, 1]
This output is generated every time I call the function - In that order. They are exactly identical.
I have read that I shouldn't have to seed the random.randint function, and I tested it by writing a basic script that just prints out a list of 10 random ints ranged 0 to 1. This workd fine, it just seems to produce the same values when I feed it through DEAP.
Is this normal? How can I ensure that, when I run the algorithm, I get different 'individuals' every time?
EDIT:
Sorry for the late reply, here is the full source I am using:
import random, sys
from deap import creator, base, tools
class Max():
def __init__(self):
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
INDIVIDUAL_SIZE = 10
self.toolbox = base.Toolbox()
self.toolbox.register("attr_bool", random.randint, 0, 1)
self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_bool, n=INDIVIDUAL_SIZE)
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
self.toolbox.register("mate", tools.cxTwoPoints)
self.toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
self.toolbox.register("select", tools.selTournament, tournsize=3)
self.toolbox.register("evaluate", self.evaluate)
print self.main()
def evaluate(self, individual):
# Some debug code
print 'Evaluating Individual: ' + str(individual)
return sum(individual),
def main(self):
CXPB, MUTPB, NGEN = 0.5, 0.2, 40
random.seed(64)
pop = self.toolbox.population(n=10)
print "Starting the Evolution Algorithm..."
fitnesses = list(map(self.toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit
# ----------------------------------------------------------
# Killing the program here - just want to see the population created
sys.exit()
print "Evaluated %i individuals" % (len(pop))
for g in range(NGEN):
print "-- Generation %i --" % (g)
# Select the next genereation individuals
offspring = self.toolbox.select(pop, len(pop))
# Clone the selected individuals
offspring = list(map(self.toolbox.clone, offspring))
# Apply crossover and mutation on the offspring
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
self.toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if random.random() < MUTPB:
self.toolbox.mutate(mutant)
del mutant.fitness.values
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = map(self.toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
print "\tEvaluated %i individuals" % (len(pop))
pop[:] = offspring
fits = [ind.fitness.values[0] for ind in pop]
length = len(pop)
mean = sum(fits) / length
sum2 = sum(x*x for x in fits)
std = abs(sum2 / length - mean**2)**0.5
print "\tMin %s" % (min(fits))
print "\tMax %s" % (max(fits))
print "\tAvg %s" % (mean)
print "\tStd %s" % (std)
class R_Test:
def __init__(self):
print str([random.randint(0, 1) for i in range(10)])
if __name__ == '__main__':
#rt = R_Test()
mx = Max()
The R_Test class is there to test the random generation in Python. I read here that the seed is dynamically called if not given in Python, and I wanted to test this.
How I have been executing the above code has been as such:
> python testGA.py
... the 10 outputs
> python testGA.py
... the exact same outputs
> python testGA.py
... the exact same outputs
> python testGA.py
... the exact same outputs
> python testGA.py
... the exact same outputs
Obviously 5 times isn't exactly a strenuous test, but the fact that all 10 values are the same 5 times in a row raised a red flag.

The problem is that you specify a seed for the random number generator in your main function. Simply comment the line : random.seed(64) and you will get different results every time you execute your program.
In DEAP example files, a specific seed is set because we also use these examples as integration tests. If after a modification in the framework base code, the output of an example is different, we want to know. It also allow us to bench the time required by each example and provide a ballpark estimate to our users. The results of these benchmarks are available online at http://deap.gel.ulaval.ca/speed/.

Related

How to solve partial Knight's Tour with special constraints

Knight's tour problem described in the image here, with diagram.
A knight was initially located in a square labeled 1. It then proceeded to make a
series of moves, never re-visiting a square, and labeled the visited squares in
order. When the knight was finished, the labeled squares in each region of connected
squares had the same sum.
A short while later, many of the labels were erased. The remaining labels can be seen
above.
Complete the grid by re-entering the missing labels. The answer to this puzzle is
the sum of the squares of the largest label in each row of the completed grid, as in
the example.
[1]: E.g. the 14 and 33 are in different regions.
The picture explains it a lot more clearly, but in summary a Knight has gone around a 10 x 10 grid. The picture shows a 10 x 10 board that shows some positions in has been in, and at what point of its journey. You do not know which position the Knight started in, or how many movements it made.
The coloured groups on the board need to all sum to the same amount.
I’ve built a python solver, but it runs for ages - uses recursion. I’ve noted that the maximum sum of a group is 197, based on there being 100 squares and the smallest group is 2 adjacent squares.
My code at this link: https://pastebin.com/UMQn1HZa
import sys, numpy as np
fixedLocationsArray = [[ 12, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 5, 0, 23, 0],
[ 0, 0, 0, 0, 0, 0, 8, 0, 0, 0],
[ 0, 0, 0, 14, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 20, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 33, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 28]]
groupsArray = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0,10, 0],
[0, 0, 0, 1, 0, 0, 0, 0,10, 0],
[0, 0, 1, 1, 1, 1, 9,10,10,10],
[2, 0, 1, 0, 0,11, 9, 9, 9, 9],
[2, 0, 0, 0,11,11,11,15,15, 9],
[2, 4, 4,14,11,12,12,15,15, 8],
[2, 3, 4,14,14,13,13,13,15, 8],
[2, 3, 5,14,16,16,16, 7, 7, 8],
[3, 3, 5, 6, 6, 6, 6, 6, 7, 8]]
'''
Solver
- Noted that the maximum sum of a group is 197 since the group of only 2 can have the 100 filled and then 97 on return
'''
class KnightsTour:
def __init__(self, width, height, fixedLocations, groupsArray):
self.w = width
self.h = height
self.fixedLocationsArray = fixedLocations
self.groupsArray = groupsArray
self.npfixedLocationsArray = np.array(fixedLocations)
self.npgroupsArray = np.array(groupsArray)
self.board = [] # Contains the solution
self.generate_board()
def generate_board(self):
"""
Creates a nested list to represent the game board
"""
for i in range(self.h):
self.board.append([0]*self.w)
def print_board(self): # Prints out the final board solution
print(" ")
print("------")
for elem in self.board:
print(elem)
print("------")
print(" ")
def generate_legal_moves(self, cur_pos, n):
"""
Generates a list of legal moves for the knight to take next
"""
possible_pos = []
move_offsets = [(1, 2), (1, -2), (-1, 2), (-1, -2),
(2, 1), (2, -1), (-2, 1), (-2, -1)]
locationOfNumberInFixed = [(ix,iy) for ix, row in enumerate(self.fixedLocationsArray) for iy, i in enumerate(row) if i == n+1]
groupsizeIsNotExcessive = self.groupsNotExcessiveSize(self.board, self.groupsArray)
for move in move_offsets:
new_x = cur_pos[0] + move[0]
new_y = cur_pos[1] + move[1]
new_pos = (new_x, new_y)
if groupsizeIsNotExcessive:
if locationOfNumberInFixed:
print(f"This number {n+1} exists in the fixed grid at {locationOfNumberInFixed[0]}")
if locationOfNumberInFixed[0] == new_pos:
print(f"Next position is {new_pos} and matches location in fixed")
possible_pos.append((new_x, new_y))
else:
continue
elif not locationOfNumberInFixed: # if the current index of move is not in table, then evaluate if it is a legal move
if (new_x >= self.h): # if it is out of height of the board, continue, don't app onto the list of possible moves
continue
elif (new_x < 0):
continue
elif (new_y >= self.w):
continue
elif (new_y < 0):
continue
else:
possible_pos.append((new_x, new_y))
else:
continue
print(f"The legal moves for index {n} are {possible_pos}")
print(f"The current board looks like:")
self.print_board()
return possible_pos
def sort_lonely_neighbors(self, to_visit, n):
"""
It is more efficient to visit the lonely neighbors first,
since these are at the edges of the chessboard and cannot
be reached easily if done later in the traversal
"""
neighbor_list = self.generate_legal_moves(to_visit, n)
empty_neighbours = []
for neighbor in neighbor_list:
np_value = self.board[neighbor[0]][neighbor[1]]
if np_value == 0:
empty_neighbours.append(neighbor)
scores = []
for empty in empty_neighbours:
score = [empty, 0]
moves = self.generate_legal_moves(empty, n)
for m in moves:
if self.board[m[0]][m[1]] == 0:
score[1] += 1
scores.append(score)
scores_sort = sorted(scores, key = lambda s: s[1])
sorted_neighbours = [s[0] for s in scores_sort]
return sorted_neighbours
def groupby_perID_and_sum(self, board, groups):
# Convert into numpy arrays
npboard = np.array(board)
npgroups = np.array(groups)
# Get argsort indices, to be used to sort a and b in the next steps
board_flattened = npboard.ravel()
groups_flattened = npgroups.ravel()
sidx = groups_flattened.argsort(kind='mergesort')
board_sorted = board_flattened[sidx]
groups_sorted = groups_flattened[sidx]
# Get the group limit indices (start, stop of groups)
cut_idx = np.flatnonzero(np.r_[True,groups_sorted[1:] != groups_sorted[:-1],True])
# Create cut indices for all unique IDs in b
n = groups_sorted[-1]+2
cut_idxe = np.full(n, cut_idx[-1], dtype=int)
insert_idx = groups_sorted[cut_idx[:-1]]
cut_idxe[insert_idx] = cut_idx[:-1]
cut_idxe = np.minimum.accumulate(cut_idxe[::-1])[::-1]
# Split input array with those start, stop ones
arrayGroups = [board_sorted[i:j] for i,j in zip(cut_idxe[:-1],cut_idxe[1:])]
arraySum = [np.sum(a) for a in arrayGroups]
sumsInListSame = arraySum.count(arraySum[0]) == len(arraySum)
return sumsInListSame
def groupsNotExcessiveSize(self, board, groups):
# Convert into numpy arrays
npboard = np.array(board)
npgroups = np.array(groups)
# Get argsort indices, to be used to sort a and b in the next steps
board_flattened = npboard.ravel()
groups_flattened = npgroups.ravel()
sidx = groups_flattened.argsort(kind='mergesort')
board_sorted = board_flattened[sidx]
groups_sorted = groups_flattened[sidx]
# Get the group limit indices (start, stop of groups)
cut_idx = np.flatnonzero(np.r_[True,groups_sorted[1:] != groups_sorted[:-1],True])
# Create cut indices for all unique IDs in b
n = groups_sorted[-1]+2
cut_idxe = np.full(n, cut_idx[-1], dtype=int)
insert_idx = groups_sorted[cut_idx[:-1]]
cut_idxe[insert_idx] = cut_idx[:-1]
cut_idxe = np.minimum.accumulate(cut_idxe[::-1])[::-1]
# Split input array with those start, stop ones
arrayGroups = [board_sorted[i:j] for i,j in zip(cut_idxe[:-1],cut_idxe[1:])]
arraySum = [np.sum(a) for a in arrayGroups]
print(arraySum)
# Check if either groups aren't too large
groupSizeNotExcessive = all(sum <= 197 for sum in arraySum)
return groupSizeNotExcessive
def tour(self, n, path, to_visit):
"""
Recursive definition of knights tour. Inputs are as follows:
n = current depth of search tree
path = current path taken
to_visit = node to visit, i.e. the coordinate
"""
self.board[to_visit[0]][to_visit[1]] = n # This writes the number on the grid
path.append(to_visit) #append the newest vertex to the current point
print(f"Added {n}")
print(f"For {n+1} visiting: ", to_visit)
if self.groupby_perID_and_sum(self.board, self.npgroupsArray): #if all areas sum
self.print_board()
print(path)
print("Done! All areas sum equal")
sys.exit(1)
else:
sorted_neighbours = self.sort_lonely_neighbors(to_visit, n)
for neighbor in sorted_neighbours:
self.tour(n+1, path, neighbor)
#If we exit this loop, all neighbours failed so we reset
self.board[to_visit[0]][to_visit[1]] = 0
try:
path.pop()
print("Going back to: ", path[-1])
except IndexError:
print("No path found")
sys.exit(1)
if __name__ == '__main__':
#Define the size of grid. We are currently solving for an 8x8 grid
kt0 = KnightsTour(10, 10, fixedLocationsArray, groupsArray)
kt0.tour(1, [], (3, 0))
# kt0.tour(1, [], (7, 0))
# kt0.tour(1, [], (7,2))
# kt0.tour(1, [], (6,3))
# kt0.tour(1, [], (4,3))
# kt0.tour(1, [], (3,2))
# startingPositions = [(3, 0), (7, 0), (7,2), (6,3), (4,3), (3,2)]
kt0.print_board()
Here are some observations that you could include to be able to stop more early in your backtracking.
First of all remember that for n steps the total sum in all areas can be computed with the formula n(n+1)/2. This number has to be divisible evenly into the groups i.e. it has to be divisible by 17 which is the amount of groups.
Furthermore if we look at the 12 we can conclude that the 11 and 13 must have been in the same area so we get a lower bound on the number for each area as 2+5+8+11+12+13=51.
And lastly we have groups of size two so the largest two step numbers must make up the total sum for one group.
Using those conditions we can calculate the remaining possible amount of steps with
# the total sum is divisible by 17:
# n*(n+1)/2 % 17 == 0
# the sum for each group is at least the predictable sum for
# the biggest given group 2+5+8+11+12+13=51:
# n*(n+1)/(2*17) >= 51
# since there are groups of two elements the sum of the biggest
# two numbers must be as least as big as the sum for each group
# n+n-1 >= n*(n+1)/(2*17)
[n for n in range(101) if n*(n+1)/2 % 17 == 0 and n*(n+1)/(2*17) >= 51 and n+n-1 >= n*(n+1)/(2*17)]
giving us
[50,51].
So the knight must have taken either 50 or 51 steps and the sum for each area must be either 75 or 78.

Simple Mutation with a Probability

As per the section of code below, I am trying to implement an automatic and random mutation process.
data = [0,1,0,0,0,0,0,1,0,0,1,1,0,0,1]
data[random.randint(0,len(data)-1)]=random.randrange(0,1)
print(data)
The code is an adaptation of some other posts I have found, although it is randomly mutating a value every time with either a 0 or 1. I require this to occur with only a certain probability (such as a 0.05 chance of mutation) rather than always being guaranteed.
Additionally, often a 0 is being replaced with a 0 and therefore there is no change to the output, so I would like to limit it in a way that a 0 will only mutate to a 1 and a 1 mutates to a 0.
I would really appreciate the assistance in resolving these two issues.
Resume
mutate any value with a choosen probability
randomly choose the position
when the position is choosen, switch between 0 and 1
def mutate(data, proba=0.05):
if random.random() < proba:
data[random.randrange(len(data))] ^= 1
if __name__ == '__main__':
data = [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1]
for i in range(10):
mutate(data)
print(data)
import random
def changeData(data):
seed = random.randint(0,1000)
# probability of 0.05 (50 / 1000)
if seed <= 50:
indexToChange = random.randint(0,len(data)-1)
# change 0 with 1 and viceversa
data[indexToChange] = 1 if data[indexToChange] == 0 else 0
if __name__== '__main__':
data = [0,1,0,0,0,0,0,1,0,0,1,1,0,0,1]
for i in range(0,100):
changeData(data)
print(data)
You can do as following:
For each element in data, mutate it (1 - val) only if a random value generated by random() function is less than the defined mutation probability.
For example:
import random
mutation_prob = 0.05
data = [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1]
mutated_data = [1 - x if random.random() < mutation_prob else x for x in data]
If the mutation should be decided regarding the data as a whole, you can do:
mutation_prob = 0.05
data = [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1]
do_mutation = random.random() < mutation_prob
mutated_data = [1 - x if do_mutation else x for x in data]

compare two arrays to make an accuracy of KNN prediction

I have two arrays from which I have to find the accuracy of my prediction.
predictions = [1, 0, 0, 1, 1, 1, 0, 1, 1, 0]
y_test = [1, 0, 0, 1, 0, 1, 0, 1, 1, 1]
so in this case, the accuracy is = (8/10)*100 = 80%
I have written a method to do this task. Here is my code, but I dont get the accuracy of 80% in this case.
def getAccuracy(y_test, predictions):
correct = 0
for x in range(len(y_test)):
if y_test[x] is predictions[x]:
correct += 1
return (correct/len(y_test)) * 100.0
Thanks for helping me.
You're code should work, if the numbers in the arrays are in a specific range that are not recreated by the python interpreter. This is because you used is which is an identity check and not an equality check. So, you are checking memory addresses, which are only equal for a specific range of numbers. So, use == instead and it will always work.
For a more Pythonic solution you can also take a look at list comprehensions:
assert len(predictions) == len(y_test), "Unequal arrays"
identity = sum([p == y for p, y in zip(predictions, y_test)]) / len(predictions) * 100
if you want to take 80.0 as result for your example, It's doing that.
Your code gives 80.0 as you wanted, however you should use == instead of is, see the reason.
def getAccuracy(y_test, predictions):
n = len(y_test)
correct = 0
for x in range(n):
if y_test[x] == predictions[x]:
correct += 1
return (correct/n) * 100.0
predictions = [1, 0, 0, 1, 1, 1, 0, 1, 1, 0]
y_test = [1, 0, 0, 1, 0, 1, 0, 1, 1, 1]
print(getAccuracy(y_test, predictions))
80.0
Here's an implementation using Numpy:
import numpy as np
n = len(y_test)
100*np.sum(np.isclose(predictions, y_test))/n
or if you convert your lists to numpy arrays, then
100*np.sum(predictions == y_test)/n

Roulette Wheel Selection for non-ordered fitness values

I need to have a fitness proportionate selection approach to a GA, however my population cant loose the structure (order), in this case while generating the probabilities, I believe the individuals get the wrong weights, the program is:
population=[[[0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1], [6], [0]],
[[0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1], [4], [1]],
[[0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0], [6], [2]],
[[1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0], [4], [3]]]
popultion_d={'0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1': 6,
'0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,1': 4,
'0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0': 6,
'1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0': 4}
def ProbabilityList(population_d):
fitness = population_d.values()
total_fit = (sum(fitness))
relative_fitness = [f/total_fit for f in fitness]
probabilities = [sum(relative_fitness[:i+1]) for i in range(len(relative_fitness))]
return (probabilities)
def FitnessProportionateSelection(population, probabilities, number):
chosen = []
for n in range(number):
r = random.random()
for (i, individual) in enumerate(population):
if r <= probabilities[i]:
chosen.append(list(individual))
break
return chosen
number=2
The population element is: [[individual],[fitness],[counter]]
The probabilities function output is: [0.42857142857142855, 0.5714285714285714, 0.8571428571428571, 1.0]
What I notice here is that the previous weight is summed up to the next one, not necessarily being in crescent order, so a think a higher weight is given to the cromosome with a lowest fitness.
I dont want to order it because I need to index the lists by position later, so I think I will have wrong matches.
Anyone knows a possible solution, package or different approach to perform a weighted the selection in this case?
p.s: I know the dictionary may be redundant here, but I had several other problems using the list itself.
Edit: I tried to use random.choices() as you can see below (using relative fitness):
def FitnessChoices(population, probabilities, number):
return random.choices(population, probabilities, number)
But I get this error: TypeError: choices() takes from 2 to 3 positional arguments but 4 were given
Thank you!
Using random.choices is certainly a good idea. You just need to understand the function call. You have to specify, whether your probabilities are marginal or cumulated. So you could use either
import random
def ProbabilityList(population_d):
fitness = population_d.values()
total_fit = sum(fitness)
relative_fitness = [f/total_fit for f in fitness]
return relative_fitness
def FitnessChoices(population, relative_fitness, number):
return random.choices(population, weights = relative_fitness, k = number)
or
import random
def ProbabilityList(population_d):
fitness = population_d.values()
total_fit = sum(fitness)
relative_fitness = [f/total_fit for f in fitness]
cum_probs = [sum(relative_fitness[:i+1]) for i in range(len(relative_fitness))]
return cum_probs
def FitnessChoices(population, cum_probs, number):
return random.choices(population, cum_weights = cum_probs, k = number)
I'd recommend you to have a look at the differences between keyword and positional arguments in python.

Calculate stationary distribution of Markov chain in Python

I've been working on a Google foobar problem for a couple of days and have all but one test passing, and I'm pretty stuck at this point. Let me know if you have any ideas! I'm using a method described here, and I have a working example up on repl.it here. Here's the problem spec:
Doomsday Fuel
Making fuel for the LAMBCHOP's reactor core is a tricky process because of the exotic matter involved. It starts as raw ore, then during processing, begins randomly changing between forms, eventually reaching a stable form. There may be multiple stable forms that a sample could ultimately reach, not all of which are useful as fuel.
Commander Lambda has tasked you to help the scientists increase fuel creation efficiency by predicting the end state of a given ore sample. You have carefully studied the different structures that the ore can take and which transitions it undergoes. It appears that, while random, the probability of each structure transforming is fixed. That is, each time the ore is in 1 state, it has the same probabilities of entering the next state (which might be the same state). You have recorded the observed transitions in a matrix. The others in the lab have hypothesized more exotic forms that the ore can become, but you haven't seen all of them.
Write a function answer(m) that takes an array of array of nonnegative ints representing how many times that state has gone to the next state and return an array of ints for each terminal state giving the exact probabilities of each terminal state, represented as the numerator for each state, then the denominator for all of them at the end and in simplest form. The matrix is at most 10 by 10. It is guaranteed that no matter which state the ore is in, there is a path from that state to a terminal state. That is, the processing will always eventually end in a stable state. The ore starts in state 0. The denominator will fit within a signed 32-bit integer during the calculation, as long as the fraction is simplified regularly.
*For example, consider the matrix m:
[
[0,1,0,0,0,1], # s0, the initial state, goes to s1 and s5 with equal probability
[4,0,0,3,2,0], # s1 can become s0, s3, or s4, but with different probabilities
[0,0,0,0,0,0], # s2 is terminal, and unreachable (never observed in practice)
[0,0,0,0,0,0], # s3 is terminal
[0,0,0,0,0,0], # s4 is terminal
[0,0,0,0,0,0], # s5 is terminal
]
So, we can consider different paths to terminal states, such as:
s0 -> s1 -> s3
s0 -> s1 -> s0 -> s1 -> s0 -> s1 -> s4
s0 -> s1 -> s0 -> s5
Tracing the probabilities of each, we find that
s2 has probability 0
s3 has probability 3/14
s4 has probability 1/7
s5 has probability 9/14
So, putting that together, and making a common denominator, gives an answer in the form of
[s2.numerator, s3.numerator, s4.numerator, s5.numerator, denominator] which is
[0, 3, 2, 9, 14].*
Test cases
Inputs:
(int) m = [[0, 2, 1, 0, 0], [0, 0, 0, 3, 4], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
Output:
(int list) [7, 6, 8, 21]
Inputs:
(int) m = [[0, 1, 0, 0, 0, 1], [4, 0, 0, 3, 2, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
Output:
(int list) [0, 3, 2, 9, 14]
Here's my code so far.
from __future__ import division
from itertools import compress
from itertools import starmap
from operator import mul
import fractions
def convertMatrix(transMatrix):
probMatrix = []
for i in range(len(transMatrix)):
row = transMatrix[i]
newRow = []
rowSum = sum(transMatrix[i])
if all([v == 0 for v in transMatrix[i]]):
for j in transMatrix[i]:
newRow.append(0)
newRow[i] = 1
probMatrix.append(newRow)
else:
for j in transMatrix[i]:
if j == 0:
newRow.append(0)
else:
newRow.append(j/rowSum)
probMatrix.append(newRow)
return probMatrix
def answer(m):
# convert matrix numbers into probabilities
probMatrix = convertMatrix(m)
# find terminal states
terminalStateFilter = []
for row in range(len(m)):
if all(x == 0 for x in m[row]):
terminalStateFilter.append(True)
else:
terminalStateFilter.append(False)
# multiply matrix by probability vector
oldFirstRow = probMatrix[0]
probVector = None
for i in range(3000):
probVector = [sum(starmap(mul, zip(oldFirstRow, col))) for col in zip(*probMatrix)]
oldFirstRow = probVector
# generate numerators
numerators = []
for i in probVector:
numerator = fractions.Fraction(i).limit_denominator().numerator
numerators.append(numerator)
# generate denominators
denominators = []
for i in probVector:
denominator = fractions.Fraction(i).limit_denominator().denominator
denominators.append(denominator)
# calculate factors to multiply numerators by
factors = [max(denominators)/x for x in denominators]
# multiply numerators by factors
numeratorsTimesFactors = [a*b for a,b in zip(numerators, factors)]
# filter numerators by terminal state booleans
terminalStateNumerators = list(compress(numeratorsTimesFactors, terminalStateFilter))
# append numerators and denominator to answer
answerlist = []
for i in terminalStateNumerators:
answerlist.append(i)
answerlist.append(max(denominators))
return list(map(int, answerlist))

Categories