I am trying to make a genetic algorithm that finds the word given in the console input. But I don't know if I succeeded to do a full genetic algorithm.
Here is the code:
main.py:
from population import Population
target = input()
maxPop = 10
mutation = 100
print("\n\n\n")
pop = Population(target, maxPop, mutation)
population.py:
import random
from ADN import genetic
class Population:
def __init__(self, target, maxPop, mut):
adn = genetic()
self.popul = []
i = 0
while i < maxPop:
self.popul.append(adn.genFirst(len(target)))
print(self.popul[i])
i+=1
#oldPop = self.popul
#adn.fitness(oldPop, target)
#"""
while target not in self.popul:
oldPop = self.popul
self.popul = adn.fitness(oldPop, target)
if target in self.popul:
return
#"""
ADN.py:
import random
class genetic:
def genFirst(self, length):
bestGenes = ""
self.letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890[],. "
word = ""
i = 0
while i < length:
word += random.choice(self.letters)
i+=1
return word
def fitness(self, oldPop, target):
newPop = []
j = 0
for word in oldPop:
newW = ""
for letter in word:
if(letter not in target):
letter = random.choice(self.letters)
else:
if(target.index(letter) != word.index(letter)):
letter = random.choice(self.letters)
newW += letter
newPop.append(newW)
print(newPop)
return newPop
If it is not a full genetic algorithm, what is missing?
No, it's not a genetic algorithm. It is not even an evolutionary algorithm. It misses the fitness function which should calculate how good is every member of the calculation. After that you should decide which code would you want to make: genetic or evolutionary. Being a beginner you should try the evolutionary algorithm, it's easier and it does not contain the crossover function (which is difficult for beginners).
Try this:
import random
genes = "abcdefghijklmnopqrsttuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-[]()1234567890;<>?/ "
target = input()
def genPar(length):
parent = []
for i in range(length):
parent.append(random.choice(genes))
return "".join(parent)
def fitness(parent):
total = 0
for i in range(len(parent)):
if(parent[i] == target[i]):
total += 1
return total
def mutate(parent):
index = random.choice(range(len(parent)))
child = []
for i in range(len(parent)):
if(i == index):
letter = random.choice(genes)
else:
letter = parent[i]
child.append(letter)
return "".join(child)
parent = genPar(len(target))
bestPar = parent
bestFitness = fitness(parent)
print(parent)
generations = 1
while True:
child = mutate(bestPar)
childFit = fitness(child)
if(childFit > bestFitness):
bestFitness = childFit
bestPar = child
print(child)
generations += 1
if(child == target):
break
print("\nGenerations: " + str(generations))
Until u see Initialization -> Fitness -> Genetic operators (mutation, crossover) -> Fitness -> Substitution cycle you can't say it is Genetic/Evolutionary algorithm :)...
for the basic genetic algorithm, you need to use some operator selection, fitness, mutation, crossover.
there different types of selection, crossover, and mutation that you can use based on your problem.
a simple example of crossover and mutation.
def single_point_crossover(parent1,parent2):
crossover_point = random.randint(1,9)
#print("crossover point", crossover_point)
child_1 = np.hstack((parent1[0:crossover_point], parent2[crossover_point:]))
child_2 = np.hstack((parent2[:crossover_point],parent1[crossover_point:]))
return child_1,child_2
def mutation(parent1,parent2):
n = len(parent1)
pos_1 = random.randint(0,n-1)
pos_2 = random.randint(0,n-1)
#print(pos_1, pos_2)
def swap(sol, posA, posB):
result = sol.copy()
elA = sol[posA]
elB = sol[posB]
result[posA] = elB
result[posB] = elA
return result
child1 = swap(parent1, pos_1, pos_2)
child2 = swap(parent2, pos_1, pos_2)
return child1,child2
Related
this is sample code of 8 puzzle game which take two matrix initial and goal state .
class Node:
def __init__(self,data,level,fval):
""" Initialize the node with the data, level of the node and the calculated fvalue """
self.data = data
self.level = level
self.fval = fval
def generate_child(self):
""" Generate child nodes from the given node by moving the blank space
either in the four directions {up,down,left,right} """
x,y = self.find(self.data,'_')
""" val_list contains position values for moving the blank space in either of
the 4 directions [up,down,left,right] respectively. """
val_list = [[x,y-1],[x,y+1],[x-1,y],[x+1,y]]
children = []
for i in val_list:
child = self.shuffle(self.data,x,y,i[0],i[1])
if child is not None:
child_node = Node(child,self.level+1,0)
children.append(child_node)
return children
def shuffle(self,puz,x1,y1,x2,y2):
""" Move the blank space in the given direction and if the position value are out
of limits the return None """
if x2 >= 0 and x2 < len(self.data) and y2 >= 0 and y2 < len(self.data):
temp_puz = []
temp_puz = self.copy(puz)
temp = temp_puz[x2][y2]
temp_puz[x2][y2] = temp_puz[x1][y1]
temp_puz[x1][y1] = temp
return temp_puz
else:
return None
def copy(self,root):
""" Copy function to create a similar matrix of the given node"""
temp = []
for i in root:
t = []
for j in i:
t.append(j)
temp.append(t)
return temp
def find(self,puz,x):
""" Specifically used to find the position of the blank space """
for i in range(0,len(self.data)):
for j in range(0,len(self.data)):
if puz[i][j] == x:
return i,j
class Puzzle:
def __init__(self,size):
""" Initialize the puzzle size by the specified size,open and closed lists to empty """
self.n = size
self.open = []
self.closed = []
def accept(self):
""" Accepts the puzzle from the user """
puz = []
for i in range(0,self.n):
temp = input().split(" ")
puz.append(temp)
return puz
def f(self,start,goal):
""" Heuristic Function to calculate hueristic value f(x) = h(x) + g(x) """
return self.h(start.data,goal)+start.level
def h(self,start,goal):
""" Calculates the different between the given puzzles """
temp = 0
for i in range(0,self.n):
for j in range(0,self.n):
if start[i][j] != goal[i][j] and start[i][j] != '_':
temp += 1
return temp
def process(self):
""" Accept Start and Goal Puzzle state"""
print("Enter the start state matrix \n")
start = self.accept()
print("Enter the goal state matrix \n")
goal = self.accept()
start = Node(start,0,0)
start.fval = self.f(start,goal)
""" Put the start node in the open list"""
self.open.append(start)
print("\n")
count=0
while True:
cur = self.open[0]
count=count+1
print("This Node number = \n", count)
print("")
print(" | ")
print(" | ")
print(" \\\'/ \n")
for i in cur.data:
for j in i:
print(j,end=" ")
print("")
""" If the difference between current and goal node is 0 we have reached the goal node"""
if(self.h(cur.data,goal) == 0):
break
for i in cur.generate_child():
i.fval = self.f(i,goal)
self.open.append(i)
self.closed.append(cur)
del self.open[0]
""" sort the opne list based on f value """
self.open.sort(key = lambda x:x.fval,reverse=False)
puz = Puzzle(3)
puz.process()
this code take initial state and goal state and start traversal and stop until
specific or reached goal state
i want to add limit of traversal to this code.so it itterate in
specific boundary either reached goal state or not
I am trying to solve Determining DNA Health challenge from Hackerrank using python. (I have to add I am somewhat new to python 3. Still learning the language)
My solution fails for test cases 7, 8 and 9 with a message reading "Wrong Answer".
When I run the following code locally, I can confirm that for these test cases my implementation produces the expected output.
I am wondering what would be the problem.
I am a bit puzzled at the moment. Is there a problem with my implementation? If so how come it produces correct answers for 28 test cases but fails on these 3? Or is it a misleading/confusing result message from Hacker Rank, as I happen to know that people find these 3 test cases (7, 8 and 9) problematic from what I learnt from reading discussions.
Any help would be highly appreciated.
Here is the code I wrote:
from bisect import bisect_left
from bisect import bisect_right
import sys
from unittest.mock import right
class TrieNode(object):
def __init__(self):
self.subnodes = {}
self.isTerminal = False
self.indexList = []
self.healthList = []
def addSubnode(self, aChar):
if (self.subnodes.get(aChar)):
return self.subnodes[aChar]
else:
newNode = TrieNode()
self.subnodes[aChar] = newNode
return newNode
def addIndexAndValue(self, index, health):
self.isTerminal = True
self.indexList.append(index)
lastHealth = 0
healthLength = len(self.healthList)
if (healthLength>0):
lastHealth = self.healthList[healthLength-1]
self.healthList.append(lastHealth + health)
def getSubnodeFor(self, aChar):
return self.subnodes.get(aChar)
def getValueForIndexes(self, startIndex, endIndex):
listSize = len(self.indexList)
if listSize < 1:
return 0
elif listSize == 1:
if startIndex <= self.indexList[0] and endIndex >= self.indexList[0]:
return self.healthList[0]
else:
return 0
else: # listSize > 1
rightInd = bisect_left(self.indexList, endIndex)
if rightInd < listSize and endIndex < self.indexList[0]:
return 0
big = 0
if rightInd >= listSize:
big = self.healthList[listSize - 1]
else:
if endIndex >= self.indexList[rightInd]:
big = self.healthList[rightInd]
else:
big = self.healthList[rightInd-1]
leftInd = bisect_left(self.indexList, startIndex)
small = 0
if leftInd >= listSize:
return 0
else:
if startIndex <= self.indexList[leftInd]:
if (leftInd > 0):
small = self.healthList[leftInd - 1]
else:
small = 0
else:
small = self.healthList[leftInd]
return big - small
class Trie(object):
def __init__(self):
self.root = TrieNode()
def getRoot(self):
return self.root
def createTrie(self, genes, healths):
for i in range(len(genes)):
node = self.root
for c in genes[i]:
node = node.addSubnode(c)
node.addIndexAndValue(i, healths[i])
def calculateHealth(trie, d, first, last):
total = 0
dLength = len(d)
for i in range(0, dLength):
node = trie.getRoot()
for j in range(i, dLength):
node = node.getSubnodeFor(d[j])
if node != None:
if node.isTerminal:
val = node.getValueForIndexes(first, last)
total = total + val
else:
break
return total
def readFromFile(aFileName):
inputArr = None
with open('../hackerRank/src/' + aFileName, encoding='utf-8') as aFile:
inputArr = aFile.read().splitlines()
return inputArr
def runFor(fileName, minimumValue, maximumValue):
inp = readFromFile(fileName)
n = inp[0]
genes = inp[1].rstrip().split()
healths = list(map(int, inp[2].rstrip().split()))
trie = Trie()
trie.createTrie(genes, healths)
s = int(inp[3])
minVal = sys.maxsize
maxVal = -1
for fItr in range(s):
line = inp[fItr+4].split()
first = int(line[0])
last = int(line[1])
d = line[2]
val = calculateHealth(trie, d, first, last)
if val < minVal:
minVal = val
if val > maxVal:
maxVal = val
print (minVal,maxVal)
assert minimumValue == minVal
assert maximumValue == maxVal
# TextX.txt 's are simple text files, which hold test data for regarding test case
# following the file name are real expected numbers for each relevant test case
# I got those from hacker rank
runFor('Test2.txt', 15806635, 20688978289)
runFor('Test7.txt', 0, 7353994)
runFor('Test8.txt', 0, 8652768)
runFor('Test9.txt', 0, 9920592)
runFor('Test33.txt', 11674463, 11674463)
One reference that might assist can be found at:
https://gist.github.com/josephmisiti/940cee03c97f031188ba7eac74d03a4f
Please read the notes he has included.
This is the input I have been using.
6
a b c aa d b
1 2 3 4 5 6
3
1 5 caaab
0 4 xyz
2 4 bcdybc
class Tape(object):
blank_symbol = " "
def __init__(self,
tape_string = ""):
self.__tape = dict((enumerate(tape_string)))
# last line is equivalent to the following three lines:
#self.__tape = {}
#for i in range(len(tape_string)):
# self.__tape[i] = input[i]
def __str__(self):
s = ""
min_used_index = min(self.__tape.keys())
max_used_index = max(self.__tape.keys())
for i in range(min_used_index, max_used_index):
s += self.__tape[i]
return s
def __getitem__(self,index):
if index in self.__tape:
return self.__tape[index]
else:
return Tape.blank_symbol
def __setitem__(self, pos, char):
self.__tape[pos] = char
class TuringMachine(object):
def __init__(self,
tape = "",
blank_symbol = " ",
initial_state = "",
final_states = None,
transition_function = None):
self.__tape = Tape(tape)
self.__head_position = 0
self.__blank_symbol = blank_symbol
self.__current_state = initial_state
if transition_function == None:
self.__transition_function = {}
else:
self.__transition_function = transition_function
if final_states == None:
self.__final_states = set()
else:
self.__final_states = set(final_states)
def get_tape(self):
return str(self.__tape)
def step(self):
char_under_head = self.__tape[self.__head_position]
x = (self.__current_state, char_under_head)
if x in self.__transition_function:
y = self.__transition_function[x]
self.__tape[self.__head_position] = y[1]
if y[2] == "R":
self.__head_position += 1
elif y[2] == "L":
self.__head_position -= 1
self.__current_state = y[0]
def final(self):
if self.__current_state in self.__final_states:
return True
else:
return False
I am trying to implement Turing automata in Python. Can you tell me why this code doesn't work? e machine operates on an infinite[4] memory tape divided into discrete cells.[5] The machine positions its head over a cell and "reads" (scans)[6] the symbol there. Then, as per the symbol and its present place in a finite table[7] of user-specified instructions, the machine (i) writes a symbol (e.g., a digit or a letter from a finite alphabet) in the cell (some models allowing symbol erasure or no writing) then (ii) either moves the tape one cell left or right (some models allow no motion, some models move the head),[9] then (iii) (as determined by the observed symbol and the machine's place in the table) either proceeds to a subsequent instruction or halts the computation.
Before adding i to list, convert it to float by float(i).
for i in lines:
print(i)
if(i != " "):
if(a % 4 == 0 and a != 1):
second.append(float(i))
tuple(tmp)
first.append(tmp)
tmp = []
else:
tmp.append(float(i))
a+=1
I am trying to understand how to implement a genetic algorithm and wrote a simple string guess. I am having trouble understanding why this solution is not working.
I believe that my problem is in my populating my new generations? The newest generations do not seem to have improved fitness values. I am also not sure if I am doing the crossover and mutation rates correctly. Any help would be really appreciated!
POP_SIZE = 300;
CROSSOVER_RATE = 0.7;
MUTATION_RATE = 0.01
GENESET = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"
target = "Hello World"
RAND_NUM = random.random()
def generateBasePopulation(population_size):
population = dict()
for _ in range(POP_SIZE):
gene = generateParent(len(target))
population[gene] = 0
return population
def generateNewPopulation(population, population_size):
newPopulation = dict()
while(len(newPopulation) <= POP_SIZE):
child_one, child_two = crossover(child_one, child_two)
child_one = mutate(child_one)
child_two = mutate(child_two)
newPopulation[child] = 0
newPopulation[child_two] = 0
return newPopulation
def assignFitness(population):
for x in population:
population[x] = getFitness(x)
def generateParent(length):
genes = list("")
for i in range(0,length):
random_gene = random.choice(GENESET)
genes.append(random_gene)
return(''.join(genes))
def getFitness(candidate):
fitness = 0
for i in range(0, len(candidate) - 1):
if target[i] == candidate[i]:
fitness += 1
return(fitness)
def mutate(parent):
gene_index_to_mutate = random.randint(0, len(parent) - 1)
mutation_value = random.choice(GENESET)
genes = list(parent)
genes[gene_index_to_mutate] = mutation_value
return(''.join(genes))
def crossover(parentA, parentB):
if(RAND_NUM < CROSSOVER_RATE):
random_index = random.randint(0, len(target))
parentASlice = parentA[:random_index]
parentBSlice = parentB[random_index:]
return (parentASlice + parentBSlice), (parentBSlice + parentASlice)
return parentA, parentB
def chooseChild(population):
fitnessSum = sum(population.values())
pick = random.uniform(0, fitnessSum)
current = 0
for pop in population:
current += population[pop]
if current >= pick:
return pop
def main():
population = generateBasePopulation(POP_SIZE)
targetNotFound = True
while(targetNotFound):
assignFitness(population)
if target in population:
print("target found!")
targetNotFound = False
if(targetNotFound):
tempPopulation = generateNewPopulation(population, POP_SIZE)
population.clear()
population = tempPopulation
There are some issues with the generateNewPopulation function.
child_one and child_two are referenced before assignment
You need two individuals from the population to perform the crossover. There are several selection algorithms, but just to give an idea you could start with a form of tournament selection:
def extractFromPopulation(population):
best = random.choice(list(population.keys()))
for _ in range(4):
gene = random.choice(list(population.keys()))
if population[gene] > population[best]:
best = gene
return best
Here the selection pressure (range(4)) is fixed. It's one of the parameters you've to tune in a real case.
Now we have:
def generateNewPopulation(population, population_size):
newPopulation = dict()
while len(newPopulation) <= POP_SIZE:
child_one = extractFromPopulation(population)
child_two = extractFromPopulation(population)
# ...
The code still doesn't work because
new individuals aren't inserted in newPopulation
Just indent the two lines:
newPopulation[child] = 0
newPopulation[child_two] = 0
(they must be part of the while loop)
The revised generateNewPopulation function follows:
def generateNewPopulation(population, population_size):
newPopulation = dict()
while len(newPopulation) <= POP_SIZE:
child_one = extractFromPopulation(population)
child_two = extractFromPopulation(population)
child_one, child_two = crossover(child_one, child_two)
child_one = mutate(child_one)
child_two = mutate(child_two)
newPopulation[child_one] = 0
newPopulation[child_two] = 0
return newPopulation
The crossover function cannot be based on a fixed RAND_NUM value
Delete the RAND_NUM = random.random() assignment and change the crossover function to use a new random value at each call:
def crossover(parentA, parentB):
if random.random() < CROSSOVER_RATE:
random_index = random.randint(0, len(target))
parentASlice = parentA[:random_index]
parentBSlice = parentB[random_index:]
return (parentASlice + parentBSlice), (parentBSlice + parentASlice)
return parentA, parentB
Also the code doesn't correctly perform single point crossover since schemata of the second parent aren't preserved.
You could change many details to improve performance but, as a starting example, it's probably enough as it is (...it works).
Average number of generations to find a solution is about 158 (average on 200 runs).
EDIT (thanks to alexis for the comment)
MUTATION_RATE is unused and a mutation always happens. The mutate function should be something like:
def mutate(parent):
if random.random() < MUTATION_RATE:
gene_index_to_mutate = random.randint(0, len(parent) - 1)
mutation_value = random.choice(GENESET)
genes = list(parent)
genes[gene_index_to_mutate] = mutation_value
return ''.join(genes)
return parent
This fix is particularly important if you keep the roulette wheel selection algorithm (chooseChild often doesn't converge without the fix).
I am solving a puzzle (Finding if there exists an input for a given automata for which no matter what the starting state is, final state would be same everytime) and have written following python code. A few testcases are written in check method in the code. For these cases program is running fairly fast. However, for testcases where 50 lists(nodes) are present, the programis taking forever to execute. I am storing intermediate results to use further as well. Can someone please review the code and give suggestions on how to increase the performance of the code?
from itertools import product
from copy import deepcopy
class Node:
def __init__(self,id):
self.id = id
self.dict = {}
def __str__(self):
return str(id) + " : " + str(self.dict)
def __repr__(self):
return str(id) + " : " + str(self.dict)
def tryDelete(nodes,_len):
for id in nodes:
y = deepcopy(nodes)
x = y[id]
del y[id]
for id,node in y.items():
for input,result in node.dict.items():
if result == x:
if x.dict[input] == x:
node.dict[input] = node
else:
node.dict[input] = x.dict[input]
pass
if pathPossible(y,_len ,False) == -1:
return x.id
return -2
target = {}
def FindTarget(node,p):
if len(p) == 1:
return node.dict[p[0]]
if node not in target or p not in target[node]:
x = FindTarget(node,p[:-1]).dict[p[-1]]
if node not in target:
target[node] = {}
target[node][p] = x
return target[node][p]
def allSatisy(nodes,p):
x = None
for key,node in nodes.items():
if x is None:
x = FindTarget(node,p)
elif FindTarget(node,p) != x:
return False
return True
def allPossiblePaths(l,n):
#x = int(((l+1)*(l+2))/2)
for i in range(1, n+1):
for p in product(range(l),repeat=i):
yield p
def pathPossible(nodes,_len ,isItereate=True):
i = 1
isFound = False
for p in allPossiblePaths(_len,len(nodes)):
if allSatisy(nodes,p):
isFound = True
break
if isFound:
return -1
elif not isItereate:
return -2
else:
return tryDelete(nodes,_len)
def answer(li):
nodes = {}
for i in range(len(li)):
nodes[i] = Node(i)
for i in range(len(li)):
for j in range(len(li[i])):
nodes[i].dict[j] = nodes[li[i][j]]
return pathPossible(nodes,len(nodes[0].dict))
def check(li,ans):
# each item in the list is a node, each item i-th in the inner list tells to what node the transition happens for input i
x = answer(li)
print(str(li) + " : " + str(ans) + " : " + str(x))
def main():
check([[2,1],[2,0],[3,1],[1,0]],-1)
check([[1,2],[1,1],[2,2]],1)
check([[1,3,0],[1,0,2],[1,1,2],[3,3,3]],-1)
if __name__ == '__main__':
main()
UPDATE: I have done few code changes, but still this needs some review from you guys. Changed code:
from itertools import product
from copy import deepcopy
class Node:
def __init__(self,id):
self.id = id
self.dict = {}
def __str__(self):
return str(self.id) + " : " + str(self.dict)
def __repr__(self):
return str(self.id) + " : " + str(self.dict)
def tryDelete(nodes,_len):
for i in range(len(nodes)):
y = nodes[:]
x = y[i]
del y[i]
tNodes = []
for node in y:
for input,result in node.dict.items():
if result == x:
node.tDict = deepcopy(node.dict)
if x.dict[input] == x.id:
node.dict[input] = node
else:
node.dict[input] = x.dict[input]
if pathPossible(y,_len ,False) == -1:
return x.id
for n in tNodes:
n.dict = n.tDict
del n.tDict
return -2
target = {}
def FindTarget(node,p):
if len(p) == 1:
return node.dict[p[0]]
if node not in target or p not in target[node]:
x = Gnodes[FindTarget(node,p[:-1])].dict[p[-1]]
if node not in target:
target[node] = {}
target[node][p] = x
return target[node][p]
def allSatisy(nodes,p):
x = None
for node in nodes:
if x is None:
x = FindTarget(node,p)
elif FindTarget(node,p) != x:
return False
return True
def allPossiblePaths(l,n):
#x = int(((l+1)*(l+2))/2)
for i in range(1, n + 1):
for p in product(range(l),repeat=i):
yield p
def pathPossible(nodes,_len ,isItereate=True):
i = 1
isFound = False
for p in allPossiblePaths(_len,len(nodes)):
if allSatisy(nodes,p):
isFound = True
break
if isFound:
return -1
elif not isItereate:
return -2
else:
return tryDelete(nodes,_len)
Gnodes = []
def answer(li):
Gnodes[:] = []
for i in range(len(li)):
Gnodes.append(Node(i))#[i] = Node(i)
for i in range(len(li)):
for j in range(len(li[i])):
Gnodes[i].dict[j] = li[i][j]
return pathPossible(Gnodes,len(Gnodes[0].dict))
def check(li,ans):
x = answer(li)
print(str(li) + " : " + str(ans) + " : " + str(x))
def main():
check([[2,1],[2,0],[3,1],[1,0]],-1)
check([[1,2],[1,1],[2,2]],1)
check([[1,3,0],[1,0,2],[1,1,2],[3,3,3]],-1)
if __name__ == '__main__':
main()
There is a wonderful graph library called NetworkX. It deals with creating graphs and path finding. You specify what edges or paths exist in your Graph and you can find paths using a plethora of algorithms like breadth first search, or A*, and many others in the algorithms section. The best way to optimize your time is code reuse.
https://networkx.github.io