Python implementation of Determining DNA Health algorithm from HackerRank

Python implementation of Determining DNA Health algorithm from HackerRank - python

I am trying to solve Determining DNA Health challenge from Hackerrank using python. (I have to add I am somewhat new to python 3. Still learning the language)
My solution fails for test cases 7, 8 and 9 with a message reading "Wrong Answer".
When I run the following code locally, I can confirm that for these test cases my implementation produces the expected output.
I am wondering what would be the problem.
I am a bit puzzled at the moment. Is there a problem with my implementation? If so how come it produces correct answers for 28 test cases but fails on these 3? Or is it a misleading/confusing result message from Hacker Rank, as I happen to know that people find these 3 test cases (7, 8 and 9) problematic from what I learnt from reading discussions.
Any help would be highly appreciated.
Here is the code I wrote:
from bisect import bisect_left
from bisect import bisect_right
import sys
from unittest.mock import right
class TrieNode(object):
def __init__(self):
self.subnodes = {}
self.isTerminal = False
self.indexList = []
self.healthList = []
def addSubnode(self, aChar):
if (self.subnodes.get(aChar)):
return self.subnodes[aChar]
else:
newNode = TrieNode()
self.subnodes[aChar] = newNode
return newNode
def addIndexAndValue(self, index, health):
self.isTerminal = True
self.indexList.append(index)
lastHealth = 0
healthLength = len(self.healthList)
if (healthLength>0):
lastHealth = self.healthList[healthLength-1]
self.healthList.append(lastHealth + health)
def getSubnodeFor(self, aChar):
return self.subnodes.get(aChar)
def getValueForIndexes(self, startIndex, endIndex):
listSize = len(self.indexList)
if listSize < 1:
return 0
elif listSize == 1:
if startIndex <= self.indexList[0] and endIndex >= self.indexList[0]:
return self.healthList[0]
else:
return 0
else: # listSize > 1
rightInd = bisect_left(self.indexList, endIndex)
if rightInd < listSize and endIndex < self.indexList[0]:
return 0
big = 0
if rightInd >= listSize:
big = self.healthList[listSize - 1]
else:
if endIndex >= self.indexList[rightInd]:
big = self.healthList[rightInd]
else:
big = self.healthList[rightInd-1]
leftInd = bisect_left(self.indexList, startIndex)
small = 0
if leftInd >= listSize:
return 0
else:
if startIndex <= self.indexList[leftInd]:
if (leftInd > 0):
small = self.healthList[leftInd - 1]
else:
small = 0
else:
small = self.healthList[leftInd]
return big - small
class Trie(object):
def __init__(self):
self.root = TrieNode()
def getRoot(self):
return self.root
def createTrie(self, genes, healths):
for i in range(len(genes)):
node = self.root
for c in genes[i]:
node = node.addSubnode(c)
node.addIndexAndValue(i, healths[i])
def calculateHealth(trie, d, first, last):
total = 0
dLength = len(d)
for i in range(0, dLength):
node = trie.getRoot()
for j in range(i, dLength):
node = node.getSubnodeFor(d[j])
if node != None:
if node.isTerminal:
val = node.getValueForIndexes(first, last)
total = total + val
else:
break
return total
def readFromFile(aFileName):
inputArr = None
with open('../hackerRank/src/' + aFileName, encoding='utf-8') as aFile:
inputArr = aFile.read().splitlines()
return inputArr
def runFor(fileName, minimumValue, maximumValue):
inp = readFromFile(fileName)
n = inp[0]
genes = inp[1].rstrip().split()
healths = list(map(int, inp[2].rstrip().split()))
trie = Trie()
trie.createTrie(genes, healths)
s = int(inp[3])
minVal = sys.maxsize
maxVal = -1
for fItr in range(s):
line = inp[fItr+4].split()
first = int(line[0])
last = int(line[1])
d = line[2]
val = calculateHealth(trie, d, first, last)
if val < minVal:
minVal = val
if val > maxVal:
maxVal = val
print (minVal,maxVal)
assert minimumValue == minVal
assert maximumValue == maxVal
# TextX.txt 's are simple text files, which hold test data for regarding test case
# following the file name are real expected numbers for each relevant test case
# I got those from hacker rank
runFor('Test2.txt', 15806635, 20688978289)
runFor('Test7.txt', 0, 7353994)
runFor('Test8.txt', 0, 8652768)
runFor('Test9.txt', 0, 9920592)
runFor('Test33.txt', 11674463, 11674463)

One reference that might assist can be found at:
https://gist.github.com/josephmisiti/940cee03c97f031188ba7eac74d03a4f
Please read the notes he has included.
This is the input I have been using.
6
a b c aa d b
1 2 3 4 5 6
3
1 5 caaab
0 4 xyz
2 4 bcdybc

Related

All solutions for N-Queens problem prints empty board

As the title states, I've implemented the N-Queens problem for all soulutions but there is one problem, it prints an empty board. I saw a solution only where we put the printboard in an if statement but without a return in printboard i fail to see how it works`
#N Queens through Backtracking with all solutions
def initialize(n):
for key in ['queen','row','col','NWtoSE','SWtoNE']:
board[key] = {}
for i in range(n):
board['queen'][i] = -1
board['row'][i] = 0
board['col'][i] = 0
for i in range (2*n-1):
#Sum of NW to SE diagonal add to constant i.e (i+j) = const
board['SWtoNE'][i] = 0
for i in range (-(n-1), n):
#Difference of SW to NE diagonal is constant i.e (j-i) = const
board['NWtoSE'][i] = 0
def isAvailable(i, j):
return (board['row'][i] == 0 and board['col'][j] == 0 and
board['NWtoSE'][j-i] == 0 and board['SWtoNE'][j+i] == 0)
def addQueen(i, j):
board['queen'][i] = j
board['row'][i] = 1
board['col'][j] = 1
board['NWtoSE'][j-i] = 1
board['SWtoNE'][j+i] = 1
def undo(i, j):
board['queen'][i] = -1
board['row'][i] = 0
board['col'][j] = 0
board['NWtoSE'][j-i] = 0
board['SWtoNE'][j+i] = 0
def printboard():
for i in board['queen'].keys():
print((i, board['queen'][i]), end = " ")
print()
def placequeen(i):
n = len(board['queen'].keys())
for j in range(n):
if(isAvailable(i, j)):
addQueen(i, j)
if i == n-1:
printboard()
else:
placequeen(i+1)
undo(i,j)
board = {}
n = int(input("Enter number of queens : "))
initialize(n)
printboard()
The following is the solution which I don't understand
if placequeen(0): printboard()

Is this a genetic algorithm?

I am trying to make a genetic algorithm that finds the word given in the console input. But I don't know if I succeeded to do a full genetic algorithm.
Here is the code:
main.py:
from population import Population
target = input()
maxPop = 10
mutation = 100
print("\n\n\n")
pop = Population(target, maxPop, mutation)
population.py:
import random
from ADN import genetic
class Population:
def __init__(self, target, maxPop, mut):
adn = genetic()
self.popul = []
i = 0
while i < maxPop:
self.popul.append(adn.genFirst(len(target)))
print(self.popul[i])
i+=1
#oldPop = self.popul
#adn.fitness(oldPop, target)
#"""
while target not in self.popul:
oldPop = self.popul
self.popul = adn.fitness(oldPop, target)
if target in self.popul:
return
#"""
ADN.py:
import random
class genetic:
def genFirst(self, length):
bestGenes = ""
self.letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890[],. "
word = ""
i = 0
while i < length:
word += random.choice(self.letters)
i+=1
return word
def fitness(self, oldPop, target):
newPop = []
j = 0
for word in oldPop:
newW = ""
for letter in word:
if(letter not in target):
letter = random.choice(self.letters)
else:
if(target.index(letter) != word.index(letter)):
letter = random.choice(self.letters)
newW += letter
newPop.append(newW)
print(newPop)
return newPop
If it is not a full genetic algorithm, what is missing?

No, it's not a genetic algorithm. It is not even an evolutionary algorithm. It misses the fitness function which should calculate how good is every member of the calculation. After that you should decide which code would you want to make: genetic or evolutionary. Being a beginner you should try the evolutionary algorithm, it's easier and it does not contain the crossover function (which is difficult for beginners).
Try this:
import random
genes = "abcdefghijklmnopqrsttuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-[]()1234567890;<>?/ "
target = input()
def genPar(length):
parent = []
for i in range(length):
parent.append(random.choice(genes))
return "".join(parent)
def fitness(parent):
total = 0
for i in range(len(parent)):
if(parent[i] == target[i]):
total += 1
return total
def mutate(parent):
index = random.choice(range(len(parent)))
child = []
for i in range(len(parent)):
if(i == index):
letter = random.choice(genes)
else:
letter = parent[i]
child.append(letter)
return "".join(child)
parent = genPar(len(target))
bestPar = parent
bestFitness = fitness(parent)
print(parent)
generations = 1
while True:
child = mutate(bestPar)
childFit = fitness(child)
if(childFit > bestFitness):
bestFitness = childFit
bestPar = child
print(child)
generations += 1
if(child == target):
break
print("\nGenerations: " + str(generations))

Until u see Initialization -> Fitness -> Genetic operators (mutation, crossover) -> Fitness -> Substitution cycle you can't say it is Genetic/Evolutionary algorithm :)...

for the basic genetic algorithm, you need to use some operator selection, fitness, mutation, crossover.
there different types of selection, crossover, and mutation that you can use based on your problem.
a simple example of crossover and mutation.
def single_point_crossover(parent1,parent2):
crossover_point = random.randint(1,9)
#print("crossover point", crossover_point)
child_1 = np.hstack((parent1[0:crossover_point], parent2[crossover_point:]))
child_2 = np.hstack((parent2[:crossover_point],parent1[crossover_point:]))
return child_1,child_2
def mutation(parent1,parent2):
n = len(parent1)
pos_1 = random.randint(0,n-1)
pos_2 = random.randint(0,n-1)
#print(pos_1, pos_2)
def swap(sol, posA, posB):
result = sol.copy()
elA = sol[posA]
elB = sol[posB]
result[posA] = elB
result[posB] = elA
return result
child1 = swap(parent1, pos_1, pos_2)
child2 = swap(parent2, pos_1, pos_2)
return child1,child2

knapsack branch and bound wrong result

I have converted the code given at this link into a python version. The code is supposed to calculate the correct value of maximum value to be filled in knapsack of weight W. I have attached the code below:
#http://www.geeksforgeeks.org/branch-and-bound-set-2-implementation-of-01-knapsack/
from queue import Queue
class Node:
def __init__(self):
self.level = None
self.profit = None
self.bound = None
self.weight = None
def __str__(self):
return "Level: %s Profit: %s Bound: %s Weight: %s" % (self.level, self.profit, self.bound, self.weight)
def bound(node, n, W, items):
if(node.weight >= W):
return 0
profit_bound = int(node.profit)
j = node.level + 1
totweight = int(node.weight)
while ((j < n) and (totweight + items[j].weight) <= W):
totweight += items[j].weight
profit_bound += items[j].value
j += 1
if(j < n):
profit_bound += (W - totweight) * items[j].value / float(items[j].weight)
return profit_bound
Q = Queue()
def KnapSackBranchNBound(weight, items, total_items):
items = sorted(items, key=lambda x: x.value/float(x.weight), reverse=True)
u = Node()
v = Node()
u.level = -1
u.profit = 0
u.weight = 0
Q.put(u)
maxProfit = 0;
while not Q.empty():
u = Q.get()
if u.level == -1:
v.level = 0
if u.level == total_items - 1:
continue
v.level = u.level + 1
v.weight = u.weight + items[v.level].weight
v.profit = u.profit + items[v.level].value
if (v.weight <= weight and v.profit > maxProfit):
maxProfit = v.profit;
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
Q.put(v)
v.weight = u.weight
v.profit = u.profit
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
# print items[v.level]
Q.put(v)
return maxProfit
if __name__ == "__main__":
from collections import namedtuple
Item = namedtuple("Item", ['index', 'value', 'weight'])
input_data = open("test.data").read()
lines = input_data.split('\n')
firstLine = lines[0].split()
item_count = int(firstLine[0])
capacity = int(firstLine[1])
print "running from main"
items = []
for i in range(1, item_count+1):
line = lines[i]
parts = line.split()
items.append(Item(i-1, int(parts[0]), float(parts[1])))
kbb = KnapSackBranchNBound(capacity, items, item_count)
print kbb
The program is supposed to calculate value of 235 for following items inside file test.data:
5 10
40 2
50 3.14
100 1.98
95 5
30 3
The first line shows number of items and knapsack weight. Lines below first line shows the value and weight of those items. Items are made using a namedtuple and sorted according to value/weight. For this problem I am getting 135 instead of 235. What am I doing wrong here?
EDIT:
I have solved the problem of finding correct items based on branch and bound. If needed, one can check it here

The problem is that you're inserting multiple references to the same Node() object into your queue. The fix is to initialize two new v objects in each iteration of the while-loop as follows:
while not Q.empty():
u = Q.get()
v = Node() # Added line
if u.level == -1:
v.level = 0
if u.level == total_items - 1:
continue
v.level = u.level + 1
v.weight = u.weight + items[v.level].weight
v.profit = u.profit + items[v.level].value
if (v.weight <= weight and v.profit > maxProfit):
maxProfit = v.profit;
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
Q.put(v)
v = Node() # Added line
v.level = u.level + 1 # Added line
v.weight = u.weight
v.profit = u.profit
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
# print(items[v.level])
Q.put(v)
Without these reinitializations, you're modifying the v object that you already inserted into the queue.
This is different from C++ where the Node objects are values that are implicitly copied into the queue to avoid aliasing problems such as these.

Python: Recursion problems

I am trying to make a sudoku solver that solves boards very quickly. At the moment my solver works on easy boards but never terminates on harder boards. I believe it has something to do with my recursion because easy boards do not require recursion and hard boards do. Any help is appreciated.
import sys
def rowno(i):
return i // 9
def colno(i):
return i % 9
def boxno(i):
return (i // 9 // 3 )*3 + (i // 3) % 3
def isNeighbor(i, j):
if rowno(i) == rowno(j) or colno(i) == colno(j) or boxno(i) == boxno(j):
return True
else:
return False
def getFileName():
if sys.platform == "win32":
filename = input("Filename? ")
else:
filename = sys.argv[-1]
return filename
solutionlist = []
class Board(object):
def __init__(self, puzzle):
self.puzzle = puzzle
self.board = [Cell(int(value), idx) for idx, value in enumerate(puzzle)]
self.change = False
def printAll(self):
print [cell.candidates for cell in self.board]
#return str(" ")
def update(self):
self.change = False
l = [cell for cell in self.board if len(cell.candidates) == 1]
for i in l:
for j in xrange(81):
if isNeighbor(i.dex, j) and i.dex != j:
old = self.board[j].candidates
self.board[j].delCandidate(i.value)
if len(old) != len(self.board[j].candidates):
self.change = True
def toString(self):
str1 = ''.join(str(e.value) for e in self.board)
return str1
def solved(self):
for cell in self.board:
if len(cell.candidates) != 1:
return False
return True
def solve(self):
self.change = True
while self.change == True:
self.update()
if self.solved():
solutionlist.append(self.board)
return
l = [cell for cell in self.board if len(cell.candidates) > 1]
for i in l:
for j in i.candidates:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()
return
def __repr__(self):
l = [cell.value for cell in self.board]
return str(l)
class Cell(object):
def __init__(self, value, dex):
self.value = value
self.dex = dex
if value == 0:
self.candidates = [1,2,3,4,5,6,7,8,9]
else:
self.candidates = [int(value)]
def __str__(self):
return str(self.value)
def delCandidate(self, value):
# deletes value from candidate list
#return self.candidate.remove(value);
self.candidates = [x for x in self.candidates if x != value]
if len(self.candidates) == 1:
self.value = self.candidates[0]
easy = "700583006006001405052006083300200958500078060648010300060802500003150072215600030"
twosol = "000805200800000401705040009000100702040000000006430000030900000010006080000000000"
hard = "040090008000000070060000120030020000005839060080600700050170600000043000003000200"
#easy solution: 794583216836721495152496783371264958529378164648915327967832541483159672215647839
b = Board(hard)
print b
b.solve()
print "end of the line"
for i in solutionlist:
print [cell.value for cell in i]
print "\n"

One major issue is the line for i in l: in the solve method. Since you're recursing, you only need to fill in one cell - the recursion will take care of the rest. So instead of for i in l:, just recurse on the one cell that is the best candidate (curCell):
l = [cell for cell in self.board if len(cell.candidates) > 1]
if len(l) > 0:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()

program who returns true or false whether a matrix is a magic square or not

I'm trying to make a program that returns a boolean whether a matrix is a magic square or not. I try to submit this code at my college's website, but it returns "Wrong Answer". I can't see what's wrong with it. Can you help me?
def magico(xs):
#print(soma(xs),soma2(xs),diag(xs))
if(soma(xs) == soma2(xs) == diag(xs) != -1 ):
return True
else:
return False
def soma(xs):
sant = 0
s = 0
for i in range(len(xs)):
if(s != sant):
return -1
s = 0
for j in range(len(xs)):
s = s + int(xs[i][j])
sant = s
return s
def soma2(xs):
s = 0
sant = 0
for j in range(len(xs)):
if(s != sant):
return -1
s = 0
for i in range(len(xs)):
s = s + int(xs[j][i])
sant = s
return s
def diag(xs):
s = 0
for i in range(len(xs)):
s = s + int(xs[i][i])
t = 0
for i in range(len(xs)):
t = t + int(xs[i][len(xs) - 1 - i])
if(s == t):
return s
else:
return -1

Pseudocode
magic = true
answer = sum(square)/n_rows(square)
for r in rows(square):
if sum(r) not equal to answer
magic = false
for c in columns(square):
if sum(c) not equal to answer
magic = false
d1 = principal_diagonal(square)
d2 = secondary_diagonal(square)
if sum(d1) or sum(d2) not equal to answer
magic = false
display magic

You are not checking if the matrix is square, you are not checking if the numbers in the matrix are unique. Beyond that you have a serious coding issue:
Your loops all end with
sant = s
They start with
if(s != sant):
But you have just made them equal...

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python implementation of Determining DNA Health algorithm from HackerRank - python

One reference that might assist can be found at: https://gist.github.com/josephmisiti/940cee03c97f031188ba7eac74d03a4f Please read the notes he has included. This is the input I have been using. 6 a b c aa d b 1 2 3 4 5 6 3 1 5 caaab 0 4 xyz 2 4 bcdybc

Related

All solutions for N-Queens problem prints empty board

Is this a genetic algorithm?

knapsack branch and bound wrong result

Python: Recursion problems

program who returns true or false whether a matrix is a magic square or not

Categories

Resources