I have a bug in my polynomial class - python

I am learning about class by building a class that represents polynomials. I know that theres a nice way of doing polynomials in numpy btw.
I have an add, multiply and call dunner methods defined. Polynomials are sums of terms of the form a*x**b so the class is called 'sot'.
Any instance of sot contains self.term which is a list of pears of numbers [a,b]. each per represent a*x**b.
the add method returns a new instance of sot with the concatenation of self.terms with other.terms as the argument
sot take a list of pears as its argument in its initialization
Thee problem is that when I add two instances of sot a+b a is changed, and so that if I repeat the addition a+b I get a different result.
The same does not happen with my multiplication method.
Can anyone see what's going on here?
#The class 'sot' represents a Sum Of Terms of the form a*x^b, aka a polinomeal
#On creating a new instanse, sot must be fead a list containg its terms.
#Each terms is represented by a list of two vareavles. [a,b] represesnting a*x^b
#Eg: a = sot(( [1,2], [2,1], [3,0] )) represents x^2 + 2x + 3
#Caling an instanse will return the evaluation of the polinomeal
# If the terms of sot are given as a tuple (eg sot(( (1,2) )) ) the object will not function as intended.
# Creating a sot of no terms, and empty polinomeal, must be done with squer brackets. eg sot([[0,3]])
# This vesion of the file prints out what its doing in the simplifying step, this is for debuging perposes
class sot:
def __init__(self,tupe):
#The first step un-packs the argument into a list of terms
self.term=[]
for t in tupe:
print("add " + str(t) + " to " + str(self.term))
self.term = self.term+[t]
#Simpler is a method that simplifys the polinomeal
self.simpler()
def simpler(self):
#The first step is to sort the polinnomeal by the assending powers eg c + x + x^2 ...
print("simplifying " + str(self))
self.term = sorted(self.term,key = lambda element: element[1] )
print("sorted "+str(self))
# now this while loop will inspect the list of terms to see if anny
# (a) have a zero coefichent
# in which case there removed from the list
# (b) have the same power
# in which case the terms second of ther terms will be poped form the list
# sotored in 'carry' and its constant 'carry[0]' will be added to the first term
# If the loop has not run then ther must be onlly one element and it must not have been
# checked for beeing zero
i=0
while i < len(self.term)-1:
print("checking " + str(i) + " out of " + str( len(self.term)-2))
if self.term[i][0] ==0:
self.term.remove(i)
print("removeing zero")
if self.term[i][1] ==self.term[i+1][1]:
print("adding similers")
carry = self.term.pop(i+1)
print("carry term "+str(carry))
self.term[i][0] = self.term[i][0] +carry[0]
print("new sum "+ str(self))
i=i+1
if i==0 & self.term[0][0] == 0:
self.term = []
print("the list has one term and that term is zero")
print("simplifyed compleat " + str(self))
def __call__(self,x):
# the result of a call the sum of each term, remember that each term [a,b] represents a*x**b
result = 0
for t in self:
result = result + t[0]*x**t[1]
return result
def __repr__(self):
string = []
for t in self:
string = string+ [str(t[0]) + "x**" + str(t[1])]
return " + ".join(string)
def __len__(self):
#The length of a sot is just the number of terms
return len(self.term)
def __iter__(self):
# The iteration of a sot steps throgh the list of terms starting from the zerothe
# and returning each tem as it dose so
self.i=0
return self
def __next__(self):
if self.i < len(self):
self.i=self.i+1
return self.term[self.i-1]
else:
raise StopIteration
def __add__(self,other):
# adding tow sots together is the same as haveing a new sot, so for a + b there list of terms
# are concatanated and fed into a new instanse of sot
argument = self.term+other.term
return sot(argument)
def __mul__(self,other):
# multiplication of sots is done with the distributive law:
# sum_i(x_i)*sum_j(x_j) = sum_i(x_i *sum_j(x_j)) = sum_i(sum_j(x_i*x_j))
# this is realised with two imbeded for loops
# when two terms are multiplyed, the constant parts are multiplyed and the powers are added:
#(ax**b)*(cx**d) = (a*c)x**(b+d)
# The relustant list of terms is fed into a new instanse of sot
argument = []
for s in self:
for o in other:
argument = argument + [[ s[0]*o[0] , s[1]+o[1] ]]
return sot(argument)

Related

How to merge overlapping string with unicode?

I am student of MCS and learning python and stuck in one problem. I am trying to merge all overllaping strings.
I am using following algorithm but output is not as expected?
(1) find max overlap between all possible pairs.
(2) Store all the overlaps in dictionary with key as amount of overlap and values as start, stringa, stringb
(3) pick the maximum set of overlap and merge the string. I have implemnted my algorithm using following code but out does not produce expected output.
def overlap(a, b):
overlaps = []
for i in range(len(b)):
for j in range(len(a)):
if a.endswith(b[:i + 1], j):
overlaps.append((i, j))
return max(overlaps) if overlaps else (0, -1)
def get_merged_string(lst):
overlaps = defaultdict(list)
while len(lst) > 1:
overlaps.clear()
for a in lst:
for b in lst:
if a == b:
continue
amount, start = overlap(a, b)
overlaps[amount].append((start, a, b))
maximum = max(overlaps)
if maximum == 0:
break
start, a, b = choice(overlaps[maximum]) # pick one among equals
lst.remove(a)
lst.remove(b)
lst.append(a[:start] + b)
str1 = ''.join(lst)
return (urllib.parse.unquote_plus(urllib.parse.unquote_plus(str1)))
Input:
%23%21%2Fusr%2Fbin%2Fpyth
n%2Fpython3%0A%0A%23%0A%23+
%0A%0A%23%0A%23+Python+fu
+Python+functio
unctions+start+
+start+with+def
th+def.++They+t
hey+take+parame
parameters%2C+whi
+which+are%0A%23+un
are%0A%23+un-typed%2C
n-typed%2C+as+oth
+as+other+varia
her+variables.%0A
es.%0A%0A%23+The+stri
string+at+the+s
the+start+of+th
rt+of+the+funct
function+is+for
n+is+for+docume
documentation.%0A
tation.%0Adef+prh
f+prhello%28%29%3A%0A++
%28%29%3A%0A++++%22Print+
+%22Print+hello%22%0A
hello%22%0A++++prin
+print%28%22Hello%2C+
llo%2C+World%21%22%29%0A%0A
World%21%22%29%0A%0Aprhel
%29%0A%0Aprhello%28%29%0A%0A%23
%28%29%0A%0A%23%0A%23%0Adef+prl
f+prlines%28str%2C+
ines%28str%2C+num%29%3A
num%29%3A%0A++++%22Prin
++++%22Print+num+
nt+num+lines+co
ines+consisting
onsisting+of+st
ing+of+str%2C+rep
+str%2C+repeating
epeating+str+on
r+once+more+on+
+on+each+line.%22
ine.%22%0A++++for+n
+for+n+in+range
in+range%280%2Cnum%29
num%29%3A%0A++++++++p
++++print%28str+%2A
%28str+%2A+%28n+%2B+1%29%29
+%28n+%2B+1%29%29%0A%0Aprli
+1%29%29%0A%0Aprlines%28%27
rlines%28%27z%27%2C+5%29%0A
%2C+5%29%0Aprint%28%29%0Apr
print%28%29%0Aprlines
rlines%28%27fred+%27%2C
red+%27%2C+4%29%0A
My output:
hello()
#
#
def prlines(str, num):
"Print hello"
print("Hello, World!")
prhellhe string at the start of the functions start with def. They take parameters, which are
# un-typed, as other variables.
# The s#!/usr/bin/python3
#
# Python function is for documentation.
def prhello():
"Print num lines consisting of str, repeating str once more on each line."
for n in range(0,num):
print(str * (n 1))
prlines('z', 5)
print()
prlines('fred ', 4)
Expected Output: It is after merging overlapping string.
#!/usr/bin/python3
#
# Python functions start with def. They take parameters, which are
# un-typed, as other variables.
# The string at the start of the function is for documentation.
def prhello():
"Print hello"
print("Hello, World!")
prhello()
#
#
def prlines(str, num):
"Print num lines consisting of str, repeating str once more on each line."
for n in range(0,num):
print(str * (n + 1))
prlines('z', 5)
print()
prlines('fred ', 4)
Above issue is caused by ambiguity of overlapping. How can i fix such issue?

Changing this Python program to have function def()

The following Python program flips a coin several times, then reports the longest series of heads and tails. I am trying to convert this program into a program that uses functions so it uses basically less code. I am very new to programming and my teacher requested this of us, but I have no idea how to do it. I know I'm supposed to have the function accept 2 parameters: a string or list, and a character to search for. The function should return, as the value of the function, an integer which is the longest sequence of that character in that string. The function shouldn't accept input or output from the user.
import random
print("This program flips a coin several times, \nthen reports the longest
series of heads and tails")
cointoss = int(input("Number of times to flip the coin: "))
varlist = []
i = 0
varstring = ' '
while i < cointoss:
r = random.choice('HT')
varlist.append(r)
varstring = varstring + r
i += 1
print(varstring)
print(varlist)
print("There's this many heads: ",varstring.count("H"))
print("There's this many tails: ",varstring.count("T"))
print("Processing input...")
i = 0
longest_h = 0
longest_t = 0
inarow = 0
prevIn = 0
while i < cointoss:
print(varlist[i])
if varlist[i] == 'H':
prevIn += 1
if prevIn > longest_h:
longest_h = prevIn
print("",longest_h,"")
inarow = 0
if varlist[i] == 'T':
inarow += 1
if inarow > longest_t:
longest_t = inarow
print("",longest_t,"")
prevIn = 0
i += 1
print ("The longest series of heads is: ",longest_h)
print ("The longest series of tails is: ",longest_t)
If this is asking too much, any explanatory help would be really nice instead. All I've got so far is:
def flip (a, b):
flipValue = random.randint
but it's barely anything.
import random
def Main():
numOfFlips=getFlips()
outcome=flipping(numOfFlips)
print(outcome)
def getFlips():
Flips=int(input("Enter number if flips:\n"))
return Flips
def flipping(numOfFlips):
longHeads=[]
longTails=[]
Tails=0
Heads=0
for flips in range(0,numOfFlips):
flipValue=random.randint(1,2)
print(flipValue)
if flipValue==1:
Tails+=1
longHeads.append(Heads) #recording value of Heads before resetting it
Heads=0
else:
Heads+=1
longTails.append(Tails)
Tails=0
longestHeads=max(longHeads) #chooses the greatest length from both lists
longestTails=max(longTails)
return "Longest heads:\t"+str(longestHeads)+"\nLongest tails:\t"+str(longestTails)
Main()
I did not quite understand how your code worked, so I made the code in functions that works just as well, there will probably be ways of improving my code alone but I have moved the code over to functions
First, you need a function that flips a coin x times. This would be one possible implementation, favoring random.choice over random.randint:
def flip(x):
result = []
for _ in range(x):
result.append(random.choice(("h", "t")))
return result
Of course, you could also pass from what exactly we are supposed to take a choice as a parameter.
Next, you need a function that finds the longest sequence of some value in some list:
def longest_series(some_value, some_list):
current, longest = 0, 0
for r in some_list:
if r == some_value:
current += 1
longest = max(current, longest)
else:
current = 0
return longest
And now you can call these in the right order:
# initialize the random number generator, so we get the same result
random.seed(5)
# toss a coin a hundred times
series = flip(100)
# count heads/tails
headflips = longest_series('h', series)
tailflips = longest_series('t', series)
# print the results
print("The longest series of heads is: " + str(headflips))
print("The longest series of tails is: " + str(tailflips))
Output:
>> The longest series of heads is: 8
>> The longest series of heads is: 5
edit: removed the flip implementation with yield, it made the code weird.
Counting the longest run
Let see what you have asked for
I'm supposed to have the function accept 2 parameters: a string or list,
or, generalizing just a bit, a sequence
and a character
again, we'd speak, generically, of an item
to search for. The function should return, as the value of the
function, an integer which is the longest sequence of that character
in that string.
My implementation of the function you are asking for, complete of doc
string, is
def longest_run(i, s):
'Counts the longest run of item "i" in sequence "s".'
c, m = 0, 0
for el in s:
if el==i:
c += 1
elif c:
m = m if m >= c else c
c = 0
return m
We initialize c (current run) and m (maximum run so far) to zero,
then we loop, looking at every element el of the argument sequence s.
The logic is straightforward but for elif c: whose block is executed at the end of a run (because c is greater than zero and logically True) but not when the previous item (not the current one) was not equal to i. The savings are small but are savings...
Flipping coins (and more...)
How can we simulate flipping n coins? We abstract the problem and recognize that flipping n coins corresponds to choosing from a collection of possible outcomes (for a coin, either head or tail) for n times.
As it happens, the random module of the standard library has the exact answer to this problem
In [52]: random.choices?
Signature: choices(population, weights=None, *, cum_weights=None, k=1)
Docstring:
Return a k sized list of population elements chosen with replacement.
If the relative weights or cumulative weights are not specified,
the selections are made with equal probability.
File: ~/lib/miniconda3/lib/python3.6/random.py
Type: method
Our implementation, aimed at hiding details, could be
def roll(n, l):
'''Rolls "n" times a dice/coin whose face values are listed in "l".
E.g., roll(2, range(1,21)) -> [12, 4] simulates rolling 2 icosahedron dices.
'''
from random import choices
return choices(l, k=n)
Putting this together
def longest_run(i, s):
'Counts the longest run of item "i" in sequence "s".'
c, m = 0, 0
for el in s:
if el==i:
c += 1
elif c:
m = m if m >= c else c
c = 0
return m
def roll(n, l):
'''Rolls "n" times a dice/coin whose face values are listed in "l".
E.g., roll(2, range(1,21)) -> [12, 4] simulates rolling 2 icosahedron dices.
'''
from random import choices
return choices(l, k=n)
N = 100 # n. of flipped coins
h_or_t = ['h', 't']
random_seq_of_h_or_t = flip(N, h_or_t)
max_h = longest_run('h', random_seq_of_h_or_t)
max_t = longest_run('t', random_seq_of_h_or_t)

Iterate through list and assign a value to the variable in Python

So i'm currently working on code, which solves simple differentials. For now my code looks something like that:
deff diff():
coeffs = []
#checking a rank of a function
lvl = int(raw_input("Tell me a rank of your function: "))
if lvl == 0:
print "\nIf the rank is 0, a differential of a function will always be 0"
#Asking user to write coefficients (like 4x^2 - he writes 4)
for i in range(0, lvl):
coeff = int(raw_input("Tell me a coefficient: "))
coeffs.append(coeff)
#Printing all coefficients
print "\nSo your coefficients are: "
for item in coeffs:
print item
And so what I want to do next? I have every coefficient in my coeffs[] list. So now I want to take every single one from there and assign it to a different variable, just to make use of it. And how can I do it? I suppose I will have to use loop, but I tried to do so for hours - nothing helped me. Sooo, how can I do this? It would be like : a=coeff[0], b = coeff[1], ..., x = coeff[lvl] .
Just access the coefficients directly from the list via their indices.
If you are wanting to use the values in a different context that entails making changes to the values but you want to keep the original list unchanged then copy the list to a new list,
import copy
mutableCoeffs = copy.copy(coeffs)
You do not need new variables.
You already have all you need to compute the coefficients for the derivative function.
print "Coefficients for the derivative:"
l = len(coeffs) -1
for item in coeffs[:-1]:
print l * item
l -=1
Or if you want to put them in a new list :
deriv_coeffs = []
l = len(coeffs) -1
for item in coeffs[:-1]:
deriv_coeffs.append(l * item)
l -=1
I guess from there you want to differenciate no? So you just assign the cofficient times it rank to the index-1?
deff diff():
coeffs = []
#checking a rank of a function
lvl = int(raw_input("Tell me a rank of your function: "))
if lvl == 0:
print "\nIf the rank is 0, a differential of a function will always be 0"
#Asking user to write coefficients (like 4x^2 - he writes 4)
for i in range(0, lvl):
coeff = int(raw_input("Tell me a coefficient: "))
coeffs.append(coeff)
#Printing all coefficients
print "\nSo your coefficients are: "
for item in coeffs:
print item
answer_coeff = [0]*(lvl-1)
for i in range(0,lvl-1):
answer_coeff[i] = coeff[i+1]*(i+1)
print "The derivative is:"
string_answer = "%d" % answer_coeff[0]
for i in range(1,lvl-1):
string_answer = string_answer + (" + %d * X^%d" % (answer_coeff[i], i))
print string_answer
If you REALLY want to assign a list to variables you could do so by accessing the globals() dict. For example:
for j in len(coeffs):
globals()["elm{0}".format(j)] = coeffs[j]
Then you'll have your coefficients in the global variables elm0, elm1 and so on.
Please note that this is most probably not what you really want (but only what you asked for).

Returning multiple integers as separate variables

I am trying to make a program that grabs 5 integers from the user, and then finds the average of them. I have it set up to take in the 5 numbers, but how do I return them all as separate variables so I can use them later on? Thanks!
def main():
x = 0
testScoreNumber = 1
while x < 5:
getNumber_0_100(testScoreNumber)
x += 1
testScoreNumber += 1
calcAverage(score1, score2, score3, score4, score5)
print(calculatedAverage)
def getNumber_0_100(testnumber):
test = int(input("Enter test score " + str(testnumber) + ":"))
testcount = 0
while testcount < 1:
test = int(input("Enter test score " + str(testnumber) + ":"))
if test > 0 or test < 100:
testcount += 1
return test
^Here is the problem, the everytime this function runs, I want it to return a different value to a different variable. Ex. test1, test2, test3.
def calcAverage(_score1,_score2,_score3,_score4,_score5):
total = _score1 + _score2 + _score3 + _score4 + _score5
calculatedAverage = total/5
return calculatedAverage
You need to store the result somewhere. It is usually (always?) a bad idea to dynamically create variable names (although it is possible using globals). The typical place to store the results is in a list or a dictionary -- in this case, I'd use a list.
change this portion of the code:
x = 0
testScoreNumber = 1
while x < 5:
getNumber_0_100(testScoreNumber)
x += 1
testScoreNumber += 1
to:
results = []
for x in range(5):
results.append( getNumber_0_100(x+1) )
which can be condensed even further:
results = [ getNumber_0_100(x+1) for x in range(5) ]
You can then pass that results list to your next function:
avg = get_ave(results[0],results[1],...)
print(avg)
Or, you can use the unpacking operator for shorthand:
avg = get_ave(*results)
print(avg)
It isn't the responsibility of the returning function to say what the caller does with its return value. In your case, it would be simple to let main have a list where it adds the return values. You could do this:
scores = []
for i in range(5):
scores.append(getNumber_0_100(i))
calcAverage(*scores)
Note that *scores is to pass a list as arguments to your calcAverage function. It's probably better to have calculateAverage be a general function which takes a list of values and calculates their average (i.e. doesn't just work on five numbers):
def calcAverage(numbers):
return sum(numbers) / len(numbers)
Then you'd call it with just calcAverage(scores)
A more Pythonic way to write the first part might be scores = [getNumber_0_100(i) for i in range(5)]
Python allows you to return a tuple, and you can unroll this tuple when you receive the return values. For example:
def return_multiple():
# do something to calculate test1, test2, and test3
return (test1, test2, test3)
val1, val2, val3 = return_multiple()
The limitation here though is that you need to know how many variables you're returning. If the number of inputs is variable, you're better off using lists.

How can memoization be applied to this algorithm?

After finding the difflib.SequenceMatcher class in Python's standard library to be unsuitable for my needs, a generic "diff"-ing module was written to solve a problem space. After having several months to think more about what it is doing, the recursive algorithm appears to be searching more than in needs to by re-searching the same areas in a sequence that a separate "search thread" may have also examined.
The purpose of the diff module is to compute the difference and similarities between a pair of sequences (list, tuple, string, bytes, bytearray, et cetera). The initial version was much slower than the code's current form, having seen a speed increase by a factor of ten. How can memoization be applied to the following code? What is the best way to rewrite the algorithm to further increase any possible speed?
class Slice:
__slots__ = 'prefix', 'root', 'suffix'
def __init__(self, prefix, root, suffix):
self.prefix = prefix
self.root = root
self.suffix = suffix
################################################################################
class Match:
__slots__ = 'a', 'b', 'prefix', 'suffix', 'value'
def __init__(self, a, b, prefix, suffix, value):
self.a = a
self.b = b
self.prefix = prefix
self.suffix = suffix
self.value = value
################################################################################
class Tree:
__slots__ = 'nodes', 'index', 'value'
def __init__(self, nodes, index, value):
self.nodes = nodes
self.index = index
self.value = value
################################################################################
def search(a, b):
# Initialize startup variables.
nodes, index = [], []
a_size, b_size = len(a), len(b)
# Begin to slice the sequences.
for size in range(min(a_size, b_size), 0, -1):
for a_addr in range(a_size - size + 1):
# Slice "a" at address and end.
a_term = a_addr + size
a_root = a[a_addr:a_term]
for b_addr in range(b_size - size + 1):
# Slice "b" at address and end.
b_term = b_addr + size
b_root = b[b_addr:b_term]
# Find out if slices are equal.
if a_root == b_root:
# Create prefix tree to search.
a_pref, b_pref = a[:a_addr], b[:b_addr]
p_tree = search(a_pref, b_pref)
# Create suffix tree to search.
a_suff, b_suff = a[a_term:], b[b_term:]
s_tree = search(a_suff, b_suff)
# Make completed slice objects.
a_slic = Slice(a_pref, a_root, a_suff)
b_slic = Slice(b_pref, b_root, b_suff)
# Finish the match calculation.
value = size + p_tree.value + s_tree.value
match = Match(a_slic, b_slic, p_tree, s_tree, value)
# Append results to tree lists.
nodes.append(match)
index.append(value)
# Return largest matches found.
if nodes:
return Tree(nodes, index, max(index))
# Give caller null tree object.
return Tree(nodes, index, 0)
Reference: How to optimize a recursive algorithm to not repeat itself?
As ~unutbu said, try the memoized decorator and the following changes:
#memoized
def search(a, b):
# Initialize startup variables.
nodes, index = [], []
a_size, b_size = len(a), len(b)
# Begin to slice the sequences.
for size in range(min(a_size, b_size), 0, -1):
for a_addr in range(a_size - size + 1):
# Slice "a" at address and end.
a_term = a_addr + size
a_root = list(a)[a_addr:a_term] #change to list
for b_addr in range(b_size - size + 1):
# Slice "b" at address and end.
b_term = b_addr + size
b_root = list(b)[b_addr:b_term] #change to list
# Find out if slices are equal.
if a_root == b_root:
# Create prefix tree to search.
a_pref, b_pref = list(a)[:a_addr], list(b)[:b_addr]
p_tree = search(a_pref, b_pref)
# Create suffix tree to search.
a_suff, b_suff = list(a)[a_term:], list(b)[b_term:]
s_tree = search(a_suff, b_suff)
# Make completed slice objects.
a_slic = Slice(a_pref, a_root, a_suff)
b_slic = Slice(b_pref, b_root, b_suff)
# Finish the match calculation.
value = size + p_tree.value + s_tree.value
match = Match(a_slic, b_slic, p_tree, s_tree, value)
# Append results to tree lists.
nodes.append(match)
index.append(value)
# Return largest matches found.
if nodes:
return Tree(nodes, index, max(index))
# Give caller null tree object.
return Tree(nodes, index, 0)
For memoization, dictionaries are best, but they cannot be sliced, so they have to be changed to lists as indicated in the comments above.
You could use the memoize decorator from the Python Decorator Library
and use it like this:
#memoized
def search(a, b):
The first time you call search with arguments a,b, the result is calculated and memoized (saved in a cache). The second time search is called with the same arguments, the result in returned from the cache.
Note that for the memoized decorator to work, the arguments must be hashable. If a and b are tuples of numbers, then they are hashable. If they are lists then you could convert them to tuples before passing them to search.
It doesn't look like search takes dicts as arguments, but if they were, then they would not be hashable and the memoization decorator would not be able to save the result in the cache.
It has been over 9 years since the question was asked, but the concept of internally caching results to speed up the algorithm was finally applied to the code today. The results of this application can be seen below:
#! /usr/bin/env python3
"""Compute differences and similarities between a pair of sequences.
After finding the "difflib.SequenceMatcher" class unsuitable, this module
was written and re-written several times into the polished version below."""
__author__ = 'Stephen "Zero" Chappell <Noctis.Skytower#gmail.com>'
__date__ = '3 September 2019'
__version__ = '$Revision: 4 $'
class Slice:
__slots__ = 'prefix', 'root', 'suffix'
def __init__(self, prefix, root, suffix):
self.prefix = prefix
self.root = root
self.suffix = suffix
class Match:
__slots__ = 'a', 'b', 'prefix', 'suffix', 'value'
def __init__(self, a, b, prefix, suffix, value):
self.a = a
self.b = b
self.prefix = prefix
self.suffix = suffix
self.value = value
class Tree:
__slots__ = 'nodes', 'index', 'value'
def __init__(self, nodes, index, value):
self.nodes = nodes
self.index = index
self.value = value
def search(a, b):
return _search(a, b, {})
def _search(a, b, memo):
# Initialize startup variables.
nodes, index = [], []
a_size, b_size = len(a), len(b)
# Begin to slice the sequences.
for size in range(min(a_size, b_size), 0, -1):
for a_addr in range(a_size - size + 1):
# Slice "a" at address and end.
a_term = a_addr + size
a_root = a[a_addr:a_term]
for b_addr in range(b_size - size + 1):
# Slice "b" at address and end.
b_term = b_addr + size
b_root = b[b_addr:b_term]
# Find out if slices are equal.
if a_root == b_root:
# Create prefix tree to search.
key = a_prefix, b_prefix = a[:a_addr], b[:b_addr]
if key not in memo:
memo[key] = _search(a_prefix, b_prefix, memo)
p_tree = memo[key]
# Create suffix tree to search.
key = a_suffix, b_suffix = a[a_term:], b[b_term:]
if key not in memo:
memo[key] = _search(a_suffix, b_suffix, memo)
s_tree = memo[key]
# Make completed slice objects.
a_slice = Slice(a_prefix, a_root, a_suffix)
b_slice = Slice(b_prefix, b_root, b_suffix)
# Finish the match calculation.
value = size + p_tree.value + s_tree.value
match = Match(a_slice, b_slice, p_tree, s_tree, value)
# Append results to tree lists.
nodes.append(match)
index.append(value)
# Return largest matches found.
if nodes:
return Tree(nodes, index, max(index))
# Give caller null tree object.
return Tree(nodes, index, 0)

Categories