Modify Damerau-Levenshtein algorithm to track transformations (insertions, deletions, etc) - python

I'm wondering how to modify the Damerau-Levenshtein algorithm to track the specific character transformations required to change a source string to a target string. This question has been answered for the Levenshtein distance, but I couldn't find any answers for DL distance.
I looked at the py-Levenshtein module: it provides exactly what I need, but for Levenshtein distance:
Levenshtein.editops("FBBDE", "BCDASD")
[('delete', 0, 0), ('replace', 2, 1), ('insert', 4, 3), ('insert', 4,
4), ('replace', 4, 5)]
The code for editops was difficult to decipher since it's written in C. I wonder how tracking transformations the can be done efficiently: I imagine it is possible from the distance matrix, which looks something like:
r e p u b l i c a n
0 1 2 3 4 5 6 7 8 9 10
d 1 1 2 3 4 5 6 7 8 9 10
e 2 2 1 2 3 4 5 6 7 8 9
m 3 3 2 2 3 4 5 6 7 8 9
o 4 4 3 3 3 4 5 6 7 8 9
c 5 5 4 4 4 4 5 6 6 7 8
r 6 5 5 5 5 5 5 6 7 7 8
a 7 6 6 6 6 6 6 6 7 7 8
t 8 7 7 7 7 7 7 7 7 8 8

import numpy as np
def levenshtein_distance(string1, string2):
n1 = len(string1)
n2 = len(string2)
return _levenshtein_distance_matrix(string1, string2)[n1, n2]
def damerau_levenshtein_distance(string1, string2):
n1 = len(string1)
n2 = len(string2)
return _levenshtein_distance_matrix(string1, string2, True)[n1, n2]
def get_ops(string1, string2, is_damerau=False):
i, j = _levenshtein_distance_matrix(string1, string2, is_damerau).shape
i -= 1
j -= 1
ops = list()
while i != -1 and j != -1:
if is_damerau:
if i > 1 and j > 1 and string1[i-1] == string2[j-2] and string1[i-2] == string2[j-1]:
if dist_matrix[i-2, j-2] < dist_matrix[i, j]:
ops.insert(0, ('transpose', i - 1, i - 2))
i -= 2
j -= 2
continue
index = np.argmin([dist_matrix[i-1, j-1], dist_matrix[i, j-1], dist_matrix[i-1, j]])
if index == 0:
if dist_matrix[i, j] > dist_matrix[i-1, j-1]:
ops.insert(0, ('replace', i - 1, j - 1))
i -= 1
j -= 1
elif index == 1:
ops.insert(0, ('insert', i - 1, j - 1))
j -= 1
elif index == 2:
ops.insert(0, ('delete', i - 1, i - 1))
i -= 1
return ops
def execute_ops(ops, string1, string2):
strings = [string1]
string = list(string1)
shift = 0
for op in ops:
i, j = op[1], op[2]
if op[0] == 'delete':
del string[i + shift]
shift -= 1
elif op[0] == 'insert':
string.insert(i + shift + 1, string2[j])
shift += 1
elif op[0] == 'replace':
string[i + shift] = string2[j]
elif op[0] == 'transpose':
string[i + shift], string[j + shift] = string[j + shift], string[i + shift]
strings.append(''.join(string))
return strings
def _levenshtein_distance_matrix(string1, string2, is_damerau=False):
n1 = len(string1)
n2 = len(string2)
d = np.zeros((n1 + 1, n2 + 1), dtype=int)
for i in range(n1 + 1):
d[i, 0] = i
for j in range(n2 + 1):
d[0, j] = j
for i in range(n1):
for j in range(n2):
if string1[i] == string2[j]:
cost = 0
else:
cost = 1
d[i+1, j+1] = min(d[i, j+1] + 1, # insert
d[i+1, j] + 1, # delete
d[i, j] + cost) # replace
if is_damerau:
if i > 0 and j > 0 and string1[i] == string2[j-1] and string1[i-1] == string2[j]:
d[i+1, j+1] = min(d[i+1, j+1], d[i-1, j-1] + cost) # transpose
return d
if __name__ == "__main__":
# GIFTS PROFIT
# FBBDE BCDASD
# SPARTAN PART
# PLASMA ALTRUISM
# REPUBLICAN DEMOCRAT
# PLASMA PLASMA
# FISH IFSH
# STAES STATES
string1 = 'FISH'
string2 = 'IFSH'
for is_damerau in [True, False]:
if is_damerau:
print('=== damerau_levenshtein_distance ===')
else:
print('=== levenshtein_distance ===')
dist_matrix = _levenshtein_distance_matrix(string1, string2, is_damerau=is_damerau)
print(dist_matrix)
ops = get_ops(string1, string2, is_damerau=is_damerau)
print(ops)
res = execute_ops(ops, string1, string2)
print(res)
Output:
=== damerau_levenshtein_distance ===
[[0 1 2 3 4]
[1 1 1 2 3]
[2 1 1 2 3]
[3 2 2 1 2]
[4 3 3 2 1]]
[('transpose', 1, 0)]
['FISH', 'IFSH']
=== levenshtein_distance ===
[[0 1 2 3 4]
[1 1 1 2 3]
[2 1 2 2 3]
[3 2 2 2 3]
[4 3 3 3 2]]
[('replace', 0, 0), ('replace', 1, 1)]
['FISH', 'IISH', 'IFSH']

Related

How to split a number into three sets

I have got an a number: like 5
i need to split it into 3 sets like
2
1 4
2
2 3
1
5
Or the number 8:
2
8 4
2
7 5
4
1 2 3 6
I try to
def partition(n):
if n < 5:
return
s = n * (n + 1) // 2
if s % 3 != 0:
return
s //= 3
lst, result = [i for i in range(1, n + 1)], []
for _ in range(2):
subset, s_current = [], s
while s_current > 0:
idx_max = bisect_right(lst, s_current) - 1
subset.append(lst[idx_max])
s_current -= lst[idx_max]
lst.pop(idx_max)
result.append(subset)
result.append(lst)
return result
If it can't make 3 sets, should return -1
But it doesn't work what i want
please, help

How to reflect a number output in Python?

I have a programm which draws a picture from numbers in certain way
n = int(input(" Введіть ваше число "))
m = n * 2 - 1
pp = " "
i = 0
while m != 0:
l = []
while m > n:
while i < n:
i += 1
j = n - i
k = i
while j != 0:
l.append(pp)
j -= 1
while k != 0:
l.append(str(k))
k -= 1
m -= 1
a = " "
print(a.join(l))
l = []
i = 0
OUTPUT:
1
2 1
3 2 1
4 3 2 1
5 4 3 2 1
But now I get a task to draw this picture
1
1 2
1 2 3
1 2 3 4
1 2 3 4 5
Is there any hint how to reflect it without overwriting the whole code?
For the output you are expecting, a very simple way to do it is with this code :
n = 5
for i in range(n):
print(' '.join([str(x+1) for x in range(i+1)]))
Output :
1
1 2
1 2 3
1 2 3 4
1 2 3 4 5
You can try this. Its simple.
for i in range(1,n+1):
for j in range(1, i+1):
print(j, end="")
print()

How to use while in python?

I should only use while and print to complete the homework. I have tried a different way to deal with that but still stuck.
Expected output:
1
2 1
3 2 1
4 3 2 1
5 4 3 2 1
6 5 4 3 2 1
what I got instead:
1
1 2
1 2 3
1 2 3 4
1 2 3 4 5
1 2 3 4 5 6
Here is my code:
j = 1
i = 1
t = 6
x = 10
d = 1
while i <= 6:
n = 1
space = -3
while space <= j:
print(" " * x, end="")
space += 1
break
while n <= i:
print('%d '%n, end="")
n += 1
print("")
i += 1
x -= 2
x = [i for i in range(1, 7)]
n = len(x)
j =1
while j <= n:
print(' '*(n-j), end="")
print(*x[0:j][::-1])
j +=1
Output
1
2 1
3 2 1
4 3 2 1
5 4 3 2 1
6 5 4 3 2 1
You are almost there. Just count backwards; i.e. change the following line
n = 1
to
n = i
and
while n <= i:
print('%d '%n, end="")
n += 1
to
while n > 0:
print('%d '%n, end="")
n -= 1
Also try the one-liner solution for fun:
>>> print("\n".join([" " * (7 - i) * 2 + " ".join([str(x) for x in reversed(range(1, i))]) for i in range(2, 8)]))
you have to print reverse order from your current one :
n =6
i = 1
tCol = n*2 -1
while i <=n:
cCount = i*2
spaceCount = tCol - cCount +1
s=1
while s<=spaceCount:
print(" ",end="")
s+=1
t =i
while t>=1:
print(t, end="")
if(t!=1):
print(" ", end="")
t-=1
print()
i+=1
output:
1
2 1
3 2 1
4 3 2 1
5 4 3 2 1
6 5 4 3 2 1
you can change the value of n to get upto any number

How do I iterate over string while iterating over the length of that string in Python

I am trying to reproduce this cost matrix:
cost matrix
At the moment, i am just playing with the Python code used to make the cost matrix. I am getting stuck because I want to have an elif statement that says
elif a_list[i] = b_list[i]:
matrix[i][j] = min( matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1])
So without adding +1 to the last term. Problem is I get this error message 'IndexError: list index out of range'
How can I fix this. At the moment, my output is:
0 1 2 3 4 5 6 7
1 1 2 3 4 5 6 7
2 2 2 3 4 5 6 7
3 3 3 3 4 5 6 7
4 4 4 4 4 5 6 7
and it should be
0 1 2 3 4 5 6 7
1 1 2 3 4 5 6 7
2 2 1 2 3 4 5 6
3 3 2 2 3 4 5 6
4 4 3 3 3 4 5 6
My whole code atm is:
import numpy as np
a = 'harvard'
b = 'yale'
a_list = list(a)
b_list = list(b)
#print(a_list)
#print(b_list)
matrix = []
for i in range(len(a_list) + 1):
matrix.append([])
for i in range(len(a_list) + 1):
for j in range(len(b_list) + 1):
matrix[i].append(j)
if i == 0:
matrix[i][j] = j
elif j == 0:
matrix[i][j] = i
#elif a_list[i] == b_list[j]:
# matrix[i][j] = min( matrix[i - 1][j] + 1,
# matrix[i][j - 1] + 1,
# matrix[i - 1][j - 1])
else:
matrix[i][j] = min( matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + 1)
for mat in zip(*matrix):
print(*mat)
You are over len+1 for a_list[i] will throw an IndexError
just change theelif statement to
elif a_list[i-1] == b_list[j-1]:

Python 2D Array can't work. Help~

_R = [0] * 5
R = [_R] * 4
num_user = 0
num_item = 0
for i in range(8):
s = input().split()
for j in range(4):
s[j] = int(s[j])
R[s[0]][s[1]] = s[2]
print(s[0], s[1], R[s[0]][s[1]])
num_user = max(num_user, s[0])
num_item = max(num_item, s[1])
print("=====")
for i in range(num_user + 1):
for j in range(num_item + 1):
print(i, j, R[i][j])
exit()
Probably you already understand what I am going to ask. The output confused me:
#output
1 2 3
2 4 2
1 1 5
3 2 2
2 2 1
3 3 4
1 4 3
2 1 4
=====
0 0 0
0 1 4
0 2 1
0 3 4
0 4 3
1 0 0
1 1 4
1 2 1
1 3 4
1 4 3
2 0 0
2 1 4
2 2 1
2 3 4
2 4 3
3 0 0
3 1 4
3 2 1
3 3 4
3 4 3
what is wrong with me? Last time I coded in Python it was 2.7 and it was long time ago. Have I forgot any important grammar?
you're creating the list of list in wrong way:
>>> _R = [0] * 5
>>> R = [_R] * 4
>>> [id(x) for x in R] #here all objects are acually identical
[36635392, 36635392, 36635392, 36635392]
>>> R[0][1]=1 #changing one element changes all other elements as well
>>> R
[[0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0]]
better create your list this way:
>>> R=[[0]*5 for _ in range(4) ]
>>> [id(x) for x in R]
[37254008, 36635712, 38713784, 38714664]
>>>
_R = [0] * 5
R = [_R] * 4
That is a NOGO. R will contain _R 4 times, the same array 4 times...
Use this instead:
R = [[0 for col in range(5)] for row in range(4)]

Categories