How to align/match multiple strings in python - python

I need some way to turn an input such as:
['luv cats', 'lovv cots', 'lov cotts']
In the aligned/matched output:
['l','l','l']
['u','o','o']
[None,'v',None]
['v','v','v']
[' ',' ',' ']
['c','c','c']
['a','o','o']
['t','t','t']
[None,None,'t']
['s','s','s']
I'm working on Python, if there is anything pre-maid that could help me with this I'd like to know. I saw some stuff of DNA sequencing that seemed like what I wanted to do, but I've never done anything like that and I got quite confused about.
Any kind of help is appreciated.
Thanks.

This is how i got it solved:
ls = ['luv cats', 'lovv cots', 'lov cotts']
lets = [list(set(a)) for a in ls]
lst1 = lets[0]
lst2 = lets[1]
lst3 = lets[2]
sub = [] #final result list
for i in lst1:
a = [i]
if i in lst2:
a.append(i)
else:
a.append(None)
if i in lst3:
a.append(i)
else:
a.append(None)
sub.append(a)
for j in lst2:
b = 3*[0]
b[1] = j
if j in lst1:
b[0] = j
else:
b[0] = None
if j in lst3:
b[2] = j
else:
b[2] = None
if b not in sub:
sub.append(b)
for j in lst3:
c = 3*[0]
c[2] = j
if j in lst1:
c[0] = j
else:
c[0] = None
if j in lst3:
c[1] = j
else:
c[1] = None
if b not in sub:
sub.append(c)
print(sub)
Result
[['u', None, None], ['l', 'l', 'l'], ['c', 'c', 'c'], ['v', 'v', 'v'], ['s', 's', 's'], [' ', ' ', ' '], ['t', 't', 't'], ['a', None, None], [None, 'o',
'o']]

Related

Go through all possibilities Python

I have two lists, let's say:
l1 = ['c', 'o', 'k', 'e']
l2 = ['a', 'b', 'c', 'd']
I would like to create a loop, that would check if each letter in l1 is in l2 (if that's the case I would get " ") or if it's there and on the same position (in that case I would get "Y").
I started with this code (unfortunately I failed). Could you please advise what's missing?
for i in l1:
for j in range(0,4):
if l1[j] == l2[j]:
v = "Y"
elif i in l2:
v = "."
else:
v = "N"
text = "".join(v)
Which those lists in the example, I would assume to get:
text = .NNN
I understand that this might be an easy question, but I'm a beginner and it's driving me crazy :)
Looking at your code, you can use zip() to iterate over the two lists simultaneously. Also use str.join after the loop:
l1 = ["c", "o", "k", "e"]
l2 = ["a", "b", "c", "d"]
out = []
for a, b in zip(l1, l2):
if a == b:
out.append("Y")
elif a in l2:
out.append(".")
else:
out.append("N")
print("".join(out))
Prints:
.NNN
l1 = ['c', 'o', 'k', 'e']
l2 = ['a', 'b', 'c', 'd']
text = ""
for i in l1:
if i in l2:
v = "."
else:
v = "N"
text += v
print(text)
First we define text="" so that we can just append. Then we loop all the letters in l1. We then check if that letter is in l2. if it is we add '.' to text if its not we add N. And finally we print the text
You can do something like:
ans = ""
for i, val in enumerate(l1):
if l2[i] == val:
ans += "Y"
elif val in l2:
ans += "."
else
ans += "N"
You don't have to loop trough the second array with the operations you are performing. You can simplify your code like this.
l1 = ['c', 'o', 'k', 'e']
l2 = ['a', 'b', 'c', 'd']
text = ""
for i in range(len(l1)):
if l1[i] == l2[i]:
text += "Y"
elif l1[i] in l2:
text += "."
else:
text += "N"
print(text)
Looping twice is not needed and therefore not the most efficient solution to your problem.
Use python sets.
Simple ./N check
l1 = ['c', 'o', 'k', 'e']
l2 = ['a', 'b', 'c', 'd']
S = set(l2)
out = ''.join('.' if x in S else 'N' for x in l1)
output: .NNN
more complex ./Y/N check:
l1 = ['c', 'o', 'k', 'e', 'z']
l2 = ['a', 'b', 'c', 'd', 'z']
S = set(l2)
out = ''.join(('Y' if l2[i]==x else '.') if x in S else 'N'
for i, x in enumerate(l1))
output: .NNNY
l1 = ['c', 'o', 'k', 'e']
l2 = ['a', 'b', 'c', 'd']
text = ""
for char in l1:
if char in l2:
index = l1.index(char)
if char == l2[index]:
v = "Y"
else:
v = "."
else:
v = "N"
text += v
print(text)

python consecutive elements to swap the list items [duplicate]

This question already has answers here:
What is the simplest way to swap each pair of adjoining chars in a string with Python?
(20 answers)
Closed 3 years ago.
here my input like:
['a','b','c','d','e','f']
output:
['b','a','d','c','f','e']
I tried to get consecutive list but i'm getting list in between empty string so please make to remove those empty list .
s = list(input().split())
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return ' '.join(c)
result = swap(s, 0, 1)
print(list(result))
current output:- ['b', ' ', 'a', ' ', 'c', ' ', 'd', ' ', 'e', ' ', 'f']
expected output:-['b', 'a', 'c', 'd', 'e','f']
You just need to return c as list, there is not need to convert to string and back again into a list:
s = ['a','b','c','d','e','f']
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return c
result = swap(s, 0, 1)
print(result)
Output:
['b', 'a', 'c', 'd', 'e', 'f']
a simple function to swap pairs that does not change the input:
def swap_pairs(list_to_swap):
s = list_to_swap[:] # create copy to not touch the original sequence
for i in range(0, len(s)-1, 2):
s[i], s[i+1] = s[i+1], s[i]
return s
s0 = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
s1 = ['a', 'b', 'c', 'd', 'e', 'f']
print(swap_pairs(s0))
print(swap_pairs(s1))
# ['b', 'a', 'd', 'c', 'f', 'e', 'g']
# ['b', 'a', 'd', 'c', 'f', 'e']
### check if s0 and s1 are untouched:
print(s0)
print(s1)
# ['a', 'b', 'c', 'd', 'e', 'f', 'g']
# ['a', 'b', 'c', 'd', 'e', 'f']
if you want to swap pairs 'in place', i.e. directly change the input, you could shorten the process to
def swap_pairs(s):
for i in range(0, len(s)-1, 2):
s[i], s[i+1] = s[i+1], s[i]
# return s
s1 = ['a', 'b', 'c', 'd', 'e', 'f']
swap_pairs(s1)
print(s1)
# ['b', 'a', 'd', 'c', 'f', 'e']
I think it's a matter of taste if a return statement should be added here. I'd consider it to be more clear not to return something since logically not needed. Anyway, be aware of variable scope.
this is the problem.. your joining on space. change it to the following.
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return ''.join(c)
for your output you could also do the following.
l = [x for x in [your output list] if x!= ' ']
or
l = [x for x in [your output list] if len(x.strip()) > 0]
Try returning only "C" and use recursion for swapping of all elements of list Then you will get expected Output. Check below code.
Output of below code: ['b','a','d','c','f','e']
s = ['a','b','c','d','e','f']
def swap(c, i, j):
if j<=len(c) and len(c)%2==0:
c[i], c[j] = c[j], c[i]
swap(c,i+2,j+2)
elif j<len(c):
c[i], c[j] = c[j], c[i]
swap(c,i+2,j+2)
return c
result = swap(s, 0, 1)
print(list(result))
and if you want Only output= ['b','a','c','d','e','f'] then no need of recursion just return c. Check below code:
s = ['a','b','c','d','e','f']
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return c
result = swap(s, 0, 1)
print(list(result))

Find all possible substrings beginning with characters from capturing group

I have for example the string BANANA and want to find all possible substrings beginning with a vowel. The result I need looks like this:
"A", "A", "A", "AN", "AN", "ANA", "ANA", "ANAN", "ANANA"
I tried this: re.findall(r"([AIEOU]+\w*)", "BANANA")
but it only finds "ANANA" which seems to be the longest match.
How can I find all the other possible substrings?
s="BANANA"
vowels = 'AIEOU'
sorted(s[i:j] for i, x in enumerate(s) for j in range(i + 1, len(s) + 1) if x in vowels)
This is a simple way of doing it. Sure there's an easier way though.
def subs(txt, startswith):
for i in xrange(len(txt)):
for j in xrange(1, len(txt) - i + 1):
if txt[i].lower() in startswith.lower():
yield txt[i:i + j]
s = 'BANANA'
vowels = 'AEIOU'
print sorted(subs(s, vowels))
A more pythonic way:
>>> def grouper(s):
... return [s[i:i+j] for j in range(1,len(s)+1) for i in range(len(s)-j+1)]
...
>>> vowels = {'A', 'I', 'O', 'U', 'E', 'a', 'i', 'o', 'u', 'e'}
>>> [t for t in grouper(s) if t[0] in vowels]
['A', 'A', 'A', 'AN', 'AN', 'ANA', 'ANA', 'ANAN', 'ANANA']
Benchmark with accepted answer:
from timeit import timeit
s1 = """
sorted(s[i:j] for i, x in enumerate(s) for j in range(i + 1, len(s) + 1) if x in vowels)
"""
s2 = """
def grouper(s):
return [s[i:i+j] for j in range(1,len(s)+1) for i in range(len(s)-j+1)]
[t for t in grouper(s) if t[0] in vowels]
"""
print '1st: ', timeit(stmt=s1,
number=1000000,
setup="vowels = 'AIEOU'; s = 'BANANA'")
print '2nd : ', timeit(stmt=s2,
number=1000000,
setup="vowels = {'A', 'I', 'O', 'U', 'E', 'a', 'i', 'o', 'u', 'e'}; s = 'BANANA'")
result :
1st: 6.08756995201
2nd : 5.25555992126
As already mentioned in the comments, Regex would not be the right way to go about this.
Try this
def get_substr(string):
holder = []
for ix, elem in enumerate(string):
if elem.lower() in "aeiou":
for r in range(len(string[ix:])):
holder.append(string[ix:ix+r+1])
return holder
print get_substr("BANANA")
## ['A', 'AN', 'ANA', 'ANAN', 'ANANA', 'A', 'AN', 'ANA', 'A']

Matching two lists in python

I have two lists with different lengths.
list1=['T','P','M','M','A','R','k','M','G','C']
list2=['T','P','M','M','A','R','k','S','G','C','N']
By comparing list1 and list2: The results must be:
new_list1=['T','P','M','M','A','R','k','mis','M', 'G','C','mis']
new_list2=['T','P','M','M','A','R','k','S', 'mis','G','C','N']
The method is by matching the elements in two lists with duplicates. If there are a non-matching elements in the same position. For example in list1 there are three copies of M, in the list2 there are two copies. The results must refer to missing M from list2 in this position. The character S is missing from list1, results also must assign to missing.
Can anyone help me?
Assuming "mis" is a special value:
from itertools import zip_longest
def create_matchs(alst, blst, mis="mis"):
for a, b in zip_longest(alst, blst, fillvalue=mis):
if a == b or mis in (a, b):
yield a, b
else:
yield mis, b
yield a, mis
list1 = ['T','P','M','M','A','R','k','M','G','C']
list2 = ['T','P','M','M','A','R','k','S','G','C','N']
new_list1, new_list2 = zip(*create_matchs(list1, list2))
print(new_list1)
print(new_list2)
You can also try it. Its simple:
list1=['T','P','M','M','A','R','k','M','G','C']
list2 =['T','P','M','M','A','R','k','S','G','C','N']
if len(list1) > len(list2):
diff = len(list1) - len(list2)
for i in range(0, diff):
list2.append('mis')
else:
diff = len(list2) - len(list1)
for i in range(0, diff):
list1.append('mis')
new_list1 = []
new_list2 = []
for i in zip(list1,list2):
if i[0] == i[1]:
new_list1.append(i[0])
new_list2.append(i[1])
elif i[0] == 'mis' or i[1] == 'mis':
new_list1.append(i[0])
new_list2.append(i[1])
else:
new_list1.append(i[0])
new_list2.append('mis')
new_list1.append('mis')
new_list2.append(i[1])
print new_list1
print new_list2
Output:
['T', 'P', 'M', 'M', 'A', 'R', 'k', 'M', 'mis', 'G', 'C', 'mis']
['T', 'P', 'M', 'M', 'A', 'R', 'k', 'mis', 'S', 'G', 'C', 'N']

Finding neighbors in a list

I have a list:
l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
I need to extract all the neighbors of '>>' and split them into groups where they have elements in between that are neither '>>' or neigbors of '>>'.
For the example list the expected outcome would be:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
I have tried quite a few things, but all the simple ones have failed one way or another. At the moment the only code that seems to work is this:
def func(L,N):
outer=[]
inner=[]
for i,e in enumerate(L):
if e!=N:
try:
if L[i-1]==N or L[i+1]==N:
inner.append(e)
elif len(inner)>0:
outer.append(inner)
inner=[]
except IndexError:
pass
if len(inner):
outer.append(inner)
return outer
func(l,'>>')
Out[196]:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
Although it seems to work, i am wondering if there is a better,cleaner way to do it?
I would argue that the most pythonic and easy to read solution would be something like this:
import itertools
def neighbours(items, fill=None):
"""Yeild the elements with their neighbours as (before, element, after).
neighbours([1, 2, 3]) --> (None, 1, 2), (1, 2, 3), (2, 3, None)
"""
before = itertools.chain([fill], items)
after = itertools.chain(items, [fill]) #You could use itertools.zip_longest() later instead.
next(after)
return zip(before, items, after)
def split_not_neighbour(seq, mark):
"""Split the sequence on each item where the item is not the mark, or next
to the mark.
split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)
"""
output = []
for items in neighbours(seq):
if mark in items:
_, item, _ = items
if item != mark:
output.append(item)
else:
if output:
yield output
output = []
if output:
yield output
Which we can use like so:
>>> l = ['a', '>>', 'b', '>>', 'd', 'e', 'f', 'g', '>>', 'i', '>>', '>>',
... 'j', 'k', 'l', '>>', '>>']
>>> print(list(split_not_neighbour(l, ">>")))
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
Note the neat avoidance of any direct indexing.
Edit: A more elegant version.
def split_not_neighbour(seq, mark):
"""Split the sequence on each item where the item is not the mark, or next
to the mark.
split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)
"""
neighboured = neighbours(seq)
for _, items in itertools.groupby(neighboured, key=lambda x: mark not in x):
yield [item for _, item, _ in items if item != mark]
Here is one alternative:
import itertools
def func(L, N):
def key(i_e):
i, e = i_e
return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
outer = []
for k, g in itertools.groupby(enumerate(L), key):
if k:
outer.append([e for i, e in g if e != N])
return outer
Or an equivalent version with a nested list comprehension:
def func(L, N):
def key(i_e):
i, e = i_e
return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
return [[e for i, e in g if e != N]
for k, g in itertools.groupby(enumerate(L), key) if k]
You can simplify it like this
l = ['']+l+['']
stack = []
connected = last_connected = False
for i, item in enumerate(l):
if item in ['','>>']: continue
connected = l[i-1] == '>>' or l[i+1] == '>>'
if connected:
if not last_connected:
stack.append([])
stack[-1].append(item)
last_connected = connected
my naive attempt
things = (''.join(l)).split('>>')
output = []
inner = []
for i in things:
if not i:
continue
i_len = len(i)
if i_len == 1:
inner.append(i)
elif i_len > 1:
inner.append(i[0])
output.append(inner)
inner = [i[-1]]
output.append(inner)
print output # [['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
Something like this:
l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[]
for i,x in enumerate(l):
if len(x)==1:
if len(lis)!=0:
lis[-1].append(x[0])
else:
lis.append([])
lis[-1].append(x[0])
else:
if len(lis)!=0:
lis[-1].append(x[0])
lis.append([])
lis[-1].append(x[-1])
else:
lis.append([])
lis[-1].append(x[0])
lis.append([])
lis[-1].append(x[-1])
print lis
output:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
or:
l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[[] for _ in range(len([1 for x in l if len(x)>1])+1)]
for i,x in enumerate(l):
if len(x)==1:
for y in reversed(lis):
if len(y)!=0:
y.append(x)
break
else:
lis[0].append(x)
else:
if not all(len(x)==0 for x in lis):
for y in reversed(lis):
if len(y)!=0:
y.append(x[0])
break
for y in lis:
if len(y)==0:
y.append(x[-1])
break
else:
lis[0].append(x[0])
lis[1].append(x[-1])
print lis
output:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]
Another medthod using superimposition of original list
import copy
lis_dup = copy.deepcopy(lis)
lis_dup.insert(0,'')
prev_in = 0
tmp=[]
res = []
for (x,y) in zip(lis,lis_dup):
if '>>' in (x,y):
if y!='>>' :
if y not in tmp:
tmp.append(y)
elif x!='>>':
if x not in tmp:
print 'x is ' ,x
tmp.append(x)
else:
if prev_in ==1:
res.append(tmp)
prev_in =0
tmp = []
prev_in = 1
else:
if prev_in == 1:
res.append(tmp)
prev_in =0
tmp = []
res.append(tmp)
print res

Categories