Set of string tuples in python - python

How could i find the the set of tuples of string?
For example there is a list of [('a', 'b'), ('b', 'a'), ('c','d')]
For me ('a', 'b') is same to ('b', 'a') . Is there any function in
python which can identify and remove one of them?

Just sort your tuples:
unique = set(tuple(sorted(t)) for t in inputlist)
Demo:
>>> inputlist = [('a', 'b'), ('b', 'a'), ('c','d')]
>>> set(tuple(sorted(t)) for t in inputlist)
set([('a', 'b'), ('c', 'd')])
You could extend collections.MutableSet() (collections.abc.MutableSet in Python 3) to encapsulate that behaviour:
try:
# Python 3
from collections.abc import MutableSet
except ImportError:
# Python 2
from collections import MutableSet
class SortingSet(MutableSet):
def __init__(self, values):
self._values = set()
for v in values:
self.add(v)
def __repr__(self):
return '<{}({}) at {:x}>'.format(
type(self).__name__, list(self._values), id(self))
def __contains__(self, value):
return tuple(sorted(value)) in self._values
def __iter__(self):
return iter(self._values)
def __len__(self):
return len(self._values)
def add(self, value):
self._values.add(tuple(sorted(value)))
def discard(self, value):
self._values.discard(tuple(sorted(value)))
Demo:
>>> inputlist = [('a', 'b'), ('b', 'a'), ('c','d')]
>>> sset = SortingSet(inputlist)
>>> sset
<SortingSet([('a', 'b'), ('c', 'd')]) at 106b74c50>
>>> ('d', 'c') in sset
True

How about:
list_ = [('a', 'b'), ('b', 'a'), ('c','d')]
set_ = set(frozenset(tuple) for tuple in list_)
print(set_)
? Tested on CPython 3.4.

The answers so far do not preserve order at all, if that is important to you then use this:
>>> from collections import OrderedDict
>>> items = [('a', 'b'), ('b', 'a'), ('c','d')]
>>> OrderedDict((frozenset(x), x) for x in items).values()
[('b', 'a'), ('c', 'd')]
This keeps the order and you said you could remove one of the duplicates (which it keeps the last)
Also the answers given so far also alter the elements:
>>> set(tuple(sorted(t)) for t in [('b', 'a'), ('c', 'd')])
set([('a', 'b'), ('c', 'd')])
>>> set(frozenset(tuple) for tuple in [('b', 'a'), ('c', 'd')])
set([frozenset(['a', 'b']), frozenset(['c', 'd'])])
This will keep the elements the same
>>> OrderedDict((frozenset(x), x) for x in [('b', 'a'), ('c', 'd')]).values()
[('b', 'a'), ('c', 'd')]

Related

Why it occured "object has no attribute" error when Im defining a local function in a Python class?

I was writing a class, in which there's a method with local args.
class GlobalAssign:
def __init__(self, seq1, seq2, d=-5, mismatch=-5, trans=-7):
self.seq1 = list(seq1)
self.seq2 = list(seq2)
self.mismatch = mismatch
self.d = d
self.trans = trans
def score(self, a, b):
# score for any pair of bases
pair = (str(a).capitalize(), str(b).capitalize())
if pair in {('A', 'G'), ('G', 'A'), ('C', 'T'), ('T', 'C')}:
return self.mismatch
if pair in {('A', 'C'), ('C', 'A'), ('T', 'G'), ('G', 'T'),
('A', 'T'), ('T', 'A'), ('C', 'G'), ('G', 'C')}:
return self.trans
elif a == '-' or b == '-':
return self.d
And I run following codes in the terminal:
In [62]: test = GlobalAssign('agcg','gtat')
In [63]: test.score('a','g')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-63-f387aa9ccad8> in <module>
----> 1 test.score('a','g')
~/data/needleman.py in score(self, a, b)
11 def score(self, a, b):
12 # score for any pair of bases
---> 13 pair = (str(a).capitalize(), str(b).capitalize())
14 if pair in {('A', 'G'), ('G', 'A'), ('C', 'T'), ('T', 'C')}:
15 return self.mismatch
AttributeError: 'GlobalAssign' object has no attribute 'a'
I think a and b are local variables so I don't need to write self.a. But it raised this error and I'm a bit confused.Have been stuck for a while. Thanks if you could help me figure it out.
I've just run your code in locally and it is working; it gives me the output of -5.
I have made some adjustment to your code in order to get all the cases:
class GlobalAssign:
def __init__(self, seq1, seq2, d=-5, mismatch=-5, trans=-7):
self.seq1 = list(seq1)
self.seq2 = list(seq2)
self.mismatch = mismatch
self.d = d
self.trans = trans
def score(self, a, b):
# score for any pair of bases
pair = (str(a).capitalize(), str(b).capitalize())
if pair in [('A', 'G'), ('G', 'A'), ('C', 'T'), ('T', 'C')]:
return self.mismatch
elif pair in [('A', 'C'), ('C', 'A'), ('T', 'G'), ('G', 'T'),
('A', 'T'), ('T', 'A'), ('C', 'G'), ('G', 'C')]:
return self.trans
elif a == '-' or b == '-':
return self.d
else:
print("Value not correct.")
return None
Moreover i have removed the bracket {} and replaced with [] inside the score method.
Let me know if this help you.

Python tuples regrouping

I am kind of new to python and currently struggling with returning the list of tuples in a way I want to.
If I have a list of tuples that looks like
[('a',),('b',),('c',),('d',),('e',),('f',)]
How can I change it to
[('a','b'),('c','d'),('e','f')]
or
[('a','b','c'),('d','e'),('f',)] ?
Is there an easy way to regroup tuples?
Any help would be appreciated.
For a consistent length of inner tuples, you can flatten your list of tuples via itertools.chain and then define a chunking generator:
from itertools import chain
L = [('a',),('b',),('c',),('d',),('e',),('f',)]
def chunker(L, n):
T = tuple(chain.from_iterable(L))
for i in range(0, len(L), n):
yield T[i: i+n]
res_2 = list(chunker(L, 2)) # [('a', 'b'), ('c', 'd'), ('e', 'f')]
res_3 = list(chunker(L, 3)) # [('a', 'b', 'c'), ('d', 'e', 'f')]
res_4 = list(chunker(L, 4)) # [('a', 'b', 'c', 'd'), ('e', 'f')]
Otherwise, you need to first define logic to determine the size of each tuple.
You could use zip with appropriate slices:
l = [('a',),('b',),('c',),('d',),('e',),('f',)]
[x+y for x, y in zip(l[::2], l[1::2])]
# [('a', 'b'), ('c', 'd'), ('e', 'f')]
Use grouper from itertools recipe:
from itertools import chain, zip_longest
lst = [('a',),('b',),('c',),('d',),('e',),('f',)]
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
How to use?
>>> list(grouper(2, chain.from_iterable(lst)))
[('a', 'b'), ('c', 'd'), ('e', 'f')]
>>> list(grouper(3, chain.from_iterable(lst)))
[('a', 'b', 'c'), ('d', 'e', 'f')]

How to extract colon separated values from the same line?

I am using python regular expressions. I want all colon separated values in a line.
e.g.
input = 'a:b c:d e:f'
expected_output = [('a','b'), ('c', 'd'), ('e', 'f')]
But when I do
>>> re.findall('(.*)\s?:\s?(.*)','a:b c:d')
I get
[('a:b c', 'd')]
I have also tried
>>> re.findall('(.*)\s?:\s?(.*)[\s$]','a:b c:d')
[('a', 'b')]
The following code works for me:
inpt = 'a:b c:d e:f'
re.findall('(\S+):(\S+)',inpt)
Output:
[('a', 'b'), ('c', 'd'), ('e', 'f')]
Use split instead of regex, also avoid giving variable name like keywords
:
inpt = 'a:b c:d e:f'
k= [tuple(i.split(':')) for i in inpt.split()]
print(k)
# [('a', 'b'), ('c', 'd'), ('e', 'f')]
The easiest way using list comprehension and split :
[tuple(ele.split(':')) for ele in input.split(' ')]
#driver values :
IN : input = 'a:b c:d e:f'
OUT : [('a', 'b'), ('c', 'd'), ('e', 'f')]
You may use
list(map(lambda x: tuple(x.split(':')), input.split()))
where
input.split() is
>>> input.split()
['a:b', 'c:d', 'e:f']
lambda x: tuple(x.split(':')) is function to convert string to tuple 'a:b' => (a, b)
map applies above function to all list elements and returns a map object (in Python 3) and this is converted to list using list
Result
>>> list(map(lambda x: tuple(x.split(':')), input.split()))
[('a', 'b'), ('c', 'd'), ('e', 'f')]

write the elements of list to file

Bigram is a list which looks like-
[('a', 'b'), ('b', 'b'), ('b', 'b'), ('b', 'c'), ('c', 'c'), ('c', 'c'), ('c', 'd'), ('d', 'd'), ('d', 'e')]
Now I am trying to wrote each element if the list as a separate line in a file with this code-
bigram = list(nltk.bigrams(s.split()))
outfile1.write("%s" % ''.join(ele) for ele in bigram)
but I am getting this error :
TypeError: write() argument must be str, not generator
I want the result as in file-
('a', 'b')
('b', 'b')
('b', 'b')
('b', 'c')
('c', 'c')
......
you're passing a generator comprehension to write, which needs strings.
If I understand correctly you want to write one representation of tuple per line.
You can achieve that with:
outfile1.write("".join('{}\n'.format(ele) for ele in bigram))
or
outfile1.writelines('{}\n'.format(ele) for ele in bigram)
the second version passes a generator comprehension to writelines, which avoids to create the big string in memory before writing to it (and looks more like your attempt)
it produces a file with this content:
('a', 'b')
('b', 'b')
('b', 'b')
('b', 'c')
('c', 'c')
('c', 'c')
('c', 'd')
('d', 'd')
('d', 'e')
Try this:
outfile1.writelines("{}\n".format(ele) for ele in bigram)
This is the operator precedence problem.
You want an expression like this:
("%s" % ''.join(ele)) for ele in bigram
Instead, you get it interpreted like this, where the part in the parens is indeed a generator:
"%s" % (''.join(ele) for ele in bigram)
Use the explicit parentheses.
Please note that ("%s" % ''.join(ele)) for ele in bigram is itself a generator. You need to call write on each element from it.
If you want to write each pair in a separate line, you have to add line separators explicitly. The easiest, to my mind, is an explicit loop:
for pair in bigram:
outfile.write("(%s, %s)\n" % pair)

Finding the index in a tuple Python

tuple = ('e', (('f', ('a', 'b')), ('c', 'd')))
how to get the positions: (binary tree)
[('e', '0'), ('f', '100'), ('a', '1010'), ('b', '1011' ), ('c', '110'), ('d', '111')]
is there any way to indexOf ?
arvore[0] # = e
arvore[1][0][0] # = f
arvore[1][0][1][0] # = a
arvore[1][0][1][1] # = b
arvore[1][1][0] # = c
arvore[1][1][1] # = d
You need to traverse the tuple recursively (like tree):
def traverse(t, trail=''):
if isinstance(t, str):
yield t, trail
return
for i, subtree in enumerate(t): # left - 0, right - 1
# yield from traverse(subtree, trail + str(i)) in Python 3.3+
for x in traverse(subtree, trail + str(i)):
yield x
Usage:
>>> t = ('e', (('f', ('a', 'b')), ('c', 'd')))
>>> list(traverse(t))
[('e', '0'), ('f', '100'), ('a', '1010'), ('b', '1011'), ('c', '110'), ('d', '111')]
BTW, don't use tuple as a variable name. It shadows builtin type/function tuple.

Categories