Python: Get unique items and sublists in a list - python

I want to find a list of unique items and sublists in a list in python 2.7
Input : [['P', 'Q'], [['not', 'P'], 'R'], [['not', 'R'], ['not', 'P']], [['not', 'Q'], 'S', ['not', 'T']], 'T']
Output: ['P','Q',['not','P'],'R',['not','R'],['not','Q'],'S',['not','T'],'T']
Can anyone suggest how to do it recursively?
My code :
def rem_dup(lis):
y, s = [], set()
for t in lis:
w = tuple(sorted(t)) if isinstance(t, list) else t
if not w in s:
y.append(t)
s.add(w)
return y
def removeDuplicates(prop):
if isinstance(prop, str):
return prop
else:
out = [rem_dup(i) if isinstance(i, list) else i for i in rem_dup(prop)]
return out
I call removeDuplicates from main method and pass it input. I get below exception:
if not w in s:
TypeError: unhashable type: 'list'

Recursive solution:
def unique(lst):
s = set()
for el in lst:
if isinstance(el, str):
s.add(el)
elif el[0] == 'not'
s.add(tuple(*el))
else:
s.update(unique(el))
return s

Related

Concat method in Class

I am working through the following exercise: I am implementing a class that represents sorted lists of basic types.
Currently:
class SortedList():
def __init__(self, input_list):
self.input_list= input_list
def add(self,value):
self.input_list.append(value)
return self.input_list
def concat(self):
return
def __repr__(self):
self.input_list.sort()
return str(self.input_list)
I make the following calls:
l1= SortedList(['z','l','a'])
print(l1)
l1.add('b')
print(l1)
l2= SortedList(['q','g'])
l3= l1.cocat(l2)
print(l3)
Everything behaves as expected until the l3 definition, since unsure how to define this type of function x.function(y) within a class.
The desired output from the last print statement is ['a','b','g','l','q','z']
You can use the + operator on lists which extends a list with another list, and then return a new instance of SortedList when concat() is called.
class SortedList:
def __init__(self, input_list):
self.input_list = sorted(input_list)
def add(self, value):
self.input_list.append(value)
self.input_list.sort()
return self.input_list
def concat(self, other):
merged = self.input_list + other.input_list
return SortedList(merged)
def __repr__(self):
return str(self.input_list)
l1 = SortedList(["z", "l", "a"])
print(l1)
# ['a', 'l', 'z']
print(l1.add("b"))
# ['a', 'b', 'l', 'z']
l2 = SortedList(["q", "g"])
l3 = l1.concat(l2)
print(l3)
# ['a', 'b', 'g', 'l', 'q', 'z']

Python DFS nested dictionary

I've written a function which should be able to search a nested dictionary, using DFS, to find a specific value. The recursive element seems to be working fine, however, when the base case should return True, it simply doesn't.
obj = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'i':'j'}]}
def obj_dfs(obj, target):
if type(obj) == dict:
for key, val in obj.items():
obj_dfs(key, target)
obj_dfs(val, target)
elif type(obj) in (list, tuple):
for elem in obj:
obj_dfs(elem, target)
else:
if obj == target:
print(f"{obj} == {target}")
return True
else:
print(f"{obj} != {target}")
return False
obj_dfs(obj, 'j')
And the results. As you can see, the standard output "i==i" shows that this element was evaluated correctly but the return True statement isn't functioning as intended. I've verified that if I call obj_dfs(obj, 'j'), that experiences the same error.
a != j
c != j
d != j
e != j
f != j
b != j
g != j
h != j
i != j
j == j
False
Why is this? And how can I fix this?
As the comments point out, you need to return the results of the recursive calls. Since you are just looking for a True/False match, you can pass the recursive calls into any() which will exit early with True if there is a match. The base case can simple be whether obj == target.
obj = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'i':'j'}]}
def obj_dfs(obj, target):
if obj == target:
return True
if isinstance(obj, dict):
return any(obj_dfs(v, target) for v in obj.items())
elif isinstance(obj, (list, tuple)):
return any(obj_dfs(l, target) for l in obj)
return False
obj_dfs(obj, 'i'), obj_dfs(obj, 'j'), obj_dfs(obj, 'd'), obj_dfs(obj, 'x')
# (True, True, True, False)
This allows for a three simple blocks. Notice we are checking for a tuple as well as a list in the last isinstance. This allows you to simply pass in the dict item()s rather than looping over keys and values independently.
Adding a print(obj) as the first line of the function will show the order in which you are traversing the data. For example obj_dfs(obj, 'j') will print:
{'a': [{'c': 'd'}, {'e': 'f'}], 'b': [{'g': 'h'}, {'i': 'j'}]}
('a', [{'c': 'd'}, {'e': 'f'}])
a
[{'c': 'd'}, {'e': 'f'}]
{'c': 'd'}
('c', 'd')
c
d
{'e': 'f'}
('e', 'f')
e
f
('b', [{'g': 'h'}, {'i': 'j'}])
b
[{'g': 'h'}, {'i': 'j'}]
{'g': 'h'}
('g', 'h')
g
h
{'i': 'j'}
('i', 'j')
i
j
I made some edits to your code
obj = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'i':'j'}]}
def obj_dfs(obj, target):
if type(obj) == dict:
for key, val in obj.items():
if(key==target):
return val
else:
result=obj_dfs(val, target)
if result!=None: return result
elif type(obj) in (list, tuple):
for elem in obj:
result=obj_dfs(elem, target)
if result!=None: return result
else:
if obj==target: return True
print(obj_dfs(obj, 'i'))
I don't know why you would just return true instead of the value though, so I put it that if its a dictionary key it would return the value, instead it would return true, to show that it is found
Expanding on my comment, try this, where we pass return values up the chain and always return True if a child returned True:
obj = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'i':'j'}]}
obj2 = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'g':'j'}]}
def obj_dfs(obj, target):
if type(obj) == dict:
for key, val in obj.items():
keyret = obj_dfs(key, target)
valueret = obj_dfs(val, target)
if keyret is True or valueret is True:
return True
else:
return False
elif type(obj) in (list, tuple):
rets = []
for elem in obj:
rets.append(obj_dfs(elem, target))
if True in rets:
return True
else:
return False
else:
if obj == target:
print(f"{obj} == {target}")
return True
else:
print(f"{obj} != {target}")
return False
print(obj_dfs(obj, 'i'))
print(obj_dfs(obj2, 'i'))
Recursion is a functional heritage and so using it with functional style yields the best results. This means decoupling concerns and pushing side effects to the fringe of your program. obj_dfs performs a depth-first traversal and mixes in search logic. And for purposes of debugging, includes a print side effect.
Decomposition results in functions that are easier to write, test, and reuse in various parts of our program. We'll start with a generic dfs for traversal -
def dfs(t, path = []):
if isinstance(t, dict):
for key, val in t.items():
yield from dfs(val, [*path, key])
elif isinstance(t, (list, tuple)):
for key, val in enumerate(t):
yield from dfs(val, [*path, key])
else:
yield path, t
obj = {'a': [{'c':'d'}, {'e':'f'}],
'b': [{'g':'h'}, {'i':'j'}]}
for path, val in dfs(obj):
print(path, val) # side effect decided by caller
['a', 0, 'c'] d
['a', 1, 'e'] f
['b', 0, 'g'] h
['b', 1, 'i'] j
The suggested solution and other answers here collapse the semantic difference between key and value, providing no differentiation on the particular match of target. Writing dfs as we did above, we can know which part of obj matched.
keys
values
['a', 0, 'c']
d
['a', 1, 'e']
f
['b', 0, 'g']
h
['b', 1, 'i']
j
has_value and has_key are easily defined in terms of dfs -
def has_value(t, target):
for path, val in dfs(t):
if val == target:
return True
return False
def has_key(t, target):
for path, val in dfs(t):
if target in path:
return True
return False
print(has_value(obj, "j")) # True
print(has_key(obj, "j")) # False
print(has_value(obj, "i")) # False
print(has_key(obj, "i")) # True

Find a Syntax error in Python DNA Sequence

Im getting a syntax error in line 5 // for k,v in // how can I solve this please?
i'm not sure what's the error here
alt_map = {'ins':'0'}
complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
def reverse_complement(seq):
for k,v in
alt_map.iteritems():
seq = seq.replace(k,v)
bases = list(seq)
bases = reversed([complement.get(base,base) for base in bases])
bases = ''.join(bases)
for k,v in
alt_map.iteritems():
bases = bases.replace(v,k) return bases
>>> seq = "TCGGinsGCCC"
>>> print "Reverse Complement:"
>>> print(reverse_complement(seq))
GGGCinsCCGA
def ReverseComplement(Pattern):
revcomp = [ ]
x = len(Pattern)
for i in Pattern:
x = x - 1
revcomp.append(Pattern[x])
return ''.join(revcomp)
Python cannot do automatic line breaks inmidst of a command.
You have to do:
for k,v in \
with \ as explicite sign that you do a linebreak inmidst of a for loop expression.
Python is a language where whitespace is partly part of the syntax.
So you have to be hyper correct in terms of whitespaces/linebreaks.
Changed your code:
alt_map = {'ins': '0'}
complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
def reverse_complement(seq):
for k, v in alt_map.items():
seq = seq.replace(k, v)
bases = list(seq)
bases = reversed([complement.get(base, base) for base in bases])
bases = ''.join(bases)
for k, v in alt_map.items():
bases = bases.replace(v, k)
return bases
if __name__ == '__main__':
seq = 'TCGGinsGCCC'
print(reverse_complement(seq))
Output:
GGGCinsCCGA

Can't stop python from iterating through string in loop

class Hat:
def __init__(self, **kwargs):
self.contents = []
for balltype in kwargs.keys():
for ballnum in range(kwargs[balltype]):
self.contents += balltype
hattrial = Hat(red = 1, blue = 2)
print(hattrial.contents)
I'm trying top create a list that contains the keys from the input argument dictionary, but instead of simply adding the string entry I get:
['r', 'e', 'd', 'b', 'l', 'u', 'e', 'b', 'l', 'u', 'e']
Instead of:
['red', 'blue', 'blue']
Where red occurs once and blue occurs twice. I've tried a few different solutions short of just manipulating the array afterwards such as the attempt below but nothing I've done has changed the output. Surely there's an elegant solution that doesn't require me sticking characters back together?
end = len(balltype)
self.contents += balltype[0:end]
self.contents += balltype
Using append
class Hat:
def __init__(self, **kwargs):
self.contents = []
for balltype in kwargs.keys():
for ballnum in range(kwargs[balltype]):
self.contents.append(balltype)
hattrial = Hat(red = 1, blue = 2)
print(hattrial.contents)
Be careful with the += operator in lists
This also works, try to understand why it appends correctly here with +=
class Hat:
def __init__(self, **kwargs):
self.contents = []
for balltype in kwargs.keys():
self.contents += kwargs[balltype] * [balltype]
hattrial = Hat(red = 1, blue = 2)
print(hattrial.contents)
Basically, the problem with your code can be reduced to the following:
a = []
a += "hello"
a
['h', 'e', 'l', 'l', 'o']

Autogrowing list in Python?

I need a list-like object that will "autogrow" whenever a slot number greater or equal to its length is accessed, filling up all the newly created slots with some pre-specified default value. E.g.:
# hypothetical DefaultList class
x = DefaultList(list('abc'), default='*')
x[6] = 'g'
print x[2], x[4], x[6], x[8] # should print 'c * g *'
Thanks!
PS. I know it is not hard to implement a class like this, but I avoid wheel-reinvention as much as possible, especially if a particularly efficient/well-designed wheel already exists.
PS2. A dict (or a collections.defaultdict) is not an acceptable implementation of the desired data structure. For why, see here: http://groups.google.com/group/comp.lang.python/msg/bcf360dfe8e868d1?hl=en
class DefaultList(list):
def __init__(self,*args,**kwargs):
list.__init__(self,*args)
self.default=kwargs.get('default',None)
def __getitem__(self,key):
# retrieving an item does not expand the list
if isinstance(key,slice):
return [self[elt] for elt in range(key.start,key.stop,key.step)]
else:
try:
return list.__getitem__(self,key)
except IndexError:
return self.default
def __setitem__(self,key,value):
# setting an item may expand the list
try:
list.__setitem__(self,key,value)
except IndexError:
self.extend([self.default]*(key-len(self)))
self.append(value)
x = DefaultList(list('abc'), default='*')
print(x)
# ['a', 'b', 'c']
x[6] = 'g'
print(x)
# ['a', 'b', 'c', '*', '*', '*', 'g']
print x[2], x[4], x[6], x[8] # should print 'c * g *'
# c * g *
print(x[2:9:2])
# ['c', '*', 'g', '*']
I would use a sparse data structure (1xn matrix).
You could always make a function that handles this:
def fillList(item, slot, myList):
length = len(myList)
if slot > length:
augmentation = [item for x in range(slot-length)]
myList.extend(augmentation)
else:
myList[slot] = item
Which while not a data structure, does accomplish what you want.
Using the idea of wheaties's solution and making a prettier interface:
You could inherit from list and overwrite the list 'getitem(index)' method which maps to [index] in your class. It should be something like this:
class GrowingList(list):
def __getitem__(self, index):
length = len(self)
# list is 0 indexed
if index >= length:
tail = [ self.default_value for x in range(index - length + 1)]
self.extend(tail)
return super(self.__class__, self).__getitem__(index)
This same code can be used if you don't extend the list, but just return some default value on invalid index
This preserves the whole list interface.
(This isn't a new answer; just a comment on unutbu's. It should really be possible to post stuff like this in comments; it isn't, so I have to post it as an answer.)
CombineListClasses and CombineListClasses2 inherit from two classes that both inherit from list. The behavior and doctests are straightforward, but break badly in the original version.
This is all standard practice in Python's data model; you almost never should be calling a base class method directly rather than via super.
class DefaultList(list):
"""
>>> x = DefaultList('abc', default='*')
>>> x
['a', 'b', 'c']
>>> x[6] = 'g'
>>> x
['a', 'b', 'c', '*', '*', '*', 'g']
>>> x[2], x[4], x[6], x[8] # should print 'c * g *'
('c', '*', 'g', '*')
>>> x[2:9:2]
['c', '*', 'g', '*']
>>> x = DefaultList()
>>> x[1] = 'a'
>>> x
[None, 'a']
>>> x = DefaultList(sequence=[1,2,3], default=5)
>>> x
[1, 2, 3]
>>> x[10]
5
"""
def __init__(self, *args, **kwargs):
if 'default' in kwargs:
self.default = kwargs['default']
del kwargs['default']
else:
self.default = None
super(DefaultList, self).__init__(*args, **kwargs)
def __getitem__(self, key):
# retrieving an item does not expand the list
if isinstance(key, slice):
return [self[elt] for elt in range(key.start, key.stop, key.step)]
else:
try:
return super(DefaultList, self).__getitem__(key)
except IndexError:
return self.default
def __setitem__(self, key, value):
# setting an item may expand the list
try:
super(DefaultList, self).__setitem__(key, value)
except IndexError:
self.extend([self.default]*(key-len(self)))
self.append(value)
# Another class that derives from list:
class AddMethodToList(list):
def __init__(self, *args, **kwargs):
self.value = kwargs['value']
del kwargs['value']
super(AddMethodToList, self).__init__(*args, **kwargs)
def new_method(self):
return self.value
# Derive from both classes.
class CombineListClasses(AddMethodToList, DefaultList):
"""
>>> a = CombineListClasses(default=10, sequence=[1,2,3], value=3)
>>> a.new_method()
3
>>> a[5] = 1
>>> a
[1, 2, 3, 10, 10, 1]
"""
pass
# Derive from both classes in reverse, reversing the call chain order.
class CombineListClasses2(DefaultList, AddMethodToList):
"""
>>> a = CombineListClasses2(default=10, sequence=[1,2,3], value=3)
>>> a.new_method()
3
>>> a[5] = 1
>>> a
[1, 2, 3, 10, 10, 1]
"""
pass
if __name__ == '__main__':
import doctest
print doctest.testmod()
Note that in Python 3, this is supported by the language directly:
class DefaultList(list):
def __init__(self, *args, default=None, **kwargs):
self.default = default
super(self).__init__(*args, **kwargs)
but that's not supported in Python 2. http://www.python.org/dev/peps/pep-3102

Categories