Elegant Python code for list decomposition - python

I tried to write code to solve the list decomposition with All possibilities.
The code I wrote was a mess. I need an elegant solution to solve the problem, because I want to improve my coding style.
I tried to write an initial version as follows, But the memory requirements are too large and the execution speed is too slow.
import itertools
powerset = lambda iterable: itertools.chain.from_iterable(
itertools.combinations(list(iterable), r)
for r in range(1, len(list(iterable)) + 1))
flatten = lambda list2d: [item for sublist in list2d for item in sublist]
x = list("abcd")
xxx = [val for val in powerset([val1 for val1 in powerset(x)] )]
xxxx = [val for val in xxx if x == list(sorted(flatten(val)))]
xxxx is :
[(('a', 'b', 'c', 'd'),),
(('a',), ('b', 'c', 'd')),
(('b',), ('a', 'c', 'd')),
(('c',), ('a', 'b', 'd')),
(('d',), ('a', 'b', 'c')),
(('a', 'b'), ('c', 'd')),
(('a', 'c'), ('b', 'd')),
(('a', 'd'), ('b', 'c')),
(('a',), ('b',), ('c', 'd')),
(('a',), ('c',), ('b', 'd')),
(('a',), ('d',), ('b', 'c')),
(('b',), ('c',), ('a', 'd')),
(('b',), ('d',), ('a', 'c')),
(('c',), ('d',), ('a', 'b')),
(('a',), ('b',), ('c',), ('d',))]
version 2:
import itertools
powerset = lambda iterable: itertools.chain.from_iterable(
itertools.combinations(list(iterable), r)
for r in range(1, len(list(iterable)) + 1))
flatten = lambda list2d: [item for sublist in list2d for item in sublist]
def makelist(list_1D):
for val in powerset(list(powerset(list_1D))) :
if list_1D == list(sorted(flatten(val))) :
yield val
if val == tuple(itertools.combinations(list_1D, 1)) :
break
for d in makelist(list("abcd")) :
print(d)
output:
(('a', 'b', 'c', 'd'),)
(('a',), ('b', 'c', 'd'))
(('b',), ('a', 'c', 'd'))
(('c',), ('a', 'b', 'd'))
(('d',), ('a', 'b', 'c'))
(('a', 'b'), ('c', 'd'))
(('a', 'c'), ('b', 'd'))
(('a', 'd'), ('b', 'c'))
(('a',), ('b',), ('c', 'd'))
(('a',), ('c',), ('b', 'd'))
(('a',), ('d',), ('b', 'c'))
(('b',), ('c',), ('a', 'd'))
(('b',), ('d',), ('a', 'c'))
(('c',), ('d',), ('a', 'b'))
(('a',), ('b',), ('c',), ('d',))
version 3 from Time Complexity of finding all partitions of a set
def partition(collection):
global counter
if len(collection) == 1:
yield [collection]
return
first = collection[0]
for smaller in partition(collection[1:]):
for n, subset in enumerate(smaller):
yield smaller[:n] + [[first] + subset] + smaller[n + 1:]
yield [[first]] + smaller

In order to avoid memory issues, we need to maximize the use of generators/iterators and never create a list of combinations.
Here is a way to do it by breaking down the problem in layers.
First, a generator to obtain partition sizes for a given number of elements. This will then be used to fill combinations of elements corresponding to each size except for the single element parts. The single element parts are done last in order to avoid duplicates. By doing them last, we always have exactly the right number of unused elements for the single element parts.
Partition generation
# Generator for all partition sizes forming N
def partSize(N):
if N<2: yield [1]*N;return
for s in range(1,N+1):
yield from ([s]+rest for rest in partSize(N-s))
print(*partSize(3))
# [1, 1, 1] [1, 2] [2, 1] [3]
print(*partSize(4))
# [1, 1, 1, 1] [1, 1, 2] [1, 2, 1] [1, 3] [2, 1, 1] [2, 2] [3, 1] [4]
Partition filling
# A generator that fills partitions
# with combinations of indexes contained in A
from itertools import combinations
def fillCombo(A,parts,R=None):
if R is None: R = [tuple()]*len(parts)
size = max(parts) # fill largest partitions first
if size < 2: # when only single element partitions are left
iA = iter(A) # fill them with the remaining indexes
yield [r if p!=1 else (next(iA),) for r,p in zip(R,parts)]
return
i,parts[i]= parts.index(size),0 # index of largest partition
for combo in combinations(A,size): # all combinations of that size
R[i] = combo # fill part and recurse
yield from fillCombo(A.difference(combo),[*parts],[*R])
Mapping partition to indexed values
# for each partition pattern, fill with combinations
# using set of indexes in fillCombo so that repeated values
# are processed as distinct
def partCombo(A):
for parts in partSize(len(A)):
for iParts in fillCombo({*range(len(A))},parts): # combine indexes
yield [tuple(A[i] for i in t) for t in iParts] # get actual values
output:
for pp in partCombo("abc"): print(pp)
[('a',), ('b',), ('c',)]
[('c',), ('a', 'b')]
[('b',), ('a', 'c')]
[('a',), ('b', 'c')]
[('a', 'b'), ('c',)]
[('a', 'c'), ('b',)]
[('b', 'c'), ('a',)]
[('a', 'b', 'c')]
This uses very little memory but still has exponential progression in time. For example:
sum(1 for _ in partCombo("abcdefghi")) # 768,500 combinations
takes 3.8 seconds on my laptop
Adding just one more letter, increases the execution time to 43 seconds for the 8,070,046 combinations.

Related

Finding match from a list of tuples

I have a list of tuples as below.
x = [('b', 'c'),
('c',),
('a', 'c', 'b'),
('b', 'c', 'a', 'd'),
('b', 'c', 'a'),
('a', 'b'),
('a', 'b', 'c', 'd'),
('a', 'c', 'b', 'd'),
('b',),
('c', 'a'),
('a', 'b', 'c'),
('a',)]
I want to give input like ('a') then it should give output like,
[('a', 'c', 'b'), ('a', 'b'),('a', 'b', 'c', 'd'),('a', 'c', 'b', 'd'),('a', 'b', 'c')]
#everything starts with a. But not "a".
or for input of ('a','b') it should give an output of
[('a', 'b', 'c', 'd'),('a', 'b', 'c')]
#everything start with ('a','b') but not ('a','b') itself.
I tried to use but no success.
print(filter(lambda x: ("a","b") in x, x))
>>> <filter object at 0x00000214B3A545F8>
def f(lst, target):
return [t for t in lst if len(t) > len(target) and all(a == b for a, b in zip(t, target))]
so that:
f(x, ('a', 'b'))
returns:
[('a', 'b', 'c', 'd'), ('a', 'b', 'c')]
Tuples are matched lexicographically in python, meaning that there elements are compared pair by pair, regardless of their type.
You can extract the portion of each tuple of the same length as your prefix and compare with ==:
def find_prefixes(prefix, sequence):
n = len(prefix)
return[x for x in sequence if x[:n] == prefix and len(x) > n]
List comprehensions of this type are indeed equivalent to filter calls, so you can do
def find_prefixes(prefix, sequence):
n = len(prefix)
return list(filter(lambda x: x[:n] == prefix and len(x) > n, sequence))
Doing a linear search is not a very efficient way to solve this problem. The data structure known as a Trie is made specifically for finding prefixes. It arranges all your data into a single tree. Here is a popular Python implementation you can use with the appropriate attribution: https://stackoverflow.com/a/11016430/2988730
Firstly, use list(filter(...)) to convert a filter object to a list, but your filter doesn't do what you want - it checks membership, not subsequence. You can check subsequence by using a slice.
Then you just need to add a check that the match is longer than the subsequence.
Also, a filter of a lambda is better written as a comprehension.
for sub in ('a',), ('a', 'b'):
n = len(sub)
out = [t for t in x if t[:n] == sub and len(t) > n]
print(out)
Output:
[('a', 'c', 'b'), ('a', 'b'), ('a', 'b', 'c', 'd'), ('a', 'c', 'b', 'd'), ('a', 'b', 'c')]
[('a', 'b', 'c', 'd'), ('a', 'b', 'c')]
list(filter(lambda y: all([y[i] == z for i,z in enumerate(inp)]) if len(y)>=len(inp) else False, x))
for
inp = ('a', 'b')
output will be
[('a', 'b'), ('a', 'b', 'c', 'd'), ('a', 'b', 'c')]

Distributed `itertools.combinations`

I am using itertools.combinations inconjunction with itertools.cycle to create a set of objects to assign to another list of objects. Sadly if the lists are not the same length or evenly divisible then the first item in the list gets un-evenly weighted.
cycle_of_combinations = cycle(combinations(['A', 'B', 'C', 'D'], 2))
#(('A', 'B'),
# ('A', 'C'),
# ('A', 'D'),
# ('B', 'C'),
# ('B', 'D'),
# ('C', 'D'))
assigned_combinations = []
for _ in range(0, 9):
assigned_combinations.append(cycle_of_combinations.next())
# assigned_combinations = [
# ('A', 'B'),
# ('A', 'C'),
# ('A', 'D'),
# ('B', 'C'),
# ('B', 'D'),
# ('C', 'D'),
# ('A', 'B'),
# ('A', 'C'),
# ('A', 'D')]
In the above example combinations that contain 'A' are over-represented. Is there a quick way to better evenly distribute the combinations?
Here's an example of a set that would be more evenly distributed, in this set I've selected every 2nd item until there are 9 items:
# assigned_combinations = [
# ('A', 'B'),
# ('A', 'D'),
# ('B', 'D'),
# ('A', 'C'),
# ('B', 'C'),
# ('C', 'D'),
# ('A', 'B'),
# ('A', 'D'),
# ('B', 'D')]
You could generate enough full cycles to cover the needed list then shuffle and trim to size. The returned list will be a bit different each time and you will still get some over representation (its inevitable with mismatched sizes and random.shuffle may make it more pronounced from time to time).
import math
import itertools
import random
def get_combos(somelist, length, count):
combos = list(itertools.combinations(somelist, length))
combos = combos * math.ceil(count/len(combos))
random.shuffle(combos)
return combos[:count]
for item in get_combos(['A', 'B', 'C', 'D'], 2, 9):
print(item)

Unique Combinations in a list of k,v tuples in Python

I have a list of various combos of items in tuples
example = [(1,2), (2,1), (1,1), (1,1), (2,1), (2,3,1), (1,2,3)]
I wish to group and count by unique combinations
yielding the result
result = [((1,2), 3), ((1,1), 2), ((2,3,1), 2)]
It is not important that the order is maintained or which permutation of the combination is preserved but it is very important that operation be done with a lambda function and the output format be still a list of tuples as above because I will be working with a spark RDD object
My code currently counts patterns taken from a data set using
RDD = sc.parallelize(example)
result = RDD.map(lambda(y):(y, 1))\
.reduceByKey(add)\
.collect()
print result
I need another .map command that will add account for different permutations as explained above
How about this: maintain a set that contains the sorted form of each item you've already seen. Only add an item to the result list if you haven't seen its sorted form already.
example = [ ('a','b'), ('a','a','a'), ('a','a'), ('b','a'), ('c', 'd'), ('b','c','a'), ('a','b','c') ]
result = []
seen = set()
for item in example:
sorted_form = tuple(sorted(item))
if sorted_form not in seen:
result.append(item)
seen.add(sorted_form)
print result
Result:
[('a', 'b'), ('a', 'a', 'a'), ('a', 'a'), ('c', 'd'), ('b', 'c', 'a')]
You can use an OrderedDict to crate an ordered dictionary based on sorted case of its items :
>>> from collections import OrderedDict
>>> d=OrderedDict()
>>> for i in example:
... d.setdefault(tuple(sorted(i)),i)
...
('a', 'b')
('a', 'a', 'a')
('a', 'a')
('a', 'b')
('c', 'd')
('b', 'c', 'a')
('b', 'c', 'a')
>>> d
OrderedDict([(('a', 'b'), ('a', 'b')), (('a', 'a', 'a'), ('a', 'a', 'a')), (('a', 'a'), ('a', 'a')), (('c', 'd'), ('c', 'd')), (('a', 'b', 'c'), ('b', 'c', 'a'))])
>>> d.values()
[('a', 'b'), ('a', 'a', 'a'), ('a', 'a'), ('c', 'd'), ('b', 'c', 'a')]
This is similar as the sorted dict.
from itertools import groupby
ex = [(1,2,3), (3,2,1), (1,1), (2,1), (1,2), (3,2), (2,3,1)]
f = lambda x: tuple(sorted(x)) as key
[tuple(k) for k, _ in groupby(sorted(ex, key=f), key=f)]
The nice thing is that you can get which are tuples are of the same combination:
In [16]: example = [ ('a','b'), ('a','a','a'), ('a','a'), ('a', 'a', 'a', 'a'), ('b','a'), ('c', 'd'), ('b','c','a'), ('a','b','c') ]
In [17]: for k, grpr in groupby(sorted(example, key=lambda x: tuple(sorted(x))), key=lambda x: tuple(sorted(x))):
print k, list(grpr)
....:
('a', 'a') [('a', 'a')]
('a', 'a', 'a') [('a', 'a', 'a')]
('a', 'a', 'a', 'a') [('a', 'a', 'a', 'a')]
('a', 'b') [('a', 'b'), ('b', 'a')]
('a', 'b', 'c') [('b', 'c', 'a'), ('a', 'b', 'c')]
('c', 'd') [('c', 'd')]
What you actually seem to need based on the comments, is map-reduce. I don't have Spark installed, but according to the docs (see transformations) this must be like this:
data.map(lambda i: (frozenset(i), i)).reduceByKey(lambda _, i : i)
This however will return (b, a) if your dataset has (a, b), (b, a) in that order.
I solved my own problem, but it was difficult to understand what I was really looking for I used
example = [(1,2), (1,1,1), (1,1), (1,1), (2,1), (3,4), (2,3,1), (1,2,3)]
RDD = sc.parallelize(example)
result = RDD.map(lambda x: list(set(x)))\
.filter(lambda x: len(x)>1)\
.map(lambda(x):(tuple(x), 1))\
.reduceByKey(add)\
.collect()
print result
which also eliminated simply repeated values such as (1,1) and (1,1,1) which was of added benefit to me
Since you are looking for a lambda function, try the following:
lambda x, y=OrderedDict(): [a for a in x if y.setdefault(tuple(sorted(a)), a) and False] or y.values()
You can use this lambda function like so:
uniquify = lambda x, y=OrderedDict(): [a for a in x if y.setdefault(tuple(sorted(a)), a) and False] or y.values()
result = uniquify(example)
Obviously, this sacrifices readability over the other answers. It is basically doing the same thing as Kasramvd's answer, in a single ugly line.

Contracting elements from two different lists

I have two different lists list1 = ['A','B'] and list2 = ['C','D','E']. I would like to be able to find all possible contractions between the elements of the two lists. For the present case I would like to have a code (preferably Python, Mathematica or MATLAB) that takes the lists above and returns:
AC,BD , AC,BE , AD,BC , AD,BE , AE,BC , AE,BD
which are all the possible contractions. I would like to be able to do this for lists of variable size (but always 2 of them). I've played a lot with Python's itertools but I can't get the hang of how it works with two lists. Any help would be much appreciated.
Here is my version:
import itertools
l1 = 'AB'
l2 = 'CDE'
n = min(len(l1),len(l2))
print('; '.join(
','.join(a+b for a,b in zip(s1,s2))
for s1,s2 in itertools.product(
itertools.permutations(l1,n),
itertools.combinations(l2,n),
)
))
This will output:
AC,BD; AC,BE; AD,BE; BC,AD; BC,AE; BD,AE
Note that for shortness, I did not build a list of the items, but directly iterated the strings. It does not matter which of the two lists gets permutations and which gets combinations, that just changes the order of the output. permutations takes all possible orderings, while combinations returns sorted orderings. This way, you get each contraction exactly once.
For each contraction, you will get two sequences s1 and s2, the contraction is between elements of like index in each sequence. ','.join(a+b for a,b in zip(s1,s2)) makes a nice string for such a contraction.
listA = {"A", "B"};
listB = {"C", "D", "E"};
f[x_, y_] := If[StringMatchQ[StringTake[x, {2}], StringTake[y, {2}]],
Sequence ## {}, List[x, y]];
z = Outer[StringJoin, listA, listB];
Flatten[Outer[f, First#z, Last#z], 1]
In [2]: list1 = ['A','B']
In [3]: list2 = ['C','D','E']
In [4]: list(itertools.product(list1, list2))
Out[4]: [('A', 'C'), ('A', 'D'), ('A', 'E'), ('B', 'C'), ('B', 'D'), ('B', 'E')]
In [5]: [''.join(p) for p in itertools.product(list1, list2)]
Out[5]: ['AC', 'AD', 'AE', 'BC', 'BD', 'BE']
If you're asking about how to build all permutations of the items contained within both lists, with no repetitions, with each result of length two, you could use itertools.permutation:
combined_list = []
for i in list1 + list2:
if i not in combined_list:
combined_list.append(i)
for perm in itertools.permutations(combined_list, 2):
print(perm)
For the inputs list1 = ['a', 'b'] and list2 = ['c', 'd', 'e'], this outputs:
('a', 'b') ('a', 'c') ('a', 'd') ('a', 'e') ('b', 'a') ('b', 'c') ('b', 'd') ('b', 'e') ('c', 'a') ('c', 'b') ('c', 'd') ('c', 'e') ('d', 'a') ('d', 'b') ('d', 'c') ('d', 'e') ('e', 'a') ('e', 'b') ('e', 'c') ('e', 'd')

List comprehension behavior in python

I am working with Codeskulptor on a rock collision problem. I want to check collisions between rocks and my rocks are in a list. I came up with the solution to build a list of combinations and then check for collision.
I do not have itertools available.
My combination list was created like this:
def combinations(items):
n_items = [(n,item) for n,item in enumerate(items)]
return [(item,item2) for n,item in n_items for m,item2 in n_items[n:] if n != m]
letters = ['A','B','C','D']
print combinations(letters)
[('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D'), ('C', 'D')]
The result is ok.
I tried to do this in a one liner before with functions:
def combinations2(items):
return [(item,item2) for n,item in enumerate(items) for m,item2 in enumerate(items[n:]) if n != m]
letters = ['A','B','C','D']
print combinations2(letters)
But the outcome is completely different and wrong:
[('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'B'), ('B', 'D'), ('C', 'C'), ('C', 'D'), ('D', 'D')]
List comprehension is still a little black magic to me. I cannot explain this behavior, would like to understand the wrong out though.
I know that my two line solution is much faster, since enumerate is only done once and than used. But the wrong output is unexplainable to me, especially as BC is missing and BB CC DD doubles are there while AA is missing.
Can someone help me?
First thing to do when understanding a list comprehension is to expand it to a regular set of for loops. Read the loops from left to right and nest accordingly.
Working code:
def combinations(items):
n_items = []
for n,item in enumerate(items):
n_items.append((n,item))
result = []
for n, item in n_items:
for m, item2 in n_items[n:]:
if n != m:
result.append((item, item2))
return result
and your attempt that doesn't work:
def combinations2(items):
result = []
for n, item in enumerate(items):
for m, item2 in enumerate(items[n:]):
if n != m:
result.append((item, item2))
return result
Perhaps this way it is easier to see what goes wrong between the two versions.
Your version slices just items, not the indices produced by enumerate(). The original version slices [(0, 'A'), (1, 'B'), (2, 'C'), (3, 'D')] down to [(1, 'B'), (2, 'C'), (3, 'D')], etc. while your version re-numbers that slice to [(0, 'B'), (1, 'C'), (2, 'D')]. This in turn leads to your erroneous output.
Start the inner loop at the higher index by adding a second argument to the enumerate() function, the index at which to start numbering:
def combinations2(items):
result = []
for n, item in enumerate(items):
for m, item2 in enumerate(items[n:], n):
if n != m:
result.append((item, item2))
return result
Back to a one-liner:
def combinations2(items):
return [(item, item2) for n, item in enumerate(items) for m, item2 in enumerate(items[n:], n) if n != m]
This then works correctly:
>>> def combinations2(items):
... return [(item, item2) for n, item in enumerate(items) for m, item2 in enumerate(items[n:], n) if n != m]
...
>>> letters = ['A','B','C','D']
>>> combinations2(letters)
[('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D'), ('C', 'D')]
Note that you can simplify it further; the only time when n == m is True is for the first iteration of each inner loop. Just slice the items list for the inner list one value further; start the outer enumerate() at 1, drop the inner enumerate() and drop the n != m test:
def combinations3(items):
result = []
for n, item in enumerate(items, 1):
for item2 in items[n:]:
result.append((item, item2))
return result
or as a list comprehension:
def combinations3(items):
return [(item, item2) for n, item in enumerate(items, 1) for item2 in items[n:]]
Just skip the clashes in the iterator.
>>> letter = ['A', 'B', 'C', 'D']
>>> list ( (x,y) for n, x in enumerate(letter) for y in letter[n+1:])
[('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D'), ('C', 'D')]
Suppose you just want to get the list of combinations.
def combinations2(items):
return filter(lambda (i,j): i <> j, [(i,j) for i in items for j in items])
letters = ['A','B','C','D']
print combinations2(letters)
The output I got is:
[('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'A'), ('B', 'C'), ('B', 'D'), ('C', 'A'), ('C', 'B'), ('C', 'D'), ('D', 'A'), ('D', 'B'), ('D', 'C')]

Categories