Python list traversal with gaps - python

Hi I have a multidimensional list such as:
my_list = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2]]
where 0 represents a gap between two pieces of data.
What I need to do is iterate through the list and keep track of how many gaps are in each sublist and throw away the zeros. I think the best way is to break each sublist into chunks where there are zeros so I end up with smaller lists of integers and a number of gaps. Ideally, to form a new list which tells me the length of each chunk and number of gaps (i.e. chunks -1), such as:
new_list = [[5, 0], [[1, 3], 1], [[1, 1], 1], [[1, 1, 1], 2]]
or probably better:
new_list = [[5], [1, 3], [1, 1], [1, 1, 1]]
and I will know that the gaps are equal to len(chunk).
EDIT:
However, leading and trailing zeros do not represent gaps. i.e. [0,0,1,2] represents one continuous chunk.
Any help much appreciated.

itertools.groupby() is perfect for this:
from itertools import groupby
my_list = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2]]
new_list = [[len(list(g)) for k, g in groupby(inner, bool) if k] for inner in my_list]
Result:
>>> new_list
[[5], [1, 3], [1, 1], [1, 1, 1]]
The result contains the length of each non-zero chunk for each sublist, so for example [1,0,3,1,2] gives [1,3], so there are two chunks (one gap). This matches your second output format.

Here is my humble code without any imports:
The algorithm is slightly long:
def toggle(n):
return n != 0
def chunk_counter(L):
"""
list -> list
"""
chunk_list = []
pivots = []
for j in range(len(L)):
if j == 0 and toggle(L[0]):
pivots.append(j)
elif toggle(L[j]) and toggle(L[j]) != toggle(L[j-1]):
pivots.append(j)
for m in range(len(pivots)):
k = 0
if m == len(pivots)-1:
bound = len(L)
else:
bound = pivots[m+1]
p = 0
while p in range(bound - pivots[m]):
if toggle(L[pivots[m] + p]):
k += 1
p += 1
else:
p += 1
chunk_list.append(k)
return chunk_list
def chunks(L):
"""
(list of lists) -> list of lists
"""
new_list = []
for i in range(len(L)):
new_list.append(chunk_counter(L[i]))
return new_list
So, you may try the function chunks() on your list:
>>> L = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2], [0,0,1,2]]
>>> chunks(L)
[[5], [1, 3], [1, 1], [1, 1, 1], [2]]
Here's a recursive definition (a replacement for Chunk Counter):
counter_list = []
def counter(L):
k = 0
while(k < len(L) and L[k] != 0):
k +=1
counter_list.append(k)
if k == len(L):
print counter_list
else:
counter(L[k+1:])

Related

How to select a sliding window of elements from a list of lists?

Say I have the following list of lists:
x = [[1,2,3],[4,5,6],[7,8,9,10]]
And I wish to select all 'windows' of size e.g. n=4, staggered by a distance of e.g. d=2:
[[1,2,3],[4]] # Starts at position `0`
[[3],[4,5,6]] # Starts at position `d`
[[5,6],[7,8]] # Starts at position `2d`
[[7,8,9,10]] # Starts at position `3d`
I.e. I wish to take intersecting 'slices' where the windows overlap with the sublists.
How would I go about this?
If you precompute some indices, you can reconstruct any window with a virtual one-liner:
import itertools
import operator
def window(x, start, stop):
first = indices[start][0]
last = indices[stop-1][0]
return [
[x[i][j] for i, j in g] if k in (first, last) else x[k]
for k, g in itertools.groupby(
indices[start:stop],
key=operator.itemgetter(0))
]
def flat_len(x):
"""Return length of flattened list."""
return sum(len(sublist) for sublist in x)
n=4; d=2
x = [[1,2,3],[4,5,6],[7,8,9,10]]
indices = [(i, j) for i, sublist in enumerate(x) for j in range(len(sublist))]
for i in range(0,flat_len(x)-n+1,d):
print(window(x,i,i+n,indices))
>>> [[1, 2, 3], [4]]
>>> [[3], [4, 5, 6]]
>>> [[5, 6], [7, 8]]
Another approach can be to actually use the flat list, get the right window, but fix it afterwards.
I caved in and used a little numpy at the end, makes the fixing easier.
x = [[1,2,3],[4,5,6],[7,8,9,10]]
from itertools import chain
import numpy as np
n = 4
d = 2
def custom_slider(x, n, d):
x_shape = [len(l) for l in x]
x_cumsum_shape = np.cumsum(x_shape) #this will come in handy for fixing slices later
x_flat = list(chain.from_iterable(x))
result = []
for i in range(0, len(x_flat) - n + 1, d):
#essentially get slice points, using the current index i to start. ignore negative or zero slices
split_pts = (x_cumsum_shape - i)[x_cumsum_shape - i > 0]
#[i: i + n] gives the correct slice. use split points to correctly mimic original arrays
temp = [list(item) for item in np.split(x_flat[i: i + n], split_pts) if item.size]
result.append(temp) #could also turn function into generator by yielding instead
return result
custom_slider(x, n, d)
Output:
[[[1, 2, 3], [4]], [[3], [4, 5, 6]], [[5, 6], [7, 8]], [[7, 8, 9, 10]]]
I would go for nested for loops, although it's not pretty:
x = [[1,2,3],[4,5,6],[7,8,9,10]]
def window(x, n, offset):
pos = 0
res = []
for l in x:
# Skip `l` if offset is larger than its length
if len(l) + pos <= offset:
pos += len(l)
continue
# Stop iterating when window is complete
elif pos >= n + offset:
break
tmp = []
for el in l:
#if `el` is in window, append it to `tmp`
if offset <= pos < n + offset:
tmp.append(el)
# Stop iterating when window is complete
elif pos >= n + offset:
break
pos += 1
res.append(tmp)
return res
def flat_len(x):
"""Return length of flattened list."""
return sum(len(sublist) for sublist in x)
n = 4
d = 2
for i in range(0, flat_len(x) - n + 1, d):
print(window(x, n, i))

Group repeated elements of a list

I am trying to create a function that receives a list and return another list with the repeated elements.
For example for the input A = [2,2,1,1,3,2] (the list is not sorted) and the function would return result = [[1,1], [2,2,2]]. The result doesn't need to be sorted.
I already did it in Wolfram Mathematica but now I have to translate it to python3, Mathematica has some functions like Select, Map and Split that makes it very simple without using long loops with a lot of instructions.
result = [[x] * A.count(x) for x in set(A) if A.count(x) > 1]
Simple approach:
def grpBySameConsecutiveItem(l):
rv= []
last = None
for elem in l:
if last == None:
last = [elem]
continue
if elem == last[0]:
last.append(elem)
continue
if len(last) > 1:
rv.append(last)
last = [elem]
return rv
print grpBySameConsecutiveItem([1,2,1,1,1,2,2,3,4,4,4,4,5,4])
Output:
[[1, 1, 1], [2, 2], [4, 4, 4, 4]]
You can sort your output afterwards if you want to have it sorted or sort your inputlist , then you wouldnt get consecutive identical numbers any longer though.
See this https://stackoverflow.com/a/4174955/7505395 for how to sort lists of lists depending on an index (just use 0) as all your inner lists are identical.
You could also use itertools - it hast things like TakeWhile - that looks much smarter if used
This will ignore consecutive ones, and just collect them all:
def grpByValue(lis):
d = {}
for key in lis:
if key in d:
d[key] += 1
else:
d[key] = 1
print(d)
rv = []
for k in d:
if (d[k]<2):
continue
rv.append([])
for n in range(0,d[k]):
rv[-1].append(k)
return rv
data = [1,2,1,1,1,2,2,3,4,4,4,4,5,4]
print grpByValue(data)
Output:
[[1, 1, 1, 1], [2, 2, 2], [4, 4, 4, 4, 4]]
You could do this with a list comprehension:
A = [1,1,1,2,2,3,3,3]
B = []
[B.append([n]*A.count(n)) for n in A if B.count([n]*A.count(n)) == 0]
outputs [[1,1,1],[2,2],[3,3,3]]
Or more pythonically:
A = [1,2,2,3,4,1,1,2,2,2,3,3,4,4,4]
B = []
for n in A:
if B.count([n]*A.count(n)) == 0:
B.append([n]*A.count(n))
outputs [[1,1,1],[2,2,2,2,2],[3,3,3],[4,4,4,4]]
Works with sorted or unsorted list, if you need to sort the list before hand you can do for n in sorted(A)
This is a job for Counter(). Iterating over each element, x, and checking A.count(x) has a O(N^2) complexity. Counter() will count how many times each element exists in your iterable in one pass and then you can generate your result by iterating over that dictionary.
>>> from collections import Counter
>>> A = [2,2,1,1,3,2]
>>> counts = Counter(A)
>>> result = [[key] * value for key, value in counts.items() if value > 1]
>>> result
[[2, 2, 2], [[1, 1]]

Finding one transposition permutation from a list in Python and C

I have the input as A = [ 2,3,4,1]
The output is simply all possible permutation from elements in A which can be done by single transposition (single flip of two neighbouring elements) operation. So the output is :
[3,2,4,1],[ 2,4,3,1],[2,3,1,4],[1,3,4,2]
How to do this in Python? and/or C?
EDIT
Circular transpositioning is allowed. Hence [2,3,4,1] ==> [1,3,4,2] is allowed and a valid output.
A = [2,3,4,1]
res = []
for i in range(len(A)):
temp = A[:]
temp[i], temp[i-1] = temp[i-1], temp[i]
res.append(temp)
print res
Results:
[[1, 3, 4, 2], [3, 2, 4, 1], [2, 4, 3, 1], [2, 3, 1, 4]]
Not an out-of-box solution but quite trivial and simple to understand
a = [ 2,3,4,1] # Initialize the array with input
o = [] # Have a empty list to hold the output
n = len(a) # Get the length of the array
for i in range(n): # Loop 'n' times
x=a[:] # Get a copy of the array. '[:]' is the slicing operator. '[:]' means to copy all the elements of the list to 'x'
x[i],x[(i+1)%n] = x[(i+1)%n],x[i] # Swap the adjacent two elements. We require '%' modulus operator to make the list circular
o.append(x) # Add the changed list to the output array
print o # Print the output array
The output is
[[3, 2, 4, 1], [2, 4, 3, 1], [2, 3, 1, 4], [1, 3, 4, 2]]
def swapped(l, p1, p2):
r = l[:]
r[p1], r[p2] = r[p2], r[p1]
return r
A = [2, 3, 4, 1]
transp = [swapped(A, i, (i+1)%len(A)) for i in range(len(A))]
Here's something a little different:
def swapper(a, j, k):
return [a[k] if i == j else a[j] if i == k else a[i] for i in range(len(a))]
def transpositions(a):
length = len(a)
return [swapper(a, j, (j+1)%length) for j in range(length)]
print(transpositions([2, 3, 4, 1]))
Here's a more iterative solution:
from itertools import izip, tee
def swaps(length):
a, b = tee(range(length))
next(b, None)
for pair in izip(a, b):
yield pair
yield (length-1, 0)
def swapper(a, i, j):
a[i], a[j] = a[j], a[i]
yield a
a[i], a[j] = a[j], a[i]
def transpositions(a):
return [r for i, j in swaps(len(a)) for r in swapper(a, i, j)]
print(transpositions([2, 3, 4, 1]))

Union find implementation using Python

So here's what I want to do: I have a list that contains several equivalence relations:
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
And I want to union the sets that share one element. Here is a sample implementation:
def union(lis):
lis = [set(e) for e in lis]
res = []
while True:
for i in range(len(lis)):
a = lis[i]
if res == []:
res.append(a)
else:
pointer = 0
while pointer < len(res):
if a & res[pointer] != set([]) :
res[pointer] = res[pointer].union(a)
break
pointer +=1
if pointer == len(res):
res.append(a)
if res == lis:
break
lis,res = res,[]
return res
And it prints
[set([1, 2, 3, 6, 7]), set([4, 5])]
This does the right thing but is way too slow when the equivalence relations is too large. I looked up the descriptions on union-find algorithm: http://en.wikipedia.org/wiki/Disjoint-set_data_structure
but I still having problem coding a Python implementation.
Solution that runs in O(n) time
def indices_dict(lis):
d = defaultdict(list)
for i,(a,b) in enumerate(lis):
d[a].append(i)
d[b].append(i)
return d
def disjoint_indices(lis):
d = indices_dict(lis)
sets = []
while len(d):
que = set(d.popitem()[1])
ind = set()
while len(que):
ind |= que
que = set([y for i in que
for x in lis[i]
for y in d.pop(x, [])]) - ind
sets += [ind]
return sets
def disjoint_sets(lis):
return [set([x for i in s for x in lis[i]]) for s in disjoint_indices(lis)]
How it works:
>>> lis = [(1,2),(2,3),(4,5),(6,7),(1,7)]
>>> indices_dict(lis)
>>> {1: [0, 4], 2: [0, 1], 3: [1], 4: [2], 5: [2], 6: [3], 7: [3, 4]})
indices_dict gives a map from an equivalence # to an index in lis. E.g. 1 is mapped to index 0 and 4 in lis.
>>> disjoint_indices(lis)
>>> [set([0,1,3,4], set([2])]
disjoint_indices gives a list of disjoint sets of indices. Each set corresponds to indices in an equivalence. E.g. lis[0] and lis[3] are in the same equivalence but not lis[2].
>>> disjoint_set(lis)
>>> [set([1, 2, 3, 6, 7]), set([4, 5])]
disjoint_set converts disjoint indices into into their proper equivalences.
Time complexity
The O(n) time complexity is difficult to see but I'll try to explain. Here I will use n = len(lis).
indices_dict certainly runs in O(n) time because only 1 for-loop
disjoint_indices is the hardest to see. It certainly runs in O(len(d)) time since the outer loop stops when d is empty and the inner loop removes an element of d each iteration. now, the len(d) <= 2n since d is a map from equivalence #'s to indices and there are at most 2n different equivalence #'s in lis. Therefore, the function runs in O(n).
disjoint_sets is difficult to see because of the 3 combined for-loops. However, you'll notice that at most i can run over all n indices in lis and x runs over the 2-tuple, so the total complexity is 2n = O(n)
I think this is an elegant solution, using the built in set functions:
#!/usr/bin/python3
def union_find(lis):
lis = map(set, lis)
unions = []
for item in lis:
temp = []
for s in unions:
if not s.isdisjoint(item):
item = s.union(item)
else:
temp.append(s)
temp.append(item)
unions = temp
return unions
if __name__ == '__main__':
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
print(union_find(l))
It returns a list of sets.
Perhaps something like this?
#!/usr/local/cpython-3.3/bin/python
import copy
import pprint
import collections
def union(list_):
dict_ = collections.defaultdict(set)
for sublist in list_:
dict_[sublist[0]].add(sublist[1])
dict_[sublist[1]].add(sublist[0])
change_made = True
while change_made:
change_made = False
for key, values in dict_.items():
for value in copy.copy(values):
for element in dict_[value]:
if element not in dict_[key]:
dict_[key].add(element)
change_made = True
return dict_
list_ = [ [1, 2], [2, 3], [4, 5], [6, 7], [1, 7] ]
pprint.pprint(union(list_))
This works by completely exhausting one equivalence at a time. When an element finds it's equivalence it is removed from the original set and no longer searched.
def equiv_sets(lis):
s = set(lis)
sets = []
#loop while there are still items in original set
while len(s):
s1 = set(s.pop())
length = 0
#loop while there are still equivalences to s1
while( len(s1) != length):
length = len(s1)
for v in list(s):
if v[0] in s1 or v[1] in s1:
s1 |= set(v)
s -= set([v])
sets += [s1]
return sets
print equiv_sets([(1,2),(2,3),(4,5),(6,7),(1,7)])
OUTPUT: [set([1, 2, 3, 6, 7]), set([4, 5])]

Python generator with external break condition

I need to iterate over ascending sequences x of n (= 5, f.i.) integers, finding all sequences for which a function f(*x) returns True.
Assume that if f_n(*y) is False for a particular y, then f_n(*z) id False for any z with z_i >= y_i. So f_n is monotonic in all its arguments.
This kind of generator function could be used in the following way to determine all ascending sequences of integers that have a sum of squares < 100
for sequence in generate_sequences(5):
if sum_squares_is_at_least(sequence, 100):
# some code to trigger the breaking of the generator loop
else:
print sequence
Clarification:
The problem here is that we need to iterate of n elements individually. Initially, we iterate [1,1,1,1,1] to [1,1,1,1,x], and then we have to continue with [1,1,1,2,2] to [1,1,1,2,y], eventually ending with [a,b,c,d,e]. It seems that the generator should look something like this, but needs some code to break out of the for and/or while loops if necessary (determined externally):
def generate_sequences(length, minimum = 1):
if length == []:
yield []
else:
element = minimum
while True:
for sequence in generate_sequences(length - 1, element):
yield element + [sequence]
element += 1
Example:
For n = 3, and sum of squares no larger than 20, the following sequences would be generated:
[1, 1, 1], [1, 1, 2], [1, 1, 3], [1, 1, 4], [1, 2, 2], [1, 2, 3], [1, 3, 3], [2, 2, 2], [2, 2, 3]
Note that in the general case, I cannot use the information that 4 is the upper bound for each element. This would also seriously impact the running time for larger examples.
Are you looking for itertools.takewhile?
>>> from itertools import takewhile
>>> def gen(): #infinite generator
... i=0
... while True:
... yield range(i,i+5)
... i = i+1
...
>>> [ x for x in takewhile( lambda x:sum(x)<20, gen() ) ]
[[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]]
>>>
import itertools as it
it.takewhile(lambda x: sum_squares_is_at_least(x, 100), generate_sequences(5))
If you are now sure about the 5 in the generate_sequences, then just let it yield the numbers as long as it is called:
def generate_sequences():
i = 0 # or anything
while True:
yield [i, i] # or anything
i = i + 1 # or anything
Then use it this way:
it.takewhile(lambda x: sum_squares_is_at_least(x, 100), generate_sequences())
I would solve it with recursion by starting with a given list then appending another number (with logic to prevent going over sum of squares target)
def makegen(N): #make a generator with max sumSquares: N
def gen(l=[]): #empty list is valid with sum == 0
yield l
if l:
i = l[-1] #keep it sorted to only include combinations not permutations
else:
i = 1 #only first iteration
sumsquare = sum(x*x for x in l) #find out how much more we can add
while sumsquare + i*i < N: #increase the appended number until we exceed target
for x in gen(l+[i]): #recurse with appended list
yield x
i += 1
return gen
calling our generator generator (tee hee :D) in the following fashion allows us to have any maximum sum of squares we desire
for x in makegen(26)():
print x

Categories