Split list into multiple lists based on indices [duplicate] - python

What is the best way to split a list into parts based on an arbitrary number of indexes? E.g. given the code below
indexes = [5, 12, 17]
list = range(20)
return something like this
part1 = list[:5]
part2 = list[5:12]
part3 = list[12:17]
part4 = list[17:]
If there are no indexes it should return the entire list.

This is the simplest and most pythonic solution I can think of:
def partition(alist, indices):
return [alist[i:j] for i, j in zip([0]+indices, indices+[None])]
if the inputs are very large, then the iterators solution should be more convenient:
from itertools import izip, chain
def partition(alist, indices):
pairs = izip(chain([0], indices), chain(indices, [None]))
return (alist[i:j] for i, j in pairs)
and of course, the very, very lazy guy solution (if you don't mind to get arrays instead of lists, but anyway you can always revert them to lists):
import numpy
partition = numpy.split

I would be interested in seeing a more Pythonic way of doing this also. But this is a crappy solution. You need to add a checking for an empty index list.
Something along the lines of:
indexes = [5, 12, 17]
list = range(20)
output = []
prev = 0
for index in indexes:
output.append(list[prev:index])
prev = index
output.append(list[indexes[-1]:])
print output
produces
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]

My solution is similar to Il-Bhima's.
>>> def parts(list_, indices):
... indices = [0]+indices+[len(list_)]
... return [list_[v:indices[k+1]] for k, v in enumerate(indices[:-1])]
Alternative approach
If you're willing to slightly change the way you input indices, from absolute indices to relative (that is, from [5, 12, 17] to [5, 7, 5], the below will also give you the desired output, while it doesn't create intermediary lists.
>>> from itertools import islice
>>> def parts(list_, indices):
... i = iter(list_)
... return [list(islice(i, n)) for n in chain(indices, [None])]

>>> def burst_seq(seq, indices):
... startpos = 0
... for index in indices:
... yield seq[startpos:index]
... startpos = index
... yield seq[startpos:]
...
>>> list(burst_seq(range(20), [5, 12, 17]))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
>>> list(burst_seq(range(20), []))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
>>> list(burst_seq(range(0), [5, 12, 17]))
[[], [], [], []]
>>>
Maxima mea culpa: it uses a for statement, and it's not using whizzbang stuff like itertools, zip(), None as a sentinel, list comprehensions, ...
;-)

indices = [5, 12, 17]
input = range(20)
output = []
reduce(lambda x, y: output.append(input[x:y]) or y, indices + [len(input)], 0)
print output

This is all that I could think of
def partition(list_, indexes):
if indexes[0] != 0:
indexes = [0] + indexes
if indexes[-1] != len(list_):
indexes = indexes + [len(list_)]
return [ list_[a:b] for (a,b) in zip(indexes[:-1], indexes[1:])]

Cide's makes three copies of the array: [0]+indices copies, ([0]+indices)+[] copies again, and indices[:-1] will copy a third time. Il-Bhima makes five copies. (I'm not counting the return value, of course.)
Those could be reduced (izip, islice), but here's a zero-copy version:
def iterate_pairs(lst, indexes):
prev = 0
for i in indexes:
yield prev, i
prev = i
yield prev, len(lst)
def partition(lst, indexes):
for first, last in iterate_pairs(lst, indexes):
yield lst[first:last]
indexes = [5, 12, 17]
lst = range(20)
print [l for l in partition(lst, indexes)]
Of course, array copies are fairly cheap (native code) compared to interpreted Python, but this has another advantage: it's easy to reuse, to mutate the data directly:
for first, last in iterate_pairs(lst, indexes):
for i in range(first, last):
lst[i] = first
print lst
# [0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 12, 12, 12, 12, 12, 17, 17, 17]
(That's why I passed indexes to iterate_pairs. If you don't care about that, you can remove that parameter and just have the final line be "yield prev, None", which is all partition() needs.)

Here's yet another answer.
def partition(l, indexes):
result, indexes = [], indexes+[len(l)]
reduce(lambda x, y: result.append(l[x:y]) or y, indexes, 0)
return result
It supports negative indexes and such.
>>> partition([1,2,3,4,5], [1, -1])
[[1], [2, 3, 4], [5]]
>>>

The plural of index is indices. Going for simplicity/readability.
indices = [5, 12, 17]
input = range(20)
output = []
for i in reversed(indices):
output.append(input[i:])
input[i:] = []
output.append(input)
while len(output):
print output.pop()

Related

Splitting a list on non-sequential numbers

I have an ordered list of entities, numbered in a broken sequence:
[1, 2, 3, 6, 7, 11, 17, 18, 19]
I'd like to break the list where there's a gap, and collect the results in a new list:
[[1, 2, 3], [6, 7], [11], [17, 18, 19]]
I have the feeling there's a name for what I want to do and probably a nice library function for it - but I can't think of it. Can anyone shine some light before I possibly reinvent a wheel?
edit: Thanks, folks, but I was asking if there's a name for this operation and an existing algorithm, not for implementations - this is what I came up with:
def group_adjoining(elements, key=lambda x: x):
"""Returns list of lists of contiguous elements
:key: function to get key integer from list element
"""
if not elements:
return elements
result = [[elements[0]]]
for a, b in zip(elements, elements[1:]):
if key(a) + 1 == key(b):
result[-1].append(b)
else:
result.append([b])
return result
Plain itertools.groupby approach:
from itertools import groupby
lst = [1, 2, 3, 6, 7, 11, 17, 18, 19]
out = []
for _, g in groupby(enumerate(lst), lambda x: x[0] - x[1]):
out.append([v for _, v in g])
print(out)
Prints:
[[1, 2, 3], [6, 7], [11], [17, 18, 19]]
Try greedy approach:
lst = [1, 2, 3, 6, 7, 11, 17, 18, 19]
res = []
tmp = []
prv = lst[0]
for l in lst:
if l-prv > 1:
res.append(tmp)
tmp = []
tmp.append(l)
prv = l
res.append(tmp)
print(res)
Output: [[1, 2, 3], [6, 7], [11], [17, 18, 19]]
I first came across more_itertools today, and I think this package is useful for this problem.
pip install more-itertools
from more_itertools import split_when
l = [1, 2, 3, 6, 7, 11, 17, 18, 19]
res = list(split_when(l, lambda a, b: a + 1 != b))
print(res)
You could use a simple generator.
def split(lst):
result = []
for item in lst:
if (not result) or result[-1] + 1 == item:
result.append(item)
else:
yield result
result = [item]
if result:
yield result
foo = [1, 2, 3, 6, 7, 11, 17, 18, 19]
result = [i for i in split(foo)]
print(result) # [[1, 2, 3], [6, 7], [11], [17, 18, 19]]
This assumes a sorted homogeneous list of int.
You could always avoid the sorted assumption with for item in sorted(lst):.
It's pretty easy by using this simple function:
li = [1, 2, 3, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18]
def split(li):
result = []
temp = [li[0]]
for i in range(1, len(li)):
if li[i] - temp[-1] == 1:
temp.append(li[i])
else:
result.append(temp)
temp = [li[i]]
result.append(temp)
return result
print(split(li))

Python islice class is not working as expected

I am writing a function that divides a list into (almost) equal 'n' distributions. I want this function to return a generator, but there appears to be an issue with yielding a generator. The function works just fine with iterables. Take a look at this snippet:
import itertools
def divide_list(array, n, gen_length=None):
"""
:param array: some iterable that you wish to divide
:param n: the number of lists you would like to return
:param gen_length: The length of the generator if array is a generator. Not necessary for lists and tuples.
:return: a generator of the divided list
Example:
In: list(divide_list([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
Out: [[1, 2, 3], [4, 5], [6, 7], [8, 9]]
"""
if isinstance(array, (list, tuple)):
floor, rem = divmod(len(array), n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield array[prev:next_]
items_index = (next_, next_ + floor)
rem -= 1
else:
floor, rem = divmod(gen_length, n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield itertools.islice(array, prev, next_)
items_index = (next_, next_ + floor)
rem -= 1
if __name__ == '__main__':
array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
print('Generator:')
print('----------')
for value in divide_list(array_, 3, gen_length=8):
print(list(value))
print('')
array_ = [12, 7, 9, 31, 13, 11, 7, 3]
print('List:')
print('-----')
for value in divide_list(array_, 3):
print(value)
Here is the ouput:
Generator:
----------
[12, 7, 9]
[7, 3]
[]
List:
-----
[12, 7, 9]
[31, 13, 11]
[7, 3]
Why is the last generator exhausted? Sometimes, it exhausts the last two generators.
The explanation as to why this isn't working is because you are using islice to skip elements when you provided it a non-zero starting point. The key issue here is that you are supposed to advance the iterator by an amount, not skipping any at each yield. This is different than the sequence case, where you give it explicit indices for each case.
However, note, you don't need to handle these cases differently. Here's a super simple approach that handles both cases - the key is to always use an iterator:
def divide(iterable, n, length=None):
if length is None:
length = len(iterable)
it = iter(iterable)
floor, rem = divmod(length, n)
while result := list(islice(it, floor + bool(rem))):
yield result
rem = max(rem - 1, 0)
In the REPL:
>>> from itertools import islice
>>> def divide(iterable, n, length=None):
... if length is None:
... length = len(iterable)
... it = iter(iterable)
... floor, rem = divmod(length, n)
... while result := list(islice(it, floor + bool(rem))):
... yield result
... rem = max(rem - 1, 0)
...
>>> list(divide([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
[[1, 2, 3], [4, 5], [6, 7], [8, 9]]
>>> list(divide(iter([1, 2, 3, 4, 5, 6, 7, 8, 9]), 4, length=9))
[[1, 2, 3], [4, 5], [6, 7], [8, 9]]
the problem is you don't take into account that you already consumed the iterator
>>> import itertools
>>> array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
>>> list(itertools.islice(array_,0,3))
[12, 7, 9]
>>> list(itertools.islice(array_,3,6)) #where are 31,13 and 11? you skipped them bacause, see below
[7, 3]
>>> array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
>>> list(itertools.islice(array_,0,3))
[12, 7, 9]
>>> list(array_) #this is what remains in the iterator
[31, 13, 11, 7, 3]
>>>
Thank's to #KellyBundy, I was able to modify the code to get the expected results. I was not aware that islice was cutting the generator then shifting those values back to the beginning at index 0. I was treating it as if it left a null value at the indices that I cut off. Here is the modified code:
import itertools
def divide_list(array, n, gen_length=None):
"""
:param array: some iterable that you wish to divide
:param n: the number of lists you would like to return
:param gen_length: The length of the generator if array is a generator. Not necessary for lists and tuples.
:return: a generator of the divided list
Example:
In: list(divide_list([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
Out: [[1, 2, 3], [4, 5], [6, 7], [8, 9]]
"""
if isinstance(array, (list, tuple)):
floor, rem = divmod(len(array), n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield array[prev:next_]
items_index = (next_, next_ + floor)
rem -= 1
else:
floor, rem = divmod(gen_length, n)
up_to = floor
for _ in range(n):
up_to = up_to + 1 if rem > 0 else up_to
yield itertools.islice(array, up_to)
up_to = floor
rem -= 1
if __name__ == '__main__':
array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
print('Generator:')
print('----------')
for value in divide_list(array_, 3, gen_length=8):
print(list(value))
print('')
array_ = [12, 7, 9, 31, 13, 11, 7, 3]
print('List:')
print('-----')
for value in divide_list(array_, 3):
print(value)
This will output the expected result:
Generator:
----------
[12, 7, 9]
[31, 13, 11]
[7, 3]
List:
-----
[12, 7, 9]
[31, 13, 11]
[7, 3]

reverse ascending sequences in a list

Trying to figure out how to reverse multiple ascending sequences in a list.
For instance: input = [1,2,2,3] to output = [2,1,3,2].
I have used mylist.reverse() but of course it reverses to [3,2,2,1]. Not sure which approach to take?
Example in detail:
So lets say [5, 7, 10, 2, 7, 8, 1, 3] is the input - the output should be [10,7,5,8,7,2,3,1]. In this example the first 3 elements 5,7,10 are in ascending order, 2,7,8 is likewise in ascending order and 1,3 also in ascending order. The function should be able to recognize this pattern and reverse each sequence and return a new list.
All you need is to find all non-descreasing subsequences and reverse them:
In [47]: l = [5, 7, 10, 2, 7, 8, 1, 3]
In [48]: res = []
In [49]: start_idx = 0
In [50]: for idx in range(max(len(l) - 1, 0)):
...: if l[idx] >= l[idx - 1]:
...: continue
...: step = l[start_idx:idx]
...: step.reverse()
...: res.extend(step)
...: start_idx = idx
...:
In [51]: step = l[start_idx:]
In [52]: step.reverse()
In [53]: res.extend(step)
In [54]: print(res)
[10, 7, 5, 8, 7, 2, 3, 1]
For increasing subsequences you need to change if l[idx] >= l[idx - 1] to if l[idx] > l[idx - 1]
Walk the list making a bigger and bigger window from x to y positions. When you find a place where the next number is not ascending, or reach the end, reverse-slice the window you just covered and add it to the end of an output list:
data = [5, 7, 10, 2, 7, 8, 1, 3]
output = []
x = None
for y in range(len(data)):
if y == len(data) - 1 or data[y] >= data[y+1]:
output.extend(data[y:x:-1])
x = y
print(output)
There is probably a more elegant way to do this, but one approach would be to use itertools.zip_longest along with enumerate to iterate over sequential element pairs in your list and keep track of each index where the sequence is no longer ascending or the list is exhausted in order to slice, reverse, and extend your output list with the sliced items.
from itertools import zip_longest
d = [5, 7, 10, 2, 7, 8, 1, 3]
results = []
stop = None
for i, (a, b) in enumerate(zip_longest(d, d[1:])):
if not b or b <= a:
results.extend(d[i:stop:-1])
stop = i
print(results)
# [10, 7, 5, 8, 7, 2, 3, 1]
data = [5, 7, 10, 2, 7, 8, 1, 3,2]
def func(data):
result =[]
temp =[]
data.append(data[-1])
for i in range(1,len(data)):
if data[i]>=data[i-1]:
temp.append(data[i-1])
else:
temp.append(data[i-1])
temp.reverse()
result.extend(temp)
temp=[]
if len(temp)!=0:
temp.reverse()
result.extend(temp)
temp.clear()
return result
print(func(data))
# output [10, 7, 5, 8, 7, 2, 3, 1, 2]
You could define a general handy method which returns slices of an array based on condition (predicate).
def slice_when(predicate, iterable):
i, x, size = 0, 0, len(iterable)
while i < size-1:
if predicate(iterable[i], iterable[i+1]):
yield iterable[x:i+1]
x = i + 1
i += 1
yield iterable[x:size]
Now, the slice has to be made when the next element is smaller then the previous, for example:
array = [5, 7, 10, 2, 7, 8, 1, 3]
slices = slice_when(lambda x,y: x > y, array)
print(list(slices))
#=> [[5, 7, 10], [2, 7, 8], [1, 3]]
So you can use it as simple as:
res = []
for e in slice_when(lambda x,y: x > y, array):
res.extend(e[::-1] )
res #=> [10, 7, 5, 8, 7, 2, 3, 1]

compare elements in list to every other element in same list

I'm using Python 2.7. If I have list
a = [1,2,3,4,5]
and I want a new listthat contains something like
b = [a[0]*a[1], a[0]*a[2], a[0]*a[3], a[0]*a[4], a[1]*a[0], a[1]*a[2], ... ]
where each element is mulitplied by the other elements but do not muliply with themselves. my intuition tells me to do the following (in a very non pythonic way):
b = []
for i in range(0,len(a)):
b.append(a[i]*a[i+1])
but this is only performing the task for the first element in the list
You can perform the multiplication using a list comprehension and enumerate which allows to skip self-multiplications with a filter:
prods = [x*y for i, x in enumerate(a) for j, y in enumerate(a) if i!=j]
print(prods)
# [2, 3, 4, 5, 2, 6, 8, 10, 3, 6, 12, 15, 4, 8, 12, 20, 5, 10, 15, 20]

Split a list into parts based on a set of indexes in Python

What is the best way to split a list into parts based on an arbitrary number of indexes? E.g. given the code below
indexes = [5, 12, 17]
list = range(20)
return something like this
part1 = list[:5]
part2 = list[5:12]
part3 = list[12:17]
part4 = list[17:]
If there are no indexes it should return the entire list.
This is the simplest and most pythonic solution I can think of:
def partition(alist, indices):
return [alist[i:j] for i, j in zip([0]+indices, indices+[None])]
if the inputs are very large, then the iterators solution should be more convenient:
from itertools import izip, chain
def partition(alist, indices):
pairs = izip(chain([0], indices), chain(indices, [None]))
return (alist[i:j] for i, j in pairs)
and of course, the very, very lazy guy solution (if you don't mind to get arrays instead of lists, but anyway you can always revert them to lists):
import numpy
partition = numpy.split
I would be interested in seeing a more Pythonic way of doing this also. But this is a crappy solution. You need to add a checking for an empty index list.
Something along the lines of:
indexes = [5, 12, 17]
list = range(20)
output = []
prev = 0
for index in indexes:
output.append(list[prev:index])
prev = index
output.append(list[indexes[-1]:])
print output
produces
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
My solution is similar to Il-Bhima's.
>>> def parts(list_, indices):
... indices = [0]+indices+[len(list_)]
... return [list_[v:indices[k+1]] for k, v in enumerate(indices[:-1])]
Alternative approach
If you're willing to slightly change the way you input indices, from absolute indices to relative (that is, from [5, 12, 17] to [5, 7, 5], the below will also give you the desired output, while it doesn't create intermediary lists.
>>> from itertools import islice
>>> def parts(list_, indices):
... i = iter(list_)
... return [list(islice(i, n)) for n in chain(indices, [None])]
>>> def burst_seq(seq, indices):
... startpos = 0
... for index in indices:
... yield seq[startpos:index]
... startpos = index
... yield seq[startpos:]
...
>>> list(burst_seq(range(20), [5, 12, 17]))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
>>> list(burst_seq(range(20), []))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
>>> list(burst_seq(range(0), [5, 12, 17]))
[[], [], [], []]
>>>
Maxima mea culpa: it uses a for statement, and it's not using whizzbang stuff like itertools, zip(), None as a sentinel, list comprehensions, ...
;-)
indices = [5, 12, 17]
input = range(20)
output = []
reduce(lambda x, y: output.append(input[x:y]) or y, indices + [len(input)], 0)
print output
This is all that I could think of
def partition(list_, indexes):
if indexes[0] != 0:
indexes = [0] + indexes
if indexes[-1] != len(list_):
indexes = indexes + [len(list_)]
return [ list_[a:b] for (a,b) in zip(indexes[:-1], indexes[1:])]
Cide's makes three copies of the array: [0]+indices copies, ([0]+indices)+[] copies again, and indices[:-1] will copy a third time. Il-Bhima makes five copies. (I'm not counting the return value, of course.)
Those could be reduced (izip, islice), but here's a zero-copy version:
def iterate_pairs(lst, indexes):
prev = 0
for i in indexes:
yield prev, i
prev = i
yield prev, len(lst)
def partition(lst, indexes):
for first, last in iterate_pairs(lst, indexes):
yield lst[first:last]
indexes = [5, 12, 17]
lst = range(20)
print [l for l in partition(lst, indexes)]
Of course, array copies are fairly cheap (native code) compared to interpreted Python, but this has another advantage: it's easy to reuse, to mutate the data directly:
for first, last in iterate_pairs(lst, indexes):
for i in range(first, last):
lst[i] = first
print lst
# [0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 12, 12, 12, 12, 12, 17, 17, 17]
(That's why I passed indexes to iterate_pairs. If you don't care about that, you can remove that parameter and just have the final line be "yield prev, None", which is all partition() needs.)
Here's yet another answer.
def partition(l, indexes):
result, indexes = [], indexes+[len(l)]
reduce(lambda x, y: result.append(l[x:y]) or y, indexes, 0)
return result
It supports negative indexes and such.
>>> partition([1,2,3,4,5], [1, -1])
[[1], [2, 3, 4], [5]]
>>>
The plural of index is indices. Going for simplicity/readability.
indices = [5, 12, 17]
input = range(20)
output = []
for i in reversed(indices):
output.append(input[i:])
input[i:] = []
output.append(input)
while len(output):
print output.pop()

Categories