Splitting a list on non-sequential numbers - python

I have an ordered list of entities, numbered in a broken sequence:
[1, 2, 3, 6, 7, 11, 17, 18, 19]
I'd like to break the list where there's a gap, and collect the results in a new list:
[[1, 2, 3], [6, 7], [11], [17, 18, 19]]
I have the feeling there's a name for what I want to do and probably a nice library function for it - but I can't think of it. Can anyone shine some light before I possibly reinvent a wheel?
edit: Thanks, folks, but I was asking if there's a name for this operation and an existing algorithm, not for implementations - this is what I came up with:
def group_adjoining(elements, key=lambda x: x):
"""Returns list of lists of contiguous elements
:key: function to get key integer from list element
"""
if not elements:
return elements
result = [[elements[0]]]
for a, b in zip(elements, elements[1:]):
if key(a) + 1 == key(b):
result[-1].append(b)
else:
result.append([b])
return result

Plain itertools.groupby approach:
from itertools import groupby
lst = [1, 2, 3, 6, 7, 11, 17, 18, 19]
out = []
for _, g in groupby(enumerate(lst), lambda x: x[0] - x[1]):
out.append([v for _, v in g])
print(out)
Prints:
[[1, 2, 3], [6, 7], [11], [17, 18, 19]]

Try greedy approach:
lst = [1, 2, 3, 6, 7, 11, 17, 18, 19]
res = []
tmp = []
prv = lst[0]
for l in lst:
if l-prv > 1:
res.append(tmp)
tmp = []
tmp.append(l)
prv = l
res.append(tmp)
print(res)
Output: [[1, 2, 3], [6, 7], [11], [17, 18, 19]]

I first came across more_itertools today, and I think this package is useful for this problem.
pip install more-itertools
from more_itertools import split_when
l = [1, 2, 3, 6, 7, 11, 17, 18, 19]
res = list(split_when(l, lambda a, b: a + 1 != b))
print(res)

You could use a simple generator.
def split(lst):
result = []
for item in lst:
if (not result) or result[-1] + 1 == item:
result.append(item)
else:
yield result
result = [item]
if result:
yield result
foo = [1, 2, 3, 6, 7, 11, 17, 18, 19]
result = [i for i in split(foo)]
print(result) # [[1, 2, 3], [6, 7], [11], [17, 18, 19]]
This assumes a sorted homogeneous list of int.
You could always avoid the sorted assumption with for item in sorted(lst):.

It's pretty easy by using this simple function:
li = [1, 2, 3, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18]
def split(li):
result = []
temp = [li[0]]
for i in range(1, len(li)):
if li[i] - temp[-1] == 1:
temp.append(li[i])
else:
result.append(temp)
temp = [li[i]]
result.append(temp)
return result
print(split(li))

Related

How to convert list to list of list for adjacent numbers

i have list [31, 32,33, 1,2,3,4, 11,12,13,14]
I need to put into adjacent numbers into one list for i, i+1
Expected out [[1,2,3,4], [11,12,13,14], [31, 32,33]]
l = [31, 32,33, 1,2,3,4, 11,12,13,14]
l.sort() #sorted the items
new_l = []
for i in l:
temp_l = [] # temp list before appending to main list
if i + 1 in l: # if i + 1 is present append to temp_list
temp_l.append(i)
new_l.append(temp_l) # temp_l has to append to main list
My out is wrong : [[1], [2], [3], [], [11], [12], [13], [], [31], [32], []]
You can use itertools.groupby:
from itertools import groupby
l = [31, 32, 33, 1, 2, 3, 4, 11, 12, 13, 14]
l.sort()
out = [
list(v for _, v in g)
for _, g in groupby(enumerate(l), key=lambda v: v[0] - v[1])
]
print(out)
Prints:
[[1, 2, 3, 4], [11, 12, 13, 14], [31, 32, 33]]
Without itertools.groupby:
l = [31, 32, 33, 1, 2, 3, 4, 11, 12, 13, 14]
l.sort()
out = [[l[0]]]
for i in range(1, len(l)):
if l[i] - out[-1][-1] == 1:
out[-1].append(l[i])
else:
out.append([l[i]])
print(out)
Edit: Added .sort()
You can append an empty sub-list to the output list when the difference between the current number and the last number in the last sub-list in the output list is not 1, and keep appending the current number to the last sub-list of the output list:
l = [31, 32,33, 1,2,3,4, 11,12,13,14]
l.sort()
output = []
for i in l:
if not output or i - output[-1][-1] != 1:
output.append([])
output[-1].append(i)
output becomes:
[[1, 2, 3, 4], [11, 12, 13, 14], [31, 32, 33]]
Demo: https://replit.com/#blhsing/UnimportantValidTelecommunications
Use a for loop, enumerate() and sort()
l = [31, 32, 33, 1, 2, 3, 4, 11 ,12, 13, 14]
# sort list
l = sorted(l)
sub_l = []
new_list = []
for i, num in enumerate(l):
# check if not last element
if not (i + 1) == len(l):
# check if next element is the same as the current + 1
if num + 1 == l[i+1]:
sub_l.append(num)
else:
# append sub-list to parent-list and reset sub-list
new_list.append(sub_l)
sub_l = []
print(new_list)
Output
[[1, 2, 3], [11, 12, 13], [31, 32]]

Python islice class is not working as expected

I am writing a function that divides a list into (almost) equal 'n' distributions. I want this function to return a generator, but there appears to be an issue with yielding a generator. The function works just fine with iterables. Take a look at this snippet:
import itertools
def divide_list(array, n, gen_length=None):
"""
:param array: some iterable that you wish to divide
:param n: the number of lists you would like to return
:param gen_length: The length of the generator if array is a generator. Not necessary for lists and tuples.
:return: a generator of the divided list
Example:
In: list(divide_list([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
Out: [[1, 2, 3], [4, 5], [6, 7], [8, 9]]
"""
if isinstance(array, (list, tuple)):
floor, rem = divmod(len(array), n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield array[prev:next_]
items_index = (next_, next_ + floor)
rem -= 1
else:
floor, rem = divmod(gen_length, n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield itertools.islice(array, prev, next_)
items_index = (next_, next_ + floor)
rem -= 1
if __name__ == '__main__':
array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
print('Generator:')
print('----------')
for value in divide_list(array_, 3, gen_length=8):
print(list(value))
print('')
array_ = [12, 7, 9, 31, 13, 11, 7, 3]
print('List:')
print('-----')
for value in divide_list(array_, 3):
print(value)
Here is the ouput:
Generator:
----------
[12, 7, 9]
[7, 3]
[]
List:
-----
[12, 7, 9]
[31, 13, 11]
[7, 3]
Why is the last generator exhausted? Sometimes, it exhausts the last two generators.
The explanation as to why this isn't working is because you are using islice to skip elements when you provided it a non-zero starting point. The key issue here is that you are supposed to advance the iterator by an amount, not skipping any at each yield. This is different than the sequence case, where you give it explicit indices for each case.
However, note, you don't need to handle these cases differently. Here's a super simple approach that handles both cases - the key is to always use an iterator:
def divide(iterable, n, length=None):
if length is None:
length = len(iterable)
it = iter(iterable)
floor, rem = divmod(length, n)
while result := list(islice(it, floor + bool(rem))):
yield result
rem = max(rem - 1, 0)
In the REPL:
>>> from itertools import islice
>>> def divide(iterable, n, length=None):
... if length is None:
... length = len(iterable)
... it = iter(iterable)
... floor, rem = divmod(length, n)
... while result := list(islice(it, floor + bool(rem))):
... yield result
... rem = max(rem - 1, 0)
...
>>> list(divide([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
[[1, 2, 3], [4, 5], [6, 7], [8, 9]]
>>> list(divide(iter([1, 2, 3, 4, 5, 6, 7, 8, 9]), 4, length=9))
[[1, 2, 3], [4, 5], [6, 7], [8, 9]]
the problem is you don't take into account that you already consumed the iterator
>>> import itertools
>>> array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
>>> list(itertools.islice(array_,0,3))
[12, 7, 9]
>>> list(itertools.islice(array_,3,6)) #where are 31,13 and 11? you skipped them bacause, see below
[7, 3]
>>> array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
>>> list(itertools.islice(array_,0,3))
[12, 7, 9]
>>> list(array_) #this is what remains in the iterator
[31, 13, 11, 7, 3]
>>>
Thank's to #KellyBundy, I was able to modify the code to get the expected results. I was not aware that islice was cutting the generator then shifting those values back to the beginning at index 0. I was treating it as if it left a null value at the indices that I cut off. Here is the modified code:
import itertools
def divide_list(array, n, gen_length=None):
"""
:param array: some iterable that you wish to divide
:param n: the number of lists you would like to return
:param gen_length: The length of the generator if array is a generator. Not necessary for lists and tuples.
:return: a generator of the divided list
Example:
In: list(divide_list([1, 2, 3, 4, 5, 6, 7, 8, 9], 4))
Out: [[1, 2, 3], [4, 5], [6, 7], [8, 9]]
"""
if isinstance(array, (list, tuple)):
floor, rem = divmod(len(array), n)
items_index = (0, floor)
for _ in range(n):
prev, next_ = items_index[0], items_index[1] + 1 if rem > 0 else items_index[1]
yield array[prev:next_]
items_index = (next_, next_ + floor)
rem -= 1
else:
floor, rem = divmod(gen_length, n)
up_to = floor
for _ in range(n):
up_to = up_to + 1 if rem > 0 else up_to
yield itertools.islice(array, up_to)
up_to = floor
rem -= 1
if __name__ == '__main__':
array_ = iter([12, 7, 9, 31, 13, 11, 7, 3])
print('Generator:')
print('----------')
for value in divide_list(array_, 3, gen_length=8):
print(list(value))
print('')
array_ = [12, 7, 9, 31, 13, 11, 7, 3]
print('List:')
print('-----')
for value in divide_list(array_, 3):
print(value)
This will output the expected result:
Generator:
----------
[12, 7, 9]
[31, 13, 11]
[7, 3]
List:
-----
[12, 7, 9]
[31, 13, 11]
[7, 3]

Sum consecutive pairs of elements in a list

I want to find sums of elements of the list by index0,index1 and index1,index2 and index2,index3, and so on.
Like:
my_list = [7, 5, 9, 4, 7, 11]
sums = [12, 14, 13, 11, 18] # 7+5, 5+9, 9+4, 4+7, 7+11
You just have to iterate through the indices:
l = [7, 5, 9, 4, 7, 11]
res = [l[i] + l[i+1] for i in range(len(l)-1)]
print(res)
Output:
[12, 14, 13, 11, 18]
You can use zip and sum for a functional solution:
# don't shadow the built-in `list`
l = [7,5,9,4,7,11]
# generate all neighboring pairs
pairs = zip(l, l[1:])
# generate all sums of pairs
sums = list(map(sum, pairs))
print(sums) # [12, 14, 13, 11, 18]
This works fine :)
list=[7,5,9,4,7,11]
aspSum = []
i = 0
while i<len(list)-1:
aspSum.append(list[i]+list[i+1])
i+=1

Split list into multiple lists based on indices [duplicate]

What is the best way to split a list into parts based on an arbitrary number of indexes? E.g. given the code below
indexes = [5, 12, 17]
list = range(20)
return something like this
part1 = list[:5]
part2 = list[5:12]
part3 = list[12:17]
part4 = list[17:]
If there are no indexes it should return the entire list.
This is the simplest and most pythonic solution I can think of:
def partition(alist, indices):
return [alist[i:j] for i, j in zip([0]+indices, indices+[None])]
if the inputs are very large, then the iterators solution should be more convenient:
from itertools import izip, chain
def partition(alist, indices):
pairs = izip(chain([0], indices), chain(indices, [None]))
return (alist[i:j] for i, j in pairs)
and of course, the very, very lazy guy solution (if you don't mind to get arrays instead of lists, but anyway you can always revert them to lists):
import numpy
partition = numpy.split
I would be interested in seeing a more Pythonic way of doing this also. But this is a crappy solution. You need to add a checking for an empty index list.
Something along the lines of:
indexes = [5, 12, 17]
list = range(20)
output = []
prev = 0
for index in indexes:
output.append(list[prev:index])
prev = index
output.append(list[indexes[-1]:])
print output
produces
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
My solution is similar to Il-Bhima's.
>>> def parts(list_, indices):
... indices = [0]+indices+[len(list_)]
... return [list_[v:indices[k+1]] for k, v in enumerate(indices[:-1])]
Alternative approach
If you're willing to slightly change the way you input indices, from absolute indices to relative (that is, from [5, 12, 17] to [5, 7, 5], the below will also give you the desired output, while it doesn't create intermediary lists.
>>> from itertools import islice
>>> def parts(list_, indices):
... i = iter(list_)
... return [list(islice(i, n)) for n in chain(indices, [None])]
>>> def burst_seq(seq, indices):
... startpos = 0
... for index in indices:
... yield seq[startpos:index]
... startpos = index
... yield seq[startpos:]
...
>>> list(burst_seq(range(20), [5, 12, 17]))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
>>> list(burst_seq(range(20), []))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
>>> list(burst_seq(range(0), [5, 12, 17]))
[[], [], [], []]
>>>
Maxima mea culpa: it uses a for statement, and it's not using whizzbang stuff like itertools, zip(), None as a sentinel, list comprehensions, ...
;-)
indices = [5, 12, 17]
input = range(20)
output = []
reduce(lambda x, y: output.append(input[x:y]) or y, indices + [len(input)], 0)
print output
This is all that I could think of
def partition(list_, indexes):
if indexes[0] != 0:
indexes = [0] + indexes
if indexes[-1] != len(list_):
indexes = indexes + [len(list_)]
return [ list_[a:b] for (a,b) in zip(indexes[:-1], indexes[1:])]
Cide's makes three copies of the array: [0]+indices copies, ([0]+indices)+[] copies again, and indices[:-1] will copy a third time. Il-Bhima makes five copies. (I'm not counting the return value, of course.)
Those could be reduced (izip, islice), but here's a zero-copy version:
def iterate_pairs(lst, indexes):
prev = 0
for i in indexes:
yield prev, i
prev = i
yield prev, len(lst)
def partition(lst, indexes):
for first, last in iterate_pairs(lst, indexes):
yield lst[first:last]
indexes = [5, 12, 17]
lst = range(20)
print [l for l in partition(lst, indexes)]
Of course, array copies are fairly cheap (native code) compared to interpreted Python, but this has another advantage: it's easy to reuse, to mutate the data directly:
for first, last in iterate_pairs(lst, indexes):
for i in range(first, last):
lst[i] = first
print lst
# [0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 12, 12, 12, 12, 12, 17, 17, 17]
(That's why I passed indexes to iterate_pairs. If you don't care about that, you can remove that parameter and just have the final line be "yield prev, None", which is all partition() needs.)
Here's yet another answer.
def partition(l, indexes):
result, indexes = [], indexes+[len(l)]
reduce(lambda x, y: result.append(l[x:y]) or y, indexes, 0)
return result
It supports negative indexes and such.
>>> partition([1,2,3,4,5], [1, -1])
[[1], [2, 3, 4], [5]]
>>>
The plural of index is indices. Going for simplicity/readability.
indices = [5, 12, 17]
input = range(20)
output = []
for i in reversed(indices):
output.append(input[i:])
input[i:] = []
output.append(input)
while len(output):
print output.pop()

Split a list into parts based on a set of indexes in Python

What is the best way to split a list into parts based on an arbitrary number of indexes? E.g. given the code below
indexes = [5, 12, 17]
list = range(20)
return something like this
part1 = list[:5]
part2 = list[5:12]
part3 = list[12:17]
part4 = list[17:]
If there are no indexes it should return the entire list.
This is the simplest and most pythonic solution I can think of:
def partition(alist, indices):
return [alist[i:j] for i, j in zip([0]+indices, indices+[None])]
if the inputs are very large, then the iterators solution should be more convenient:
from itertools import izip, chain
def partition(alist, indices):
pairs = izip(chain([0], indices), chain(indices, [None]))
return (alist[i:j] for i, j in pairs)
and of course, the very, very lazy guy solution (if you don't mind to get arrays instead of lists, but anyway you can always revert them to lists):
import numpy
partition = numpy.split
I would be interested in seeing a more Pythonic way of doing this also. But this is a crappy solution. You need to add a checking for an empty index list.
Something along the lines of:
indexes = [5, 12, 17]
list = range(20)
output = []
prev = 0
for index in indexes:
output.append(list[prev:index])
prev = index
output.append(list[indexes[-1]:])
print output
produces
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
My solution is similar to Il-Bhima's.
>>> def parts(list_, indices):
... indices = [0]+indices+[len(list_)]
... return [list_[v:indices[k+1]] for k, v in enumerate(indices[:-1])]
Alternative approach
If you're willing to slightly change the way you input indices, from absolute indices to relative (that is, from [5, 12, 17] to [5, 7, 5], the below will also give you the desired output, while it doesn't create intermediary lists.
>>> from itertools import islice
>>> def parts(list_, indices):
... i = iter(list_)
... return [list(islice(i, n)) for n in chain(indices, [None])]
>>> def burst_seq(seq, indices):
... startpos = 0
... for index in indices:
... yield seq[startpos:index]
... startpos = index
... yield seq[startpos:]
...
>>> list(burst_seq(range(20), [5, 12, 17]))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16], [17, 18, 19]]
>>> list(burst_seq(range(20), []))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
>>> list(burst_seq(range(0), [5, 12, 17]))
[[], [], [], []]
>>>
Maxima mea culpa: it uses a for statement, and it's not using whizzbang stuff like itertools, zip(), None as a sentinel, list comprehensions, ...
;-)
indices = [5, 12, 17]
input = range(20)
output = []
reduce(lambda x, y: output.append(input[x:y]) or y, indices + [len(input)], 0)
print output
This is all that I could think of
def partition(list_, indexes):
if indexes[0] != 0:
indexes = [0] + indexes
if indexes[-1] != len(list_):
indexes = indexes + [len(list_)]
return [ list_[a:b] for (a,b) in zip(indexes[:-1], indexes[1:])]
Cide's makes three copies of the array: [0]+indices copies, ([0]+indices)+[] copies again, and indices[:-1] will copy a third time. Il-Bhima makes five copies. (I'm not counting the return value, of course.)
Those could be reduced (izip, islice), but here's a zero-copy version:
def iterate_pairs(lst, indexes):
prev = 0
for i in indexes:
yield prev, i
prev = i
yield prev, len(lst)
def partition(lst, indexes):
for first, last in iterate_pairs(lst, indexes):
yield lst[first:last]
indexes = [5, 12, 17]
lst = range(20)
print [l for l in partition(lst, indexes)]
Of course, array copies are fairly cheap (native code) compared to interpreted Python, but this has another advantage: it's easy to reuse, to mutate the data directly:
for first, last in iterate_pairs(lst, indexes):
for i in range(first, last):
lst[i] = first
print lst
# [0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 12, 12, 12, 12, 12, 17, 17, 17]
(That's why I passed indexes to iterate_pairs. If you don't care about that, you can remove that parameter and just have the final line be "yield prev, None", which is all partition() needs.)
Here's yet another answer.
def partition(l, indexes):
result, indexes = [], indexes+[len(l)]
reduce(lambda x, y: result.append(l[x:y]) or y, indexes, 0)
return result
It supports negative indexes and such.
>>> partition([1,2,3,4,5], [1, -1])
[[1], [2, 3, 4], [5]]
>>>
The plural of index is indices. Going for simplicity/readability.
indices = [5, 12, 17]
input = range(20)
output = []
for i in reversed(indices):
output.append(input[i:])
input[i:] = []
output.append(input)
while len(output):
print output.pop()

Categories