Split list into groups of sequential numbers [duplicate] - python

I'd like to identify groups of consecutive numbers in a list, so that:
myfunc([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
Returns:
[(2,5), (12,17), 20]
And was wondering what the best way to do this was (particularly if there's something inbuilt into Python).
Edit: Note I originally forgot to mention that individual numbers should be returned as individual numbers, not ranges.

EDIT 2: To answer the OP new requirement
ranges = []
for key, group in groupby(enumerate(data), lambda (index, item): index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
Output:
[xrange(2, 5), xrange(12, 17), 20]
You can replace xrange with range or any other custom class.
Python docs have a very neat recipe for this:
from operator import itemgetter
from itertools import groupby
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
print(map(itemgetter(1), g))
Output:
[2, 3, 4, 5]
[12, 13, 14, 15, 16, 17]
If you want to get the exact same output, you can do this:
ranges = []
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
group = map(itemgetter(1), g)
ranges.append((group[0], group[-1]))
output:
[(2, 5), (12, 17)]
EDIT: The example is already explained in the documentation but maybe I should explain it more:
The key to the solution is
differencing with a range so that
consecutive numbers all appear in same
group.
If the data was: [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
Then groupby(enumerate(data), lambda (i,x):i-x) is equivalent of the following:
groupby(
[(0, 2), (1, 3), (2, 4), (3, 5), (4, 12),
(5, 13), (6, 14), (7, 15), (8, 16), (9, 17)],
lambda (i,x):i-x
)
The lambda function subtracts the element index from the element value. So when you apply the lambda on each item. You'll get the following keys for groupby:
[-2, -2, -2, -2, -8, -8, -8, -8, -8, -8]
groupby groups elements by equal key value, so the first 4 elements will be grouped together and so forth.
I hope this makes it more readable.
python 3 version may be helpful for beginners
import the libraries required first
from itertools import groupby
from operator import itemgetter
ranges =[]
for k,g in groupby(enumerate(data),lambda x:x[0]-x[1]):
group = (map(itemgetter(1),g))
group = list(map(int,group))
ranges.append((group[0],group[-1]))

more_itertools.consecutive_groups was added in version 4.0.
Demo
import more_itertools as mit
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
[list(group) for group in mit.consecutive_groups(iterable)]
# [[2, 3, 4, 5], [12, 13, 14, 15, 16, 17], [20]]
Code
Applying this tool, we make a generator function that finds ranges of consecutive numbers.
def find_ranges(iterable):
"""Yield range of consecutive numbers."""
for group in mit.consecutive_groups(iterable):
group = list(group)
if len(group) == 1:
yield group[0]
else:
yield group[0], group[-1]
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
list(find_ranges(iterable))
# [(2, 5), (12, 17), 20]
The source implementation emulates a classic recipe (as demonstrated by #Nadia Alramli).
Note: more_itertools is a third-party package installable via pip install more_itertools.

The "naive" solution which I find somewhat readable atleast.
x = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 22, 25, 26, 28, 51, 52, 57]
def group(L):
first = last = L[0]
for n in L[1:]:
if n - 1 == last: # Part of the group, bump the end
last = n
else: # Not part of the group, yield current group and start a new
yield first, last
first = last = n
yield first, last # Yield the last group
>>>print list(group(x))
[(2, 5), (12, 17), (22, 22), (25, 26), (28, 28), (51, 52), (57, 57)]

Assuming your list is sorted:
>>> from itertools import groupby
>>> def ranges(lst):
pos = (j - i for i, j in enumerate(lst))
t = 0
for i, els in groupby(pos):
l = len(list(els))
el = lst[t]
t += l
yield range(el, el+l)
>>> lst = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
>>> list(ranges(lst))
[range(2, 6), range(12, 18)]

Here it is something that should work, without any import needed:
def myfunc(lst):
ret = []
a = b = lst[0] # a and b are range's bounds
for el in lst[1:]:
if el == b+1:
b = el # range grows
else: # range ended
ret.append(a if a==b else (a,b)) # is a single or a range?
a = b = el # let's start again with a single
ret.append(a if a==b else (a,b)) # corner case for last single/range
return ret

Please note that the code using groupby doesn't work as given in Python 3 so use this.
for k, g in groupby(enumerate(data), lambda x:x[0]-x[1]):
group = list(map(itemgetter(1), g))
ranges.append((group[0], group[-1]))

This doesn't use a standard function - it just iiterates over the input, but it should work:
def myfunc(l):
r = []
p = q = None
for x in l + [-1]:
if x - 1 == q:
q += 1
else:
if p:
if q > p:
r.append('%s-%s' % (p, q))
else:
r.append(str(p))
p = q = x
return '(%s)' % ', '.join(r)
Note that it requires that the input contains only positive numbers in ascending order. You should validate the input, but this code is omitted for clarity.

import numpy as np
myarray = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
sequences = np.split(myarray, np.array(np.where(np.diff(myarray) > 1)[0]) + 1)
l = []
for s in sequences:
if len(s) > 1:
l.append((np.min(s), np.max(s)))
else:
l.append(s[0])
print(l)
Output:
[(2, 5), (12, 17), 20]

I think this way is simpler than any of the answers I've seen here (Edit: fixed based on comment from Pleastry):
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
starts = [x for x in data if x-1 not in data and x+1 in data]
ends = [x for x in data if x-1 in data and x+1 not in data and x not in starts]
singles = [x for x in data if x-1 not in data and x+1 not in data]
list(zip(starts, ends)) + singles
Output:
[(2, 5), (12, 17), 20]
Edited:
As #dawg notes, this is O(n**2). One option to improve performance would be to convert the original list to a set (and also the starts list to a set) i.e.
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
data_as_set = set(data)
starts = [x for x in data_as_set if x-1 not in data_as_set and x+1 in data_as_set]
startset = set(starts)
ends = [x for x in data_as_set if x-1 in data_as_set and x+1 not in data_as_set and x not in startset]
singles = [x for x in data_as_set if x-1 not in data_as_set and x+1 not in data_as_set]
print(list(zip(starts, ends)) + singles)

Using groupby and count from itertools gives us a short solution. The idea is that, in an increasing sequence, the difference between the index and the value will remain the same.
In order to keep track of the index, we can use an itertools.count, which makes the code cleaner as using enumerate:
from itertools import groupby, count
def intervals(data):
out = []
counter = count()
for key, group in groupby(data, key = lambda x: x-next(counter)):
block = list(group)
out.append([block[0], block[-1]])
return out
Some sample output:
print(intervals([0, 1, 3, 4, 6]))
# [[0, 1], [3, 4], [6, 6]]
print(intervals([2, 3, 4, 5]))
# [[2, 5]]

This is my method in which I tried to prioritize readability. Note that it returns a tuple of the same values if there is only one value in a group. That can be fixed easily in the second snippet I'll post.
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
yield first, last # this is needed to yield the last set of numbers
Here is the result of a test:
values = [0, 5, 6, 7, 12, 13, 21, 22, 23, 24, 25, 26, 30, 44, 45, 50]
result = list(group(values))
print(result)
result = [(0, 0), (5, 7), (12, 13), (21, 26), (30, 30), (44, 45), (50, 50)]
If you want to return only a single value in the case of a single value in a group, just add a conditional check to the yields:
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
if first == last:
yield first
else:
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
if first == last:
yield first
else:
yield first, last
result = [0, (5, 7), (12, 13), (21, 26), 30, (44, 45), 50]

Here's the answer I came up with. I'm writing the code for other people to understand, so I'm fairly verbose with variable names and comments.
First a quick helper function:
def getpreviousitem(mylist,myitem):
'''Given a list and an item, return previous item in list'''
for position, item in enumerate(mylist):
if item == myitem:
# First item has no previous item
if position == 0:
return None
# Return previous item
return mylist[position-1]
And then the actual code:
def getranges(cpulist):
'''Given a sorted list of numbers, return a list of ranges'''
rangelist = []
inrange = False
for item in cpulist:
previousitem = getpreviousitem(cpulist,item)
if previousitem == item - 1:
# We're in a range
if inrange == True:
# It's an existing range - change the end to the current item
newrange[1] = item
else:
# We've found a new range.
newrange = [item-1,item]
# Update to show we are now in a range
inrange = True
else:
# We were in a range but now it just ended
if inrange == True:
# Save the old range
rangelist.append(newrange)
# Update to show we're no longer in a range
inrange = False
# Add the final range found to our list
if inrange == True:
rangelist.append(newrange)
return rangelist
Example run:
getranges([2, 3, 4, 5, 12, 13, 14, 15, 16, 17])
returns:
[[2, 5], [12, 17]]

Using numpy + comprehension lists:
With numpy diff function, consequent input vector entries that their difference is not equal to one can be identified. The start and end of the input vector need to be considered.
import numpy as np
data = np.array([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
d = [i for i, df in enumerate(np.diff(data)) if df!= 1]
d = np.hstack([-1, d, len(data)-1]) # add first and last elements
d = np.vstack([d[:-1]+1, d[1:]]).T
print(data[d])
Output:
[[ 2 5]
[12 17]
[20 20]]
Note: The request that individual numbers should be treated differently, (returned as individual, not ranges) was omitted. This can be reached by further post-processing the results. Usually this will make things more complex without gaining any benefit.

One-liner in Python 2.7 if interested:
x = [2, 3, 6, 7, 8, 14, 15, 19, 20, 21]
d = iter(x[:1] + sum(([i1, i2] for i1, i2 in zip(x, x[1:] + x[:1]) if i2 != i1+1), []))
print zip(d, d)
>>> [(2, 3), (6, 8), (14, 15), (19, 21)]

A short solution that works without additional imports. It accepts any iterable, sorts unsorted inputs, and removes duplicate items:
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
Example:
>>> ranges([2, 3, 4, 7, 8, 9, 15])
[(2, 4), (7, 9), (15, 15)]
>>> ranges([-1, 0, 1, 2, 3, 12, 13, 15, 100])
[(-1, 3), (12, 13), (15, 15), (100, 100)]
>>> ranges(range(100))
[(0, 99)]
>>> ranges([0])
[(0, 0)]
>>> ranges([])
[]
This is the same as #dansalmo's solution which I found amazing, albeit a bit hard to read and apply (as it's not given as a function).
Note that it could easily be modified to spit out "traditional" open ranges [start, end), by e.g. altering the return statement:
return [(s, e+1) for s, e in zip(edges, edges)]
I copied this answer over from another question that was marked as a duplicate of this one with the intention to make it easier findable (after I just now searched again for this topic, finding only the question here at first and not being satisfied with the answers given).

The versions by Mark Byers, Andrea Ambu, SilentGhost, Nadia Alramli, and truppo are simple and fast. The 'truppo' version encouraged me to write a version that retains the same nimble behavior while handling step sizes other than 1 (and lists as singletons elements that don't extend more than 1 step with a given step size). It is given here.
>>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
[(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]

Not the best approach , but here is my 2 cents
def getConsecutiveValues2(arr):
x = ""
final = []
end = 0
start = 0
for i in range(1,len(arr)) :
if arr[i] - arr[i-1] == 1 :
end = i
else :
print(start,end)
final.append(arr[start:end+1])
start = i
if i == len(arr) - 1 :
final.append(arr[start:end+1])
return final
x = [1,2,3,5,6,8,9,10,11,12]
print(getConsecutiveValues2(x))
>> [[1, 2, 3], [5, 6], [8, 9, 10, 11]]

This implementation works for regular or irregular steps
I needed to achieve the same thing but with the slight difference where steps can be irregular. this is my implementation
def ranges(l):
if not len(l):
return range(0,0)
elif len(l)==1:
return range(l[0],l[0]+1)
# get steps
sl = sorted(l)
steps = [i-j for i,j in zip(sl[1:],sl[:-1])]
# get unique steps indexes range
groups = [[0,0,steps[0]],]
for i,s in enumerate(steps):
if s==groups[-1][-1]:
groups[-1][1] = i+1
else:
groups.append( [i+1,i+1,s] )
g2 = groups[-2]
if g2[0]==g2[1]:
if sl[i+1]-sl[i]==s:
_=groups.pop(-2)
groups[-1][0] = i
# create list of ranges
return [range(sl[i],sl[j]+s,s) if s!=0 else [sl[i]]*(j+1-i) for i,j,s in groups]
Here's an example
from timeit import timeit
# for regular ranges
l = list(range(1000000))
ranges(l)
>>> [range(0, 1000000)]
l = list(range(10)) + list(range(20,25)) + [1,2,3]
ranges(l)
>>> [range(0, 2), range(1, 3), range(2, 4), range(3, 10), range(20, 25)]
sorted(l);[list(i) for i in ranges(l)]
>>> [0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24]
>>> [[0, 1], [1, 2], [2, 3], [3, 4, 5, 6, 7, 8, 9], [20, 21, 22, 23, 24]]
# for irregular steps list
l = [1, 3, 5, 7, 10, 11, 12, 100, 200, 300, 400, 60, 99, 4000,4001]
ranges(l)
>>> [range(1, 9, 2), range(10, 13), range(60, 138, 39), range(100, 500, 100), range(4000, 4002)]
## Speed test
timeit("ranges(l)","from __main__ import ranges,l", number=1000)/1000
>>> 9.303160999934334e-06

Yet another solution if you expect your input to be a set:
def group_years(years):
consecutive_years = []
for year in years:
close = {y for y in years if abs(y - year) == 1}
for group in consecutive_years:
if len(close.intersection(group)):
group |= close
break
else:
consecutive_years.append({year, *close})
return consecutive_years
Example:
group_years({2016, 2017, 2019, 2020, 2022})
Out[54]: [{2016, 2017}, {2019, 2020}, {2022}]

Related

(python)how can i find consecutive number set in a list that consisting of interger [duplicate]

I'd like to identify groups of consecutive numbers in a list, so that:
myfunc([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
Returns:
[(2,5), (12,17), 20]
And was wondering what the best way to do this was (particularly if there's something inbuilt into Python).
Edit: Note I originally forgot to mention that individual numbers should be returned as individual numbers, not ranges.
EDIT 2: To answer the OP new requirement
ranges = []
for key, group in groupby(enumerate(data), lambda (index, item): index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
Output:
[xrange(2, 5), xrange(12, 17), 20]
You can replace xrange with range or any other custom class.
Python docs have a very neat recipe for this:
from operator import itemgetter
from itertools import groupby
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
print(map(itemgetter(1), g))
Output:
[2, 3, 4, 5]
[12, 13, 14, 15, 16, 17]
If you want to get the exact same output, you can do this:
ranges = []
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
group = map(itemgetter(1), g)
ranges.append((group[0], group[-1]))
output:
[(2, 5), (12, 17)]
EDIT: The example is already explained in the documentation but maybe I should explain it more:
The key to the solution is
differencing with a range so that
consecutive numbers all appear in same
group.
If the data was: [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
Then groupby(enumerate(data), lambda (i,x):i-x) is equivalent of the following:
groupby(
[(0, 2), (1, 3), (2, 4), (3, 5), (4, 12),
(5, 13), (6, 14), (7, 15), (8, 16), (9, 17)],
lambda (i,x):i-x
)
The lambda function subtracts the element index from the element value. So when you apply the lambda on each item. You'll get the following keys for groupby:
[-2, -2, -2, -2, -8, -8, -8, -8, -8, -8]
groupby groups elements by equal key value, so the first 4 elements will be grouped together and so forth.
I hope this makes it more readable.
python 3 version may be helpful for beginners
import the libraries required first
from itertools import groupby
from operator import itemgetter
ranges =[]
for k,g in groupby(enumerate(data),lambda x:x[0]-x[1]):
group = (map(itemgetter(1),g))
group = list(map(int,group))
ranges.append((group[0],group[-1]))
more_itertools.consecutive_groups was added in version 4.0.
Demo
import more_itertools as mit
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
[list(group) for group in mit.consecutive_groups(iterable)]
# [[2, 3, 4, 5], [12, 13, 14, 15, 16, 17], [20]]
Code
Applying this tool, we make a generator function that finds ranges of consecutive numbers.
def find_ranges(iterable):
"""Yield range of consecutive numbers."""
for group in mit.consecutive_groups(iterable):
group = list(group)
if len(group) == 1:
yield group[0]
else:
yield group[0], group[-1]
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
list(find_ranges(iterable))
# [(2, 5), (12, 17), 20]
The source implementation emulates a classic recipe (as demonstrated by #Nadia Alramli).
Note: more_itertools is a third-party package installable via pip install more_itertools.
The "naive" solution which I find somewhat readable atleast.
x = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 22, 25, 26, 28, 51, 52, 57]
def group(L):
first = last = L[0]
for n in L[1:]:
if n - 1 == last: # Part of the group, bump the end
last = n
else: # Not part of the group, yield current group and start a new
yield first, last
first = last = n
yield first, last # Yield the last group
>>>print list(group(x))
[(2, 5), (12, 17), (22, 22), (25, 26), (28, 28), (51, 52), (57, 57)]
Assuming your list is sorted:
>>> from itertools import groupby
>>> def ranges(lst):
pos = (j - i for i, j in enumerate(lst))
t = 0
for i, els in groupby(pos):
l = len(list(els))
el = lst[t]
t += l
yield range(el, el+l)
>>> lst = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
>>> list(ranges(lst))
[range(2, 6), range(12, 18)]
Here it is something that should work, without any import needed:
def myfunc(lst):
ret = []
a = b = lst[0] # a and b are range's bounds
for el in lst[1:]:
if el == b+1:
b = el # range grows
else: # range ended
ret.append(a if a==b else (a,b)) # is a single or a range?
a = b = el # let's start again with a single
ret.append(a if a==b else (a,b)) # corner case for last single/range
return ret
Please note that the code using groupby doesn't work as given in Python 3 so use this.
for k, g in groupby(enumerate(data), lambda x:x[0]-x[1]):
group = list(map(itemgetter(1), g))
ranges.append((group[0], group[-1]))
This doesn't use a standard function - it just iiterates over the input, but it should work:
def myfunc(l):
r = []
p = q = None
for x in l + [-1]:
if x - 1 == q:
q += 1
else:
if p:
if q > p:
r.append('%s-%s' % (p, q))
else:
r.append(str(p))
p = q = x
return '(%s)' % ', '.join(r)
Note that it requires that the input contains only positive numbers in ascending order. You should validate the input, but this code is omitted for clarity.
import numpy as np
myarray = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
sequences = np.split(myarray, np.array(np.where(np.diff(myarray) > 1)[0]) + 1)
l = []
for s in sequences:
if len(s) > 1:
l.append((np.min(s), np.max(s)))
else:
l.append(s[0])
print(l)
Output:
[(2, 5), (12, 17), 20]
I think this way is simpler than any of the answers I've seen here (Edit: fixed based on comment from Pleastry):
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
starts = [x for x in data if x-1 not in data and x+1 in data]
ends = [x for x in data if x-1 in data and x+1 not in data and x not in starts]
singles = [x for x in data if x-1 not in data and x+1 not in data]
list(zip(starts, ends)) + singles
Output:
[(2, 5), (12, 17), 20]
Edited:
As #dawg notes, this is O(n**2). One option to improve performance would be to convert the original list to a set (and also the starts list to a set) i.e.
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
data_as_set = set(data)
starts = [x for x in data_as_set if x-1 not in data_as_set and x+1 in data_as_set]
startset = set(starts)
ends = [x for x in data_as_set if x-1 in data_as_set and x+1 not in data_as_set and x not in startset]
singles = [x for x in data_as_set if x-1 not in data_as_set and x+1 not in data_as_set]
print(list(zip(starts, ends)) + singles)
Using groupby and count from itertools gives us a short solution. The idea is that, in an increasing sequence, the difference between the index and the value will remain the same.
In order to keep track of the index, we can use an itertools.count, which makes the code cleaner as using enumerate:
from itertools import groupby, count
def intervals(data):
out = []
counter = count()
for key, group in groupby(data, key = lambda x: x-next(counter)):
block = list(group)
out.append([block[0], block[-1]])
return out
Some sample output:
print(intervals([0, 1, 3, 4, 6]))
# [[0, 1], [3, 4], [6, 6]]
print(intervals([2, 3, 4, 5]))
# [[2, 5]]
This is my method in which I tried to prioritize readability. Note that it returns a tuple of the same values if there is only one value in a group. That can be fixed easily in the second snippet I'll post.
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
yield first, last # this is needed to yield the last set of numbers
Here is the result of a test:
values = [0, 5, 6, 7, 12, 13, 21, 22, 23, 24, 25, 26, 30, 44, 45, 50]
result = list(group(values))
print(result)
result = [(0, 0), (5, 7), (12, 13), (21, 26), (30, 30), (44, 45), (50, 50)]
If you want to return only a single value in the case of a single value in a group, just add a conditional check to the yields:
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
if first == last:
yield first
else:
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
if first == last:
yield first
else:
yield first, last
result = [0, (5, 7), (12, 13), (21, 26), 30, (44, 45), 50]
Here's the answer I came up with. I'm writing the code for other people to understand, so I'm fairly verbose with variable names and comments.
First a quick helper function:
def getpreviousitem(mylist,myitem):
'''Given a list and an item, return previous item in list'''
for position, item in enumerate(mylist):
if item == myitem:
# First item has no previous item
if position == 0:
return None
# Return previous item
return mylist[position-1]
And then the actual code:
def getranges(cpulist):
'''Given a sorted list of numbers, return a list of ranges'''
rangelist = []
inrange = False
for item in cpulist:
previousitem = getpreviousitem(cpulist,item)
if previousitem == item - 1:
# We're in a range
if inrange == True:
# It's an existing range - change the end to the current item
newrange[1] = item
else:
# We've found a new range.
newrange = [item-1,item]
# Update to show we are now in a range
inrange = True
else:
# We were in a range but now it just ended
if inrange == True:
# Save the old range
rangelist.append(newrange)
# Update to show we're no longer in a range
inrange = False
# Add the final range found to our list
if inrange == True:
rangelist.append(newrange)
return rangelist
Example run:
getranges([2, 3, 4, 5, 12, 13, 14, 15, 16, 17])
returns:
[[2, 5], [12, 17]]
Using numpy + comprehension lists:
With numpy diff function, consequent input vector entries that their difference is not equal to one can be identified. The start and end of the input vector need to be considered.
import numpy as np
data = np.array([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
d = [i for i, df in enumerate(np.diff(data)) if df!= 1]
d = np.hstack([-1, d, len(data)-1]) # add first and last elements
d = np.vstack([d[:-1]+1, d[1:]]).T
print(data[d])
Output:
[[ 2 5]
[12 17]
[20 20]]
Note: The request that individual numbers should be treated differently, (returned as individual, not ranges) was omitted. This can be reached by further post-processing the results. Usually this will make things more complex without gaining any benefit.
One-liner in Python 2.7 if interested:
x = [2, 3, 6, 7, 8, 14, 15, 19, 20, 21]
d = iter(x[:1] + sum(([i1, i2] for i1, i2 in zip(x, x[1:] + x[:1]) if i2 != i1+1), []))
print zip(d, d)
>>> [(2, 3), (6, 8), (14, 15), (19, 21)]
A short solution that works without additional imports. It accepts any iterable, sorts unsorted inputs, and removes duplicate items:
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
Example:
>>> ranges([2, 3, 4, 7, 8, 9, 15])
[(2, 4), (7, 9), (15, 15)]
>>> ranges([-1, 0, 1, 2, 3, 12, 13, 15, 100])
[(-1, 3), (12, 13), (15, 15), (100, 100)]
>>> ranges(range(100))
[(0, 99)]
>>> ranges([0])
[(0, 0)]
>>> ranges([])
[]
This is the same as #dansalmo's solution which I found amazing, albeit a bit hard to read and apply (as it's not given as a function).
Note that it could easily be modified to spit out "traditional" open ranges [start, end), by e.g. altering the return statement:
return [(s, e+1) for s, e in zip(edges, edges)]
I copied this answer over from another question that was marked as a duplicate of this one with the intention to make it easier findable (after I just now searched again for this topic, finding only the question here at first and not being satisfied with the answers given).
The versions by Mark Byers, Andrea Ambu, SilentGhost, Nadia Alramli, and truppo are simple and fast. The 'truppo' version encouraged me to write a version that retains the same nimble behavior while handling step sizes other than 1 (and lists as singletons elements that don't extend more than 1 step with a given step size). It is given here.
>>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
[(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]
Not the best approach , but here is my 2 cents
def getConsecutiveValues2(arr):
x = ""
final = []
end = 0
start = 0
for i in range(1,len(arr)) :
if arr[i] - arr[i-1] == 1 :
end = i
else :
print(start,end)
final.append(arr[start:end+1])
start = i
if i == len(arr) - 1 :
final.append(arr[start:end+1])
return final
x = [1,2,3,5,6,8,9,10,11,12]
print(getConsecutiveValues2(x))
>> [[1, 2, 3], [5, 6], [8, 9, 10, 11]]
This implementation works for regular or irregular steps
I needed to achieve the same thing but with the slight difference where steps can be irregular. this is my implementation
def ranges(l):
if not len(l):
return range(0,0)
elif len(l)==1:
return range(l[0],l[0]+1)
# get steps
sl = sorted(l)
steps = [i-j for i,j in zip(sl[1:],sl[:-1])]
# get unique steps indexes range
groups = [[0,0,steps[0]],]
for i,s in enumerate(steps):
if s==groups[-1][-1]:
groups[-1][1] = i+1
else:
groups.append( [i+1,i+1,s] )
g2 = groups[-2]
if g2[0]==g2[1]:
if sl[i+1]-sl[i]==s:
_=groups.pop(-2)
groups[-1][0] = i
# create list of ranges
return [range(sl[i],sl[j]+s,s) if s!=0 else [sl[i]]*(j+1-i) for i,j,s in groups]
Here's an example
from timeit import timeit
# for regular ranges
l = list(range(1000000))
ranges(l)
>>> [range(0, 1000000)]
l = list(range(10)) + list(range(20,25)) + [1,2,3]
ranges(l)
>>> [range(0, 2), range(1, 3), range(2, 4), range(3, 10), range(20, 25)]
sorted(l);[list(i) for i in ranges(l)]
>>> [0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24]
>>> [[0, 1], [1, 2], [2, 3], [3, 4, 5, 6, 7, 8, 9], [20, 21, 22, 23, 24]]
# for irregular steps list
l = [1, 3, 5, 7, 10, 11, 12, 100, 200, 300, 400, 60, 99, 4000,4001]
ranges(l)
>>> [range(1, 9, 2), range(10, 13), range(60, 138, 39), range(100, 500, 100), range(4000, 4002)]
## Speed test
timeit("ranges(l)","from __main__ import ranges,l", number=1000)/1000
>>> 9.303160999934334e-06
Yet another solution if you expect your input to be a set:
def group_years(years):
consecutive_years = []
for year in years:
close = {y for y in years if abs(y - year) == 1}
for group in consecutive_years:
if len(close.intersection(group)):
group |= close
break
else:
consecutive_years.append({year, *close})
return consecutive_years
Example:
group_years({2016, 2017, 2019, 2020, 2022})
Out[54]: [{2016, 2017}, {2019, 2020}, {2022}]

Extracting consecutive integers from a list in python [duplicate]

I'd like to identify groups of consecutive numbers in a list, so that:
myfunc([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
Returns:
[(2,5), (12,17), 20]
And was wondering what the best way to do this was (particularly if there's something inbuilt into Python).
Edit: Note I originally forgot to mention that individual numbers should be returned as individual numbers, not ranges.
EDIT 2: To answer the OP new requirement
ranges = []
for key, group in groupby(enumerate(data), lambda (index, item): index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
Output:
[xrange(2, 5), xrange(12, 17), 20]
You can replace xrange with range or any other custom class.
Python docs have a very neat recipe for this:
from operator import itemgetter
from itertools import groupby
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
print(map(itemgetter(1), g))
Output:
[2, 3, 4, 5]
[12, 13, 14, 15, 16, 17]
If you want to get the exact same output, you can do this:
ranges = []
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
group = map(itemgetter(1), g)
ranges.append((group[0], group[-1]))
output:
[(2, 5), (12, 17)]
EDIT: The example is already explained in the documentation but maybe I should explain it more:
The key to the solution is
differencing with a range so that
consecutive numbers all appear in same
group.
If the data was: [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
Then groupby(enumerate(data), lambda (i,x):i-x) is equivalent of the following:
groupby(
[(0, 2), (1, 3), (2, 4), (3, 5), (4, 12),
(5, 13), (6, 14), (7, 15), (8, 16), (9, 17)],
lambda (i,x):i-x
)
The lambda function subtracts the element index from the element value. So when you apply the lambda on each item. You'll get the following keys for groupby:
[-2, -2, -2, -2, -8, -8, -8, -8, -8, -8]
groupby groups elements by equal key value, so the first 4 elements will be grouped together and so forth.
I hope this makes it more readable.
python 3 version may be helpful for beginners
import the libraries required first
from itertools import groupby
from operator import itemgetter
ranges =[]
for k,g in groupby(enumerate(data),lambda x:x[0]-x[1]):
group = (map(itemgetter(1),g))
group = list(map(int,group))
ranges.append((group[0],group[-1]))
more_itertools.consecutive_groups was added in version 4.0.
Demo
import more_itertools as mit
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
[list(group) for group in mit.consecutive_groups(iterable)]
# [[2, 3, 4, 5], [12, 13, 14, 15, 16, 17], [20]]
Code
Applying this tool, we make a generator function that finds ranges of consecutive numbers.
def find_ranges(iterable):
"""Yield range of consecutive numbers."""
for group in mit.consecutive_groups(iterable):
group = list(group)
if len(group) == 1:
yield group[0]
else:
yield group[0], group[-1]
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
list(find_ranges(iterable))
# [(2, 5), (12, 17), 20]
The source implementation emulates a classic recipe (as demonstrated by #Nadia Alramli).
Note: more_itertools is a third-party package installable via pip install more_itertools.
The "naive" solution which I find somewhat readable atleast.
x = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 22, 25, 26, 28, 51, 52, 57]
def group(L):
first = last = L[0]
for n in L[1:]:
if n - 1 == last: # Part of the group, bump the end
last = n
else: # Not part of the group, yield current group and start a new
yield first, last
first = last = n
yield first, last # Yield the last group
>>>print list(group(x))
[(2, 5), (12, 17), (22, 22), (25, 26), (28, 28), (51, 52), (57, 57)]
Assuming your list is sorted:
>>> from itertools import groupby
>>> def ranges(lst):
pos = (j - i for i, j in enumerate(lst))
t = 0
for i, els in groupby(pos):
l = len(list(els))
el = lst[t]
t += l
yield range(el, el+l)
>>> lst = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
>>> list(ranges(lst))
[range(2, 6), range(12, 18)]
Here it is something that should work, without any import needed:
def myfunc(lst):
ret = []
a = b = lst[0] # a and b are range's bounds
for el in lst[1:]:
if el == b+1:
b = el # range grows
else: # range ended
ret.append(a if a==b else (a,b)) # is a single or a range?
a = b = el # let's start again with a single
ret.append(a if a==b else (a,b)) # corner case for last single/range
return ret
Please note that the code using groupby doesn't work as given in Python 3 so use this.
for k, g in groupby(enumerate(data), lambda x:x[0]-x[1]):
group = list(map(itemgetter(1), g))
ranges.append((group[0], group[-1]))
This doesn't use a standard function - it just iiterates over the input, but it should work:
def myfunc(l):
r = []
p = q = None
for x in l + [-1]:
if x - 1 == q:
q += 1
else:
if p:
if q > p:
r.append('%s-%s' % (p, q))
else:
r.append(str(p))
p = q = x
return '(%s)' % ', '.join(r)
Note that it requires that the input contains only positive numbers in ascending order. You should validate the input, but this code is omitted for clarity.
import numpy as np
myarray = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
sequences = np.split(myarray, np.array(np.where(np.diff(myarray) > 1)[0]) + 1)
l = []
for s in sequences:
if len(s) > 1:
l.append((np.min(s), np.max(s)))
else:
l.append(s[0])
print(l)
Output:
[(2, 5), (12, 17), 20]
I think this way is simpler than any of the answers I've seen here (Edit: fixed based on comment from Pleastry):
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
starts = [x for x in data if x-1 not in data and x+1 in data]
ends = [x for x in data if x-1 in data and x+1 not in data and x not in starts]
singles = [x for x in data if x-1 not in data and x+1 not in data]
list(zip(starts, ends)) + singles
Output:
[(2, 5), (12, 17), 20]
Edited:
As #dawg notes, this is O(n**2). One option to improve performance would be to convert the original list to a set (and also the starts list to a set) i.e.
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
data_as_set = set(data)
starts = [x for x in data_as_set if x-1 not in data_as_set and x+1 in data_as_set]
startset = set(starts)
ends = [x for x in data_as_set if x-1 in data_as_set and x+1 not in data_as_set and x not in startset]
singles = [x for x in data_as_set if x-1 not in data_as_set and x+1 not in data_as_set]
print(list(zip(starts, ends)) + singles)
Using groupby and count from itertools gives us a short solution. The idea is that, in an increasing sequence, the difference between the index and the value will remain the same.
In order to keep track of the index, we can use an itertools.count, which makes the code cleaner as using enumerate:
from itertools import groupby, count
def intervals(data):
out = []
counter = count()
for key, group in groupby(data, key = lambda x: x-next(counter)):
block = list(group)
out.append([block[0], block[-1]])
return out
Some sample output:
print(intervals([0, 1, 3, 4, 6]))
# [[0, 1], [3, 4], [6, 6]]
print(intervals([2, 3, 4, 5]))
# [[2, 5]]
This is my method in which I tried to prioritize readability. Note that it returns a tuple of the same values if there is only one value in a group. That can be fixed easily in the second snippet I'll post.
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
yield first, last # this is needed to yield the last set of numbers
Here is the result of a test:
values = [0, 5, 6, 7, 12, 13, 21, 22, 23, 24, 25, 26, 30, 44, 45, 50]
result = list(group(values))
print(result)
result = [(0, 0), (5, 7), (12, 13), (21, 26), (30, 30), (44, 45), (50, 50)]
If you want to return only a single value in the case of a single value in a group, just add a conditional check to the yields:
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
if first == last:
yield first
else:
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
if first == last:
yield first
else:
yield first, last
result = [0, (5, 7), (12, 13), (21, 26), 30, (44, 45), 50]
Here's the answer I came up with. I'm writing the code for other people to understand, so I'm fairly verbose with variable names and comments.
First a quick helper function:
def getpreviousitem(mylist,myitem):
'''Given a list and an item, return previous item in list'''
for position, item in enumerate(mylist):
if item == myitem:
# First item has no previous item
if position == 0:
return None
# Return previous item
return mylist[position-1]
And then the actual code:
def getranges(cpulist):
'''Given a sorted list of numbers, return a list of ranges'''
rangelist = []
inrange = False
for item in cpulist:
previousitem = getpreviousitem(cpulist,item)
if previousitem == item - 1:
# We're in a range
if inrange == True:
# It's an existing range - change the end to the current item
newrange[1] = item
else:
# We've found a new range.
newrange = [item-1,item]
# Update to show we are now in a range
inrange = True
else:
# We were in a range but now it just ended
if inrange == True:
# Save the old range
rangelist.append(newrange)
# Update to show we're no longer in a range
inrange = False
# Add the final range found to our list
if inrange == True:
rangelist.append(newrange)
return rangelist
Example run:
getranges([2, 3, 4, 5, 12, 13, 14, 15, 16, 17])
returns:
[[2, 5], [12, 17]]
Using numpy + comprehension lists:
With numpy diff function, consequent input vector entries that their difference is not equal to one can be identified. The start and end of the input vector need to be considered.
import numpy as np
data = np.array([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
d = [i for i, df in enumerate(np.diff(data)) if df!= 1]
d = np.hstack([-1, d, len(data)-1]) # add first and last elements
d = np.vstack([d[:-1]+1, d[1:]]).T
print(data[d])
Output:
[[ 2 5]
[12 17]
[20 20]]
Note: The request that individual numbers should be treated differently, (returned as individual, not ranges) was omitted. This can be reached by further post-processing the results. Usually this will make things more complex without gaining any benefit.
One-liner in Python 2.7 if interested:
x = [2, 3, 6, 7, 8, 14, 15, 19, 20, 21]
d = iter(x[:1] + sum(([i1, i2] for i1, i2 in zip(x, x[1:] + x[:1]) if i2 != i1+1), []))
print zip(d, d)
>>> [(2, 3), (6, 8), (14, 15), (19, 21)]
A short solution that works without additional imports. It accepts any iterable, sorts unsorted inputs, and removes duplicate items:
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
Example:
>>> ranges([2, 3, 4, 7, 8, 9, 15])
[(2, 4), (7, 9), (15, 15)]
>>> ranges([-1, 0, 1, 2, 3, 12, 13, 15, 100])
[(-1, 3), (12, 13), (15, 15), (100, 100)]
>>> ranges(range(100))
[(0, 99)]
>>> ranges([0])
[(0, 0)]
>>> ranges([])
[]
This is the same as #dansalmo's solution which I found amazing, albeit a bit hard to read and apply (as it's not given as a function).
Note that it could easily be modified to spit out "traditional" open ranges [start, end), by e.g. altering the return statement:
return [(s, e+1) for s, e in zip(edges, edges)]
I copied this answer over from another question that was marked as a duplicate of this one with the intention to make it easier findable (after I just now searched again for this topic, finding only the question here at first and not being satisfied with the answers given).
The versions by Mark Byers, Andrea Ambu, SilentGhost, Nadia Alramli, and truppo are simple and fast. The 'truppo' version encouraged me to write a version that retains the same nimble behavior while handling step sizes other than 1 (and lists as singletons elements that don't extend more than 1 step with a given step size). It is given here.
>>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
[(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]
Not the best approach , but here is my 2 cents
def getConsecutiveValues2(arr):
x = ""
final = []
end = 0
start = 0
for i in range(1,len(arr)) :
if arr[i] - arr[i-1] == 1 :
end = i
else :
print(start,end)
final.append(arr[start:end+1])
start = i
if i == len(arr) - 1 :
final.append(arr[start:end+1])
return final
x = [1,2,3,5,6,8,9,10,11,12]
print(getConsecutiveValues2(x))
>> [[1, 2, 3], [5, 6], [8, 9, 10, 11]]
This implementation works for regular or irregular steps
I needed to achieve the same thing but with the slight difference where steps can be irregular. this is my implementation
def ranges(l):
if not len(l):
return range(0,0)
elif len(l)==1:
return range(l[0],l[0]+1)
# get steps
sl = sorted(l)
steps = [i-j for i,j in zip(sl[1:],sl[:-1])]
# get unique steps indexes range
groups = [[0,0,steps[0]],]
for i,s in enumerate(steps):
if s==groups[-1][-1]:
groups[-1][1] = i+1
else:
groups.append( [i+1,i+1,s] )
g2 = groups[-2]
if g2[0]==g2[1]:
if sl[i+1]-sl[i]==s:
_=groups.pop(-2)
groups[-1][0] = i
# create list of ranges
return [range(sl[i],sl[j]+s,s) if s!=0 else [sl[i]]*(j+1-i) for i,j,s in groups]
Here's an example
from timeit import timeit
# for regular ranges
l = list(range(1000000))
ranges(l)
>>> [range(0, 1000000)]
l = list(range(10)) + list(range(20,25)) + [1,2,3]
ranges(l)
>>> [range(0, 2), range(1, 3), range(2, 4), range(3, 10), range(20, 25)]
sorted(l);[list(i) for i in ranges(l)]
>>> [0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24]
>>> [[0, 1], [1, 2], [2, 3], [3, 4, 5, 6, 7, 8, 9], [20, 21, 22, 23, 24]]
# for irregular steps list
l = [1, 3, 5, 7, 10, 11, 12, 100, 200, 300, 400, 60, 99, 4000,4001]
ranges(l)
>>> [range(1, 9, 2), range(10, 13), range(60, 138, 39), range(100, 500, 100), range(4000, 4002)]
## Speed test
timeit("ranges(l)","from __main__ import ranges,l", number=1000)/1000
>>> 9.303160999934334e-06
Yet another solution if you expect your input to be a set:
def group_years(years):
consecutive_years = []
for year in years:
close = {y for y in years if abs(y - year) == 1}
for group in consecutive_years:
if len(close.intersection(group)):
group |= close
break
else:
consecutive_years.append({year, *close})
return consecutive_years
Example:
group_years({2016, 2017, 2019, 2020, 2022})
Out[54]: [{2016, 2017}, {2019, 2020}, {2022}]

Subdividing python integer list into groups of linearly spaced items [duplicate]

In this other SO post, a Python user asked how to group continuous numbers such that any sequences could just be represented by its start/end and any stragglers would be displayed as single items. The accepted answer works brilliantly for continuous sequences.
I need to be able to adapt a similar solution but for a sequence of numbers that have potentially (not always) varying increments. Ideally, how I represent that will also include the increment (so they'll know if it was every 3, 4, 5, nth)
Referencing the original question, the user asked for the following input/output
[2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20] # input
[(2,5), (12,17), 20]
What I would like is the following (Note: I wrote a tuple as the output for clarity but xrange would be preferred using its step variable):
[2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20] # input
[(2,5,1), (12,17,1), 20] # note, the last element in the tuple would be the step value
And it could also handle the following input
[2, 4, 6, 8, 12, 13, 14, 15, 16, 17, 20] # input
[(2,8,2), (12,17,1), 20] # note, the last element in the tuple would be the increment
I know that xrange() supports a step so it may be possible to even use a variant of the other user's answer. I tried making some edits based on what they wrote in the explanation but I wasn't able to get the result I was looking for.
For anyone that doesn't want to click the original link, the code that was originally posted by Nadia Alramli is:
ranges = []
for key, group in groupby(enumerate(data), lambda (index, item): index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
The itertools pairwise recipe is one way to solve the problem. Applied with itertools.groupby, groups of pairs whose mathematical difference are equivalent can be created. The first and last items of each group are then selected for multi-item groups or the last item is selected for singleton groups:
from itertools import groupby, tee, izip
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return izip(a, b)
def grouper(lst):
result = []
for k, g in groupby(pairwise(lst), key=lambda x: x[1] - x[0]):
g = list(g)
if len(g) > 1:
try:
if g[0][0] == result[-1]:
del result[-1]
elif g[0][0] == result[-1][1]:
g = g[1:] # patch for duplicate start and/or end
except (IndexError, TypeError):
pass
result.append((g[0][0], g[-1][-1], k))
else:
result.append(g[0][-1]) if result else result.append(g[0])
return result
Trial: input -> grouper(lst) -> output
Input: [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
Output: [(2, 5, 1), (12, 17, 1), 20]
Input: [2, 4, 6, 8, 12, 13, 14, 15, 16, 17, 20]
Output: [(2, 8, 2), (12, 17, 1), 20]
Input: [2, 4, 6, 8, 12, 12.4, 12.9, 13, 14, 15, 16, 17, 20]
Output: [(2, 8, 2), 12, 12.4, 12.9, (13, 17, 1), 20] # 12 does not appear in the second group
Update: (patch for duplicate start and/or end values)
s1 = [i + 10 for i in xrange(0, 11, 2)]; s2 = [30]; s3 = [i + 40 for i in xrange(45)]
Input: s1+s2+s3
Output: [(10, 20, 2), (30, 40, 10), (41, 84, 1)]
# to make 30 appear as an entry instead of a group change main if condition to len(g) > 2
Input: s1+s2+s3
Output: [(10, 20, 2), 30, (41, 84, 1)]
Input: [2, 4, 6, 8, 10, 12, 13, 14, 15, 16, 17, 20]
Output: [(2, 12, 2), (13, 17, 1), 20]
You can create an iterator to help grouping and try to pull the next element from the following group which will be the end of the previous group:
def ranges(lst):
it = iter(lst)
next(it) # move to second element for comparison
grps = groupby(lst, key=lambda x: (x - next(it, -float("inf"))))
for k, v in grps:
i = next(v)
try:
step = next(v) - i # catches single element v or gives us a step
nxt = list(next(grps)[1])
yield xrange(i, nxt.pop(0), step)
# outliers or another group
if nxt:
yield nxt[0] if len(nxt) == 1 else xrange(nxt[0], next(next(grps)[1]), nxt[1] - nxt[0])
except StopIteration:
yield i # no seq
which give you:
In [2]: l1 = [2, 3, 4, 5, 8, 10, 12, 14, 13, 14, 15, 16, 17, 20, 21]
In [3]: l2 = [2, 4, 6, 8, 12, 13, 14, 15, 16, 17, 20]
In [4]: l3 = [13, 14, 15, 16, 17, 18]
In [5]: s1 = [i + 10 for i in xrange(0, 11, 2)]
In [6]: s2 = [30]
In [7]: s3 = [i + 40 for i in xrange(45)]
In [8]: l4 = s1 + s2 + s3
In [9]: l5 = [1, 2, 5, 6, 9, 10]
In [10]: l6 = {1, 2, 3, 5, 6, 9, 10, 13, 19, 21, 22, 23, 24}
In [11]:
In [11]: for l in (l1, l2, l3, l4, l5, l6):
....: print(list(ranges(l)))
....:
[xrange(2, 5), xrange(8, 14, 2), xrange(13, 17), 20, 21]
[xrange(2, 8, 2), xrange(12, 17), 20]
[xrange(13, 18)]
[xrange(10, 20, 2), 30, xrange(40, 84)]
[1, 2, 5, 6, 9, 10]
[xrange(1, 3), 5, 6, 9, 10, 13, 19, xrange(21, 24)]
When the step is 1 it is not included in the xrange output.
Here is a quickly written (and extremely ugly) answer:
def test(inArr):
arr=inArr[:] #copy, unnecessary if we use index in a smart way
result = []
while len(arr)>1: #as long as there can be an arithmetic progression
x=[arr[0],arr[1]] #take first two
arr=arr[2:] #remove from array
step=x[1]-x[0]
while len(arr)>0 and x[1]+step==arr[0]: #check if the next value in array is part of progression too
x[1]+=step #add it
arr=arr[1:]
result.append((x[0],x[1],step)) #append progression to result
if len(arr)==1:
result.append(arr[0])
return result
print test([2, 4, 6, 8, 12, 13, 14, 15, 16, 17, 20])
This returns [(2, 8, 2), (12, 17, 1), 20]
Slow, as it copies a list and removes elements from it
It only finds complete progressions, and only in sorted arrays.
In short, it is shitty, but should work ;)
There are other (cooler, more pythonic) ways to do this, for example you could convert your list to a set, keep removing two elements, calculate their arithmetic progression and intersect with the set.
You could also reuse the answer you provided to check for certain step sizes. e.g.:
ranges = []
step_size=2
for key, group in groupby(enumerate(data), lambda (index, item): step_size*index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
Which finds every group with step size of 2, but only those.
I came across such a case once. Here it goes.
import more_itertools as mit
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20] # input
x = [list(group) for group in mit.consecutive_groups(iterable)]
output = [(i[0],i[-1]) if len(i)>1 else i[0] for i in x]
print(output)

Python: How to range() multiple values from list or dictionary?

Im new to programming. Trying to range numbers - For example if i want to range more than one range, 1..10 20...30 50...100. Where i need to store them(list or dictionary) and how to use them one by one?
example = range(1,10)
exaple2 = range(20,30)
for b in example:
print b
or you can use yield from (python 3.5)
def ranger():
yield from range(1, 10)
yield from range(20, 30)
yield from range(50, 100)
for x in ranger():
print(x)
The range function returns a list. If you want a list of multiple ranges, you need to concatenate these lists. For example:
range(1, 5) + range(11, 15)
returns [1, 2, 3, 4, 11, 12, 13, 14]
Range module helps you to get numbers between the given input.
Syntax:
range(x) - returns list starting from 0 to x-1
>>> range(10)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
range(x,y) - returns list starting from x to y-1
>>> range(10,20)
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
>>>
range(x,y,stepsize) - returns list starting from x to y-1 with stepsize
>>> range(10,20,2)
[10, 12, 14, 16, 18]
>>>
In Python3.x you can do:
output = [*range(1, 10), *range(20, 30)]
or using itertools.chain function:
from itertools import chain
data = [range(1, 10), range(20, 30)]
output = [*chain(*data)]
or using chain.from_iterable function
from itertools import chain
data = [range(1, 10), range(20, 30)]
output = [*chain.from_iterable(data)]
output:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]

Identify groups of consecutive numbers in a list

I'd like to identify groups of consecutive numbers in a list, so that:
myfunc([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
Returns:
[(2,5), (12,17), 20]
And was wondering what the best way to do this was (particularly if there's something inbuilt into Python).
Edit: Note I originally forgot to mention that individual numbers should be returned as individual numbers, not ranges.
EDIT 2: To answer the OP new requirement
ranges = []
for key, group in groupby(enumerate(data), lambda (index, item): index - item):
group = map(itemgetter(1), group)
if len(group) > 1:
ranges.append(xrange(group[0], group[-1]))
else:
ranges.append(group[0])
Output:
[xrange(2, 5), xrange(12, 17), 20]
You can replace xrange with range or any other custom class.
Python docs have a very neat recipe for this:
from operator import itemgetter
from itertools import groupby
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
print(map(itemgetter(1), g))
Output:
[2, 3, 4, 5]
[12, 13, 14, 15, 16, 17]
If you want to get the exact same output, you can do this:
ranges = []
for k, g in groupby(enumerate(data), lambda (i,x):i-x):
group = map(itemgetter(1), g)
ranges.append((group[0], group[-1]))
output:
[(2, 5), (12, 17)]
EDIT: The example is already explained in the documentation but maybe I should explain it more:
The key to the solution is
differencing with a range so that
consecutive numbers all appear in same
group.
If the data was: [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
Then groupby(enumerate(data), lambda (i,x):i-x) is equivalent of the following:
groupby(
[(0, 2), (1, 3), (2, 4), (3, 5), (4, 12),
(5, 13), (6, 14), (7, 15), (8, 16), (9, 17)],
lambda (i,x):i-x
)
The lambda function subtracts the element index from the element value. So when you apply the lambda on each item. You'll get the following keys for groupby:
[-2, -2, -2, -2, -8, -8, -8, -8, -8, -8]
groupby groups elements by equal key value, so the first 4 elements will be grouped together and so forth.
I hope this makes it more readable.
python 3 version may be helpful for beginners
import the libraries required first
from itertools import groupby
from operator import itemgetter
ranges =[]
for k,g in groupby(enumerate(data),lambda x:x[0]-x[1]):
group = (map(itemgetter(1),g))
group = list(map(int,group))
ranges.append((group[0],group[-1]))
more_itertools.consecutive_groups was added in version 4.0.
Demo
import more_itertools as mit
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
[list(group) for group in mit.consecutive_groups(iterable)]
# [[2, 3, 4, 5], [12, 13, 14, 15, 16, 17], [20]]
Code
Applying this tool, we make a generator function that finds ranges of consecutive numbers.
def find_ranges(iterable):
"""Yield range of consecutive numbers."""
for group in mit.consecutive_groups(iterable):
group = list(group)
if len(group) == 1:
yield group[0]
else:
yield group[0], group[-1]
iterable = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
list(find_ranges(iterable))
# [(2, 5), (12, 17), 20]
The source implementation emulates a classic recipe (as demonstrated by #Nadia Alramli).
Note: more_itertools is a third-party package installable via pip install more_itertools.
The "naive" solution which I find somewhat readable atleast.
x = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 22, 25, 26, 28, 51, 52, 57]
def group(L):
first = last = L[0]
for n in L[1:]:
if n - 1 == last: # Part of the group, bump the end
last = n
else: # Not part of the group, yield current group and start a new
yield first, last
first = last = n
yield first, last # Yield the last group
>>>print list(group(x))
[(2, 5), (12, 17), (22, 22), (25, 26), (28, 28), (51, 52), (57, 57)]
Assuming your list is sorted:
>>> from itertools import groupby
>>> def ranges(lst):
pos = (j - i for i, j in enumerate(lst))
t = 0
for i, els in groupby(pos):
l = len(list(els))
el = lst[t]
t += l
yield range(el, el+l)
>>> lst = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17]
>>> list(ranges(lst))
[range(2, 6), range(12, 18)]
Here it is something that should work, without any import needed:
def myfunc(lst):
ret = []
a = b = lst[0] # a and b are range's bounds
for el in lst[1:]:
if el == b+1:
b = el # range grows
else: # range ended
ret.append(a if a==b else (a,b)) # is a single or a range?
a = b = el # let's start again with a single
ret.append(a if a==b else (a,b)) # corner case for last single/range
return ret
Please note that the code using groupby doesn't work as given in Python 3 so use this.
for k, g in groupby(enumerate(data), lambda x:x[0]-x[1]):
group = list(map(itemgetter(1), g))
ranges.append((group[0], group[-1]))
This doesn't use a standard function - it just iiterates over the input, but it should work:
def myfunc(l):
r = []
p = q = None
for x in l + [-1]:
if x - 1 == q:
q += 1
else:
if p:
if q > p:
r.append('%s-%s' % (p, q))
else:
r.append(str(p))
p = q = x
return '(%s)' % ', '.join(r)
Note that it requires that the input contains only positive numbers in ascending order. You should validate the input, but this code is omitted for clarity.
import numpy as np
myarray = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
sequences = np.split(myarray, np.array(np.where(np.diff(myarray) > 1)[0]) + 1)
l = []
for s in sequences:
if len(s) > 1:
l.append((np.min(s), np.max(s)))
else:
l.append(s[0])
print(l)
Output:
[(2, 5), (12, 17), 20]
I think this way is simpler than any of the answers I've seen here (Edit: fixed based on comment from Pleastry):
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
starts = [x for x in data if x-1 not in data and x+1 in data]
ends = [x for x in data if x-1 in data and x+1 not in data and x not in starts]
singles = [x for x in data if x-1 not in data and x+1 not in data]
list(zip(starts, ends)) + singles
Output:
[(2, 5), (12, 17), 20]
Edited:
As #dawg notes, this is O(n**2). One option to improve performance would be to convert the original list to a set (and also the starts list to a set) i.e.
data = [2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20]
data_as_set = set(data)
starts = [x for x in data_as_set if x-1 not in data_as_set and x+1 in data_as_set]
startset = set(starts)
ends = [x for x in data_as_set if x-1 in data_as_set and x+1 not in data_as_set and x not in startset]
singles = [x for x in data_as_set if x-1 not in data_as_set and x+1 not in data_as_set]
print(list(zip(starts, ends)) + singles)
Using groupby and count from itertools gives us a short solution. The idea is that, in an increasing sequence, the difference between the index and the value will remain the same.
In order to keep track of the index, we can use an itertools.count, which makes the code cleaner as using enumerate:
from itertools import groupby, count
def intervals(data):
out = []
counter = count()
for key, group in groupby(data, key = lambda x: x-next(counter)):
block = list(group)
out.append([block[0], block[-1]])
return out
Some sample output:
print(intervals([0, 1, 3, 4, 6]))
# [[0, 1], [3, 4], [6, 6]]
print(intervals([2, 3, 4, 5]))
# [[2, 5]]
This is my method in which I tried to prioritize readability. Note that it returns a tuple of the same values if there is only one value in a group. That can be fixed easily in the second snippet I'll post.
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
yield first, last # this is needed to yield the last set of numbers
Here is the result of a test:
values = [0, 5, 6, 7, 12, 13, 21, 22, 23, 24, 25, 26, 30, 44, 45, 50]
result = list(group(values))
print(result)
result = [(0, 0), (5, 7), (12, 13), (21, 26), (30, 30), (44, 45), (50, 50)]
If you want to return only a single value in the case of a single value in a group, just add a conditional check to the yields:
def group(values):
"""return the first and last value of each continuous set in a list of sorted values"""
values = sorted(values)
first = last = values[0]
for index in values[1:]:
if index - last > 1: # triggered if in a new group
if first == last:
yield first
else:
yield first, last
first = index # update first only if in a new group
last = index # update last on every iteration
if first == last:
yield first
else:
yield first, last
result = [0, (5, 7), (12, 13), (21, 26), 30, (44, 45), 50]
Here's the answer I came up with. I'm writing the code for other people to understand, so I'm fairly verbose with variable names and comments.
First a quick helper function:
def getpreviousitem(mylist,myitem):
'''Given a list and an item, return previous item in list'''
for position, item in enumerate(mylist):
if item == myitem:
# First item has no previous item
if position == 0:
return None
# Return previous item
return mylist[position-1]
And then the actual code:
def getranges(cpulist):
'''Given a sorted list of numbers, return a list of ranges'''
rangelist = []
inrange = False
for item in cpulist:
previousitem = getpreviousitem(cpulist,item)
if previousitem == item - 1:
# We're in a range
if inrange == True:
# It's an existing range - change the end to the current item
newrange[1] = item
else:
# We've found a new range.
newrange = [item-1,item]
# Update to show we are now in a range
inrange = True
else:
# We were in a range but now it just ended
if inrange == True:
# Save the old range
rangelist.append(newrange)
# Update to show we're no longer in a range
inrange = False
# Add the final range found to our list
if inrange == True:
rangelist.append(newrange)
return rangelist
Example run:
getranges([2, 3, 4, 5, 12, 13, 14, 15, 16, 17])
returns:
[[2, 5], [12, 17]]
Using numpy + comprehension lists:
With numpy diff function, consequent input vector entries that their difference is not equal to one can be identified. The start and end of the input vector need to be considered.
import numpy as np
data = np.array([2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 20])
d = [i for i, df in enumerate(np.diff(data)) if df!= 1]
d = np.hstack([-1, d, len(data)-1]) # add first and last elements
d = np.vstack([d[:-1]+1, d[1:]]).T
print(data[d])
Output:
[[ 2 5]
[12 17]
[20 20]]
Note: The request that individual numbers should be treated differently, (returned as individual, not ranges) was omitted. This can be reached by further post-processing the results. Usually this will make things more complex without gaining any benefit.
One-liner in Python 2.7 if interested:
x = [2, 3, 6, 7, 8, 14, 15, 19, 20, 21]
d = iter(x[:1] + sum(([i1, i2] for i1, i2 in zip(x, x[1:] + x[:1]) if i2 != i1+1), []))
print zip(d, d)
>>> [(2, 3), (6, 8), (14, 15), (19, 21)]
A short solution that works without additional imports. It accepts any iterable, sorts unsorted inputs, and removes duplicate items:
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
Example:
>>> ranges([2, 3, 4, 7, 8, 9, 15])
[(2, 4), (7, 9), (15, 15)]
>>> ranges([-1, 0, 1, 2, 3, 12, 13, 15, 100])
[(-1, 3), (12, 13), (15, 15), (100, 100)]
>>> ranges(range(100))
[(0, 99)]
>>> ranges([0])
[(0, 0)]
>>> ranges([])
[]
This is the same as #dansalmo's solution which I found amazing, albeit a bit hard to read and apply (as it's not given as a function).
Note that it could easily be modified to spit out "traditional" open ranges [start, end), by e.g. altering the return statement:
return [(s, e+1) for s, e in zip(edges, edges)]
I copied this answer over from another question that was marked as a duplicate of this one with the intention to make it easier findable (after I just now searched again for this topic, finding only the question here at first and not being satisfied with the answers given).
The versions by Mark Byers, Andrea Ambu, SilentGhost, Nadia Alramli, and truppo are simple and fast. The 'truppo' version encouraged me to write a version that retains the same nimble behavior while handling step sizes other than 1 (and lists as singletons elements that don't extend more than 1 step with a given step size). It is given here.
>>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
[(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]
Not the best approach , but here is my 2 cents
def getConsecutiveValues2(arr):
x = ""
final = []
end = 0
start = 0
for i in range(1,len(arr)) :
if arr[i] - arr[i-1] == 1 :
end = i
else :
print(start,end)
final.append(arr[start:end+1])
start = i
if i == len(arr) - 1 :
final.append(arr[start:end+1])
return final
x = [1,2,3,5,6,8,9,10,11,12]
print(getConsecutiveValues2(x))
>> [[1, 2, 3], [5, 6], [8, 9, 10, 11]]
This implementation works for regular or irregular steps
I needed to achieve the same thing but with the slight difference where steps can be irregular. this is my implementation
def ranges(l):
if not len(l):
return range(0,0)
elif len(l)==1:
return range(l[0],l[0]+1)
# get steps
sl = sorted(l)
steps = [i-j for i,j in zip(sl[1:],sl[:-1])]
# get unique steps indexes range
groups = [[0,0,steps[0]],]
for i,s in enumerate(steps):
if s==groups[-1][-1]:
groups[-1][1] = i+1
else:
groups.append( [i+1,i+1,s] )
g2 = groups[-2]
if g2[0]==g2[1]:
if sl[i+1]-sl[i]==s:
_=groups.pop(-2)
groups[-1][0] = i
# create list of ranges
return [range(sl[i],sl[j]+s,s) if s!=0 else [sl[i]]*(j+1-i) for i,j,s in groups]
Here's an example
from timeit import timeit
# for regular ranges
l = list(range(1000000))
ranges(l)
>>> [range(0, 1000000)]
l = list(range(10)) + list(range(20,25)) + [1,2,3]
ranges(l)
>>> [range(0, 2), range(1, 3), range(2, 4), range(3, 10), range(20, 25)]
sorted(l);[list(i) for i in ranges(l)]
>>> [0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24]
>>> [[0, 1], [1, 2], [2, 3], [3, 4, 5, 6, 7, 8, 9], [20, 21, 22, 23, 24]]
# for irregular steps list
l = [1, 3, 5, 7, 10, 11, 12, 100, 200, 300, 400, 60, 99, 4000,4001]
ranges(l)
>>> [range(1, 9, 2), range(10, 13), range(60, 138, 39), range(100, 500, 100), range(4000, 4002)]
## Speed test
timeit("ranges(l)","from __main__ import ranges,l", number=1000)/1000
>>> 9.303160999934334e-06
Yet another solution if you expect your input to be a set:
def group_years(years):
consecutive_years = []
for year in years:
close = {y for y in years if abs(y - year) == 1}
for group in consecutive_years:
if len(close.intersection(group)):
group |= close
break
else:
consecutive_years.append({year, *close})
return consecutive_years
Example:
group_years({2016, 2017, 2019, 2020, 2022})
Out[54]: [{2016, 2017}, {2019, 2020}, {2022}]

Categories