python : group elements together in list - python

I'm currently working with itertools to create and return a list whose elements are lists that contain the consecutive runs of equal elements of the original list.
import itertools
it = [1, 1, 5, 5, 5, 'test', 'test', 5]
new = len(it)
for a in range(new):
return [list(k) for a, k in itertools.groupby(it)]
For the above example the result is:
[[1, 1], [5, 5, 5], ['test', 'test'], [5]]
Can I achieve this without using itertools?

You can pair adjacent items by zipping the list with itself but with a padding of float('nan') since it can't be equal to any object, and then iterate through the zipped pairs to append items to last sub-list of the output list, and add a new sub-list when the adjacent items are different:
output = []
for a, b in zip([float('nan')] + it, it):
if a != b:
output.append([])
output[-1].append(b)
output becomes:
[[1, 1], [5, 5, 5], ['test', 'test'], [5]]

To be honest a simple for loop could make this work, you don't even have to import itertools.
The simplest way to do this is by using this:
it = [1, 1, 5, 5, 5, 'test', 'test', 5]
result = []
for (i, x) in enumerate(it):
if i < 1 or type(x) != type(it[i - 1]) or x != it[i - 1]:
result.append([x])
else:
result[-1].append(x)
print(result)
Or, in function form:
def type_chunk(it):
result = []
for (i, x) in enumerate(it):
if i < 1 or type(x) != type(it[i - 1]) or x != it[i - 1]:
result.append([x])
else:
result[-1].append(x)
return result
You would then use the function like this:
print(type_chunk([1, 1, 5, 5, 5, 'test', 'test', 5]))
You could even skip the type checking and only look for equal values:
def type_chunk(it):
result = []
for (i, x) in enumerate(it):
if i < 1 or x != it[i - 1]:
result.append([x])
else:
result[-1].append(x)
return result
Good luck.

You could have a look at the function in itertools to see how they are doing it.
Here is one way which shows the logic clearly (can be further reduced):
def i_am_itertool():
it = [1, 1, 5, 5, 5, 'test', 'test', 5]
ret = []
temp = []
last = it[0]
for e in it:
if e == last:
temp.append(e)
else:
ret.append(temp) # Add previous group
temp = [e] # Start next group
last = e
ret.append(temp) # Add final group
return ret
print(i_am_itertool())
Output:
[[1, 1], [5, 5, 5], ['test', 'test'], [5]]

Related

Finding next max value from index of nested lists in python?

I'm trying to find the next maximum value of nested lists, I already have a nested list sorted by bubblesort, I need to take the largest element of each nested list and insert it into the solution vector, until the solution vector is sorted.
P.S: I can't delete the element from the initial nested list, only find the next maximum value.
See the image at the bottom as an example:
Nested_list = [[1, 7, 9], [4, 5, 6], [2, 3, 8], [0]]
The way I devised deleted the largest vector from the original list, which was quite time consuming, I believe that just moving the index to the next largest value will consume less time:
def bubbleSort(array):
n = len(array)-1
for i in range(n):
for j in range(0, n-i):
if array[j] > array[j+1]:
array[j], array[j+1] = array[j+1], array[j]
else:
continue
return array
def ordena_lista(output):
for sublista in output:
bubbleSort(sublista)
def maior_valor_lista(output):
return list(el[-1] for el in output)
def nested_remove(L, x):
if x in L:
L.remove(x)
else:
for element in L:
if type(element) is list:
nested_remove(element, x)
b = list(random.sample(range(10), 10))
n= m.floor(m.sqrt(len(b)))
output=list([b[i:i + n] for i in range(0, len(b), n)])
ordena_lista(b)
while output:
valores_maximo = maior_valor_lista(output)
var = max(valores_maximo, key=int)
final = [var] + final
nested_remove(output, var)
output = list(filter(None, output))
The simplest solution would be the following,
from functools import reduce
from operator import add
def sort_nested_list(nested_list):
return sorted(reduce(add, nested_list))
but, without knowing the exact implementation details of python's sorted, I can't tell you if it takes advantage of your pre-sorting.
If we know the sublists are sorted, and we are allowed to copy the list, and we know how many elements there are in total, we can write the following,
import math
from copy import deepcopy
def get_max_and_pop(nested_list):
""" find the maximum element of a sublist of nested_list, remove it from the sublist, and return it """
print(f"get_max_and_pop says: {nested_list}")
return max(nested_list, key=lambda x: x[-1:]).pop()
def sort_nested_list_whose_sublists_are_sorted(nested_list, n_elements):
nested_list_copy = deepcopy(nested_list)
return [get_max_and_pop(nested_list=nested_list_copy) for _ in range(n_elements)][::-1]
edit: without knowledge of the number of elements, we can write,
from copy import deepcopy
def sort_nested_list_whose_sublists_are_sorted_iter(nested_list):
nested_list_copy = deepcopy(nested_list)
while any(nested_list_copy):
yield max(nested_list_copy, key=lambda x: x[-1:]).pop()
This amounts to a bizarre, woefully inefficient and completely unnecessary sorting algorithm but here goes anyway:
Nested_list = [[9, 7, 1], [4, 5, 6], [2, 3, 8], [0]]
for e in Nested_list:
e.sort()
Output_list = []
Nested_list_copy = [[e_ for e_ in e] for e in Nested_list]
element_count = sum(len(e) for e in Nested_list)
for _ in range(element_count):
m = None
for i, e in enumerate(Nested_list_copy):
if e:
tm = e[-1]
if m is None or tm > m:
m = tm
k = i
Output_list.insert(0, Nested_list_copy[k].pop())
print(Nested_list)
print(Output_list)
Output:
[[1, 7, 9], [4, 5, 6], [2, 3, 8], [0]]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

how to combine same matching items in a 1d list and make it as 2d list [duplicate]

From this list:
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
I'm trying to create:
L = [[1],[2,2],[3,3,3],[4,4,4,4],[5,5,5,5,5]]
Any value which is found to be the same is grouped into it's own sublist.
Here is my attempt so far, I'm thinking I should use a while loop?
global n
n = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5] #Sorted list
l = [] #Empty list to append values to
def compare(val):
""" This function receives index values
from the n list (n[0] etc) """
global valin
valin = val
global count
count = 0
for i in xrange(len(n)):
if valin == n[count]: # If the input value i.e. n[x] == n[iteration]
temp = valin, n[count]
l.append(temp) #append the values to a new list
count +=1
else:
count +=1
for x in xrange (len(n)):
compare(n[x]) #pass the n[x] to compare function
Use itertools.groupby:
from itertools import groupby
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
print([list(j) for i, j in groupby(N)])
Output:
[[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]
Side note: Prevent from using global variable when you don't need to.
Someone mentions for N=[1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 1] it will get [[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5], [1]]
In other words, when numbers of the list isn't in order or it is a mess list, it's not available.
So I have better answer to solve this problem.
from collections import Counter
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
C = Counter(N)
print [ [k,]*v for k,v in C.items()]
You can use itertools.groupby along with a list comprehension
>>> l = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
>>> [list(v) for k,v in itertools.groupby(l)]
[[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]
This can be assigned to the variable L as in
L = [list(v) for k,v in itertools.groupby(l)]
You're overcomplicating this.
What you want to do is: for each value, if it's the same as the last value, just append it to the list of last values; otherwise, create a new list. You can translate that English directly to Python:
new_list = []
for value in old_list:
if new_list and new_list[-1][0] == value:
new_list[-1].append(value)
else:
new_list.append([value])
There are even simpler ways to do this if you're willing to get a bit more abstract, e.g., by using the grouping functions in itertools. But this should be easy to understand.
If you really need to do this with a while loop, you can translate any for loop into a while loop like this:
for value in iterable:
do_stuff(value)
iterator = iter(iterable)
while True:
try:
value = next(iterator)
except StopIteration:
break
do_stuff(value)
Or, if you know the iterable is a sequence, you can use a slightly simpler while loop:
index = 0
while index < len(sequence):
value = sequence[index]
do_stuff(value)
index += 1
But both of these make your code less readable, less Pythonic, more complicated, less efficient, easier to get wrong, etc.
You can do that using numpy too:
import numpy as np
N = np.array([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])
counter = np.arange(1, np.alen(N))
L = np.split(N, counter[N[1:]!=N[:-1]])
The advantage of this method is when you have another list which is related to N and you want to split it in the same way.
Another slightly different solution that doesn't rely on itertools:
#!/usr/bin/env python
def group(items):
"""
groups a sorted list of integers into sublists based on the integer key
"""
if len(items) == 0:
return []
grouped_items = []
prev_item, rest_items = items[0], items[1:]
subgroup = [prev_item]
for item in rest_items:
if item != prev_item:
grouped_items.append(subgroup)
subgroup = []
subgroup.append(item)
prev_item = item
grouped_items.append(subgroup)
return grouped_items
print group([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])
# [[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]

Remove sublist from list

I want to do the following in Python:
A = [1, 2, 3, 4, 5, 6, 7, 7, 7]
C = A - [3, 4] # Should be [1, 2, 5, 6, 7, 7, 7]
C = A - [4, 3] # Should not be removing anything, because sequence 4, 3 is not found
So, I simply want to remove the first appearance of a sublist (as a sequence) from another list. How can I do that?
Edit: I am talking about lists, not sets. Which implies that ordering (sequence) of items matter (both in A and B), as well as duplicates.
Use sets:
C = list(set(A) - set(B))
In case you want to mantain duplicates and/or oder:
filter_set = set(B)
C = [x for x in A if x not in filter_set]
If you want to remove exact sequences, here is one way:
Find the bad indices by checking to see if the sublist matches the desired sequence:
bad_ind = [range(i,i+len(B)) for i,x in enumerate(A) if A[i:i+len(B)] == B]
print(bad_ind)
#[[2, 3]]
Since this returns a list of lists, flatten it and turn it into a set:
bad_ind_set = set([item for sublist in bad_ind for item in sublist])
print(bad_ind_set)
#set([2, 3])
Now use this set to filter your original list, by index:
C = [x for i,x in enumerate(A) if i not in bad_ind_set]
print(C)
#[1, 2, 5, 6, 7, 7, 7]
The above bad_ind_set will remove all matches of the sequence. If you only want to remove the first match, it's even simpler. You just need the first element of bad_ind (no need to flatten the list):
bad_ind_set = set(bad_ind[0])
Update: Here is a way to find and remove the first matching sub-sequence using a short circuiting for loop. This will be faster because it will break out once the first match is found.
start_ind = None
for i in range(len(A)):
if A[i:i+len(B)] == B:
start_ind = i
break
C = [x for i, x in enumerate(A)
if start_ind is None or not(start_ind <= i < (start_ind + len(B)))]
print(C)
#[1, 2, 5, 6, 7, 7, 7]
I considered this question was like one substring search, so KMP, BM etc sub-string search algorithm could be applied at here. Even you'd like support multiple patterns, there are some multiple pattern algorithms like Aho-Corasick, Wu-Manber etc.
Below is KMP algorithm implemented by Python which is from GitHub Gist.
PS: the author is not me. I just want to share my idea.
class KMP:
def partial(self, pattern):
""" Calculate partial match table: String -> [Int]"""
ret = [0]
for i in range(1, len(pattern)):
j = ret[i - 1]
while j > 0 and pattern[j] != pattern[i]:
j = ret[j - 1]
ret.append(j + 1 if pattern[j] == pattern[i] else j)
return ret
def search(self, T, P):
"""
KMP search main algorithm: String -> String -> [Int]
Return all the matching position of pattern string P in S
"""
partial, ret, j = self.partial(P), [], 0
for i in range(len(T)):
while j > 0 and T[i] != P[j]:
j = partial[j - 1]
if T[i] == P[j]: j += 1
if j == len(P):
ret.append(i - (j - 1))
j = 0
return ret
Then use it to calcuate out the matched position, finally remove the match:
A = [1, 2, 3, 4, 5, 6, 7, 7, 7, 3, 4]
B = [3, 4]
result = KMP().search(A, B)
print(result)
#assuming at least one match is found
print(A[:result[0]:] + A[result[0]+len(B):])
Output:
[2, 9]
[1, 2, 5, 6, 7, 7, 7, 3, 4]
[Finished in 0.201s]
PS: You can try other algorithms also. And #Pault 's answers is good enough unless you care about the performance a lot.
Here is another approach:
# Returns that starting and ending point (index) of the sublist, if it exists, otherwise 'None'.
def findSublist(subList, inList):
subListLength = len(subList)
for i in range(len(inList)-subListLength):
if subList == inList[i:i+subListLength]:
return (i, i+subListLength)
return None
# Removes the sublist, if it exists and returns a new list, otherwise returns the old list.
def removeSublistFromList(subList, inList):
indices = findSublist(subList, inList)
if not indices is None:
return inList[0:indices[0]] + inList[indices[1]:]
else:
return inList
A = [1, 2, 3, 4, 5, 6, 7, 7, 7]
s1 = [3,4]
B = removeSublistFromList(s1, A)
print(B)
s2 = [4,3]
C = removeSublistFromList(s2, A)
print(C)

How to to split a list at a certain value

given a unique valued list such as [5,4,9,2,1,7,'dog',9] is there a way to split it a a certain value? ie
[5,4,9,2,7,'dog'].split(4)
= [5,4],[9,2,7,'dog']
[5,4,9,2,7,'dog'].split(2)
= [5,4,9,2], [7,'dog']
?
>>> mylist = [5,4,9,2,7,'dog']
>>> def list_split(l, element):
... if l[-1] == element:
... return l, []
... delimiter = l.index(element)+1
... return l[:delimiter], l[delimiter:]
...
>>> list_split(mylist, 4)
([5, 4], [9, 2, 7, 'dog'])
>>> list_split(mylist, 2)
([5, 4, 9, 2], [7, 'dog'])
>>> list_split(mylist, 'dog')
([5, 4, 9, 2, 7, 'dog'], [])
Here is a function that will split a list into an arbitrary number of sub-lists and return a list of them, much like a string split.
def split_list(input_list,seperator):
outer = []
inner = []
for elem in input_list:
if elem == separator:
if inner:
outer.append(inner)
inner = []
else:
inner.append(elem)
if inner:
outer.append(inner)
return outer
>>> split_list([1,2,3,4,1,2,3,4,4,3,4,3],3)
[[1, 2], [4, 1, 2], [4, 4], [4]]
If the separator element is to be retained on the sub-list to the left, as specifically posed in the question, then this variation will do it:
def split_list(input_list,seperator):
outer = []
inner = []
for elem in input_list:
if elem == separator:
inner.append(elem)
outer.append(inner)
inner = []
else:
inner.append(elem)
if inner:
outer.append(inner)
return outer
>>> splitlist.split_list([1,2,3,4,1,2,3,4,4,3,4,3],3)
[[1, 2, 3], [4, 1, 2, 3], [4, 4, 3], [4, 3]]
def listsplit(l, e):
try:
i = l.index(e) + 1
return l[:i], l[i:]
except ValueError:
return l
In the case of the element being at the first or last index in the list, one of the returned lists will be empty, but I'm not entirely sure if this is a problem. In the case of ValueError from the list.index call, you'll have only the list itself returned, but if another behavior is desired, it's pretty simple to change.
Not as such, but you can pretty easily slice the list you're making using the index method and get the results you show in your example output.
>>> l1 = [5, 4, 9, 2, 7, 'dog']
>>> l1[:l1.index(4) + 1], l1[l1.index(4) + 1:]
([5, 4], [9, 2, 7, 'dog'])
The '+1' is there because you wanted the needle (4) to appear in the first list.
Note that this splits on the first occurrence, which is pretty typical. If you really want a split method as in your question, you can subclass list and write your own using something like this.
On one line:
[x[k+1:x[k+1:len(x)].index(4)+len(x[:k+1])] for k in [c for c in [-1]+[i for i, a in enumerate(x) if a == 4]][:-1] ] + [x[(len(x) - 1) - x[::-1].index(4)+1:len(x)]]
The function:
def split_list(x, value):
return [x[k+1:x[k+1:len(x)].index(value)+len(x[:k+1])] for k in [c for c in [-1]+[i for i, a in enumerate(x) if a == value]][:-1] ] + [x[(len(x) - 1) - x[::-1].index(value)+1:len(x)]]
It will work in case the separator exists several times, and in cas it exists at thy end or the beginning of the lis.
Details:
Part 1: [c for c in [-1]+[i for i, a in enumerate(x) if a == value]][:-1]
Get the indexes of occurences of value, append "-1" in the beginning and remove the index of the last occurence.
x[k+1:x[k+1:len(x)].index(value)+len(x[:k+1])]
Make a slice (let's callet Slice A) of the list starting the first time from 0 (that's why I appended [-1] -1+1=0) and then starting from occurence index +1 (to make the current occurence unreachable) with each occurence index got from Part 1, then I take another slice starting from the curent occurence index +1 ending at thy end of the list, then I look for the first occurence in this slice, I get it; because it was a slice, I add the current offset (the part from beginning + current occurence index +1) to it, at this point I get the position to stop in, this is the end of the slice A. I finally obtain a split section each time.
I removed the index of the last occurence, coz it is not relevant to calculate anything after that index, so I just apend them at thy end: + [x[(len(x) - 1) - x[::-1].index(value)+1:len(x)]]
Use slicing:
def split_list(li, item):
if item not in li:
return li
else:
return [li[0:li.index(item)+1], li[li.index(item)+1:]]
Testing:
for item in (5,4,2,'dog'):
print item, split_list([5,4,9,2,7,'dog'], item)
Prints:
5 [[5], [4, 9, 2, 7, 'dog']]
4 [[5, 4], [9, 2, 7, 'dog']]
2 [[5, 4, 9, 2], [7, 'dog']]
dog [[5, 4, 9, 2, 7, 'dog'], []]

Union find implementation using Python

So here's what I want to do: I have a list that contains several equivalence relations:
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
And I want to union the sets that share one element. Here is a sample implementation:
def union(lis):
lis = [set(e) for e in lis]
res = []
while True:
for i in range(len(lis)):
a = lis[i]
if res == []:
res.append(a)
else:
pointer = 0
while pointer < len(res):
if a & res[pointer] != set([]) :
res[pointer] = res[pointer].union(a)
break
pointer +=1
if pointer == len(res):
res.append(a)
if res == lis:
break
lis,res = res,[]
return res
And it prints
[set([1, 2, 3, 6, 7]), set([4, 5])]
This does the right thing but is way too slow when the equivalence relations is too large. I looked up the descriptions on union-find algorithm: http://en.wikipedia.org/wiki/Disjoint-set_data_structure
but I still having problem coding a Python implementation.
Solution that runs in O(n) time
def indices_dict(lis):
d = defaultdict(list)
for i,(a,b) in enumerate(lis):
d[a].append(i)
d[b].append(i)
return d
def disjoint_indices(lis):
d = indices_dict(lis)
sets = []
while len(d):
que = set(d.popitem()[1])
ind = set()
while len(que):
ind |= que
que = set([y for i in que
for x in lis[i]
for y in d.pop(x, [])]) - ind
sets += [ind]
return sets
def disjoint_sets(lis):
return [set([x for i in s for x in lis[i]]) for s in disjoint_indices(lis)]
How it works:
>>> lis = [(1,2),(2,3),(4,5),(6,7),(1,7)]
>>> indices_dict(lis)
>>> {1: [0, 4], 2: [0, 1], 3: [1], 4: [2], 5: [2], 6: [3], 7: [3, 4]})
indices_dict gives a map from an equivalence # to an index in lis. E.g. 1 is mapped to index 0 and 4 in lis.
>>> disjoint_indices(lis)
>>> [set([0,1,3,4], set([2])]
disjoint_indices gives a list of disjoint sets of indices. Each set corresponds to indices in an equivalence. E.g. lis[0] and lis[3] are in the same equivalence but not lis[2].
>>> disjoint_set(lis)
>>> [set([1, 2, 3, 6, 7]), set([4, 5])]
disjoint_set converts disjoint indices into into their proper equivalences.
Time complexity
The O(n) time complexity is difficult to see but I'll try to explain. Here I will use n = len(lis).
indices_dict certainly runs in O(n) time because only 1 for-loop
disjoint_indices is the hardest to see. It certainly runs in O(len(d)) time since the outer loop stops when d is empty and the inner loop removes an element of d each iteration. now, the len(d) <= 2n since d is a map from equivalence #'s to indices and there are at most 2n different equivalence #'s in lis. Therefore, the function runs in O(n).
disjoint_sets is difficult to see because of the 3 combined for-loops. However, you'll notice that at most i can run over all n indices in lis and x runs over the 2-tuple, so the total complexity is 2n = O(n)
I think this is an elegant solution, using the built in set functions:
#!/usr/bin/python3
def union_find(lis):
lis = map(set, lis)
unions = []
for item in lis:
temp = []
for s in unions:
if not s.isdisjoint(item):
item = s.union(item)
else:
temp.append(s)
temp.append(item)
unions = temp
return unions
if __name__ == '__main__':
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
print(union_find(l))
It returns a list of sets.
Perhaps something like this?
#!/usr/local/cpython-3.3/bin/python
import copy
import pprint
import collections
def union(list_):
dict_ = collections.defaultdict(set)
for sublist in list_:
dict_[sublist[0]].add(sublist[1])
dict_[sublist[1]].add(sublist[0])
change_made = True
while change_made:
change_made = False
for key, values in dict_.items():
for value in copy.copy(values):
for element in dict_[value]:
if element not in dict_[key]:
dict_[key].add(element)
change_made = True
return dict_
list_ = [ [1, 2], [2, 3], [4, 5], [6, 7], [1, 7] ]
pprint.pprint(union(list_))
This works by completely exhausting one equivalence at a time. When an element finds it's equivalence it is removed from the original set and no longer searched.
def equiv_sets(lis):
s = set(lis)
sets = []
#loop while there are still items in original set
while len(s):
s1 = set(s.pop())
length = 0
#loop while there are still equivalences to s1
while( len(s1) != length):
length = len(s1)
for v in list(s):
if v[0] in s1 or v[1] in s1:
s1 |= set(v)
s -= set([v])
sets += [s1]
return sets
print equiv_sets([(1,2),(2,3),(4,5),(6,7),(1,7)])
OUTPUT: [set([1, 2, 3, 6, 7]), set([4, 5])]

Categories