Union find implementation using Python - python

So here's what I want to do: I have a list that contains several equivalence relations:
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
And I want to union the sets that share one element. Here is a sample implementation:
def union(lis):
lis = [set(e) for e in lis]
res = []
while True:
for i in range(len(lis)):
a = lis[i]
if res == []:
res.append(a)
else:
pointer = 0
while pointer < len(res):
if a & res[pointer] != set([]) :
res[pointer] = res[pointer].union(a)
break
pointer +=1
if pointer == len(res):
res.append(a)
if res == lis:
break
lis,res = res,[]
return res
And it prints
[set([1, 2, 3, 6, 7]), set([4, 5])]
This does the right thing but is way too slow when the equivalence relations is too large. I looked up the descriptions on union-find algorithm: http://en.wikipedia.org/wiki/Disjoint-set_data_structure
but I still having problem coding a Python implementation.

Solution that runs in O(n) time
def indices_dict(lis):
d = defaultdict(list)
for i,(a,b) in enumerate(lis):
d[a].append(i)
d[b].append(i)
return d
def disjoint_indices(lis):
d = indices_dict(lis)
sets = []
while len(d):
que = set(d.popitem()[1])
ind = set()
while len(que):
ind |= que
que = set([y for i in que
for x in lis[i]
for y in d.pop(x, [])]) - ind
sets += [ind]
return sets
def disjoint_sets(lis):
return [set([x for i in s for x in lis[i]]) for s in disjoint_indices(lis)]
How it works:
>>> lis = [(1,2),(2,3),(4,5),(6,7),(1,7)]
>>> indices_dict(lis)
>>> {1: [0, 4], 2: [0, 1], 3: [1], 4: [2], 5: [2], 6: [3], 7: [3, 4]})
indices_dict gives a map from an equivalence # to an index in lis. E.g. 1 is mapped to index 0 and 4 in lis.
>>> disjoint_indices(lis)
>>> [set([0,1,3,4], set([2])]
disjoint_indices gives a list of disjoint sets of indices. Each set corresponds to indices in an equivalence. E.g. lis[0] and lis[3] are in the same equivalence but not lis[2].
>>> disjoint_set(lis)
>>> [set([1, 2, 3, 6, 7]), set([4, 5])]
disjoint_set converts disjoint indices into into their proper equivalences.
Time complexity
The O(n) time complexity is difficult to see but I'll try to explain. Here I will use n = len(lis).
indices_dict certainly runs in O(n) time because only 1 for-loop
disjoint_indices is the hardest to see. It certainly runs in O(len(d)) time since the outer loop stops when d is empty and the inner loop removes an element of d each iteration. now, the len(d) <= 2n since d is a map from equivalence #'s to indices and there are at most 2n different equivalence #'s in lis. Therefore, the function runs in O(n).
disjoint_sets is difficult to see because of the 3 combined for-loops. However, you'll notice that at most i can run over all n indices in lis and x runs over the 2-tuple, so the total complexity is 2n = O(n)

I think this is an elegant solution, using the built in set functions:
#!/usr/bin/python3
def union_find(lis):
lis = map(set, lis)
unions = []
for item in lis:
temp = []
for s in unions:
if not s.isdisjoint(item):
item = s.union(item)
else:
temp.append(s)
temp.append(item)
unions = temp
return unions
if __name__ == '__main__':
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
print(union_find(l))
It returns a list of sets.

Perhaps something like this?
#!/usr/local/cpython-3.3/bin/python
import copy
import pprint
import collections
def union(list_):
dict_ = collections.defaultdict(set)
for sublist in list_:
dict_[sublist[0]].add(sublist[1])
dict_[sublist[1]].add(sublist[0])
change_made = True
while change_made:
change_made = False
for key, values in dict_.items():
for value in copy.copy(values):
for element in dict_[value]:
if element not in dict_[key]:
dict_[key].add(element)
change_made = True
return dict_
list_ = [ [1, 2], [2, 3], [4, 5], [6, 7], [1, 7] ]
pprint.pprint(union(list_))

This works by completely exhausting one equivalence at a time. When an element finds it's equivalence it is removed from the original set and no longer searched.
def equiv_sets(lis):
s = set(lis)
sets = []
#loop while there are still items in original set
while len(s):
s1 = set(s.pop())
length = 0
#loop while there are still equivalences to s1
while( len(s1) != length):
length = len(s1)
for v in list(s):
if v[0] in s1 or v[1] in s1:
s1 |= set(v)
s -= set([v])
sets += [s1]
return sets
print equiv_sets([(1,2),(2,3),(4,5),(6,7),(1,7)])
OUTPUT: [set([1, 2, 3, 6, 7]), set([4, 5])]

Related

Finding next max value from index of nested lists in python?

I'm trying to find the next maximum value of nested lists, I already have a nested list sorted by bubblesort, I need to take the largest element of each nested list and insert it into the solution vector, until the solution vector is sorted.
P.S: I can't delete the element from the initial nested list, only find the next maximum value.
See the image at the bottom as an example:
Nested_list = [[1, 7, 9], [4, 5, 6], [2, 3, 8], [0]]
The way I devised deleted the largest vector from the original list, which was quite time consuming, I believe that just moving the index to the next largest value will consume less time:
def bubbleSort(array):
n = len(array)-1
for i in range(n):
for j in range(0, n-i):
if array[j] > array[j+1]:
array[j], array[j+1] = array[j+1], array[j]
else:
continue
return array
def ordena_lista(output):
for sublista in output:
bubbleSort(sublista)
def maior_valor_lista(output):
return list(el[-1] for el in output)
def nested_remove(L, x):
if x in L:
L.remove(x)
else:
for element in L:
if type(element) is list:
nested_remove(element, x)
b = list(random.sample(range(10), 10))
n= m.floor(m.sqrt(len(b)))
output=list([b[i:i + n] for i in range(0, len(b), n)])
ordena_lista(b)
while output:
valores_maximo = maior_valor_lista(output)
var = max(valores_maximo, key=int)
final = [var] + final
nested_remove(output, var)
output = list(filter(None, output))
The simplest solution would be the following,
from functools import reduce
from operator import add
def sort_nested_list(nested_list):
return sorted(reduce(add, nested_list))
but, without knowing the exact implementation details of python's sorted, I can't tell you if it takes advantage of your pre-sorting.
If we know the sublists are sorted, and we are allowed to copy the list, and we know how many elements there are in total, we can write the following,
import math
from copy import deepcopy
def get_max_and_pop(nested_list):
""" find the maximum element of a sublist of nested_list, remove it from the sublist, and return it """
print(f"get_max_and_pop says: {nested_list}")
return max(nested_list, key=lambda x: x[-1:]).pop()
def sort_nested_list_whose_sublists_are_sorted(nested_list, n_elements):
nested_list_copy = deepcopy(nested_list)
return [get_max_and_pop(nested_list=nested_list_copy) for _ in range(n_elements)][::-1]
edit: without knowledge of the number of elements, we can write,
from copy import deepcopy
def sort_nested_list_whose_sublists_are_sorted_iter(nested_list):
nested_list_copy = deepcopy(nested_list)
while any(nested_list_copy):
yield max(nested_list_copy, key=lambda x: x[-1:]).pop()
This amounts to a bizarre, woefully inefficient and completely unnecessary sorting algorithm but here goes anyway:
Nested_list = [[9, 7, 1], [4, 5, 6], [2, 3, 8], [0]]
for e in Nested_list:
e.sort()
Output_list = []
Nested_list_copy = [[e_ for e_ in e] for e in Nested_list]
element_count = sum(len(e) for e in Nested_list)
for _ in range(element_count):
m = None
for i, e in enumerate(Nested_list_copy):
if e:
tm = e[-1]
if m is None or tm > m:
m = tm
k = i
Output_list.insert(0, Nested_list_copy[k].pop())
print(Nested_list)
print(Output_list)
Output:
[[1, 7, 9], [4, 5, 6], [2, 3, 8], [0]]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

how to combine same matching items in a 1d list and make it as 2d list [duplicate]

From this list:
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
I'm trying to create:
L = [[1],[2,2],[3,3,3],[4,4,4,4],[5,5,5,5,5]]
Any value which is found to be the same is grouped into it's own sublist.
Here is my attempt so far, I'm thinking I should use a while loop?
global n
n = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5] #Sorted list
l = [] #Empty list to append values to
def compare(val):
""" This function receives index values
from the n list (n[0] etc) """
global valin
valin = val
global count
count = 0
for i in xrange(len(n)):
if valin == n[count]: # If the input value i.e. n[x] == n[iteration]
temp = valin, n[count]
l.append(temp) #append the values to a new list
count +=1
else:
count +=1
for x in xrange (len(n)):
compare(n[x]) #pass the n[x] to compare function
Use itertools.groupby:
from itertools import groupby
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
print([list(j) for i, j in groupby(N)])
Output:
[[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]
Side note: Prevent from using global variable when you don't need to.
Someone mentions for N=[1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 1] it will get [[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5], [1]]
In other words, when numbers of the list isn't in order or it is a mess list, it's not available.
So I have better answer to solve this problem.
from collections import Counter
N = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
C = Counter(N)
print [ [k,]*v for k,v in C.items()]
You can use itertools.groupby along with a list comprehension
>>> l = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
>>> [list(v) for k,v in itertools.groupby(l)]
[[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]
This can be assigned to the variable L as in
L = [list(v) for k,v in itertools.groupby(l)]
You're overcomplicating this.
What you want to do is: for each value, if it's the same as the last value, just append it to the list of last values; otherwise, create a new list. You can translate that English directly to Python:
new_list = []
for value in old_list:
if new_list and new_list[-1][0] == value:
new_list[-1].append(value)
else:
new_list.append([value])
There are even simpler ways to do this if you're willing to get a bit more abstract, e.g., by using the grouping functions in itertools. But this should be easy to understand.
If you really need to do this with a while loop, you can translate any for loop into a while loop like this:
for value in iterable:
do_stuff(value)
iterator = iter(iterable)
while True:
try:
value = next(iterator)
except StopIteration:
break
do_stuff(value)
Or, if you know the iterable is a sequence, you can use a slightly simpler while loop:
index = 0
while index < len(sequence):
value = sequence[index]
do_stuff(value)
index += 1
But both of these make your code less readable, less Pythonic, more complicated, less efficient, easier to get wrong, etc.
You can do that using numpy too:
import numpy as np
N = np.array([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])
counter = np.arange(1, np.alen(N))
L = np.split(N, counter[N[1:]!=N[:-1]])
The advantage of this method is when you have another list which is related to N and you want to split it in the same way.
Another slightly different solution that doesn't rely on itertools:
#!/usr/bin/env python
def group(items):
"""
groups a sorted list of integers into sublists based on the integer key
"""
if len(items) == 0:
return []
grouped_items = []
prev_item, rest_items = items[0], items[1:]
subgroup = [prev_item]
for item in rest_items:
if item != prev_item:
grouped_items.append(subgroup)
subgroup = []
subgroup.append(item)
prev_item = item
grouped_items.append(subgroup)
return grouped_items
print group([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])
# [[1], [2, 2], [3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5, 5]]

Group repeated elements of a list

I am trying to create a function that receives a list and return another list with the repeated elements.
For example for the input A = [2,2,1,1,3,2] (the list is not sorted) and the function would return result = [[1,1], [2,2,2]]. The result doesn't need to be sorted.
I already did it in Wolfram Mathematica but now I have to translate it to python3, Mathematica has some functions like Select, Map and Split that makes it very simple without using long loops with a lot of instructions.
result = [[x] * A.count(x) for x in set(A) if A.count(x) > 1]
Simple approach:
def grpBySameConsecutiveItem(l):
rv= []
last = None
for elem in l:
if last == None:
last = [elem]
continue
if elem == last[0]:
last.append(elem)
continue
if len(last) > 1:
rv.append(last)
last = [elem]
return rv
print grpBySameConsecutiveItem([1,2,1,1,1,2,2,3,4,4,4,4,5,4])
Output:
[[1, 1, 1], [2, 2], [4, 4, 4, 4]]
You can sort your output afterwards if you want to have it sorted or sort your inputlist , then you wouldnt get consecutive identical numbers any longer though.
See this https://stackoverflow.com/a/4174955/7505395 for how to sort lists of lists depending on an index (just use 0) as all your inner lists are identical.
You could also use itertools - it hast things like TakeWhile - that looks much smarter if used
This will ignore consecutive ones, and just collect them all:
def grpByValue(lis):
d = {}
for key in lis:
if key in d:
d[key] += 1
else:
d[key] = 1
print(d)
rv = []
for k in d:
if (d[k]<2):
continue
rv.append([])
for n in range(0,d[k]):
rv[-1].append(k)
return rv
data = [1,2,1,1,1,2,2,3,4,4,4,4,5,4]
print grpByValue(data)
Output:
[[1, 1, 1, 1], [2, 2, 2], [4, 4, 4, 4, 4]]
You could do this with a list comprehension:
A = [1,1,1,2,2,3,3,3]
B = []
[B.append([n]*A.count(n)) for n in A if B.count([n]*A.count(n)) == 0]
outputs [[1,1,1],[2,2],[3,3,3]]
Or more pythonically:
A = [1,2,2,3,4,1,1,2,2,2,3,3,4,4,4]
B = []
for n in A:
if B.count([n]*A.count(n)) == 0:
B.append([n]*A.count(n))
outputs [[1,1,1],[2,2,2,2,2],[3,3,3],[4,4,4,4]]
Works with sorted or unsorted list, if you need to sort the list before hand you can do for n in sorted(A)
This is a job for Counter(). Iterating over each element, x, and checking A.count(x) has a O(N^2) complexity. Counter() will count how many times each element exists in your iterable in one pass and then you can generate your result by iterating over that dictionary.
>>> from collections import Counter
>>> A = [2,2,1,1,3,2]
>>> counts = Counter(A)
>>> result = [[key] * value for key, value in counts.items() if value > 1]
>>> result
[[2, 2, 2], [[1, 1]]

How to find all partial solutions of a list recursively

I have a box with a maximum weight capacity. I am given a list of weights of the items that I am supposed to fit in the box. I need all solutions that get to, or as close as possible to, the maximum capacity. What I mean is that I should not be able to add another item to any of the partial solutions without going over the maximum capacity.
If I am given the following list of weights:
[1,2,3,4,5]
If the maximum capacity is 9, the solutions should be (I may have missed one, but you get the point):
[[4,3,2], [5,3,1], [5,4], [3,2,1], [4,2,1], [4,3,1], [5,3]]
Here's my recursive algorithm, I think I am getting close but I can't figure out how to fix it.
def findSubset(alist, maxim):
if maxim <= 0:
return [[]]
if len(alist) == 0:
return []
alist2 = alist[1:]
include = findSubset(alist2, maxim-alist[0])
for s in include:
s.append(alist[0])
return include + findSubset(alist2, maxim)
Current output is:
[[4, 3, 2, 1], [5, 3, 2, 1], [5, 4, 2, 1], [5, 4, 3, 1], [5, 3, 1], [5, 4, 1], [4, 3, 2], [5, 3, 2], [5, 4, 2], [5, 4, 3], [5, 4]]
My advice would be to iterate on all element of the list, while building a list of possible solutions
if the element is < maxim, recurse on a sublist on following elements for maxim - elt, and for each element of the result, append the element to it and append all to the resulting list
if the element is == maxim, add the singleton list containing the element to the resulting list
Code is:
def findSubset(alist, maxim):
res = []
if maxim <= 0:
return [[]]
if len(alist) == 0:
return []
for i, elt in enumerate(alist):
if elt < maxim:
res.extend([ [elt] + l for l in findSubset(alist[i+1:], maxim-elt)])
elif elt == maxim:
res.append([elt])
return res
It gives
>>> findSubset(lst, 9)
[[1, 3, 5], [2, 3, 4], [4, 5]]
Above code only gives exact solutions. In case where there are no exact solution, it should be extended to give best approaching solutions:
def findApproachSubset(alist, maxim):
for i in range(maxim, 0, -1):
res = findSubset(alist, i)
if len(res) > 0:
return res
return []
For example:
>>> findSubset([1, 4, 5, 6], 8)
[]
>>> findApproachSubset([1, 4, 5, 6], 8)
[[1, 6]]
because here the best solution is for 7 instead of 8.
This is written using Haskell. As it seems that this is homework, there is no point giving you the complete answer.
We use three separate functions, the function f can be made recursive as it is just map
xs = [1, 2, 3, 4, 5]
perms [] = []
perms xs = xs : perms (tail xs)
All you need to know is that : means cons i.e 1:[2] = [1,2]
In python, we could write it as:
def perms(xs):
if xs:
return [xs] + perms(xs[1:])
else:
return []
so perms xs is just [[1,2,3,4,5],[2,3,4,5],[3,4,5],[4,5],[5]]
takeWhileLessThan _ [] = []
takeWhileLessThan n (x:xs) =
if n-x < 0 then [] else (x : takeWhileLessThan (n-x) xs)
and takeWhileLessThan uses recursion. It operates on a single list, keeping track of the current sum.
f n xs = map (takeWhileLessThan n) (perms xs)
> [[1,2,3],[2,3,4],[3,4],[4,5],[5]]
The final function maps the recursive function over the list of lists. If you then wanted all values and wanted it to be recursive, just write another function...
if you are allowed for loops, then this method would work well. The only recursion is the main function.
def f(n, xs, solutions):
if xs:
m = n
ys = []
for x in xs:
if (m-x) < 0:
break
else:
ys.append(x)
m = m-x
solutions.append(ys)
return f(n, xs[1:], solutions)
else:
return solutions
which gives:
>>> f(9, [1,2,3,4,5], [])
[[1, 2, 3], [2, 3, 4], [3, 4], [4, 5], [5]]
def recursive_function(current_node: int, nodes_left: list, maximum: int, trace: list) -> list:
sum_so_far = sum(trace)
if current_node is not None:
trace.append(current_node)
else:
current_node = 0
if sum_so_far + current_node > maximum:
# That means, that current trace is already to big
# no need to search further this path
return False
elif sum_so_far + current_node == maximum:
# Bingo! The perfect set of nodes!
# No need to look further this path
return {tuple(sorted(trace))}
else:
# We still haven't reached maximum value
# let's check if we can pack some more nodes to trace
results = set()
for node in nodes_left:
nodes_copy = nodes_left.copy()
nodes_copy.remove(node)
traces = recursive_function(node, nodes_copy, maximum, trace.copy())
if traces:
# This path gave us some legitimate results, we collect them
results.update(traces)
if results:
# At least one possible path gave us results, we pass them on
return results
# There was nothing left in nodes_left that we could add and not
# cross the maximum limit. The path leading to this moment fits
# our requirements.
return {tuple(sorted(trace))}
def findSubset(alist, maxim) -> list:
results = recursive_function(None, alist, maxim, [])
return results
Why do I do this {tuple(sorted(trace))}? So I don't keep different permutations of same results in my memory.
Time cost: n!
Memory cost: n

Python list traversal with gaps

Hi I have a multidimensional list such as:
my_list = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2]]
where 0 represents a gap between two pieces of data.
What I need to do is iterate through the list and keep track of how many gaps are in each sublist and throw away the zeros. I think the best way is to break each sublist into chunks where there are zeros so I end up with smaller lists of integers and a number of gaps. Ideally, to form a new list which tells me the length of each chunk and number of gaps (i.e. chunks -1), such as:
new_list = [[5, 0], [[1, 3], 1], [[1, 1], 1], [[1, 1, 1], 2]]
or probably better:
new_list = [[5], [1, 3], [1, 1], [1, 1, 1]]
and I will know that the gaps are equal to len(chunk).
EDIT:
However, leading and trailing zeros do not represent gaps. i.e. [0,0,1,2] represents one continuous chunk.
Any help much appreciated.
itertools.groupby() is perfect for this:
from itertools import groupby
my_list = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2]]
new_list = [[len(list(g)) for k, g in groupby(inner, bool) if k] for inner in my_list]
Result:
>>> new_list
[[5], [1, 3], [1, 1], [1, 1, 1]]
The result contains the length of each non-zero chunk for each sublist, so for example [1,0,3,1,2] gives [1,3], so there are two chunks (one gap). This matches your second output format.
Here is my humble code without any imports:
The algorithm is slightly long:
def toggle(n):
return n != 0
def chunk_counter(L):
"""
list -> list
"""
chunk_list = []
pivots = []
for j in range(len(L)):
if j == 0 and toggle(L[0]):
pivots.append(j)
elif toggle(L[j]) and toggle(L[j]) != toggle(L[j-1]):
pivots.append(j)
for m in range(len(pivots)):
k = 0
if m == len(pivots)-1:
bound = len(L)
else:
bound = pivots[m+1]
p = 0
while p in range(bound - pivots[m]):
if toggle(L[pivots[m] + p]):
k += 1
p += 1
else:
p += 1
chunk_list.append(k)
return chunk_list
def chunks(L):
"""
(list of lists) -> list of lists
"""
new_list = []
for i in range(len(L)):
new_list.append(chunk_counter(L[i]))
return new_list
So, you may try the function chunks() on your list:
>>> L = [[1,2,3,1,2],[1,0,3,1,2],[1,0,0,0,2],[1,0,3,0,2], [0,0,1,2]]
>>> chunks(L)
[[5], [1, 3], [1, 1], [1, 1, 1], [2]]
Here's a recursive definition (a replacement for Chunk Counter):
counter_list = []
def counter(L):
k = 0
while(k < len(L) and L[k] != 0):
k +=1
counter_list.append(k)
if k == len(L):
print counter_list
else:
counter(L[k+1:])

Categories