Tracking depth in a breadth first search of a directed tree - python

I'm trying to find the distance between the root and the depth of the node that is being traversed, for example if I had a the following adjancency list representing the tree { 1: [2, 3], 2: [4], 3: [5]} an associated list like the following would be created [0, 1, 1, 2, 2] denoting the level of each node.
I have the following code and can't see where I'm meant to add counting functionality etc, ideally this would deal with cross and back edges as well
def bfs(graph, root):
seen, queue = set([root]), collections.deque([root])
visit_order = []
while queue:
vertex = queue.popleft()
visit_order.append(vertex)
for node in graph[vertex]:
if node not in seen:
seen.add(node)
queue.append(node)
print(visit_order)

Instead of queuing just the nodes, you can queue the nodes and their levels as tuples, and when you enqueue a node it's always coupled with the current level plus one, so that when you dequeue a node and append the node to visit_order you also get the level of the node from the tuple:
import collections
def bfs(graph, root):
seen, queue = {root}, collections.deque([(root, 0)])
visit_order = []
levels = []
while queue:
vertex, level = queue.popleft()
visit_order.append(vertex)
levels.append(level)
for node in graph.get(vertex, []):
if node not in seen:
seen.add(node)
queue.append((node, level + 1))
print(visit_order)
print(levels)
so that:
bfs({ 1: [2, 3], 2: [4], 3: [5]}, 1)
would output:
[1, 2, 3, 4, 5]
[0, 1, 1, 2, 2]

You can use a dictionary to keep track of the current depths:
from collections import deque
d = {1: [2, 3], 2: [4], 3: [5]}
def bfs(graph, root = 1):
queue, seen, depths = deque([root]), [], {root:0}
while queue:
val = queue.popleft()
depths.update({i:depths[val] +1 for i in graph.get(val, [])})
seen.append(val)
queue.extend([i for i in graph.get(val, []) if i not in seen])
yield seen, depths
[(_all, _depths)] = bfs(d)
print([_depths[i] for i in _all])
Output:
[0, 1, 1, 2, 2]
The logic is simpler, however, when using a class, as a depth-first traversal can be applied:
class Tree:
def __init__(self, _start):
self.__dict__ = {'head':_start, 'data':[Tree(i) for i in d.get(_start, [])]}
def __contains__(self, _val):
if self.head != _val and not self.data:
return False
return True if self.head == _val else any(_val in i for i in self.data)
def get_depth(self, _val):
if self.head == _val:
return 0
return 1+[i for i in self.data if _val in i][0].get_depth(_val)
t = Tree(1)
print([t.get_depth(i) for i in set([i for a, b in d.items() for i in [a, *b]])])
Output:
[0, 1, 1, 2, 2]

Related

Python: Generic method that can return different object type in Pythonic style?

I want build two tree, the build approach is exactly same, but tree nodes are of different types. I have a implementation but not sure if that is the "Pythonic Style"? Is there any Pythonic style approach?
The method I implement like this:
import collections
from typing import TypeVar
T = TypeVar('T')
# Use Breadth First approach to build a tree, the input list is LeetCode style list,
# like this: [1,2,3,4,5,null,7]
def build_binary_tree(ints: list[int], root: T) -> T:
root.val = ints[0]
def bfs():
length = len(ints)
que = collections.deque([root])
i = 1
while que:
if i >= length:
break
for _ in range(len(que)):
node = que.popleft()
if i < length and ints[i]:
node.left = type(root)(ints[i]) #Here get constructor of the parameter "root"
que.append(node.left)
i += 1
if i < length and ints[i]:
node.right = type(root)(ints[i]) #Here get constructor of the parameter "root"
que.append(node.right)
i += 1
bfs()
return root
# Here are the usage of the method:
# The node type is "Node"
root = build_binary_tree([1, 2, 3, 4, 5, None, 6, 7, None, None, None, None, 8], Node())
# The node type is "TreeNode"
root = build_binary_tree([5, 1, 5, 5, 5, None, 5], TreeNode())

Creating a BinarySortTree method that returns items in a sorted fashion

def items(self) -> List:
"""Return all of the items in the BST in sorted order.
>>> BinarySearchTree(None).items() # An empty BST
[]
>>> bst = BinarySearchTree(7)
>>> left = BinarySearchTree(3)
>>> left._left = BinarySearchTree(2)
>>> left._right = BinarySearchTree(5)
>>> right = BinarySearchTree(11)
>>> right._left = BinarySearchTree(9)
>>> right._right = BinarySearchTree(13)
>>> bst._left = left
>>> bst._right = right
>>> bst.items()
[2, 3, 5, 7, 9, 11, 13]
"""
if self.is_empty():
return []
elif self._root:
lst = [self._root]
if self._left != BinarySearchTree(None):
lst.insert(lst.index(self._root) - 1, self._left._root)
self._left.items()
if self._right != BinarySearchTree(None):
lst.insert(lst.index(self._root) + 1, self._right._root)
self._right.items()
return lst
What's wrong with my code? I'm trying recursion and it manages to half-pass the docstring example and returns [3, 7, 11]. I can't seem to implement the actual recursion part of the code, so that it returns more.
Note: I'm trying to complete this method without dumping the numbers and then sorting the list.
Assuming that BinarySearchTree is defined as the owner of the whole tree and that its root property is a proper TreeNode with a left and right hierarchy of TreeNodes (i.e. not another BinarySearchTree, which wouldn't make any sense),
You could make that a recursive generator:
def items(self):
def DFS(node):
if node.left: yield from DFS(node.left) # all smaller nodes
yield node # this node
if node.right: yield from DFS(node.right) # all larger nodes
return [] if self.is_empty() else list(DFS(self.root))

Find first and second order contacts of each node in a network

I have a graph having 602647 nodes and 982982 edges. I wanted to find the first and second order contacts (i.e. 1-hop contacts and 2-hops contacts) for each node in the graph in Networkx.
i built the following code that worked fine for smaller graphs, but never finished running for larger (graphs as the one above):
hop_1 = {}
hop_2 = {}
row_1 = {}
row_2 = {}
for u, g in G.nodes(data=True):
row_1.setdefault(u, nx.single_source_shortest_path_length(G, u, cutoff=1))
row_2.setdefault(u, nx.single_source_shortest_path_length(G, u, cutoff=2))
hop_1.update(row_1)
hop_2.update(row_2)
some notes:
results are stored first in a dict (hope_1 and hope_2)
row_1 and row_2 and temporary holding variables
hop-1 will include nodes after one jump
hop-2 will include nodes that are located at both one jump and two jumps
Is there a way to optimize/imrpove this code and finish running?
To find first and second-order neighbors you can use functions all_neighbors() and node_boundary():
hop1 = {}
hop2 = {}
for n in G.nodes():
neighbs1 = list(nx.all_neighbors(G, n))
hop1[n] = neighbs1
hop2[n] = list(nx.node_boundary(G, neighbs1 + [n])) + neighbs1
print(hop1)
# {0: [1, 2, 3], 1: [0, 2, 3], 2: [0, 1, 3, 4], 3: [0, 1, 2, 4], 4: [2, 3]}
print(hop2)
# {0: [4, 1, 2, 3], 1: [4, 0, 2, 3], 2: [0, 1, 3, 4], 3: [0, 1, 2, 4], 4: [0, 1, 2, 3]}
I don't know networkx; but, by definition, a node that is reachable one hop is also reachable in <=2 hops, which is what the docs (and source) of single_source_shortest_path_length is giving you. you can therefore remove the first call to single_source_shortest_path_length.
second, your uses of dictionaries are very strange! why are you using setdefault rather than just setting elements? also you're copying things a lot with update which doesn't do anything useful and just wastes time.
I'd do something like:
hop_1 = {}
hop_2 = {}
for u in G.nodes():
d1 = []
d2 = []
for v, n in nx.single_source_shortest_path_length(G, u, cutoff=2).items():
if n == 1:
d1.append(v)
elif n == 2:
d2.append(v)
hop_1[u] = d1
hop_2[u] = d2
which takes about a minute on my laptop with a G_nm graph as generated by:
import networkx as nx
G = nx.gnm_random_graph(602647, 982982)
note that tqdm is nice for showing progress of long running loops, just import tqdm and change the outer for loop to be:
for u in tqdm(G.nodes()):
...
and you'll get a nice bar reporting progress

How to stop depth-first search when the target vertex of the graph is found?

Here is the graph:
g = {
0: [2, 5, 7],
1: [7],
2: [0, 6],
3: [5, 4],
4: [3, 6, 7],
5: [3, 4, 0],
6: [2, 4],
7: [0, 1, 4]
}
I have the following function in Python:
def dfs(graph, start, target, visited=None):
if visited is None:
visited = set()
visited.add(start)
for n in (set( graph[start] ) - visited):
dfs(graph, n, target, visited)
return visited
But it returns all the vertices that exists in the graph, and I want that it returns just the target vertex if it's present in the graph.
Could someone help me?
You want to test if you reached your target and, if you did, return True. This can be accomplished by making the following edits to your code:
Small edit for efficiency
def dfs(graph, start, target, visited=None):
if visited is None:
visited = set()
visited.add(start)
for n in (set( graph[start] ) - visited):
if n == target:
return True
return dfs(graph, n, target, visited)
return False
Edit: I made a mistake in my algorithm, fixed version is below:
def dfs(graph, start, target, visited=None):
if start == target:
return True
if visited is None:
visited = set()
visited.add(start)
found = False
for n in (set( graph[start] ) - visited):
if target == n:
return True
found = dfs(graph, n, target, visited)
return found

Topological sort python

I coded a solution for DFS non-recursive, but i can't modify it to make a topological sort:
def dfs(graph,start):
path = []
stack = [start]
while stack != []:
v = stack.pop()
if v not in path: path.append(v)
for w in reversed(graph[v]):
if w not in path and not w in stack:
stack.append(w)
return path
Any ideas how to modify it?
With the recursive version i can easy have the sorting:
def dfs_rec(graph,start,path):
path = path + [start]
for edge in graph[start]:
if edge not in path:
path = dfs_rec(graph, edge,path)
print start
return path
Input:
>>> graph = {
1: [2, 3],
2: [4, 5, 6],
3: [4,6],
4: [5,6],
5: [6],
6: []
}
>>> dfs_rec(graph,1,[])
6
5
4
2
3
1
[1, 2, 4, 5, 6, 3]
>>> dfs(graph,1)
[1, 2, 4, 5, 6, 3]
>>> graph = {
1: [3],
3: [5,6],
5: [4],
4: [7],
7: [],
6: []
}
>>> print dfs_rec(graph,1,[])
7
4
5
6
3
1
[1, 3, 5, 4, 7, 6]
>>> print dfs(graph,1)
[1, 3, 5, 4, 7, 6]
so i need to get this ordering in the non-recursive also.
Non-recursive solution:
I think that this also could be the solution, mark me if i am wrong.
def dfs(graph,start):
path = []
stack = [start]
label = len(graph)
result = {}
while stack != []:
#this for loop could be done in other ways also
for element in stack:
if element not in result:
result[element] = label
label = label - 1
v = stack.pop()
if v not in path: path.append(v)
for w in reversed(graph[v]):
if w not in path and not w in stack:
stack.append(w)
result = {v:k for k, v in result.items()}
return path,result
Input:
graph = { 1: [3], 3:[5,6] , 5:[4] , 4:[7], 7:[],6:[]}
print dfs(graph,1)
Output:
([1, 3, 5, 4, 7, 6], {1: 7, 2: 4, 3: 5, 4: 6, 5: 3, 6: 1})
1
/
3
/\
5 6
/
4
/
7
FWIW, here is some code I worked up for a non-recursive topological sort.
from collections import defaultdict, namedtuple
from itertools import islice
Results = namedtuple('Results', ['sorted', 'cyclic'])
def topological_sort(dependency_pairs):
'Sort values subject to dependency constraints'
num_heads = defaultdict(int) # num arrows pointing in
tails = defaultdict(list) # list of arrows going out
heads = [] # unique list of heads in order first seen
for h, t in dependency_pairs:
num_heads[t] += 1
if h in tails:
tails[h].append(t)
else:
tails[h] = [t]
heads.append(h)
ordered = [h for h in heads if h not in num_heads]
for h in ordered:
for t in tails[h]:
num_heads[t] -= 1
if not num_heads[t]:
ordered.append(t)
cyclic = [n for n, heads in num_heads.items() if heads]
return Results(ordered, cyclic)
if __name__ == '__main__':
print( topological_sort('aa'.split()) )
print( topological_sort('ah bg cf ch di ed fb fg hd he ib'.split()) )
from collections import defaultdict, deque
class Graph:
def __init__(self, directed=False, nodes=None, edges=None):
self.graph = defaultdict(list)
self.directed = directed
self.add_nodes(nodes)
self.add_edges(edges)
#property
def nodes(self):
if not self.directed:
return list(self.graph.keys())
elif self.directed:
nodes = set()
nodes.update(self.graph.keys())
for node in self.graph.keys():
for neighbor in self.graph[node]:
nodes.add(neighbor)
return list(nodes)
def add_node(self, node):
if node not in self.nodes:
self.graph[node] = list()
def add_nodes(self, nodes):
if nodes is None:
return None
for node in nodes:
self.add_node(node)
#property
def edges(self):
edges = list()
for source, neighbors in self.graph.items():
for neighbor in neighbors:
edges.append((source, neighbor))
return edges
def add_edge(self, edge):
node1, node2 = edge
self.graph[node1].append(node2)
if not self.directed:
self.graph[node2].append(node1)
def add_edges(self, edges):
if edges is None:
return None
for edge in edges:
self.add_edge(edge)
def topological_util(self, node, visited, label):
visited[node] = True
for edge in self.graph[node]:
if not visited[edge]:
self.topological_util(edge, visited, label)
label.appendleft(node)
def topological_sort(self):
visited = dict.fromkeys(self.nodes, False)
# store all nodes in topological order, the index is the position
label = deque()
for node in self.nodes:
if not visited[node]:
self.topological_util(node, visited, label)
return label
#this algorithm gives the logic of topological sorting..if u want to run this
#give adjacency mat of your choice and this algorithm works on graph elements ranging from 0 to n
a=[[0,0,1,0,0,0],[0,0,1,0,0,0],[0,0,0,1,1,0],[0,0,0,0,1,0],[0,0,0,0,0,0],[0,0,1,0,0,0]]
vis=[0 for i in range(0,len(a))]
s=[]
orderstack=[]#stores the reverse order of topological sorted elements
def dfs_for_topological_sorting(a,vis,i):
vis[i]=1
x=0
for j in range(0,len(a[0])):
if(a[i][j]==1 and vis[j]==0):
x=1
s.append(j)
#print(s)
dfs_for_topological_sorting(a,vis,j)
if(x==0 and len(s)!=0):
orderstack.append(s[len(s)-1])
if(len(s)>0):
dfs_for_topological_sorting(a,vis,s.pop())
for i in range(0,len(a)):
if(i not in orderstack):
s.append(i)
dfs_for_topological_sorting(a,vis,i)
print(orderstack[len(orderstack)-1::-1])
from collections import defaultdict # importing defaultdict
def topological_sort(graph,b,a): # defining function
T = []
visited = []
in_degree = []
for i in range(a+1):
in_degree.append(0) # initialising the degree of each vertex =0
visited.append(0) # initialising all the vertics unvisited
for i in range(1,a+1):
for j in graph[i]:
in_degree[j] = in_degree[j] + 1 # now assigning and incrementing
Queue=[] # the degree of each vertex acc.
for i in range(1,a+1):
if in_degree[i]==0:
Queue.append(i) # appending those vertices which have zero
visited[i] = 1 # degree and making it as visited
while Queue :
vertex = Queue.pop(Queue.index(min(Queue))) # popping each element in
T.append(vertex) # lexicographical order and
for j in graph[vertex]: # appending to T
if visited[j]==0:
in_degree[j] = in_degree[j] - 1
if in_degree[j] == 0:
Queue.append(j) #according to each popped vertex
visited[j] = 1 #as key in graph check whether
return T #in list corresponding to key
# as value,is it not visited and
#decreasing its value when it
#becomes zero,append it to queue
#and mark it as visited
graph=defaultdict(list)
a,b=list(map(int,input().split())) #a=no. of vertices
for i in range(b): #b=no. of edges
p,q=list(map(int,input().split()))
graph[p].append(q) # we take input in graph as DAG
ss=topological_sort(graph,b,a) # calling function
for i in ss:
print(i,end=" ")
'''Input
5 6
1 2
1 3
2 3
2 4
3 4
3 5
Your Code's Output
1 2 3 4 5
Expected Correct Output
1 2 3 4 5 '''

Categories