This question deals with the A* algorithm and three variations thereof:
max-depth (only search within a fix depth away from the starting vertex)
multi-target (search for multiple targets rather than a single goal)
multi-path (search for the first n paths - may share some vertices - from the start to the target)
To address my question I shall use python (v3+) as it lends itself to readability. There are a host of graph data structures (AM, AL, DOK, CRS, etc, etc). As A* only performs operations on the vertex set (under the assumption that the vertex set has local knowledge of predecessors and successors), I will use an adjacency list... or more specifically an adjacency dictionary (hash) e.g.:
vertex_set = {
"a": {"b", "c"},
"b": {"c"},
...
}
To provide a launching point, an implementation of plain A* is provided below:
A*
helpers
from math import inf
def a_star_distance_between(start, goal):
# cost_estimate may go here
# but I set it to a constant zero transforming A* to a greedy breadth first
# search as this estimate will vary by your use case
cost_estimate = 0
return cost_estimate
def reconstruct_path(came_from, current):
total_path = [current]
while current in came_from.keys():
current = came_from[current]
total_path.append(current)
total_path.reverse()
return total_path
def a_star_heuristic_between(a, b):
# your heuristic goes here
# dummy heuristic just for functionality
heuristic = len(vertex_set[a]) + len(vertex_set[b])
return 1 / heuristic
def a_star_lowest_f_score(f_scores, nodes_currently_discovered):
f_min_val = inf
f_min_key = ""
for node in nodes_currently_discovered:
val = f_scores[node]
if val < f_min_val:
f_min_key = node
f_min_val = val
return f_min_key
A* normal
def a_star(start, stop):
nodes_already_evaluated = set() # a.k.a. closed set
nodes_currently_discovered = {start} # a.k.a. open set
came_from = dict()
# for each node, cost of getting from start node to that node
g_score = {v: inf for v in list(vertex_set.keys())}
g_score[start] = 0
# for each node, cost of getting from the start node to the goal by passing through that node
f_score = {v: inf for v in list(vertex_set.keys())}
f_score[start] = 1 # normally a_star_heuristic_between(start, stop), 1 here because of hard coded value above
while nodes_currently_discovered:
current = a_star_lowest_f_score(f_score, nodes_currently_discovered)
if current == stop:
return reconstruct_path(came_from, current)
nodes_currently_discovered.remove(current)
nodes_already_evaluated.add(current)
for neighbor in vertex_set[current]:
if neighbor in nodes_already_evaluated:
continue
if neighbor not in nodes_currently_discovered:
nodes_currently_discovered.add(neighbor)
tentative_g_score = g_score[current] + a_star_distance_between(current, neighbor)
if tentative_g_score >= g_score[neighbor]:
continue # not a better path
# best path until now
came_from[neighbor] = current
g_score[neighbor] = tentative_g_score
f_score[neighbor] = g_score[neighbor] + a_star_heuristic_between(neighbor, stop)
of the three variants addressed above, the easiest to make from the above implementation is (2) multi-target:
A* multi-target
from statistics import mean
def multi_target_a_star(start, stops):
# CHANGE
stops_paths = {stop: None for stop in stops}
nodes_already_evaluated = set() # a.k.a. closed set
nodes_currently_discovered = {start} # a.k.a. open set
came_from = dict()
# for each node, cost of getting from start node to that node
g_score = {v: inf for v in list(vertex_set.keys())}
g_score[start] = 0
# for each node, cost of getting from the start node to the goal by passing through that node
f_score = {v: inf for v in list(vertex_set.keys())}
f_score[start] = 1 # normally a_star_heuristic_between(start, stop), 1 here because of hard coded value
while nodes_currently_discovered:
current = a_star_lowest_f_score(f_score, nodes_currently_discovered)
if current == stop:
# CHANGE
stop_paths[current] = reconstruct_path(came_from, current)
if all([v != None for k, v in stops_paths.items()]):
return stops_paths
nodes_currently_discovered.remove(current)
nodes_already_evaluated.add(current)
for neighbor in vertex_set[current]:
if neighbor in nodes_already_evaluated:
continue
if neighbor not in nodes_currently_discovered:
nodes_currently_discovered.add(neighbor)
tentative_g_score = g_score[current] + a_star_distance_between(current, neighbor)
if tentative_g_score >= g_score[neighbor]:
continue # not a better path
# best path until now
came_from[neighbor] = current
g_score[neighbor] = tentative_g_score
# CHANGE
f_score[neighbor] = g_score[neighbor] + mean([a_star_heuristic_between(neighbor, stop) for stop in stops])
A* depth lock
def a_star(start, stop, max_depth=inf):
nodes_already_evaluated = set() # a.k.a. closed set
nodes_currently_discovered = {start} # a.k.a. open set
came_from = dict()
# for each node, cost of getting from start node to that node
g_score = {v: inf for v in list(vertex_set.keys())}
g_score[start] = 0
# for each node, cost of getting from the start node to the goal by passing through that node
f_score = {v: inf for v in list(vertex_set.keys())}
f_score[start] = 1 # normally a_star_heuristic_between(start, stop), 1 here because of hard coded value above
# keep track of distance. This is not the most efficient way to do so. However it lets us not have to modify our distance and heuristic functions.
d_score = {v: inf for v in list(vertex_set.keys())}
d_score[start] = 0
while nodes_currently_discovered:
current = a_star_lowest_f_score(f_score, nodes_currently_discovered)
if current == stop:
return reconstruct_path(came_from, current)
nodes_currently_discovered.remove(current)
nodes_already_evaluated.add(current)
# CHANGE: test for depth
if d_score[current] + 1 > max_depth:
# NOTE: at this point current will NOT be re-evaluated again even if there is a path where getting to current is less than max_depth
# this stems from current being placed in nodes_already_evaluated as well as that the node and distance are not kept together in a tuple, e.g. (node, dist) - which requires updating a couple of functions.
continue
for neighbor in vertex_set[current]:
if neighbor in nodes_already_evaluated:
continue
if neighbor not in nodes_currently_discovered:
nodes_currently_discovered.add(neighbor)
# CHANGE
d_score[neighbor] = d_score[current] + 1
tentative_g_score = g_score[current] + a_star_distance_between(current, neighbor)
if tentative_g_score >= g_score[neighbor]:
continue # not a better path
# best path until now
came_from[neighbor] = current
g_score[neighbor] = tentative_g_score
f_score[neighbor] = g_score[neighbor] + a_star_heuristic_between(neighbor, stop)
Question
My question is how to implement a version** (see note) of A* that can be called as follows:
a_star(start, stops, max_depth, num_paths, max_duration)
** NOTE: depending on the changes made, the resultant path search may no longer be comparable to A* as it may lose completeness. I use "version" loosely to simply denote that in this case it was based off of A*.
where:
start: starting vertex
stops: a list of vertices to which to find a path from start (e.g. start -> stops[0], start -> stops[1], ..., start -> stops[-1] )
max_depth: an integer representing the maximum path length that should be explored, e.g. max_depth=3 start -(1)-> a -(2)-> b -(3)-> stop
num_paths: the maximum number of paths to find between each start and stop, e.g. if len(stops)=2 then there should be at most 6 paths
max_duration: the maximum run time allowed before the function terminates and returns what has been found so far
I know that each of these variants (excluding max-depth) are main topics in relation to path finding. Multi-path has received more attention with various underlying premises. Some approaches attempt to send multiple agents such as Bee-Sensor. A more simplistic approach simply removes those vertices in the first found path from the graph and then re-run the algorithm on the sub graph (which I find very unsatisfactory). If a --> b --> c --> e and a --> b --> d --> e are paths, I would want both to found, rather than discarding the vertices b and c from evaluation.
One of the most interesting recent contributions comes from Yin and Yang, 2015. However, similar to the aforementioned naive approach of removing vertices of found paths, this method finds paths with greater variance (great for their use case, not for mine).
I have implemented my stated goal for DFS and BFS. However the framework for a-star makes it very difficult for me to see how to get then the second, third, etc best path.
Current thoughts
For multi-path I initially thought storing a simple list of paths would be sufficient and checking to see if the length of this met the desired amount. However, this does not work as the way the visited nodes are kept tracked of are in two sets: the 'closed' (nodes_already_evaluated) and 'open' (nodes_currently_discovered) sets. If the path A* returns is a --> b --> c, then at this point the closed set has at least {a, b, c} and we do not wish to remove b from evaluation as a -- > b -- d --> c might be the second best path. However including both b and the goal (c) in the open set just results in making the same path twice and recomputing already determined values. Especially if this multi-path approach is mixed with multi-target.
A valid question might be "why would you want to have a multi-path, multi-target" algorithm where you could just parallel launch multi-path single target searches. In the case when the distance function is constant or unknown, A* becomes a greedy breadth first search algorithm, hence the explored frontier is the same regardless of the final target. The above given multi-target implementation just keeps expanding the frontier until all targets are reached, which is more efficient.
I would greatly appreciate any guidance offered to finding a path finding function with the previously listed arguments (start, stops, max_depth, num_paths, duration) that at least contains a heuristic function and has competitive runtime.
For testing purposes I provide the following small graph:
vertex_set = {
1: {3, 4, 13, 21},
2: {3, 20},
3: {1, 2, 4, 5, 23},
4: {1, 3, 6},
5: {3, 7},
6: {4, 8, 16, 23},
7: {5, 9},
8: {6, 10, 11},
9: {7, 12},
10: {13, 14, 15},
11: {15, 16},
12: {9, 20},
13: {1, 10, 17},
14: {10, 17, 18},
15: {10, 11},
16: {6, 11, 18, 19, 20},
17: {13, 14},
18: {14, 16, 21},
19: {16, 22},
20: {2, 12, 16},
21: {1, 18},
22: {19},
23: {3, 6, 22}
}
Notes:
#tobias_k has pointed out several optimizations to the above code. Such as using a priority heap for finding the lowest f score vertex.
In addition I wish to point out that the above code is not for any serious production purposes. Rather, as stated initially, an implementation in a more readable language to help the discussion of how to modify the core concepts of the A* algorithm.
There are other optimizations. For example, for the depth lock variant (searching for paths of <= a fixed length), it would save both memory and time to not store all distances in d_score; rather the nodes_already_evaluated and nodes_currently_discovered (closed and open sets respectively) could store tuples of (vertex, distance_when_discovered). This would allow a vertex that is reachable by two different vertices at different depths to be added to the nodes_currently_discovered (open set) more than once - which is putatively desirable feature depending on use case.
Along these lines, #tobias_k suggests also storing the f_score of a vertex in a tuple for the priority heap.
I do appreciate insights into optimizations. However, that is not the primary purpose of the question.
Related
Recently I took a test in the theory of algorithms. I had a normal best first search algorithm (code below).
from queue import PriorityQueue
# Filling adjacency matrix with empty arrays
vertices = 14
graph = [[] for i in range(vertices)]
# Function for adding edges to graph
def add_edge(x, y, cost):
graph[x].append((y, cost))
graph[y].append((x, cost))
# Function For Implementing Best First Search
# Gives output path having the lowest cost
def best_first_search(source, target, vertices):
visited = [0] * vertices
pq = PriorityQueue()
pq.put((0, source))
print("Path: ")
while not pq.empty():
u = pq.get()[1]
# Displaying the path having the lowest cost
print(u, end=" ")
if u == target:
break
for v, c in graph[u]:
if not visited[v]:
visited[v] = True
pq.put((c, v))
print()
if __name__ == '__main__':
# The nodes shown in above example(by alphabets) are
# implemented using integers add_edge(x,y,cost);
add_edge(0, 1, 1)
add_edge(0, 2, 8)
add_edge(1, 2, 12)
add_edge(1, 4, 13)
add_edge(2, 3, 6)
add_edge(4, 3, 3)
source = 0
target = 2
best_first_search(source, target, vertices)
He brings out Path: 0 1 0 2 (path sum — 8), it's correct.
My teacher suggested that I remake the code so that it looks for the local minimum path, i.e. Path: 0 1 2 (path sum — 13).
I need greedily take the shortest edge from the current node to an unvisited node and I don't really understand how to do it right.
Since this is homework, I won't spell out the entire code for you.
For best-first search, you don't need a priority queue. You just need to track which nodes you have visited, and which node you are currently at. While your current node is not the target node, find the shortest edge that leads to an unvisited node, and set your current node to the node at the other end of that edge.
So I have followed Wikipedia's pseudocode for Dijkstra's algorithm as well as Brilliants. https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Pseudocode https://brilliant.org/wiki/dijkstras-short-path-finder/. Here is my code which doesn't work. Can anyone point in the flaw in my code?
# Uses python3
from queue import Queue
n, m = map(int, input().split())
adj = [[] for i in range(n)]
for i in range(m):
u, v, w = map(int, input().split())
adj[u-1].append([v, w])
adj[v-1].append([u, w])
x, y = map(int, input().split())
x, y = x-1, y-1
q = [i for i in range(n, 0, -1)]
#visited = set()
# visited.add(x+1)
dist = [float('inf') for i in range(len(adj))]
dist[x] = 0
# print(adj[visiting])
while len(q) != 0:
visiting = q.pop()-1
for i in adj[visiting]:
u, v = i
dist[u-1] = dist[visiting]+v if dist[visiting] + \
v < dist[u-1] else dist[u-1]
# print(dist)
if dist[y] != float('inf'):
print(dist[y])
else:
print(-1)
Your algorithm is not implementing Dijkstra's algorithm correctly. You are just iterating over all nodes in their input order and updating the distance to the neighbors based on the node's current distance. But that latter distance is not guaranteed to be the shortest distance, because you iterate some nodes before their "turn". Dijkstra's algorithm specifies a particular order of processing nodes, which is not necessarily the input order.
The main ingredient that is missing from your algorithm, is a priority queue. You did import from Queue, but never use it. Also, it lacks the marking of nodes as visited, a concept which you seemed to have implemented a bit, but which you commented out.
The outline of the algorithm on Wikipedia explains the use of this priority queue in the last step of each iteration:
Otherwise, select the unvisited node that is marked with the smallest tentative distance, set it as the new "current node", and go back to step 3.
There is currently no mechanism in your code that selects the visited node with smallest distance. Instead it picks the next node based on the order in the input.
To correct your code, please consult the pseudo code that is available on that same Wikipedia page, and I would advise to go for the variant with priority queue.
In Python you can use heapq for performing the actions on the priority queue (heappush, heappop).
This is for my research in protein folding (So I guess technically a school project)
Summary:
I have the edges of an weighted undirected graph. Each vertex of the graph has anywhere from 1 to 20-ish edges. I would like to trim this graph down such that no vertex has more than 6 edges. I would also like the graph to retain as much connectivity as possible (maximize the degree).
Background:
I have a Delaunay Tesselation of the atoms (pointcloud essentially) in a protein using the scipy library. I use this to create a list of all pairs of residues that are in contact with each other (I store the distance between them). This list contains every pair (twice), and the distance between the pairs. (The residue contains many atoms so I use the average position of them to get the position of the residue)
pairs
[(ALA 1, GLU 2, 2.7432), (ALA 1, GLU 2, 2.7432), (ALA 4, ASP 27, 4.8938), (ALA 4, ASP 27, 4.8938) ... ]
What I have tried (which works but isn't exactly what I want) is to only store the six closest contacts. (I sort the residue names so I can use collections later)
for contact in residue.contacts[:6]:
pairs.append( tuple( sorted([residue.name, contact.name], key=lambda r: r.name) + [residue.dist[contact]] ) )
And then remove any contacts that are not reciprocated. (I guess technically add contacts that are)
new_pairs = []
counter=collections.Counter(pairs)
for key, val in counter.items():
if val == 2:
new_pairs.append(key)
This works, but I lose some information that I would like to keep. I phrased the question as a graph theory problem because I feel like this problem has already been solved in that field.
I was thinking that greedy algorithm might work:
while run_greedy:
# find the residue with the maximum number of neighbors
# find that residues pair with the maximum number of neighbors but only if the pair exists in pairs
# remove that pair from pairs
# if maximum_degree <= 6: run_greedy = False
Does the greedy algorithm work? Are there known algorithms that do this well? Is there a library that can do this (I am more than willing to change the format of the data to fit the library)?
I hope this is enough information, Thanks in advance for the help.
EDIT this is an variant of the knapsack problem: you add edges one by one, and want to maximize the number of edges while the graph built doesn't exceed a given degree.
The following solution uses dynamic programming.
Let m[i, d] the maximum subset of edges in e_0, ..., e_{i-1} creating a subgraph of maximium degree <= d.
m[i, 0] = {}
m[0, d] = {}
m[i, d] = m[i-1, d] + {e_i} if the degree of the graph is <= d
m[i, d] = m[i-1, d-1] + {e_i} if it has more edges than m[i-1][d], else m[i-1][d].
Hence the algorithm (not tested):
for i in 0..N:
m[i][0] = {}
for d in 1..K:
m[0][d] = {}
for d in 1..K:
for i in 1..N:
G1 = m[i-1][d] + {e_i}
if D(G1) == d: # can add e_i with degree <= k
m[i][d] = G1
else:
m[i][d] = max(m[i-1][d-1] + {e_i}, m[i-1][d]) # key=cardinal
Solution is: m[N-1][K-1]. Time complexity is O(K N^2) (imbricated loops : K N + maximum degre of the graph in N or less)
Previous answer
TLDR; I don't know how to find an optimal solution, but a greedy algorithm might give you acceptable result.
The problem
Let me rephrase the problem, based on your question and your code: you want to remove a minimum number of edges from your graph in order to reduce the maximum degree the graph to 6. That is to get the maximal subgraph G' from G with D(u) <= 6 for all u in G'.
The closest idea I found is the K-core of a graph, but that's not exactly the same problem.
Your method
Your method is clearly not optimal, since you keep at most 6 edges of every vertex and recreate the graph with those edges. Take the graph A-B-C:
A -> 1. B, 2. C
B -> 1. C, 2. A
C -> 1. A, 2. B
If you try to reduce the maximum degree of this graph to 1 using your method, the first pass will remove A-B (B is the 2nd neighbor of A), B-A (A is the 2nd neighbor of B) and C-B (B is the 2nd neighbor of C):
A -> 1. B
B -> 1. C
C -> 1. A
The second pass, to insure that the graph is undirected, will remove all the remaining edges (and vertices).
An optimal reduction would be:
A -> 1. B
B -> 1. A
Or any other pair of vertices in A, B, C.
Some strategy
Let:
k = 6
D(u) = max(d(u)-k, 0): the number of neighbors above k, or 0
w(u-v) (resp s(u-v)) = the weak (resp. strong) endpoint of the edge: having the lowest (resp. highest) degree
m(u-v) = min(D(u), D(v))
M(u-v) = max(D(u), D(v))
Let S = sum(D(u) for u in G). The goal is to make S = 0 while removing a minimum number of edges. If you remove:
(1) a floating edge: m(u-v) > 0, then S is decreased by 2 (both endpoints loose 1 degree)
(2) a sinking edge: m(u-v) = 0 and M(u-v) > 0, then S is decreased by 1 (the degree of the weak endpoint is already <= 6)
(3) a sunk edge: M(u-v) = 0, then S is unchanged
Note that a floating edge may become a sinking edge if: 1. its weak endpoint has a degree of k+1; 2. you remove another edge connected to this endpoint. Similarly, a sinking edge can sunk.
You have to remove floating edges while avoid creating sinking edges, because removing a floating edges is more efficient to reduce S. Let K the number of floating edges removed, and L the number of sinking edges removed (we don't remove sunk edges) to make S = 0. We want 2*K + L >= S. Obviously, the idea is to make L as small a possible, because we want a small number of edges removed (K + L).
I doubt you'll find an optimal greedy algorithm, because everything depends on the order of removing and the remote consequences of the current removing are hard to predict.
But you can use a general strategy to limit the creation of sinking edges:
do not remove edges with m(u-v) = 1 unless you have no choice.
if you have to remove an edge with m(u-v) = 1, choose the one whose weak endpoint has the less floating edges (they will become sinking edges).
An algorithm
Here's a greedy algorithm that implements this strategy:
while {u, v in G | m(u-v) > 0} is not empty: // remove floating edges first
remove the edge u-v with:
1. the maxmimum m(u-v)
2. w(u-v) has the minimum of neighbors t with D(t) > 0
3. s(u-v) has the minimum of neighbors t with D(t) > 0
remove all edges from {u, v in G | M(u-v) > 0} // clean up sinking edges
clean orphan vertices
Termination the algorithm terminates because we remove an edge on each iteration, thus {u in G | D(u) > 0} will become empty at some point.
Note: you can use a heap and update m(u-v) after each removing.
I am learning about topological sort, and graphs in general. I implemented a version below using DFS but I am having trouble understanding why the wikipedia page says this is O(|V|+|E|) and analyzing its time complexity, and the difference between |V|+|E| and n^2 in general.
Firstly, I have two for loops, logic says that it would be (n^2) but also isnt it true that in any DAG(or Tree), there is n-1 edges, and n vertexes? How is this any different from n^2 if we can remove the "-1" for non significant value?
graph = {
1:[4, 5, 7],
2:[3,5,6],
3:[4],
4:[5],
5:[6,7],
6:[7],
7:[]
}
from collections import defaultdict
def topological_sort(graph):
ordered, marked = [], defaultdict(int)
while len(ordered) < len(graph):
for vertex in graph:
if marked[vertex]==0:
visit(graph, vertex, ordered, marked)
return ordered
def visit(graph, n, ordered, marked):
if marked[n] == 1:
raise 'Not a DAG'
marked[n] = 1
for neighbor in graph.get(n):
if marked[neighbor]!=2:
visit(graph, neighbor, ordered, marked)
marked[n] = 2
ordered.insert(0, n)
def main():
print(topological_sort(graph))
main()
The proper implementation works in O(|V| + |E|) time because it goes through every edge and every vertex at most once. It's the same thing as O(|V|^2) for a complete (or almost complete graph). However, it's much better when the graph is sparse.
You implementation is O(|V|^2), not O(|V| + |E|). These two nested loops:
while len(ordered) < len(graph):
for vertex in graph:
if marked[vertex]==0:
visit(graph, vertex, ordered, marked)
do 1 + 2 ... + |V| = O(|V|^2) iterations in the worst case (for instance, for an empty graph). You can easily fix by getting rid of the outer loop (it's that simple: just remove the while loop. You don't need it).
I have a directed graph in which I want to efficiently find a list of all K-th order neighbors of a node. K-th order neighbors are defined as all nodes which can be reached from the node in question in exactly K hops.
I looked at networkx and the only function relevant was neighbors. However, this just returns the order 1 neighbors. For higher order, we need to iterate to determine the full set. I believe there should be a more efficient way of accessing K-th order neighbors in networkx.
Is there a function which efficiently returns the K-th order neighbors, without incrementally building the set?
EDIT: In case there exist other graph libraries in Python which might be useful here, please do mention those.
You can use:
nx.single_source_shortest_path_length(G, node, cutoff=K)
where G is your graph object.
For NetworkX the best method is probably to build the set of neighbors at each k. You didn't post your code but it seems you probably already have done this:
import networkx as nx
def knbrs(G, start, k):
nbrs = set([start])
for l in range(k):
nbrs = set((nbr for n in nbrs for nbr in G[n]))
return nbrs
if __name__ == '__main__':
G = nx.gnp_random_graph(50,0.1,directed=True)
print(knbrs(G, 0, 3))
Yes,you can get a k-order ego_graph of a node
subgraph = nx.ego_graph(G,node,radius=k)
then neighbors are nodes of the subgraph
neighbors= list(subgraph.nodes())
I had a similar problem, except that I had a digraph, and I need to maintain the edge-attribute dictionary. This mutual-recursion solution keeps the edge-attribute dictionary if you need that.
def neighbors_n(G, root, n):
E = nx.DiGraph()
def n_tree(tree, n_remain):
neighbors_dict = G[tree]
for neighbor, relations in neighbors_dict.iteritems():
E.add_edge(tree, neighbor, rel=relations['rel'])
#you can use this map if you want to retain functional purity
#map(lambda neigh_rel: E.add_edge(tree, neigh_rel[0], rel=neigh_rel[1]['rel']), neighbors_dict.iteritems() )
neighbors = list(neighbors_dict.iterkeys())
n_forest(neighbors, n_remain= (n_remain - 1))
def n_forest(forest, n_remain):
if n_remain <= 0:
return
else:
map(lambda tree: n_tree(tree, n_remain=n_remain), forest)
n_forest( [root] , n)
return E
You solve your problem using modified BFS algorithm. When you're storing node in queue, store it's level (distance from root) as well. When you finish processing the node (all neighbours visited - node marked as black) you can add it to list of nodes of its level. Here is example based on this simple implementation:
#!/usr/bin/python
# -*- coding: utf-8 -*-
from collections import defaultdict
from collections import deque
kth_step = defaultdict(list)
class BFS:
def __init__(self, node,edges, source):
self.node = node
self.edges = edges
self.source = source
self.color=['W' for i in range(0,node)] # W for White
self.graph =color=[[False for i in range(0,node)] for j in range(0,node)]
self.queue = deque()
# Start BFS algorithm
self.construct_graph()
self.bfs_traversal()
def construct_graph(self):
for u,v in self.edges:
self.graph[u][v], self.graph[v][u] = True, True
def bfs_traversal(self):
self.queue.append((self.source, 1))
self.color[self.source] = 'B' # B for Black
kth_step[0].append(self.source)
while len(self.queue):
u, level = self.queue.popleft()
if level > 5: # limit searching there
return
for v in range(0, self.node):
if self.graph[u][v] == True and self.color[v]=='W':
self.color[v]='B'
kth_step[level].append(v)
self.queue.append((v, level+1))
'''
0 -- 1---7
| |
| |
2----3---5---6
|
|
4
'''
node = 8 # 8 nodes from 0 to 7
edges =[(0,1),(1,7),(0,2),(1,3),(2,3),(3,5),(5,6),(2,4)] # bi-directional edge
source = 0 # set fist node (0) as source
bfs = BFS(node, edges, source)
for key, value in kth_step.items():
print key, value
Output:
$ python test.py
0 [0]
1 [1, 2]
2 [3, 7, 4]
3 [5]
4 [6]
I don't know networkx, neither I found ready to use algorithm in Graph Tool. I believe such a problem isn't common enough to have its own function. Also I think it would be overcomplicated, inefficient and redundant to store lists of k-th neighbours for any node in graph instance so such a function would probably have to iterate over nodes anyway.
As proposed previously, the following solution gives you all secondary neighbors (neighbors of neighbors) and lists all neighbors once (the solution is based on BFS):
{n: path for n, path in nx.single_source_shortest_path(G, 'a', cutoff=2).items() if len(path)==3}
Another solution which is slightly faster (6.68 µs ± 191 ns vs. 13.3 µs ± 32.1 ns, measured with timeit) includes that in undirected graphs the neighbor of a neighbor can be the source again:
def k_neighbors(G, source, cutoff):
neighbors = {}
neighbors[0] = {source}
for k in range(1, cutoff+1):
neighbors[k] = set()
for node in level[k-1]:
neighbors[k].update(set(G.neighbors(node)))
return neighbors
k_neighbors(B, 'a', 2) #dict keyed with level until `cutoff`, in this case 2
Both solutions give you the source itself as 0th-order neighbor.
So it depends on your context which one to prefer.