Generating graph from a batch of adjacency matrices - python

I am trying to train a network for generating adjacency matrix for graphs. In the training process, for a single graph I use
import networkx as nx
import numpy as np
adj = np.asarray([[0,1,0,0],[1,0,1,0],[0,0,0,1], [0,0,1,0]])
G = nx.from_numpy_matrix(adj)
for transforming adjacency to graph. However, while training the network, I need to do this with a batch of matrices and it seems that networkx cannot do this. Is there a package that can handle the following:
import networkx as nx
import numpy as np
adjs = []
adjs.append(np.asarray([[0,1,0,0],[1,0,1,0],[0,0,0,1], [0,0,1,0]]))
adjs.append(np.asarray([[0,1,0,1],[1,0,0,0],[0,0,0,1], [1,0,1,0]]))
adjs = np.asarray(adjs)
G = nx.from_numpy_matrix(adjs)

You can add a map over the nx.from_numpy_matrix function to apply it over all the adjacency matrices in the adjs list. Something like this
import networkx as nx
import numpy as np
adjs = []
adjs.append(np.asarray([[0,1,0,0],[1,0,1,0],[0,0,0,1], [0,0,1,0]]))
adjs.append(np.asarray([[0,1,0,1],[1,0,0,0],[0,0,0,1], [1,0,1,0]]))
adjs = np.asarray(adjs)
graph_list = list(map(lambda adj_matrix:nx.from_numpy_matrix(adj_matrix), adjs))
Now, graph_list is simply a list of NetworkX graphs.
for graph in graph_list:
print("Printing information for Graph at index:", idx)
print(graph.nodes())
print(graph.edges())
# Output:
# Printing information for Graph at index: 0
# [0, 1, 2, 3]
# [(0, 1), (1, 2), (2, 3)]
# Printing information for Graph at index: 1
# [0, 1, 2, 3]
# [(0, 1), (0, 3), (2, 3)]
You can view the code here as well.
Reference:
Python Map Tutorial

Related

How to retain node ordering when converting graph from networkx to pytorch geometric?

Question: How to retain the node ordering/labels when converting a graph from networkx to pytorch geometric?
Code: (to be run in Google Colab)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
from torch.nn import Linear
import torch.nn.functional as F
torch.__version__
# install pytorch geometric
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
from torch_geometric.nn import GCNConv
from torch_geometric.utils.convert import to_networkx, from_networkx
# Make the networkx graph
G = nx.Graph()
# Add some cars
G.add_nodes_from([
('Ford', {'y': 0, 'Name': 'Ford'}),
('Lexus', {'y': 1, 'Name': 'Lexus'}),
('Peugot', {'y': 2, 'Name': 'Peugot'}),
('Mitsubushi', {'y': 3, 'Name': 'Mitsubishi'}),
('Mazda', {'y': 4, 'Name': 'Mazda'}),
])
# Relabel the nodes
remapping = {x[0]: i for i, x in enumerate(G.nodes(data = True))}
G = nx.relabel_nodes(G, remapping, copy=False)
# Add some edges --> A = [(0, 1, 0, 1, 1), (1, 0, 1, 1, 0), (0, 1, 0, 0, 1), (1, 1, 0, 0, 0), (1, 0, 1, 0, 0)] as the adjacency matrix
G.add_edges_from([
(0, 1), (0, 3), (0, 4),
(1, 2), (1, 3),
(2, 1), (2, 4),
(3, 0), (3, 1),
(4, 0), (4, 2)
])
# Convert the graph into PyTorch geometric
pyg_graph = from_networkx(G)
pyg_graph.edge_index
When I print the edge indices in the last line of the code, I get different answers each time I run it. Most importantly, I am looking to consistently get the same (correct) answer whereby each node numbering is retained from networkx:
tensor([[0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4],
[4, 2, 4, 2, 3, 0, 1, 1, 4, 0, 1, 3]])
The form of this edge index tensor is:
the first list contains the node ids of the source node
the second list contains the node ids of the target node
For the node ids to be retained, we would expect node 0 to appear three times in the first (source) list instead of just twice.
Is there any way for me to force PyTorch Geometric to copy over the node ids?
Thanks
[EDIT] One possible work-around I have is using the following bit of code which is able to produce edge index and weight tensors for PyTorch geometric
# Create a dictionary of the mappings from company --> node id
mapping_dict = {x: i for i, x in enumerate(list(G.nodes()))}
# Get the number of nodes
num_nodes = len(mapping_dict)
# Now create a source, target, and edge list for PyTorch geometric graph
edge_source_list = []
edge_target_list = []
edge_weight_list = []
# iterate through all the edges
for e in G.edges():
# first element of tuple is appended to source edge list
edge_source_list.append(mapping_dict[e[0]])
# last element of tuple is appended to target edge list
edge_target_list.append(mapping_dict[e[1]])
# add the edge weight to the edge weight list
edge_weight_list.append(1)
# now create full edge lists for pytorch geometric - undirected edges need to be defined in both directions
full_source_list = edge_source_list + edge_target_list # full source list
full_target_list = edge_target_list + edge_source_list # full target list
full_weight_list = edge_weight_list + edge_weight_list # full edge weight list
print(len(edge_source_list), len(edge_target_list), len(full_source_list))
# now convert these to torch tensors
edge_index_tensor = torch.LongTensor( np.concatenate([ [np.array(full_source_list)], [np.array(full_target_list)]] ))
edge_weight_tensor = torch.FloatTensor(np.array(full_weight_list))
It seems this issue was resolved in the comments (the solution proposed by #Sparky05 is to use copy=True, which is the default for nx.relabel_nodes), but below is the explanation for why the node order is changed.
When copy=False is passed, nx.relabel_nodes will re-add the nodes to the graph in the order they appear in the set of keys of remapping dict. The relevant lines in the code are here:
def _relabel_inplace(G, mapping):
old_labels = set(mapping.keys())
new_labels = set(mapping.values())
if len(old_labels & new_labels) > 0:
# skip codes for labels sets that overlap
else:
# non-overlapping label sets
nodes = old_labels
# skip lines
for old in nodes: # this is now in the set order
By using set the order of nodes is modified, so to preserve the order the non-overlapping label sets should be treated as:
else:
# non-overlapping label sets
nodes = mapping.keys()
A related PR is submitted here.

How to assign graph label for graph in pytorch geometric?

Question: How can we assign a graph-level label to a graph made in PyTorch geometric?
Example: Let us say we create an undirected graph in PyTorch geometric and now we want to label that graph according to its class (can use a numerical value). How could we now assign a class label for the whole graph, such that it can be used for graph classification tasks? Furthermore, how could we collect a bunch of graphs with labels to form our dataset?
Code: (to be run in Google Colab)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
from torch.nn import Linear
import torch.nn.functional as F
torch.__version__
# install pytorch geometric
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
from torch_geometric.nn import GCNConv
from torch_geometric.utils.convert import to_networkx, from_networkx
# Make the networkx graph
G = nx.Graph()
# Add some cars
G.add_nodes_from([
('Ford', {'y': 0, 'Name': 'Ford'}),
('Lexus', {'y': 1, 'Name': 'Lexus'}),
('Peugot', {'y': 2, 'Name': 'Peugot'}),
('Mitsubushi', {'y': 3, 'Name': 'Mitsubishi'}),
('Mazda', {'y': 4, 'Name': 'Mazda'}),
])
# Relabel the nodes
remapping = {x[0]: i for i, x in enumerate(G.nodes(data = True))}
G = nx.relabel_nodes(G, remapping, copy=True)
# Add some edges --> A = [(0, 1, 0, 1, 1), (1, 0, 1, 1, 0), (0, 1, 0, 0, 1), (1, 1, 0, 0, 0), (1, 0, 1, 0, 0)] as the adjacency matrix
G.add_edges_from([
(0, 1), (0, 3), (0, 4),
(1, 2), (1, 3),
(2, 1), (2, 4),
(3, 0), (3, 1),
(4, 0), (4, 2)
])
# Convert the graph into PyTorch geometric
pyg_graph = from_networkx(G)
Now how could we give this graph a label = 0 (for class e.g. cars)? Then if we did that for lots of graphs, how could we bunch them together to form a dataset?
Thanks
The pyg_graph object has type torch_geometric.data.Data.
Inspecting the source code of Data class, you can see that it defines the dunder methods __setattr__ and __setitem__.
Thanks to __setattr__, you can assign the label with the line
pyg_graph.label = 0
or you can instead use __setitem__ doing
pyg_graph["label"] = 0
The two notations perform the same action internally, so they can be used interchangeably.
To create a batch of graphs and labels, you can simply do
batch = torch_geometric.data.Batch.from_data_list([pyg_graph, pyg_graph])
>>> batch.label
tensor([0, 0])
and PyG takes care of the batching of all attributes automatically.

Calculate the Laplacian matrix of a graph object in NetworkX

I am writing my own function that calculates the Laplacian matrix for any directed graph, and am struggling with filling the diagonal entries of the resulting matrix. The following equation is what I use to calculate entries of the Laplacian matrix, where e_ij represents an edge from node i to node j.
I am creating graph objects with NetworkX (https://networkx.org/). I know NetworkX has its own Laplacian function for directed graphs, but I want to be 100% sure I am using a function that carries out the correct computation for my purposes. The code I have developed thus far is shown below, for the following example graph:
# Create a simple example of a directed weighted graph
G = nx.DiGraph()
G.add_nodes_from([1, 2, 3])
G.add_weighted_edges_from([(1, 2, 1), (1, 3, 1), (2, 1, 1), (2, 3, 1), (3, 1, 1), (3, 2, 1)])
# Put node, edge, and weight information into Python lists
node_list = []
for item in G.nodes():
node_list.append(item)
edge_list = []
weight_list = []
for item in G.edges():
weight_list.append(G.get_edge_data(item[0],item[1])['weight'])
item = (item[0]-1,item[1]-1)
edge_list.append(item)
print(edge_list)
> [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
# Fill in the non-diagonal entries of the Laplacian
num_nodes = len(node_list)
num_edges = len(edge_list)
J = np.zeros(shape = (num_nodes,num_nodes))
for x in range(num_edges):
i = edge_list[x][0]
j = edge_list[x][1]
J[i,j] = weight_list[x]
I am struggling to figure out how to fill in the diagonal entries. edge_list is a list of tuples. To perform the computation in the above equation for L(G), I need to loop through the second entries of each tuple, store the first entry into a temporary list, sum over all the elements of that temporary list, and finally store the negative of the sum in the correct diagonal entry of L(G).
Any suggestions would be greatly appreciated, especially if there are steps above that can be done more efficiently or elegantly.
I adjusted networkx.laplacian_matrix function for undirected graphs a little bit
import networkx as nx
import scipy.sparse
G = nx.DiGraph()
G.add_nodes_from([1, 2, 3])
G.add_weighted_edges_from([(1, 2, 1), (1, 3, 1), (2, 1, 1), (2, 3, 1), (3, 1, 1), (3, 2, 1)])
nodelist = list(G)
A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight="weight", format="csr")
n, m = A.shape
diags = A.sum(axis=0) # 1 = outdegree, 0 = indegree
D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format="csr")
print((A - D).todense())
# [[-2 1 1]
# [ 1 -2 1]
# [ 1 1 -2]]
I will deviate a little from your method, since I prefer to work with Numpy if possible :P.
In the following snippet, I generate test data for a network of n=10 nodes; that is, I generate an array of tuples V to populate with random nodes, and also a (n,n) array A with the values of the edges between nodes. Hopefully the code is somewhat self-explanatory and is correct (let me know otherwise):
from random import sample
import numpy as np
# Number and list of nodes
n = 10
nodes = list(np.arange(n)) # random.sample needs list
# Test array of linked nodes
# V[i] is a tuple with all nodes the i-node connects to.
V = np.zeros(n, dtype = tuple)
for i in range(n):
nv = np.random.randint(5) # Random number of edges from node i
# To avoid self-loops (do not know if it is your case - comment out if necessary)
itself = True
while itself:
cnodes = sample(nodes, nv) # samples nv elements from the nodes list w/o repetition
itself = i in cnodes
V[i] = cnodes
# Test matrix of weighted edges (from i-node to j-node)
A = np.zeros((n,n))
for i in range(n):
for j in range(n):
if j in V[i]:
A[i,j] = np.random.random()*5
# Laplacian of network
J = np.copy(A) # This already sets the non-diagonal elements
for i in range(n):
J[i,i] = - np.sum(A[:,i]) - A[i,i]
Thank you all for your suggestions! I agree that numpy is the way to go. As a rudimentary solution that I will optimize later, this is what I came up with:
def Laplacian_all(edge_list,weight_list,num_nodes,num_edges):
J = np.zeros(shape = (num_nodes,num_nodes))
for x in range(num_edges):
i = edge_list[x][0]
j = edge_list[x][1]
J[i,j] = weight_list[x]
for i in range(num_nodes):
temp = []
for x in range(num_edges):
if i == edge_list[x][1]:
temp.append(weight_list[x])
temp_sum = -1*sum(temp)
J[i,i] = temp_sum
return J
I have yet to test this on different graphs, but this was what I was hoping to figure out for my immediate purposes.

Graph isomorphism with constraints on the edges using networkx

I would like to define my own isomorphism of two graphs. I want to check if two graphs are isomorphic given that each edge has some attribute --- basically the order of placing each edge. I wonder if one can use the method:
networkx.is_isomorphic(G1,G2, edge_match=some_callable)
somehow by defining function some_callable().
For example, the following graphs are isomorphic, because you can relabel the nodes to obtain one from another.
Namely, relabel [2<->3].
But, the following graphs are not isomorphic.
There is no way to obtain one from another by re-labeling the nodes.
Here you go. This is exactly what the edge_match option is for doing. I'll create 3 graphs the first two are isomorphic (even though the weights have different names --- I've set the comparison function to account for that). The third is not isomorphic.
import networkx as nx
G1 = nx.Graph()
G1.add_weighted_edges_from([(0,1,0), (0,2,1), (0,3,2)], weight = 'aardvark')
G2 = nx.Graph()
G2.add_weighted_edges_from([(0,1,0), (0,2,2), (0,3,1)], weight = 'baboon')
G3 = nx.Graph()
G3.add_weighted_edges_from([(0,1,0), (0,2,2), (0,3,2)], weight = 'baboon')
def comparison(D1, D2):
#for an edge u,v in first graph and x,y in second graph
#this tests if the attribute 'aardvark' of edge u,v is the
#same as the attribute 'baboon' of edge x,y.
return D1['aardvark'] == D2['baboon']
nx.is_isomorphic(G1, G2, edge_match = comparison)
> True
nx.is_isomorphic(G1, G3, edge_match = comparison)
> False
Here answer the problem specifically in the question, with the very same graphs. Note that I'm using the networkx.MultiGraph and consider some 'ordering' in placing those edges.
import networkx as nx
G1,G2,G3,G4=nx.MultiGraph(),nx.MultiGraph(),nx.MultiGraph(),nx.MultiGraph()
G1.add_weighted_edges_from([(0, 1, 0), (0, 2, 1), (0, 3, 2)], weight='ordering')
G2.add_weighted_edges_from([(0, 1, 0), (0, 3, 1), (0, 2, 2)], weight='ordering')
G3.add_weighted_edges_from([(0, 1, 0), (0, 1, 1), (2, 3, 2)], weight='ordering')
G4.add_weighted_edges_from([(0, 1, 0), (2, 3, 1), (0, 1, 2)], weight='ordering')
def comparison(D1,D2):
return D1[0]['ordering'] == D2[0]['ordering']
nx.is_isomorphic(G1,G2, edge_match=comparison)
>True
nx.is_isomorphic(G3,G4, edge_match=comparison)
>False

NetworkX convert MultiGraph to simple Graph by averaging parallel length

There is one solution about converting MultiGraph to Simple Graph by using Maehler's code
import networkx as nx
G = nx.MultiGraph()
G.add_nodes_from([1,2,3])
G.add_edges_from([(1, 2), (1, 2), (1, 3), (2, 3), (2, 3)])
G2 = nx.Graph(G)
and another one using Aslak and Aric's code by summing the weight
import networkx as nx
# weighted MultiGraph
M = nx.MultiGraph()
M.add_edge(1,2,weight=7)
M.add_edge(1,2,weight=19)
M.add_edge(2,3,weight=42)
# create weighted graph from M
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
w = data['weight'] if 'weight' in data else 1.0
if G.has_edge(u,v):
G[u][v]['weight'] += w
else:
G.add_edge(u, v, weight=w)
print G.edges(data=True)
# [(1, 2, {'weight': 26}), (2, 3, {'weight': 42})]
Wondering how to average the parallel edges' weight?
Here is an approach similar to those that uses the statistics package to compute the mean of the edge weight.
import networkx as nx
from statistics import mean
# weighted MultiGraph
M = nx.MultiGraph()
M.add_edge(1,2,weight=7)
M.add_edge(1,2,weight=20)
M.add_edge(2,3,weight=42)
M.add_edge(2,3)
# create weighted graph G from M
G = nx.Graph()
for u,v,data in M.edges(data=True):
if not G.has_edge(u,v):
# set weight to 1 if no weight is given for edge in M
weight = mean(d.get('weight',1) for d in M.get_edge_data(u,v).values())
G.add_edge(u, v, weight=weight)
print(G.edges(data=True))
OUTPUT (networkx-2.0-dev)
EdgeView([(1, 2, {'weight': 13.5}), (2, 3, {'weight': 21.5})])

Categories