I'm using Python and Graphviz to draw some cluster graph consist of nodes.
I want to assign different colors to each node, dependent on an attribute, e.g. its x-coordinate.
Here's how I produce graph:
def add_nodes(graph, nodes):
for n in nodes:
if isinstance(n, tuple):
graph.node(n[0], **n[1])
else:
graph.node(n)
return graph
A = [[517, 1, [409], 10, 6],
[534, 1, [584], 10, 12],
[614, 1, [247], 11, 5],
[679, 1, [228], 13, 7],
[778, 1, [13], 14, 14]]
nodesgv = []
for node in A:
nodesgv.append((str(node[0]),{'label': str(node[0]), 'color': ???, 'style': 'filled'}))
graph = functools.partial(gv.Graph, format='svg', engine='neato')
add_nodes(graph(), nodesgv).render(('img/test'))
And now I want to assign a color to each node with the ordering of the first value of each node.
More specifically what I want is:
a red node (517)
a yellow node (534)
a green node (614)
a blue node (679)
and a purple node (778)
I know how to assign colors to the graph, but what I'm looking for is something similar to the c=x part when using matplotlib.
Problem is I'm not able to know the number of nodes (clusters) beforehand, so for example if I've got 7 nodes, I still want a graph with 7 nodes that start from a red one, and end with a purple one.
plt.scatter(x, y, c=x, s=node_sizes)
So is there any attribute in Graphviz that can do this?
Or can anyone tell me how does the colormap in matplotlib work?
Sorry for the lack of clarity. T^T
Oh I figured out a way to get what I want.
Just for recording and for someone else may have a same problem(?)
Can just rescale a color map and assign the corresponding index (of color) to the nodes.
def add_nodes(graph, nodes):
for n in nodes:
if isinstance(n, tuple):
graph.node(n[0], **n[1])
else:
graph.node(n)
return graph
A = [[517, 1, [409], 10, 6],
[534, 1, [584], 10, 12],
[614, 1, [247], 11, 5],
[679, 1, [228], 13, 7],
[778, 1, [13], 14, 14]]
nodesgv = []
Arange = [ a[0] for a in A]
norm = mpl.colors.Normalize(vmin = min(Arange), vmax = max(Arange))
cmap = cm.jet
for index, i in enumerate(A):
x = i[0]
m = cm.ScalarMappable(norm = norm, cmap = cmap)
mm = m.to_rgba(x)
M = colorsys.rgb_to_hsv(mm[0], mm[1], mm[2])
nodesgv.append((str(i[0]),{'label': str((i[1])), 'color': "%f, %f, %f" % (M[0], M[1], M[2]), 'style': 'filled'}))
graph = functools.partial(gv.Graph, format='svg', engine='neato')
add_nodes(graph(), nodesgv).render(('img/test'))
Related
Given a connected graph and a list of N-assigned vertexes, I want to find an efficient way to create N subgraphs, each containing one of the assigned vertexes.
To achieve that, we can prune the edges. However, we should prune less edge weight as possible.
For example, let's start with the following graph. We want to obtain three subgraphs containing one of the three red vertexes
The result should look like the following:
Right now, I'm using a heuristic, but it is not working well in some edge cases and has n^2 complexity on the number of vertexes. The idea is to calculate the shortest path between two vertex and remove the lightest edge and repeat until the vertex are disconnected.
Here is my code:
import pandas as pd
import igraph as ig
from collections import Counter
ucg_df = pd.DataFrame(
[
[0, 1, 100],
[0, 2, 110],
[2, 3, 70],
[3, 4, 100],
[3, 1, 90],
[0, 3, 85],
[5, 7, 90],
[0, 8, 100],
[3, 6, 10],
[2, 5, 60],
],
columns=["nodeA", "nodeB", "weight"],
)
ucg_graph = ig.Graph.DataFrame(ucg_df, directed=False)
ig.plot(
ucg_graph,
target='stack1.pdf',
edge_label=ucg_graph.es["weight"],
vertex_color=['red']*3 + ['green']*(len(ucg_df)-3),
vertex_label = ucg_graph.vs.indices
)
def generate_subgraphs_from_vertexes(g, vertex_list):
for i, vertex in enumerate(vertex_list):
for j in range(i + 1, len(vertex_list)):
while True:
path = g.get_shortest_paths(vertex_list[i], vertex_list[j], mode='ALL', output='epath',
weights='weight')[0]
if len(path) == 0:
break
edge_2_drop = min(g.es[path], key=lambda x: x['weight'])
edge_2_drop.delete()
return g
graph = generate_subgraphs_from_vertexes(ucg_graph, ucg_graph.vs[0,1,2])
ig.plot(
graph,
target='stack2.pdf',
edge_label=graph.es["weight"],
vertex_color=['red']*3 + ['green']*(len(ucg_df)-3),
vertex_label = graph.vs.indices
)
what kind of algorithm could I use to better solve this problem?
I am not familiar with igraph in Python, but below is my attempt in R. Hope you can get some hint here.
I think your problem can be reformulated into an assignment problem, since the key part is assigning "red" to associated "green" vertices to maximize the cost
library(igraph)
library(lpSolve)
# red vertices
vred <- V(g)[V(g)$color == "red"]
# subgraph that contains vred
sg <- induced.subgraph(
g,
unique(unlist(ego(g, 1, vred)))
)
# green vertices in sg
vgreen <- V(sg)[V(sg)$color == "green"]
# cost matrix
cost.mat <- get.adjacency(sg, attr = "label", sparse = FALSE)[vred, ][, vgreen]
p <- lp.assign(cost.mat, "max")
idx <- which(p$solution > 0, arr.ind = TRUE)
# edge list for max assignment
el1 <- cbind(names(vred[idx[, 1]]), names(vgreen[idx[, 2]]))
# all edges associated with vred
el <- get.edgelist(g)
el2 <- el[rowSums(matrix(el %in% names(vred), ncol = 2)) > 0, ]
# remove edges that are not obtained for the max assignment
rmEls <- do.call(
paste,
c(
data.frame(
el2[!apply(el2, 1, function(x) toString(sort(x))) %in% apply(el1, 1, function(x) toString(sort(x))), ]
),
sep = "|"
)
)
out <- g %>%
delete.edges(rmEls)
When running plot(out, layout = layout_nicely(g)), you will see
Data
df <- data.frame(
from = c(0, 0, 2, 3, 3, 0, 5, 0, 3, 2),
to = c(1, 2, 3, 4, 1, 3, 7, 8, 6, 5),
weight = c(100, 110, 70, 100, 90, 85, 90, 100, 10, 60)
)
# original graph object
g <- df %>%
graph_from_data_frame(directed = FALSE) %>%
set_edge_attr(name = "label", value = df$weight) %>%
set_vertex_attr(name = "color", value = ifelse(names(V(.)) %in% c("0", "1", "2"), "red", "green"))
Inspired by
Find rows of matrix which contain rows of another matrix,
I found, assuming the graph is undirected:
mtch <- matrix(match(el2, el1), ncol = 2)
idx <- which(abs(mtch[,1] - mtch[,2]) == nrow(el1))
rmEls <- get.edge.ids(g, t(el2[-idx,]))
rmEls
## [1] 1 2 3 6
The sample data is as follows:
unique_list = ['home0', 'page_a0', 'page_b0', 'page_a1', 'page_b1',
'page_c1', 'page_b2', 'page_a2', 'page_c2', 'page_c3']
sources = [0, 0, 1, 2, 2, 3, 3, 4, 4, 7, 6]
targets = [3, 4, 4, 3, 5, 6, 8, 7, 8, 9, 9]
values = [2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2]
Using the sample code from the documentation
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = unique_list,
color = "blue"
),
link = dict(
source = sources,
target = targets,
value = values
))])
fig.show()
This outputs the following sankey diagram
However, I would like to get all the values which end in the same number in the same vertical column, just like how the leftmost column has all of it's nodes ending with a 0. I see in the docs that it is possible to move the node positions, however I was wondering if there was a cleaner way to do it other than manually inputting x and y values. Any help appreciated.
In go.Sankey() set arrangement='snap' and adjust x and y positions in x=<list> and y=<list>. The following setup will place your nodes as requested.
Plot:
Please note that the y-values are not explicitly set in this example. As soon as there are more than one node for a common x-value, the y-values will be adjusted automatically for all nodes to be displayed in the same vertical position. If you do want to set all positions explicitly, just set arrangement='fixed'
Edit:
I've added a custom function nodify() that assigns identical x-positions to label names that have a common ending such as '0' in ['home0', 'page_a0', 'page_b0']. Now, if you as an example change page_c1 to page_c2 you'll get this:
Complete code:
import plotly.graph_objects as go
unique_list = ['home0', 'page_a0', 'page_b0', 'page_a1', 'page_b1',
'page_c1', 'page_b2', 'page_a2', 'page_c2', 'page_c3']
sources = [0, 0, 1, 2, 2, 3, 3, 4, 4, 7, 6]
targets = [3, 4, 4, 3, 5, 6, 8, 7, 8, 9, 9]
values = [2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2]
def nodify(node_names):
node_names = unique_list
# uniqe name endings
ends = sorted(list(set([e[-1] for e in node_names])))
# intervals
steps = 1/len(ends)
# x-values for each unique name ending
# for input as node position
nodes_x = {}
xVal = 0
for e in ends:
nodes_x[str(e)] = xVal
xVal += steps
# x and y values in list form
x_values = [nodes_x[n[-1]] for n in node_names]
y_values = [0.1]*len(x_values)
return x_values, y_values
nodified = nodify(node_names=unique_list)
# plotly setup
fig = go.Figure(data=[go.Sankey(
arrangement='snap',
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = unique_list,
color = "blue",
x=nodified[0],
y=nodified[1]
),
link = dict(
source = sources,
target = targets,
value = values
))])
fig.show()
I am trying to visualize some data regarding the time at which the process was running or alive and the time it was idle. For each process, I have a_x_axis the time at which process started running and a_live_for is the time it was alive after it woke up. I have two data points in for each process. I am trying to connect these two dots by a line by connecting 1st green dot with the first red dot and second green dot with the second red dot and so on, so I can see alive and idle time for each process in the large data set. I looked into scatter plot examples but could not find any way to solve this issue.
import matplotlib.pyplot as plt
a_x_axis = [32, 30, 40, 50, 60, 78]
a_live = [1, 3, 2, 1, 2, 4]
a_alive_for = [a + b for a, b in zip(a_x_axis, a_live)]
b_x_axis = [22, 25, 45, 55, 60, 72]
b_live = [1, 3, 2, 1, 2, 4]
b_alive_for = [a + b for a, b in zip(b_x_axis, b_live)]
a_y_axis = []
b_y_axis = []
for i in range(0, len(a_x_axis)):
a_y_axis.append('process-1')
b_y_axis.append('process-2')
print("size of a: %s" % len(a_x_axis))
print("size of a: %s" % len(a_y_axis))
plt.xlabel('time (s)')
plt.scatter(a_x_axis, [1]*len(a_x_axis))
plt.scatter(a_alive_for, [1]*len(a_x_axis))
plt.scatter(b_x_axis, [2]*len(b_x_axis))
plt.scatter(b_alive_for, [2]*len(b_x_axis))
plt.show()
You need:
import matplotlib.pyplot as plt
a_x_axis = [32, 30, 40, 50, 60, 78]
a_live = [1, 3, 2, 1, 2, 4]
a_alive_for = [a + b for a, b in zip(a_x_axis, a_live)]
b_x_axis = [22, 25, 45, 55, 60, 72]
b_live = [1, 3, 2, 1, 2, 4]
b_alive_for = [a + b for a, b in zip(b_x_axis, b_live)]
a_y_axis = []
b_y_axis = []
for i in range(0, len(a_x_axis)):
a_y_axis.append('process-1')
b_y_axis.append('process-2')
print("size of a: %s" % len(a_x_axis))
print("size of a: %s" % len(a_y_axis))
plt.xlabel('time (s)')
plt.scatter(a_x_axis, [1]*len(a_x_axis))
plt.scatter(a_alive_for, [1]*len(a_x_axis))
plt.scatter(b_x_axis, [2]*len(b_x_axis))
plt.scatter(b_alive_for, [2]*len(b_x_axis))
for i in range(0, len(a_x_axis)):
plt.plot([a_x_axis[i],a_alive_for[i]], [1,1], 'green')
for i in range(0, len(b_x_axis)):
plt.plot([b_x_axis[i],b_alive_for[i]], [2,2], 'green')
plt.show()
Output:
scatter is just not the tool for plotting lines, it's plot. And it accepts 2D-arrays of x- and y-coordinates, so you don't have to manually iterate over lists. So you would need sth like
plt.plot([a_x_axis, a_alive_for], [[1]*n,[1]*n], 'green')
with n = len(a_x_axis).
However, you could structure your data much better in numpy arrays or pandas dataframes where you can set titles for columns, too. (Is it that, what you wanted to achieve by appending 'process-x' to your data lists...?)
Also, the colors of your markers seem to me not chosen by purpose; if you want to have them the same like the lines you could even leave scatter completely away.
I am starting to work with IDE Jupyter && Python 3.6 and a question has arisen.
I have to draw through the IDE, a Hamiltonian path in the Petersen subgraph, but I do not know how to do it.
I show information about said graph:
Graph of Petersen: https://en.wikipedia.org/wiki/Petersen_graph
Hypohamiltonian graph: https://en.wikipedia.org/wiki/Hypohamiltonian_graph
Any idea of how you can make the comments?
Thank you very much.
To compute the Hamiltonian graph in Petersen graph we can use the solution from this answer
petersen = {1: [2,5,6], 2: [3,1,7], 3: [4,2,8], 4: [5,3,9], 5: [1,4,10],
6: [1,8,9], 7:[2,9,10], 8: [3,10,6], 9: [4,6,7], 10: [5,7,8]}
I've forgotten whether or not Petersen graphs are isomorphic to any of their vertex permutations so I will assume they are not. Therefore, instead of searching for pairs of vertices which form the ends of the path we will add two new vertices connected to every vertex of the original graph. So if a Hamiltonian path exists in the original graph, it will exist in this extended graph -- just cut off the two extra vertices (-1) and (-2).
# Add two new vertices (-1) and (-2)
for k in petersen:
petersen[k].append(-1)
petersen[k].append(-2)
petersen[-1] = list(range(1,11))
petersen[-2] = list(range(1,11))
Now we can apply the algorithm from the post:
def find_all_paths(graph, start, end, path=[]):
path = path + [start]
if start == end:
return [path]
if not start in graph:
return []
paths = []
for node in graph[start]:
if node not in path:
newpaths = find_all_paths(graph, node, end, path)
for newpath in newpaths:
paths.append(newpath)
return paths
for path in find_all_paths(petersen, -1, -2):
if len(path) == len(petersen):
print(path[1:-1])
[1, 2, 3, 4, 5, 10, 7, 9, 6, 8]
[1, 2, 3, 4, 5, 10, 8, 6, 9, 7]
[1, 2, 3, 8, 6, 9, 4, 5, 10, 7]
[1, 2, 3, 8, 6, 9, 7, 10, 5, 4]
[1, 2, 7, 9, 6, 8, 3, 4, 5, 10]
[1, 2, 7, 9, 6, 8, 10, 5, 4, 3]
...
Since this algorithm returns list of ALL paths between given vertices we will filter them only to Hamiltonian paths and cut off the extra vertices.
Surely, this can be more efficient, but I leave the optimizations to either you or someone else. For such a small graph as Petersen it works quickly enough in my opinion.
DRAWING
We randomly choose one path and store it in ham_path variable.
import random
ham_paths = [path[1:-1] for path in find_all_paths(petersen, -1, -2)
if len(path) == len(petersen)]
ham_path = random.choice(ham_paths)
Then we will use the networkx package to draw the graph and the chosen path.
import networkx
g = networkx.Graph()
for k, vs in petersen.items():
for v in vs:
if v in [-1, -2] or k in [-1, -2]:
continue
if abs(ham_path.index(k) - ham_path.index(v)) == 1:
g.add_edge(k,v, color='red', width=1.5)
else:
g.add_edge(k,v, color='black', width=0.5)
We create a networkx graph, and each edge that is in Hamiltonian path will be colored red and bold. On the other hand, every other edge will be thinner and black. We also do not want the extra vertices in our drawing.
pos = networkx.circular_layout(g)
edges = g.edges()
colors = [g[u][v]['color'] for u,v in edges]
widths = [g[u][v]['width'] for u,v in edges]
networkx.draw(g, pos, edges=edges, edge_color=colors, width=widths)
I am looking for a Python library which would support mesh queries. For now, I have looked at openmesh, but I am a bit afraid that would be an overkill for my small master thesis project. The features which I need is:
to iterate over vertices around a given vertex
iterate over all edges, faces, vertices
easily associate function values with each vertex, face, edge (I picture that these geometric entities are indexed)
And if I am really successful, I might need also to:
change the topology of the mesh, like adding or removing a vertex
Is it possible to do this with numpy so I could keep my depedency list small? For now I plan that the initial mesh will be generated with distmesh (pydistmesh). Does it have parts which could be useful for my mesh queries?
Theese kinds of queries became quite easy and effiecient with improved face based data structure which is used by CGAL. Here I have implemented code to valk around one specific vertex:
# The demonstration of improved face based data structure
from numpy import array
triangles = array([[ 5, 7, 10],
[ 7, 5, 6],
[ 4, 0, 3],
[ 0, 4, 6],
[ 4, 7, 6],
[ 4, 9, 10],
[ 7, 4, 10],
[ 0, 2, 1],
[ 2, 0, 6],
[ 2, 5, 1],
[ 5, 2, 6],
[ 8, 4, 3],
[ 4, 11, 9],
[ 8, 11, 4],
[ 9, 11, 3],
[11, 8, 3]], dtype=int)
points = array([[ 0.95448092, 0.45655774],
[ 0.86370317, 0.02141752],
[ 0.53821089, 0.16915935],
[ 0.97218064, 0.72769053],
[ 0.55030382, 0.70878147],
[ 0.34692982, 0.08765148],
[ 0.46289581, 0.29827649],
[ 0.21159925, 0.39472549],
[ 0.61679844, 0.79488884],
[ 0.4272861 , 0.93375762],
[ 0.12451604, 0.54267654],
[ 0.45974728, 0.91139648]])
import pylab as plt
fig = plt.figure()
pylab.triplot(points[:,0],points[:,1],triangles)
for i,tri in enumerate(triangles):
v1,v2,v3 = points[tri]
vavg = (v1 + v2 + v3)/3
plt.text(vavg[0],vavg[1],i)
#plt.show()
## constructing improved face based data structure
def edge_search(v1,v2,skip):
"""
Which triangle has edge with verticies i and j and aren't triangle <skip>?
"""
neigh = -1
for i,tri in enumerate(triangles):
if (v1 in tri) and (v2 in tri):
if i is skip:
continue
else:
neigh = i
break
return(neigh)
def triangle_search(i):
"""
For given vertex with index i return any triangle from neigberhood
"""
for i,tri in enumerate(triangles):
if i in tri:
return(i)
neighberhood = []
for i,tri in enumerate(triangles):
v1, v2, v3 = tri
t3 = edge_search(v1,v2,i)
t1 = edge_search(v2,v3,i)
t2 = edge_search(v3,v1,i)
neighberhood.append([t1,t2,t3])
neighberhood = array(neighberhood,dtype=int)
faces = []
for vi,_ in enumerate(points):
faces.append(triangle_search(vi))
## Now walking over first ring can be implemented
def triangle_ring(vertex):
tri_start = faces[vertex]
tri = tri_start
## with asumption that vertex is not on the boundary
for i in range(10):
yield tri
boolindx = triangles[tri]==vertex
# permutating to next and previous vertex
w = boolindx[[0,1,2]]
cw = boolindx[[2,0,1]]
ccw = boolindx[[1,2,0]]
ct = neighberhood[tri][cw][0]
if ct==tri_start:
break
else:
tri=ct
for i in triangle_ring(6):
print(i)
## Using it for drawing lines on plot
vertex = 6
ring_points = []
for i in triangle_ring(vertex):
vi = triangles[i]
cw = (vi==vertex)[[2,0,1]]
print("v={}".format(vi[cw][0]))
ring_points.append(vi[cw][0])
data = array([points[i] for i in ring_points])
plt.plot(data[:,0],data[:,1],"ro")
#plt.savefig("topology.png")
plt.show()
input("Press Enter to continue...")
plt.close("all")