Related
I have tried to view field lines of an uncomplete regular grid vector field with first pyVista Streamlines and then with plotly without success... I have yet good results with other 2d streamplots :
2d streamplot of the data
Could someone help me with this ? I found no answer... Here is my data : https://wetransfer.com/downloads/7f3c4ae01e5922e753ea708134f956e720230214141330/bf11ab
import pandas as pd
import numpy as np
import pyvista as pv
import plotly.graph_objects as go
df = pd.read_csv("mix_griddata.csv")
X = df['X']
Y = df['Y']
Z = df['Z']
Vx = df['Vx']
Vy = df['Vy']
Vz = df['Vz']
fig = go.Figure(data=go.Streamtube(
x = X,
y = Y,
z = Z,
u = Vx,
v = Vy,
w = Vz,
starts = dict(
x = X.sample(frac=0.01,replace=False),
y = Y.sample(frac=0.01,replace=False),
z = Z.sample(frac=0.01,replace=False)
),
sizeref =1,
colorscale = 'Portland',
showscale = False,
maxdisplayed = 30000000
))
fig.update_layout(
scene = dict(
aspectratio = dict(
x = 1,
y = 1,
z = 1
)
),
margin = dict(
t = 10,
b = 10,
l = 10,
r = 10
)
)
fig.show(renderer="browser")
#Streamlines
mix_FD_grid = np.load("C:/Users/hd377/OneDrive - ensam.eu/0-Thesis/Fibres_Direction_in_allvolume/mix/mix_FD_grid.npy")
origin = (0,0,0)
mesh = pv.UniformGrid(dimensions=mix_FD_grid[:,:,:,0].shape, spacing=(1, 1, 1), origin=origin)
vectors = np.empty((mesh.n_points, 3))
vectors[:, 0] = mix_FD_grid[:,:,:,0].flatten()
vectors[:, 1] = mix_FD_grid[:,:,:,1].flatten()
vectors[:, 2] = mix_FD_grid[:,:,:,2].flatten()
mesh['vectors'] = vectors
stream, src = mesh.streamlines(
'vectors', return_source=True, max_steps = 20000, n_points=200, source_radius=25, source_center=(15, 0, 30)
)
p = pv.Plotter()
p.add_mesh(mesh.outline(), color="k")
p.add_mesh(stream.tube(radius=0.1))
p.camera_position = [(182.0, 177.0, 50), (139, 105, 19), (-0.2, -0.2, 1)]
p.show()
The plotly window does appear in my browser but no tube are visible at all, and the axes values are false.
The pyVista does show something, but in the wrong direction, and clearly not what expected (longitudinal flux circumventing a central cone).
I'll only be tackling PyVista. It's hard to say for sure and I'm only guessing, but your data is probably laid out in the wrong order.
For starters, your data is inconsistent to begin with: your CSV has 1274117 rows whereas your multidimensional array has shape (37, 364, 100, 3), for a total of 1346800 vectors. And your question title says "unstructured", but your PyVista attempt uses a uniform grid with.
Secondly, your CSV doesn't correspond to a regular grid in the first place, e.g. at the end of the file you have 15 rows starting with 368.693,36.971999999999994, then 8 rows starting with 369.71999999999997,36.971999999999994, then a single row starting with 370.74699999999996,36.971999999999994. In a regular grid you'd get the same number of items in each block.
Thirdly, your CSV has an unusual (MATLAB-smelling) layout that the order of axes is z-x-y (rather than either x-y-z or z-y-x). This is a strong clue that your data is mangled due to memory layout issues when flattened. But the previous two point mean that I can't verify how your 4d array was created, I have to take it for granted that it's correct.
Just plotting your raw data makes it obvious that the data is mangled in your original version (with some style cleanup):
import numpy as np
import pyvista as pv
mix_FD_grid = np.load("mix_FD_grid.npy")
origin = (0, 0, 0)
mesh = pv.UniformGrid(dimensions=mix_FD_grid.shape[:-1], spacing=(1, 1, 1), origin=origin)
vectors = np.empty_like(mesh.points)
vectors[:, 0] = mix_FD_grid[..., 0].ravel()
vectors[:, 1] = mix_FD_grid[..., 1].ravel()
vectors[:, 2] = mix_FD_grid[..., 2].ravel()
mesh.point_data['vectors'] = vectors
mesh.plot()
The fragmented pattern you can see is a hallmark of data mangling due to mistaken memory layout.
If we assume the layout is more or less sane, trying column-major layout ("F" for "Fortran", also used by MATLAB) seems to make a lot more sense:
vectors[:, 0] = mix_FD_grid[..., 0].ravel('F')
vectors[:, 1] = mix_FD_grid[..., 1].ravel('F')
vectors[:, 2] = mix_FD_grid[..., 2].ravel('F')
mesh.point_data['vectors'] = vectors
mesh.plot()
So we can try using streamlines using that:
stream, src = mesh.streamlines(
'vectors', return_source=True, max_steps=20000, n_points=200, source_radius=25, source_center=(15, 0, 30)
)
p = pv.Plotter()
p.add_mesh(mesh.outline(), color="k")
p.add_mesh(stream.tube(radius=0.1))
p.show()
It doesn't look great:
So, you said that the streamlines should be longitudinal, but here they are clearly transversal. Can it be that the x and y field components are swapped? I can't tell, so let's try!
import numpy as np
import pyvista as pv
mix_FD_grid = np.load("mix_FD_grid.npy")
origin = (0, 0, 0)
mesh = pv.UniformGrid(dimensions=mix_FD_grid.shape[:-1], spacing=(1, 1, 1), origin=origin)
vectors = np.empty_like(mesh.points)
vectors[:, 0] = mix_FD_grid[..., 1].ravel('F') # swap 0 <-> 1
vectors[:, 1] = mix_FD_grid[..., 0].ravel('F') # swap 0 <-> 1
vectors[:, 2] = mix_FD_grid[..., 2].ravel('F')
mesh.point_data['vectors'] = vectors
stream, src = mesh.streamlines(
'vectors', return_source=True, max_steps=20000, n_points=200, source_radius=25, source_center=(15, 0, 30)
)
p = pv.Plotter()
p.add_mesh(mesh.outline(), color="k")
p.add_mesh(stream.tube(radius=0.1))
p.show()
Now we're talking!
Bonus: y field component on a volumetric plot:
mesh.plot(volume=True, scalars=vectors[:, 1], show_scalar_bar=False)
I have a community list as the following list_community.
How do I edit the code below to make the community visible?
from igraph import *
list_community = [['A', 'B', 'C', 'D'],['E','F','G'],['G', 'H','I','J']]
list_nodes = ['A', 'B', 'C', 'D','E','F','G','H','I','J']
tuple_edges = [('A','B'),('A','C'),('A','D'),('B','C'),('B','D'), ('C','D'),('C','E'),
('E','F'),('E','G'),('F','G'),('G','H'),
('G','I'), ('G','J'),('H','I'),('H','J'),('I','J'),]
# Make a graph
g_test = Graph()
g_test.add_vertices(list_nodes)
g_test.add_edges(tuple_edges)
# Plot
layout = g_test.layout("kk")
g.vs["name"] = list_nodes
visual_style = {}
visual_style["vertex_label"] = g.vs["name"]
visual_style["layout"] = layout
ig.plot(g_test, **visual_style)
I would like a plot that visualizes the community as shown below.
I can also do this by using a module other than igraph.
Thank you.
In igraph you can use the VertexCover to draw polygons around clusters (as also suggested by Szabolcs in his comment). You have to supply the option mark_groups when plotting the cover, possibly with some additional palette if you want. See some more detail in the documentation here.
In order to construct the VertexCover, you first have to make sure you get integer indices for each node in the graph you created. You can do that using g_test.vs.find.
clusters = [[g_test.vs.find(name=v).index for v in cl] for cl in list_community]
cover = ig.VertexCover(g_test, clusters)
After that, you can simply draw the cover like
ig.plot(cover,
mark_groups=True,
palette=ig.RainbowPalette(3))
resulting in the following picture
Here is a script that somewhat achieves what you're looking for. I had to handle the cases of single-, and two-nodes communities separately, but for greater than two nodes this draws a polygon within the nodes.
I had some trouble with matplotlib not accounting for overlapping edges and faces of polygons which meant the choice was between (1) not having the polygon surround the nodes or (2) having an extra outline just inside the edge of the polygon due to matplotlib overlapping the widened edge with the fill of the polygon. I left a comment on how to change the code from option (2) to option (1).
I also blatantly borrowed a convenience function from this post to handle correctly sorting the nodes in the polygon for appropriate filling by matplotlib's plt.fill().
Option 1:
Option 2:
Full code:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
def sort_xy(x, y):
x0 = np.mean(x)
y0 = np.mean(y)
r = np.sqrt((x-x0)**2 + (y-y0)**2)
angles = np.where((y-y0) > 0, np.arccos((x-x0)/r), 2*np.pi-np.arccos((x-x0)/r))
mask = np.argsort(angles)
x_sorted = x[mask]
y_sorted = y[mask]
return x_sorted, y_sorted
G = nx.karate_club_graph()
pos = nx.spring_layout(G, seed=42)
fig, ax = plt.subplots(figsize=(8, 10))
nx.draw(G, pos=pos, with_labels=True)
communities = nx.community.louvain_communities(G)
alpha = 0.5
edge_padding = 10
colors = cm.get_cmap('viridis', len(communities))
for i, comm in enumerate(communities):
if len(comm) == 1:
cir = plt.Circle((pos[comm.pop()]), edge_padding / 100, alpha=alpha, color=colors(i))
ax.add_patch(cir)
elif len(comm) == 2:
comm_pos = {k: pos[k] for k in comm}
coords = [a for a in zip(*comm_pos.values())]
x, y = coords[0], coords[1]
plt.plot(x, y, linewidth=edge_padding, linestyle="-", alpha=alpha, color=colors(i))
else:
comm_pos = {k: pos[k] for k in comm}
coords = [a for a in zip(*comm_pos.values())]
x, y = sort_xy(np.array(coords[0]), np.array(coords[1]))
plt.fill(x, y, alpha=alpha, facecolor=colors(i),
edgecolor=colors(i), # set to None to remove edge padding
linewidth=edge_padding)
Consider the tree below.
import matplotlib.pyplot as plt
import networkx as nx
import pydot
from networkx.drawing.nx_pydot import graphviz_layout
T = nx.balanced_tree(2, 5)
for line in nx.generate_adjlist(T):
print(line)
pos = graphviz_layout(T, prog="dot")
nx.draw(T, pos, node_color="y", edge_color='#909090', node_size=200, with_labels=True)
plt.show()
How can I draw this left to right so that the whole image is rotated by 90 degrees with the root on the right?
If you want to have fine-grained control over node positions (which includes rotating the whole graph) you can actually set each node's position explicitly. Here's a way to do that that produces a 'centred' hierarchy, left to right.
import itertools
import matplotlib.pyplot as plt
import networkx as nx
plt.figure(figsize=(12,8))
subset_sizes = [1, 2, 4, 8, 16, 32]
def multilayered_graph(*subset_sizes):
extents = nx.utils.pairwise(itertools.accumulate((0,) + subset_sizes))
layers = [range(start, end) for start, end in extents]
G = nx.Graph()
for (i, layer) in enumerate(layers):
G.add_nodes_from(layer, layer=i)
for layer1, layer2 in nx.utils.pairwise(layers):
G.add_edges_from(itertools.product(layer1, layer2))
return G
# Instantiate the graph
G = multilayered_graph(*subset_sizes)
# use the multipartite layout
pos = nx.multipartite_layout(G, subset_key="layer")
nodes = G.nodes
nodes_0 = set([n for n in nodes if G.nodes[n]['layer']==0])
nodes_1 = set([n for n in nodes if G.nodes[n]['layer']==1])
nodes_2 = set([n for n in nodes if G.nodes[n]['layer']==2])
nodes_3 = set([n for n in nodes if G.nodes[n]['layer']==3])
nodes_4 = set([n for n in nodes if G.nodes[n]['layer']==4])
nodes_5 = set([n for n in nodes if G.nodes[n]['layer']==5])
# setup a position list
pos = dict()
base = 128
thisList = list(range(-int(base/2),int(base/2),1))
# then assign nodes to indices
pos.update( (n, (10, thisList[int(base/2)::int(base/2)][i])) for i, n in enumerate(nodes_0) )
pos.update( (n, (40, thisList[int(base/4)::int(base/2)][i])) for i, n in enumerate(nodes_1) )
pos.update( (n, (60, thisList[int(base/8)::int(base/4)][i])) for i, n in enumerate(nodes_2) )
pos.update( (n, (80, thisList[int(base/16)::int(base/8)][i])) for i, n in enumerate(nodes_3) )
pos.update( (n, (100, thisList[int(base/32)::int(base/16)][i])) for i, n in enumerate(nodes_4) )
pos.update( (n, (120, thisList[int(base/64)::int(base/32)][i])) for i, n in enumerate(nodes_5) )
nx.draw(G, pos, node_color='y', edge_color='grey', with_labels=True)
plt.show()
By using a position list, you can easily transform this graph into any number of alignments or rotations.
Notes
add nodes with a layer key and use multipartite_layout to make the graph layered
setup a "position list" based on the number of nodes in your widest layer (to make the layout centre-aligned, use a zero-centred list)
To assign positions in each layer use basic Python list slice/skip notation to grab the right number of positions, spaced the appropriate amount apart, starting at the right position for the alignment you want
You can do this with the rankdir attribute from graphviz, which can be set on a networkx graph by:
T.graph["graph"] = dict(rankdir="RL")
networkx issue #3547 gives some more info about setting graph attributes.
I have been trying to apply SOM on my dataframe, my dataframe has 25 columns where each column represents a house, each house has a values for power consumption for two years, and I want to cluster the data with number of clusters = 3.
I have done the following:
import sys
sys.path.insert(0, '../')
%load_ext autoreload
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import plot,axis,show,pcolor,colorbar,bone
from matplotlib.patches import Patch
%matplotlib inline
from minisom import MiniSom
from sklearn.preprocessing import minmax_scale, scale
%autoreload 2
data1 = pd.read_excel(r"C:\Users\user\Desktop\Thesis\Tarek\Consumption.xlsx")
data1['h1'] = data1['h1'].str.split(';').str[2].astype('float')
data1['h2'] = data1['h2'].str.split(';').str[2].astype('float')
data1['h3'] = data1['h3'].str.split(';').str[2].astype('float')
data1['h4'] = data1['h4'].str.split(';').str[2].astype('float')
data1['h5'] = data1['h5'].str.split(';').str[2].astype('float')
data1['h6'] = data1['h6'].str.split(';').str[2].astype('float')
data1['h7'] = data1['h7'].str.split(';').str[2].astype('float')
data1['h8'] = data1['h8'].str.split(';').str[2].astype('float')
data1['h9'] = data1['h9'].str.split(';').str[2].astype('float')
data1['h10'] = data1['h10'].str.split(';').str[2].astype('float')
data1['h11'] = data1['h11'].str.split(';').str[2].astype('float')
data1['h12'] = data1['h12'].str.split(';').str[2].astype('float')
data1['h13'] = data1['h13'].str.split(';').str[2].astype('float')
data1['h14'] = data1['h14'].str.split(';').str[2].astype('float')
data1['h15'] = data1['h15'].str.split(';').str[2].astype('float')
data1['h16'] = data1['h16'].str.split(';').str[2].astype('float')
data1['h17'] = data1['h17'].str.split(';').str[2].astype('float')
data1['h18'] = data1['h18'].str.split(';').str[2].astype('float')
data1['h19'] = data1['h19'].str.split(';').str[2].astype('float')
data1['h20'] = data1['h20'].str.split(';').str[2].astype('float')
data1['h21'] = data1['h21'].str.split(';').str[2].astype('float')
data1['h22'] = data1['h22'].str.split(';').str[2].astype('float')
data1['h23'] = data1['h23'].str.split(';').str[2].astype('float')
data1['h24'] = data1['h24'].str.split(';').str[2].astype('float')
data1['h25'] = data1['h25'].str.split(';').str[2].astype('float')
data1.fillna(0,inplace=True)
data1=data1.round(decimals=2)
X=data1.values
som =MiniSom(x=3,y=3,input_len=25,sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_batch(data=X ,num_iteration=1000,verbose=True)
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o' , 's','v']
colors = ['r', 'g','y']
for i, x in enumerate(X):
w = som.winner(x)
plot(w[0] + 0.5,
w[1] + 0.5,
markers[i],
markeredgecolor = colors[i],
markerfacecolor = 'None',
markersize = 10,
markeredgewidth = 2)
show()
when I am running the code, I am getting this error:
IndexError: list index out of range
please any tips to add the markers and colors in the right way without having any problems, and I would be glad if any one can help, I am a bit new to Python and tried to find a solution but I couldn`t find any.
The problem seems to be that the length of your X=data1.values is around 25 but the length of your markers and colors is only 3. So in the following for loop, when i is 3, you are trying to access markers[3] and colors[3] which throws an IndexError because both markers and colors goes up to index 2 (indexing starts from 0 in python)
for i, x in enumerate(X):
One solution is to define custom list of 25 markers and 25 colors. While you might want to define your own markers, you can leave the colors out and let the code choose automatic colors for the markeredgecolor
I have the following code to perform hierarchical clutering on data:
Z = linkage(data,method='weighted')
plt.subplot(2,1,1)
dendro = dendrogram(Z)
leaves = dendro['leaves']
print leaves
plt.show()
How ever at the dendogram all the clusters have the same color (blue). Is there a way to use different colors with respect to similarity in between clusters?
Look at the documentation, Looks like you could pass the link_color_func keyword or color_threshold keyword to have different colors.
Edit:
The default behavior of the dendrogram coloring scheme is, given a color_threshold = 0.7*max(Z[:,2]) to color all the descendent links below a cluster node k the same color if k is the first node below the cut threshold; otherwise, all links connecting nodes with distances greater than or equal to the threshold are colored blue [from the docs].
What the hell does this mean? Well, if you look at a dendrogram, different clusters linked together. The "distance" between two clusters is the height of the link between them. The color_threshold is the height below which new clusters will be different colors. If all your clusters are blue, then you need to raise your color_threshold. For example,
In [48]: mat = np.random.rand(10, 10)
In [49]: z = linkage(mat, method="weighted")
In [52]: d = dendrogram(z)
In [53]: d['color_list']
Out[53]: ['g', 'g', 'b', 'r', 'c', 'c', 'c', 'b', 'b']
In [54]: plt.show()
I can check what the default color_threshold is by
In [56]: 0.7*np.max(z[:,2])
Out[56]: 1.0278719020096947
If I lower the color_threshold, I get more blue because more links have distances greater than the new color_threshold. You can see this visually because all the links above 0.9 are now blue:
In [64]: d = dendrogram(z, color_threshold=.9)
In [65]: d['color_list']
Out[65]: ['g', 'b', 'b', 'r', 'b', 'b', 'b', 'b', 'b']
In [66]: plt.show()
If I increase the color_threshold to 1.2, the links below 1.2 will no longer be blue. Additionally, the cyan and red links will merge into a single color because their parent link is below 1.2:
The following code will produce a dendrogram with a different color for each leaf. If in the process of merging clusters it encounters two clusters with different colors, then it selects the default one dflt_col = tab:blue.
Note: the link_matrix function is a plain-copy of the one from the AgglomerativeClustering example in scikit-learn.
To explain what all it does, it's really time-consuming. Thus, print directly every unclear step.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import squareform, pdist
from matplotlib.pyplot import cm
from sklearn.cluster import AgglomerativeClustering
import matplotlib.colors as clrs
def link_matrix(model, **kwargs):
# Create linkage matrix and then plot the dendrogram as in the standard sci-kit learn documentation
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
Z = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
return Z
def assign_link_colors(model):
n_clusters = len(model.Z)
scl_map_to_hex = mpl.cm.ScalarMappable(cmap = "jet").to_rgba(np.unique(model.labels_), norm = True) #colors.to_hex()
col = [clrs.to_hex(rgb) for rgb in scl_map_to_hex]
dic_labels = {s:[c, idx] for s, c, idx in zip(np.arange(len(model.feature_names_in_), dtype = int), model.feature_names_in_, model.labels_, )}
model.dict_idx_name_cl = {k: v for k, v in sorted(dic_labels.items(), key=lambda item: item[1][1])}
dflt_col = "tab:blue" # Unclustered blue
model.dict_colors = {x:col[model.dict_idx_name_cl[x][1]] for x in model.dict_idx_name_cl}
link_cols = {}
for i, i_cl in enumerate(model.Z[:,:2].astype(int)): # select only 1st two rows
c1, c2 = (link_cols[x] if x > n_clusters else model.dict_colors[x] for x in i_cl)
# Choice of coloring assignment: if same color --> ok; if no leaf, dft ("undefined") color
if c1 == c2:
tmp_cl = c1
elif min(i_cl) <= n_clusters: # select the leaf color
tmp_cl = model.dict_colors[min(i_cl)]
else:
tmp_cl = dflt_col
link_cols[i+1+n_clusters] = tmp_cl
#print(f'-link_cols: {link_cols}',)
return link_cols
def mod_2_dendrogram(model, **kwargs):
plt.style.use('seaborn-whitegrid')
plt.figure(figsize=(int(.5 * len(model.feature_names_in_)), 7))
print(f'-0.7*max(Z[:,2]): {0.7*max(model.Z[:,2])}',)
# Plot the corresponding dendrogram
ddata = dendrogram(model.Z, #count_sort = "descending",
**kwargs)
# Plot distances on the dendrogram
# plot cluster points & distance labels
y_lim = dist_thr
for i, d, c in zip(ddata['icoord'], ddata['dcoord'], ddata['color_list']):
x = sum(i[1:3])/2
y = d[1]
if y > y_lim:
plt.plot(x, y, 'o', c=c, markeredgewidth=0)
plt.annotate(np.round(y,2), (x, y), xytext=(0, -5),
textcoords='offset points',
va='top', ha='center', fontsize=9)
plt.axhline(y=dist_thr, color='orange', alpha = 0.7, linestyle='--', label = f"threshold: {int(model.dist_thr)}")
plt.title(f'Agglomerative Dendrogram with n_clust: {model.n_clusters_}')
plt.xlabel('Clusters')
plt.ylabel('Distance')
plt.legend()
return ddata
Now, the running example:
import string
import pandas as pd
np.random.seed(0)
dist = np.random.randint(1e4, size = (10,10))
np.fill_diagonal(dist, 0)
dist = pd.DataFrame(dist, columns = list(string.ascii_lowercase)[:dist.shape[0]])
dist_thr = 1.5e3
model = AgglomerativeClustering(distance_threshold = dist_thr, n_clusters=None, linkage = "single", metric = "precomputed",)
model.dist_thr = dist_thr
model = model.fit(dist)
model.Z = link_matrix(model)
link_cols = assign_link_colors(model)
_ = mod_2_dendrogram(model, labels = dist.columns,
link_color_func = lambda x: link_cols[x])