I have many 2d sequences with variable length, i.e. lists of list where each sublist is a sequence. I want to project these sequences/lines/sublists in a 3d visualisation adding time-step as another dimension. So far I am failing to plot all the 3d lines using plotly.express.
import plotly.express as px
t = [[ii+1 for ii in range(len(features[i]))] for i in range(len(labels))]
x0 = [[x[0] for x in features[i]] for i in range(len(labels))]
x1 = [[x[1] for x in features[i]] for i in range(len(labels))]
df = pd.DataFrame(dict(
X=[tii for ti in t for tii in ti],
Y=[xii for xi in x0 for xii in xi],
Z=[xii for xi in x1 for xii in xi],
color=[aa for a in labels for aa in a]
))
fig = px.line_3d(df, x="X", y="Y", z="Z", color="color")
fig.show
This is what I get, which is not really what I want. It is treating all the cases/sublists with common label as one single sequence, thus we see at the end of each line it goes back the where it starts. I have looked up on how to iteratively plotting this in a for-loop (just like matplotlib) (basically creating a new pandas dataframe at each iteration and plot it), however with no success. Does anyone have any experience on this please? Much appreciated!
A mcve is as below:
import plotly.express as px
import numpy as np
import pandas as pd
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0]]
t = [[ii+1 for ii in range(len(features[i]))] for i in range(len(labels))]
x0 = [[x[0] for x in features[i]] for i in range(len(labels))]
x1 = [[x[1] for x in features[i]] for i in range(len(labels))]
df2 = pd.DataFrame(dict(
X=[tii for ti in t for tii in ti],
Y=[xii for xi in x0 for xii in xi],
Z=[xii for xi in x1 for xii in xi],
color=[aa for a in labels for aa in a]
))
fig1 = px.line_3d(df2, x="X", y="Y", z="Z", color="color")
fig1.show()
You see basically 3 lines instead of 5.
Your problems is that you are using the same label for different traces. Here a workaround with a loop
import numpy as np
import plotly.graph_objs as go
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0]]
fig = go.Figure()
for i, feat in enumerate(features):
feat = np.array(feat)
fig.add_trace(
go.Scatter3d(
x=np.arange(len(feat)),
y=feat[:,0],
z=feat[:,1],
mode='lines',
hovertext=labels[i]
)
)
fig.show()
You might need to play with trace names.
Update
Hoefully it's not too overcomplicated but it is meant to be as generic as possible
import numpy as np
import plotly.graph_objs as go
from itertools import cycle
def plotly_color_map(names):
# From https://stackoverflow.com/a/44727682
plotly_colors = cycle(['#1f77b4', # muted blue
'#ff7f0e', # safety orange
'#2ca02c', # cooked asparagus green
'#d62728', # brick red
'#9467bd', # muted purple
'#8c564b', # chestnut brown
'#e377c2', # raspberry yogurt pink
'#7f7f7f', # middle gray
'#bcbd22', # curry yellow-green
'#17becf' # blue-teal
])
return dict(zip(names, plotly_colors))
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0]]
legend_groups = [l[0] for l in labels]
traces = [False if (len(legend_groups[:i])>0 and l in legend_groups[:i])
else True for i, l in enumerate(legend_groups)]
cm = plotly_color_map(set(legend_groups))
fig = go.Figure()
for i, feat in enumerate(features):
feat = np.array(feat)
fig.add_trace(
go.Scatter3d(
x=np.arange(len(feat)),
y=feat[:,0],
z=feat[:,1],
mode='lines',
line={"color":cm[legend_groups[i]]},
legendgroup=legend_groups[i],
hovertext=labels[i],
showlegend=traces[i],
name="label_{}".format(legend_groups[i])
)
)
fig.show()
Related
I have the following code
import plotly.express as px
import pandas as pd
import numpy as np
df = pd.DataFrame([1, None, None, 4, 6, None], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6]
df["completed"] = [1, 0, 0, 1, 1, 0]
fig = px.line(df, x="x", y="y", markers=True, color="completed")
fig.show()
That results in the following plot
But I have to highlight (change the color line to red and add a dot point) in the cases that the dataframe has NaN value like in the following plot
Is there any way to do that easily? I have been looking for it but I'm not able to find a suitable solution.
Thanks in advance!
Found a solution using this https://community.plotly.com/t/change-color-of-continuous-line-based-on-value/68938/2
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools as it
df = pd.DataFrame([1, None, None, 4, 6, None, 2, 1], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6, 7, 8]
df["completed"] = [1, 0, 0, 1, 1, 0, 1, 1]
fig = go.Figure()
# generate color list
df.loc[df["y"].isna(), "line_color"] = "red"
df.loc[df["y"].isna(), "line_type"] = "dash"
df.loc[df["y"].notna(), "line_color"] = "black"
df.loc[df["y"].notna(), "line_type"] = "solid"
df["y"] = df["y"].interpolate(method="index")
# create coordinate pairs
x_pairs = it.pairwise(df["x"])
y_pairs = it.pairwise(df["y"])
for x, y, line_color, line_type in zip(
x_pairs,
y_pairs,
df["line_color"].values,
df["line_type"].values,
):
# create trace
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode="lines",
line=dict(color=line_color, dash=line_type),
)
)
fig.show()
This is the new output for the plot.
I'm having hard time to draw this... Can someone help me please
Make linspaces of grid_resolution points in xlim and grid_resolution points in ylim. e.g. For xlim=(-1, 1), ylim=(0, 2) and grid_resolution=3, make the linspace (-1, 0, 1) of x coordinates and the linspace (0, 1, 2) of y coordinates.
Use np.tile() to repeat the x grid points grid_resolution times (e.g. (-1, 0, 1, -1, 0, 1, -1, 0, 1)) and np.repeat() to repeat each of the y grid points grid_resolution times (e.g. (0, 0, 0, 1, 1, 1, 2, 2, 2)).
Use np.stack() to combine the x grid points and y grid points into a 2D array of size grid_resolution2 x 2. (e.g. [[-1, 0], [0, 0], [1, 0], [-1, 1], [0, 1], [1, 1], [-1, 2], [0, 2], [1, 2]] )
Make a dictionary keyed by -1 and 1 with values 'pink' and 'lightskyblue'.
Use clf.predict() on the 2D array of points to get predicted y values.
6.For each y in {-1, 1}, use plt.plot() to plot those points in your 2D array with that predicted y value in the color specified by your dictionary.
above is the requirements
def plot_decision_boundary(clf, xlim, ylim, grid_resolution):
"""Display how clf classifies each point in the space specified by xlim and ylim.
- clf is a classifier (already fit to data).
- xlim and ylim are each 2-tuples of the form (low, high).
- grid_resolution specifies the number of points into which the xlim is divided
and the number into which the ylim interval is divided. The function plots
grid_resolution * grid_resolution points."""
below are the test code
data_string = """
x0, x1, y
0, 0, -1
-1, 1, -1
1, -1, -1
0, 1, 1
1, 1, 1
1, 0, 1
"""
df = pd.read_csv(StringIO(data_string), sep='\s*,\s+', engine='python')
clf = svm.SVC(kernel="linear", C=1000)
clf.fit(df[['x0', 'x1']], df['y'])
# Call student's function.
plot_decision_boundary(clf=clf, xlim=(-4, 4), ylim=(-4, 4), grid_resolution=100)
# Add training examples to plot.
colors = {-1:'red', 1:'blue'}
for y in (-1, 1):
plt.plot(df.x0[df.y == y], df.x1[df.y == y], '.', color=colors[y])
I am trying to show displacement on a 3D truss example however I am running into an error.I have simplified my code below.I am able to show displacement on a 2D problem however I am unable on a 3D problem.I am also trying to show the node numbers at each node.I managed to put the nodes(green color) however the numbers are not showing even after i used the "plt.annotate" command.Can someone help me get the displacement and node numbers to show?Thank you in advance.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import sys
np.set_printoptions(threshold=sys.maxsize)
def plot_truss(nodes, elements, areas,forces):
# plot nodes in 3d
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
x = [i[0] for i in nodes.values()]
y = [i[1] for i in nodes.values()]
z = [i[2] for i in nodes.values()]
# size = 400
# ax.scatter(x, y, z, c='r', marker='o', s=size, zorder=5)
size = 400
offset = size / 4000
ax.scatter(x, y, z, c='y', s=size, zorder=5)
for i, location in enumerate(zip(x, y, z)):
plt.annotate(i + 1, (location[0] - offset, location[1] - offset), zorder=10)
# plot elements in 3d
for element in elements:
fromPoint = np.array(nodes[elements[element][0]])
toPoint = np.array(nodes[elements[element][1]])
x1 = fromPoint[0]
y1 = fromPoint[1]
z1 = fromPoint[2]
x2 = toPoint[0]
y2 = toPoint[1]
z2 = toPoint[2]
ax.plot([x1, x2], [y1, y2], zs=[z1, z2], c='b', linestyle='-', linewidth=5*areas[element], zorder=1)
nodes = {1: [0, 10, 0], 2: [0, 0, 0], 3: [10, 5, 0], 4: [0, 10, 10]}
areas = {1: 1.0, 2: 2.0, 3: 2.0}
elements = {1: [1, 3], 2: [2, 3], 3: [4, 3]}
forces = {1: [0, 0, 0], 2: [0, 0, 0], 3: [0, -200, 0], 4: [0, 0, 0]}
disps = {1: [0, 0, 0], 2: [0, 0, 0], 3: [ 3, -2, 4], 4: [0, 0, 0]}
def plt_displacement(nodes,elements,disps color="red"):
nodes_disp = np.copy(nodes)
nodes_disp[:, 0] += disp[::2, 0]
nodes_disp[:, 1] += disp[1::2, 0]
plt.scatter(nodes_disp[:, 0], nodes_disp[:, 1], color=color)
for e in elements:
x_tmp = [nodes_disp[e[0], 0], nodes_disp[e[1], 0]]
y_tmp = [nodes_disp[e[0], 1], nodes_disp[e[1], 1]]
plt.plot(x_tmp, y_tmp, color=color)
plt_displacement(nodes,elements,disps)
plot_truss(nodes, elements, areas, forces)
plt.show()
when i run the code I am getting the error below;
<ipython-input-47-758895b259be> in plt_displacement(elements, nodes, disp, color)
31 def plt_displacement(elements, nodes, disp, color="red"):
32 nodes_disp = np.copy(nodes)
---> 33 nodes_disp[:, 0] += disp[::2, 0]
34 nodes_disp[:, 1] += disp[1::2, 0]
35 plt.scatter(nodes_disp[:, 0], nodes_disp[:, 1], color=color)
IndexError: too many indices for array
It looks like you may have switched “nodes” and “elements” in your call to plt_displacement() (3rd and 12th to last lines) vs your definition.
plt_displacement(nodes,elements,disps)
def plt_displacement(elements, nodes, disp, color="red"):
I’m not sure exactly what plt_displacement is supposed to do. But looking at nodes_disp it is an array of no shape, so slicing won’t work.
>>> nodes_disp = np.copy(nodes)
>>> nodes_disp
array({1: [0, 10, 0], 2: [0, 0, 0], 3: [10, 5, 0], 4: [0, 10, 10]}, dtype=object)
>>> nodes_disp.shape
()
You can change the values to an array and slice it like this:
>>> npdisp = np.copy(list(disps.values()))
>>> nodes_disp
array([[ 0, 10, 0],
[ 0, 0, 0],
[10, 5, 0],
[ 0, 10, 10]])
But I’m not sure if that’s your intent.
Like wise you’d have to change the type of disp to an array in order to slice it, as it is a dictionary
This is an example dataframe:
import pandas as pd
import numpy as np
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
indexes= 'a','b','c','d'
columns='ab','bc','cd','de','ef','fg'
df = pd.DataFrame(index=indexes,columns=columns, data=values)
print(df)
from this dataframe I need to create a series of pie charts, one for every column, shown on the same figure, where the slices dimension is fixed (equal to 100/len(indexes)) and the color of the slices depends on the value of the index, in particular: white if 0, green if 1, yellow if 2, red if 4.
What suggestions can you give me?
I found that:
df.plot(kind='pie', subplots=True, figsize=(len(columns)*2, 2))
it creates a series, but I can't control the input values...
I've created a pie for a column, but then I wasn't able to link the color to the value of index:
labels = indexes
sizes = np.linspace(100/len(labels),100/len(labels), num=len(labels))
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels)
ax1.axis('equal')
plt.show()
ImportanceOfBeingErnest answer has helped me giving to the piechart the wanted look:
fig1, ax1 = plt.subplots()
labels = indexes
sizes = np.linspace(100/len(labels),100/len(labels), num=len(labels))
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
colors = [coldic[v] for v in values[:,0]]
ax1.pie(sizes, labels=labels, colors=colors,counterclock=False, startangle=90)
ax1.axis('equal')
plt.show()
Now the colors a linked to the values, and the dimensions of the slices are fixed. I just need to have the same pie chart for all the columns and in the same image.
The importance of these charts is given by the colors, not the dimensions of the slices, which I want to be always equal.
Thanks for your time!
Not relying on pandas internal plotting functions (which are of course limited) one can use matplotlib' pie function to plot the diagrams.
The colors can be set as a list, which is generated from the values according to some mapping dictionary.
import numpy as np
import matplotlib.pyplot as plt
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
labels= ['a','b','c','d']
fig1, axes = plt.subplots(ncols=values.shape[1], )
for i in range(values.shape[1]):
colors = [coldic[v] for v in values[:,i]]
labs = [l if values[j,i] > 0 else "" for j, l in enumerate(labels)]
axes[i].pie(values[:,i], labels=labs, colors=colors)
axes[i].set_aspect("equal")
plt.show()
For fixed wedge sizes you just use a fixed array to supply to pie.
import numpy as np
import matplotlib.pyplot as plt
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
labels= ['a','b','c','d']
fig1, axes = plt.subplots(ncols=values.shape[1], )
for i in range(values.shape[1]):
colors = [coldic[v] for v in values[:,i]]
axes[i].pie(np.ones(values.shape[0]), labels=labels, colors=colors,
wedgeprops=dict(linewidth=1, edgecolor="k"))
axes[i].set_aspect("equal")
axes[i].set_title("".join(list(map(str,values[:,i]))))
plt.show()
This paper has a nice way of visualizing clusters of a dataset with binary features by plotting a 2D matrix and sorting the values according to a cluster.
In this case, there are three clusters, as indicated by the black dividing lines; the rows are sorted, and show which examples are in each cluster, and the columns are the features of each example.
Given a vector of cluster assignments and a pandas DataFrame, how can I replicate this using a Python library (e.g. seaborn)? Plotting a DataFrame using seaborn isn't difficult, nor is sorting the rows of the DataFrame to align with the cluster assignments. What I am most interested in is how to display those black dividing lines which delineate each cluster.
Dummy data:
"""
col1 col2
x1_c0 0 1
x2_c0 0 1
================= I want a line drawn here
x3_c1 1 0
================= and here
x4_c2 1 0
"""
import pandas as pd
import seaborn as sns
df = pd.DataFrame(
data={'col1': [0, 0, 1, 1], 'col2': [1, 1, 0, 0]},
index=['x1_c0', 'x2_c0', 'x3_c1', 'x4_c2']
)
clus = [0, 0, 1, 2] # This is the cluster assignment
sns.heatmap(df)
The link that mwaskom posted in a comment is good starting place. The trick is figuring out what the coordinates are for the vertical and horizontal lines.
To illustrate what the code is actually doing, it's worthwhile to just plot all of the lines individually
%matplotlib inline
import pandas as pd
import seaborn as sns
df = pd.DataFrame(data={'col1': [0, 0, 1, 1], 'col2': [1, 1, 0, 0]},
index=['x1_c0', 'x2_c0', 'x3_c1', 'x4_c2'])
f, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(df)
ax.axvline(1, 0, 2, linewidth=3, c='w')
ax.axhline(1, 0, 1, linewidth=3, c='w')
ax.axhline(2, 0, 1, linewidth=3, c='w')
ax.axhline(3, 0, 1, linewidth=3, c='w')
f.tight_layout()
The the way that the axvline method works is the first argument is the x location of the line and then the lower bound and upper bound of the line (in this case 1, 0, 2). The horizontal line takes the y location and then the x start and x stop of the line. The defaults will create the line for the entire plot, so you can typically leave those out.
This code above creates a line for every value in the dataframe. If you want to create groups for the heatmap, you will want to create an index in your data frame, or some other list of values to loop through. For instance with a more complicated example using code from this example:
df = pd.DataFrame(data={'col1': [0, 0, 1, 1, 1.5], 'col2': [1, 1, 0, 0, 2]},
index=['x1_c0', 'x2_c0', 'x3_c1', 'x4_c2', 'x5_c2'])
df['id_'] = df.index
df['group'] = [1, 2, 2, 3, 3]
df.set_index(['group', 'id_'], inplace=True)
df
col1 col2
group id_
1 x1_c0 0.0 1
2 x2_c0 0.0 1
x3_c1 1.0 0
3 x4_c2 1.0 0
x5_c2 1.5 2
Then plot the heatmap with the groups:
f, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(df)
groups = df.index.get_level_values(0)
for i, group in enumerate(groups):
if i and group != groups[i - 1]:
ax.axhline(len(groups) - i, c="w", linewidth=3)
ax.axvline(1, c="w", linewidth=3)
f.tight_layout()
Because your heatmap is not symmetric you may need to use a separate for loop for the columns