Related
I am trying to visualize UMAP and have the following code and plot already, my goal is to have two different markers for two classes in my dataset and also have a color for each group i have in my dataset (groups are VP XXX, see colorbar in image) which actually already worked out somehow.
The issue is that the markers aren't the ones I am trying to get and the colorbars isn't very accurate in telling me which color is which group.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#### prepare lists ####
VP_with_col = []
m=[]
col = []
embedding = [[0,0.8],[0.5,0.5], [0.9,0.5],[0.2,0.9],[0.4,0.4],[0.6,0.5],[0.77,0.59],[0.8,0.1]]
EXAMPLE_VP = ["VP124","VP124","VP125", "VP125", "VP203", "VP203","VP258","VP258"]
EXAMPLE_LABELS = [0,1,0,1,0,1,0,1]
dataframe = pd.DataFrame({"VP": EXAMPLE_VP, "label": EXAMPLE_LABELS})
VP_list = dataframe.VP.unique()
# add color/value to each unique VP
for idx,vp in enumerate(VP_list):
VP_with_col.append([1+idx, vp]) #somehow this gives me a different color for each group which is great
#create color array of length len(dataframe.VP) with a color for each group
for idx, vp in enumerate(dataframe.VP):
for vp_col in VP_with_col:
if(vp_col[1] == vp):
col.append(vp_col[0])
#### create marker list ####
for elem in dataframe.label:
if(elem == 0):
m.append("o")
else:
m.append("^")
########################## relevant part for question ############################
#### create plot dataframe from lists and UMAP embedding ####
plot_df = pd.DataFrame(data={"x":embedding[:,0], "y": embedding[:,1], "color":col, "marker": m })
plt.style.use("seaborn")
plt.figure()
#### Plot ####
ax= sns.scatterplot(data=plot_df, x="x",y="y",style= "marker" , c= col, cmap='Spectral', s=5 )
ax.set(xlabel = None, ylabel = None)
plt.gca().set_aspect('equal', 'datalim')
#### Colorbar ####
norm = plt.Normalize(min(col), max(col))
sm = plt.cm.ScalarMappable(cmap="Spectral", norm=norm)
sm.set_array([])
# Remove the legend(marker legend) , add colorbar
ax.get_legend().remove()
cb = ax.figure.colorbar(sm)
cb.set_ticks(np.arange(len(VP_list)))
cb.set_ticklabels(VP_list)
##### save
plt.title('UMAP projection of feature space', fontsize=12)
plt.savefig("./umap_plot",dpi=1200)
getting me this plot with standard marker and 'x' marker. In style = "marker" the marker column of the dataframe is something like ["^", "o","^","^","^","o"...]:
Is it also possible to make it clearer which color belongs to which class in the colorbar ?
You're doing a lot of manipulations that would be needed for matplotlib without Seaborn. With Seaborn, most is this goes automatic. Here is how it could look like with your test data:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
embedding = np.array([[0, 0.8], [0.5, 0.5], [0.9, 0.5], [0.2, 0.9], [0.4, 0.4], [0.6, 0.5], [0.77, 0.59], [0.8, 0.1]])
EXAMPLE_VP = ["VP124", "VP124", "VP125", "VP125", "VP203", "VP203", "VP258", "VP258"]
EXAMPLE_LABELS = [0, 1, 0, 1, 0, 1, 0, 1]
plot_df = pd.DataFrame({"x": embedding[:, 0], "y": embedding[:, 1], "VP": EXAMPLE_VP, "label": EXAMPLE_LABELS})
plt.figure()
plt.style.use("seaborn")
ax = sns.scatterplot(data=plot_df, x="x", y="y",
hue='VP', palette='Spectral',
style="label", markers=['^', 'o'], s=100)
ax.set(xlabel=None, ylabel=None)
ax.set_aspect('equal', 'datalim')
# sns.move_legend(ax, bbox_to_anchor=(1.01, 1.01), loc='upper left')
plt.tight_layout()
plt.show()
Note that the 'Spectral' colormap assigns a light yellow color to 'VP203' which is difficult to see with the default background. You might want to use e.g. palette='Set2' for the colors.
I'm plotting a large number of data points with errors using Matplotlib (version 2.2.5), and I'm rasterizing the data because there are a few thousand data points. I've found that when I rasterize the data and save as a PDF, however, the error bars produce an ugly white outline that isn't acceptable for publication. I've constructed a MWE that shows the problem:
import numpy as np
import random as rand
import matplotlib.pyplot as plt
rand.seed(10)
seeds = range(0, 1000)
data = np.empty((len(seeds), 2))
for n in seeds:
data[n, 0] = rand.gauss(1, 0.01)
data[n, 1] = rand.gauss(1, 0.01)
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(data[:, 0], data[:, 1], s=10, facecolors="k", rasterized=True, zorder=1)
ax.errorbar(data[:, 0], data[:, 1], xerr=0.01, yerr=0.01, color="k", fmt="none", rasterized=True, zorder=2)
fig.savefig("Test.pdf", dpi=250)
This looks fine in the Jupyter Notebook output, and also as a saved PNG file. The output PDF file, however, looks like this:
How do I get rid of that white fuzz caused by the error bars? If I don't rasterize, the problem vanishes, but then the file takes annoyingly long to load in my paper, and the last thing I want to do is annoy my reader.
I found the solution thanks to an older question: I needed to add ax.set_rasterization_zorder(0) to the code and change the zorder of the plotted points to be below 0. This produced a perfect graph that has no ugly outlines of the data and retains a vectorized axis, exactly what I wanted. The working code is:
import numpy as np
import random as rand
import matplotlib.pyplot as plt
rand.seed(10)
seeds = range(0, 1000)
data = np.empty((len(seeds), 2))
for n in seeds:
data[n, 0] = rand.gauss(1, 0.01)
data[n, 1] = rand.gauss(1, 0.01)
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(data[:, 0], data[:, 1], s=10, facecolors="k", rasterized=True, zorder=-2)
ax.errorbar(data[:, 0], data[:, 1], xerr=0.01, yerr=0.01, color="k", fmt="none", rasterized=True, zorder=-1)
ax.set_rasterization_zorder(0)
fig.savefig("Test.pdf", dpi=250)
and the output is:
I plotted two normal distribution curves on the graph as seen below:
Using the below code:
import random
import numpy as np
import matplotlib.pyplot as plt
d1 = np.array([random.randrange(0,100) for i in range(100)])
d2 = np.array([random.randrange(0,50) for i in range(100)])
d1,d2 = np.sort(d1),np.sort(d2)
d1_mean,d1_std = np.mean(d1),np.std(d1)
d2_mean,d2_std = np.mean(d2),np.std(d2)
# y = np.repeat(15,100)
plt.figure(figsize=(8,6))
plt.plot(d1,stats.norm.pdf(d1,d1_mean,d1_std)*1000,'r')
plt.plot(d2,stats.norm.pdf(d2,d2_mean,d2_std)*1000,'b')
# plt.hlines(y,d1.min(),d1.max(),'g')
plt.title("Flattening the curve")
plt.show()
What I want to achieve is this:
The second graph is easier to plot because both the mean for both the normal distribution curves for the above graph were the same. However, I am unable to achieve the same with the graph at the very top. When I set the mean to be similar, this happens:
Any tips on how to achieve this? Any help is much appreciated. Thank you for reading.
I think the easiest way to generate the two Gaussian curves would be to plug x-values in the range [-20, 20] into the Gaussian function with two different values of sigma. matplotlib will then make the boundaries of your plot [-20, 20], and it will be centered around 0.
import random
import numpy as np
import matplotlib.pyplot as plt
def gaussian(x, mu, sig):
return 1./(np.sqrt(2.*np.pi)*sig)*np.exp(-np.power((x - mu)/sig, 2.)/2)
plt.figure(figsize=(8,6))
x_values = np.linspace(-20, 20, 200)
# generate gaussian curves with mu = 0, sigma = 5, 10
plt.plot(x_values, gaussian(x_values, 0, 5), color = 'blue')
plt.plot(x_values, gaussian(x_values, 0, 10), color = 'red')
plt.title("Flattening the curve")
plt.show()
I wany to create a dist plot (preferably using seaborn) with different colors to different range of values.
I have the vector:
[3,1,2,3,5,6,8,0,0,5,7,0,1, 0.2]
And I want to create a distplot such that all the parts with range 0 to 1 will be red and all the other will be blue.
What is the best way to do so?
I don't know if there is an easy way in seaborn to do this but doing the plot yourself is probably much easier. First you need to get equally sized bins (if you want that) such that the plot looks homogenous (np.histogram). Afterwards it's just a single numpy filter on your observations and the plot.
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
x = np.array([3,1,2,3,5,6,8,0,0,5,7,0,1, 0.2])
# make equal binning through the range, you can adapt the bin size here
counts, bins = np.histogram(x, bins=10)
# here we do the filtering and split the observations based on your color code
x1 = x[(x <= 1) & (x >= 0)]
x2 = x[~((x <= 1) & (x >= 0))]
# finally, do the plot
f, ax = plt.subplots()
ax.hist(x1, bins=bins, color="tab:red")
ax.hist(x2, bins=bins, color="tab:blue")
ax.set(xlabel="Measurement", ylabel="Counts", title="histogram with 2 colors")
sns.despine()
Gives you:
I think you need a scatter plot. In that case, you can try the following solution. Here you first create a column of colors based on your condition and then assign those colors to the scatter plot.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = np.array([3, 1, 2, 3, 5, 6, 8, 0, 0, 5, 7, 0,1, 0.2])
df = pd.DataFrame({'data':data}).reset_index()
df['colors'] = np.where(data<1, 'red', 'blue')
plt.scatter(df['index'], df['data'], c=df['colors'])
Alternative would be to plot directly using DataFrame
data = np.array([3, 1, 2, 3, 5, 6, 8, 0, 0, 5, 7, 0,1, 0.2])
df = pd.DataFrame({'data':data}).reset_index()
colors = np.where(data<1, 'red', 'blue')
df.plot(kind='scatter', x='index', y='data',c=colors)
I am exploring random stackings of cubes.
I started with 2D and can generate random packings within a rectangle like this:
Now I have the code to generalize the stacking to 3D, but I am struggling to generalize the visualization. An example data set is, filling a 3x3x3 cube with 1x1x1 and 2x2x2 cubes,
#the coordinates of a corner vertex of the 19 1x1x1 cubes
x1 = [1, 0, 2, 0, 0, 0, 2, 1, 0, 1, 2, 2, 0, 0, 0, 2, 0, 1, 1]
y1 = [1, 1, 0, 2, 0, 0, 2, 2, 2, 0, 1, 0, 1, 2, 1, 0, 0, 0, 0]
z1 = [2, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2]
#the coordinates of a corner vertex of the 1 2x2x2 cube
x2 = [1]
y2 = [1]
z2 = [0]
# I believe the random filling is working because
# the total volumes equal: 19 + 2**3 = 3**3
#I would like to start with the lists
X = [x1,x2]
Y = [y1,y2]
Z = [z1,z2]
sizes = [1,2]
#because I want to generalize the visualization to n sizes
So far, all I have the knowledge to do is plot a 3D scatter of the data
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for ii in range(len(sizes)):
ax.scatter(X[ii],Y[ii],Z[ii])
plt.show()
I would like to make a plot more like this, except with variable sizes.
Any help would be greatly appreciated! I have a lot to learn about matplotlib/pyplot and so on.
I have made a little bit of progress:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, PathPatch
from mpl_toolkits.mplot3d import Axes3D
import mpl_toolkits.mplot3d.art3d as art3d
def cube(a,b,c,l):
for zz in [c,c+l]:
for i in ["x","y","z"]:
side = Rectangle((a, b), l,l)
ax.add_patch(side)
art3d.pathpatch_2d_to_3d(side, z=zz, zdir=i)
fig = plt.figure()
ax=fig.gca(projection='3d')
cube(0,0,0,1)
ax.set_xlim3d(-2, 2)
ax.set_ylim3d(-2, 2)
ax.set_zlim3d(-2, 2)
plt.show()
This plots a single cube.
EDIT:
More progress, I am now very close
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, PathPatch
from mpl_toolkits.mplot3d import Axes3D
import mpl_toolkits.mplot3d.art3d as art3d
cmap = plt.get_cmap('spring') #define the colors of the plot
colors = [cmap(i) for i in np.linspace(0.1, 0.9, n+1)]
def cube(a,b,c,l): #plots a cube of side l at (a,b,c)
for ll in [0,l]:
for i in range(3):
dire= ["x","y","z"]
xdire = [b,a,a]
ydire = [c,c,b]
zdire = [a,b,c]
side = Rectangle((xdire[i], ydire[i]),facecolors[np.where(sizes == l)[0]],edgecolor='black')
ax.add_patch(side)
art3d.pathpatch_2d_to_3d(side, z=zdire[i]+ll, zdir=dire[i])
def plotter3D(X,Y,Z,sizes): #run cube(a,b,c,l) over the whole data set
for iX in range(len(X)):
x = X[iX]
y = Y[iX]
z = Z[iX]
for ix in range(len(x)):
cube(x[ix],y[ix],z[ix],sizes[iX])
fig = plt.figure() #open a figure
ax=fig.gca(projection='3d') #make it 3d
plotter3D(X,Y,Z,sizes) #generate the cubes from the data set
ax.set_xlim3d(0, length) #set the plot ranges
ax.set_ylim3d(0, width)
ax.set_zlim3d(0, height)
plt.show()
This generates the desired output, although it seems to be see-through in some places when viewed from certain angles. You can see this in the small cube-- dead center at coordinates (1.5,2,3) Any idea how to fix this?
Another edit:
The solution outined above has two problems: (1) I can't get equal aspect ratios for the three axes, and (2) The cubes are see-through from certain angles. Here's what the output looks like for a larger system