I would like to plot the path based on x y z location data. Below is a reproducible example, all the lines keep starting from 0 instead of following one after each other.
import seaborn as sns
# loading sample data and replicating my scenario
data = sns.load_dataset("iris")
# giving it a numeric value to replicate my scenario
cat_lbl = {'setosa': 1, 'versicolor': 2,'virginica' : 3}
data['cat_lbl'] = data['species'].map(cat_lbl)
#plot headings
species = ['setosa', 'versicolor', 'virginica']
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
sepal_length = data.loc[:,['sepal_length','cat_lbl']]
sepal_width = data.loc[:,['sepal_width','cat_lbl']]
petal_length = data.loc[:,['petal_length','cat_lbl']]
fig = plt.figure(figsize=([20,15]))
for lbl in range(3):
lbl=lbl+1
x=sepal_length[(sepal_length.cat_lbl == lbl)].values
y=sepal_width[(sepal_width.cat_lbl == lbl)].values
z=petal_length[(petal_length.cat_lbl == lbl)].values
ax=fig.add_subplot(3,3,lbl, projection='3d')
ax.plot(x.flatten(),y.flatten(),z.flatten())
ax.set_title(species[lbl-1])
plt.show()
Your problem is that
x=sepal_length[(sepal_length.cat_lbl == lbl)].values
y=sepal_width[(sepal_width.cat_lbl == lbl)].values
z=petal_length[(petal_length.cat_lbl == lbl)].values
are actually 2D arrays that contain the category index (1,2,3). So when you flatten x.flatten(), you alternate between the coordinate and the category index (you can see that the lines actually loop back to (1,1) on the first graph, (2,2) on the second and (3,3) on the third)
Here is how I would write your code:
import seaborn as sns
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
data = sns.load_dataset("iris")
species = ['setosa', 'versicolor', 'virginica']
fig,axs = plt.subplots(1,3,subplot_kw=dict(projection='3d'),figsize=(9,3))
for sp,ax in zip(species, axs.flat):
temp = data.loc[data['species']==sp]
x=temp['sepal_length'].values
y=temp['sepal_width'].values
z=temp['petal_length'].values
ax.plot(x,y,z)
ax.set_title(sp)
plt.show()
Try ax.plot3D(...) instead of ax.plot(...) as indicated in this tutorial for 3D plotting:
ax = plt.axes(projection='3d')
# Data for a three-dimensional line
zline = np.linspace(0, 15, 1000)
xline = np.sin(zline)
yline = np.cos(zline)
ax.plot3D(xline, yline, zline, 'gray')
# Data for three-dimensional scattered points
zdata = 15 * np.random.random(100)
xdata = np.sin(zdata) + 0.1 * np.random.randn(100)
ydata = np.cos(zdata) + 0.1 * np.random.randn(100)
ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens');
Related
I'm trying to scale the y-axis so my errorbars can be seen.
Any help would be appreciated! :)
Here is my current code.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
ax.yaxis.grid(True)
plt.show()
I've tried these solutions, but no joy:
plt.ylim(-1, 1)
plt.rcParams["figure.figsize"] = [7.50, 3.50]
plt.rcParams["figure.autolayout"] = True
plt.yticks(np.arange(min(y), max(y)+0.5, 0.01))
I was expecting the y-axis scale to zoom close enough to the points so my errorbars could be seen
Try autoscalling based in y ticks. Here I'm adding some logic that just rescales the y-axis based on the data that is in the visible x-region. As I don't have your data I took random data.
import numpy as np
import random
ntermsList = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountAverages = np.random.randint(low=0, high=10, size=(555,))
allPmuCycleCountStandardDeviations = np.random.randint(low=0, high=10, size=(555,))
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline
x = ntermsList
y = allPmuCycleCountAverages
xerr = 0
yerr = allPmuCycleCountStandardDeviations
fig, ax = plt.subplots()
ax.errorbar(x, y, xerr=xerr, yerr=yerr,fmt='-o')
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_title('Line plot with error bars')
ax.set_xticks(ntermsList)
ax.set_xticklabels(ntermsList)
ax.set_yticks(allPmuCycleCountAverages)
#plt.setp(ax.get_yticklabels(), rotation=90, horizontalalignment='right')
ax.yaxis.grid(True)
margin =0.1
def get_bottom_top(line):
xd = line.get_xdata()
yd = line.get_ydata()
lo,hi = ax.get_xlim()
y_displayed = yd[((xd>lo) & (xd<hi))]
h = np.max(y_displayed) - np.min(y_displayed)
bot = np.min(y_displayed)-margin*h
top = np.max(y_displayed)+margin*h
return bot,top
lines = ax.get_lines()
bot,top = np.inf, -np.inf
for line in lines:
new_bot, new_top = get_bottom_top(line)
if new_bot < bot: bot = new_bot
if new_top > top: top = new_top
ax.set_ylim(bot,top)
plt.show()
Before Rescalling
After rescalling
I have a 3-dimensional plot and I am able to plot it with the code written below.
Considering that my point distribution is represented by a 100x100 matrix, is it possible to plot a confidence interval on my data? In the code below, my data are called "result", while the upper bound and lower bound that I want to show are called "upper_bound" and "lower_bound".
For example, I am asking if exist something like this, but in 3 dimension (instead of 2 dimension like the picture below)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
interval = np.random.normal(0, 1, size=(100, 100))
x = np.arange(0.1,1.1,0.01)
y = np.linspace(-np.pi,np.pi,100)
X,Y = np.meshgrid(x,y)
result = []
for i,j in zip(X,Y):
result.append(np.log(i)+np.sin(j))
upper_bound = np.array(result)+interval
lower_bound = np.array(result)-interval
fig = plt.figure()
fig.set_figwidth(20)
fig.set_figheight(6)
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, np.array(result))
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
Check out this 3d surface plot using plotly graph objects:
import plotly.graph_objects as go
import numpy as np
x = np.arange(0.1,1.1,0.01)
y = np.linspace(-np.pi,np.pi,100)
X,Y = np.meshgrid(x,y)
result = []
for i,j in zip(X,Y):
result.append(np.log(i)+np.sin(j))
upper_bound = np.array(result)+1
lower_bound = np.array(result)-1
fig = go.Figure(data=[
go.Surface(z=result),
go.Surface(z=upper_bound, showscale=False, opacity=0.3,colorscale='purp'),
go.Surface(z=lower_bound, showscale=False, opacity=0.3,colorscale='purp'),
])
fig.show()
This plots 3 surfaces, the one for your results and the 2 bounds. However if you'd like something that looks more like a filled volume you'd have to add volume graphs with scaling opacity.
I have a set of Cartesian coordinates pairs, along with a binary variable for each of the pairs. I am plotting a heatmap, where in each bin, I compute the fraction of coordinates falling into this bin where the binary variable is 1.
My problem is with the axis. As can be seen in the picture below, the resulting axis are strings, that stand for bin boundaries. I would like the axis to be Cartesian coordinates. Is there a simple way to change this?
import numpy as np
import pandas as pd
import seaborn as sb
np.random.seed(0)
x = np.random.uniform(0,100, size=200)
y = np.random.uniform(0,100, size=200)
z = np.random.choice([True, False], size=200, p=[0.3, 0.7])
df = pd.DataFrame({"x" : x, "y" : y, "z":z})
binsx = 8
binsy = 5
res = df.groupby([pd.cut(df.y, binsy),pd.cut(df.x,binsx)])['z'].mean().unstack()
ax = sb.heatmap(res)
ax.axis('equal')
ax.invert_yaxis()
The following creates a scale by using the bins for histogramming as the extents of the image.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
x = np.random.uniform(0,100, size=200)
y = np.random.uniform(0,100, size=200)
z = np.random.choice([True, False], size=200, p=[0.3, 0.7])
df = pd.DataFrame({"x" : x, "y" : y, "z":z})
binsx = np.arange(0,112.5,12.5)
binsy = np.arange(0,120,20)
res = df.groupby([pd.cut(df.y, binsy),pd.cut(df.x,binsx)])['z'].mean().unstack()
plt.imshow(res, cmap=plt.cm.Reds,
extent=[binsx.min(), binsx.max(),binsy.min(),binsy.max()])
plt.xticks(binsx)
plt.yticks(binsy)
plt.colorbar()
plt.grid(False)
plt.show()
I am trying to plot the comun distribution of two normal distributed variables.
The code below plots one normal distributed variable. What would the code be for plotting two normal distributed variables?
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(-3, 3, 100)
plt.plot(x,mlab.normpdf(x, mu, sigma))
plt.show()
It sounds like what you're looking for is a Multivariate Normal Distribution. This is implemented in scipy as scipy.stats.multivariate_normal. It's important to remember that you are passing a covariance matrix to the function. So to keep things simple keep the off diagonal elements as zero:
[X variance , 0 ]
[ 0 ,Y Variance]
Here is an example using this function and generating a 3D plot of the resulting distribution. I add the colormap to make seeing the curves easier but feel free to remove it.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from mpl_toolkits.mplot3d import Axes3D
#Parameters to set
mu_x = 0
variance_x = 3
mu_y = 0
variance_y = 15
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
rv = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
#Make a 3D plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, rv.pdf(pos),cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
Giving you this plot:
Edit the method used below was deprecated in Matplotlib v2.2 and removed in v3.1
A simpler version is available through matplotlib.mlab.bivariate_normal
It takes the following arguments so you don't need to worry about matrices
matplotlib.mlab.bivariate_normal(X, Y, sigmax=1.0, sigmay=1.0, mux=0.0, muy=0.0, sigmaxy=0.0)
Here X, and Y are again the result of a meshgrid so using this to recreate the above plot:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.mlab import bivariate_normal
from mpl_toolkits.mplot3d import Axes3D
#Parameters to set
mu_x = 0
sigma_x = np.sqrt(3)
mu_y = 0
sigma_y = np.sqrt(15)
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
Z = bivariate_normal(X,Y,sigma_x,sigma_y,mu_x,mu_y)
#Make a 3D plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z,cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
Giving:
The following adaption to #Ianhi's code above returns a contour plot version of the 3D plot above.
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
import numpy as np
from scipy.stats import multivariate_normal
#Parameters to set
mu_x = 0
variance_x = 3
mu_y = 0
variance_y = 15
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X,Y = np.meshgrid(x,y)
pos = np.array([X.flatten(),Y.flatten()]).T
rv = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.contour(X, Y, rv.pdf(pos).reshape(500,500))
plt.show()
While the other answers are great, I wanted to achieve similar results while also illustrating the distribution with a scatter plot of the sample.
More details can be found here: Python 3d plot of multivariate gaussian distribution
The results looks like:
And is generated using the following code:
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from scipy.stats import multivariate_normal
# Sample parameters
mu = np.array([0, 0])
sigma = np.array([[0.7, 0.2], [0.2, 0.3]])
rv = multivariate_normal(mu, sigma)
sample = rv.rvs(500)
# Bounds parameters
x_abs = 2.5
y_abs = 2.5
x_grid, y_grid = np.mgrid[-x_abs:x_abs:.02, -y_abs:y_abs:.02]
pos = np.empty(x_grid.shape + (2,))
pos[:, :, 0] = x_grid
pos[:, :, 1] = y_grid
levels = np.linspace(0, 1, 40)
fig = plt.figure()
ax = fig.gca(projection='3d')
# Removes the grey panes in 3d plots
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
# The heatmap
ax.contourf(x_grid, y_grid, 0.1 * rv.pdf(pos),
zdir='z', levels=0.1 * levels, alpha=0.9)
# The wireframe
ax.plot_wireframe(x_grid, y_grid, rv.pdf(
pos), rstride=10, cstride=10, color='k')
# The scatter. Note that the altitude is defined based on the pdf of the
# random variable
ax.scatter(sample[:, 0], sample[:, 1], 1.05 * rv.pdf(sample), c='k')
ax.legend()
ax.set_title("Gaussian sample and pdf")
ax.set_xlim3d(-x_abs, x_abs)
ax.set_ylim3d(-y_abs, y_abs)
ax.set_zlim3d(0, 1)
plt.show()
I want to draw distributions like shown in figure below -- tail of distributions. I have tried following but not quite getting there:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
plt.plot(x,mlab.normpdf(x,mean1,sigma1))
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
plt.plot(x,mlab.normpdf(y,mean2,sigma2))
##plt.axis('off')
plt.yticks([])
plt.xticks([])
plt.show()
Any suggestions would be appreciative?
You want fill_between:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math
mean1 = 0
variance1 = 1
sigma1 = math.sqrt(variance1)
x = np.linspace(-3,3.5,100, endpoint=True)
y1 = mlab.normpdf(x,mean1,sigma1)
fig, ax = plt.subplots()
ax.plot(x,y1)
mean2 = 0.4
variance2 = 2
sigma2 = math.sqrt(variance2)
y = np.linspace(-4,3.5,100, endpoint=False)
y2 = mlab.normpdf(y,mean2,sigma2)
ax.plot(x,y2)
ax.fill_between(x[:30], y1[:30], color='blue')
ax.fill_between(x[:30], y2[:30], color='green')
ax.fill_between(x[-30:], y1[-30:], y2[-30:], color='red', alpha=0.5)
ax.set_yticks([])
ax.set_xticks([])
plt.savefig('fill_norms.png')
plt.show()
This is a crazy simple example -- see the cookbook examples and look at the where clause; your fill-between highlights can adapt to changes in the lines you're plotting (e.g., an automatic red fill_between everywhere BADTHING exceeds GOODTHING, without your having to figure out the index (30 or -30 in this example)).