If I have a Python MatPlotLib figure (for example, a matplotlib.axes._subplots.AxesSubplot object), is there a way to extract from it positions of the points and the error bars? I.e. I want to get arrays containing x,y-coordinates and y-errors.
Example:
import numpy as np
import seaborn as sb
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
p = sb.regplot(x=x, y=y, x_bins=10, fit_reg=None)
How can I extract from 'p' positions of the points and the error bars?
Thank you for your help!
The errorbar data are stored in p.lines, since seaborn plots them using plt.plot.
You can access their positions using line.get_xdata() and line.get_ydata().
The point data are stored in p.collections, since they are plotted internally in seaborn using plt.scatter.
Getting at the point positions from the PathCollection object takes one extra step, as shown in this answer: Get positions of points in PathCollection created by scatter(): i.e. you have to set the offset_position first, before accessing the offsets.
Here's an example to get both the point data and the errorbar data from the
matplotlib Axes object, p.
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
p = sb.regplot(x=x, y=y, x_bins=10, fit_reg=None)
# First, get the positions of the points:
coll = p.collections[0]
coll.set_offset_position('data')
points_xy = coll.get_offsets()
print points_xy
#[[-1.65295679 3.05723876]
# [-1.29981986 1.60258005]
# [-0.94417279 0.8999881 ]
# [-0.56964819 0.38035406]
# [-0.20253243 0.0774201 ]
# [ 0.15535504 0.024336 ]
# [ 0.5362322 0.30849082]
# [ 0.90482003 0.85788122]
# [ 1.26136841 1.66294418]
# [ 1.63048127 3.02934186]]
# Next, get the positions of the errorbars
xerr = []
yerr = []
for line in p.lines:
xerr.append(line.get_xdata()[0])
yerr.append(line.get_ydata().tolist())
print xerr
# [-1.6529567859649865, -1.2998198636006264, -0.94417278886439027, -0.56964818931133276, -0.20253243328132031, 0.15535504153419355, 0.53623219583456194, 0.90482002911787607, 1.2613684083224488, 1.6304812696399549]
print yerr
# [[2.908807029542707, 3.200571530218434], [1.4449980200239572, 1.751504207194087], [0.7633753040974505, 1.029774999216172], [0.26593411110949544, 0.4753543268237353], [-0.0030674495857816496, 0.15582564460187567], [-0.052610243112427575, 0.09899773706322114], [0.21019700161329888, 0.41120457637300634], [0.7328000635837721, 0.9826379405190817], [1.508513523393156, 1.8184617796582343], [2.885113765027557, 3.1670479251950376]]
plt.show()
Here points_xy is a list of (x,y) coordinates of the points, xerr is the x-coordinate of the errorbars (which is, of course, the same as the x-coordinates in points_xy), and yerr is a list of pairs of y-coordinates: the top and bottom of each errorbar.
If you know the points are in the center of the error bars (it looks like they are, for this example), then this should do it:
import numpy as np
import seaborn as sb
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
p = sb.regplot(x=x, y=y, x_bins=10, fit_reg=None)
def get_data(p):
x_list = []
lower_list = []
upper_list = []
for line in p.lines:
x_list.append(line.get_xdata()[0])
lower_list.append(line.get_ydata()[0])
upper_list.append(line.get_ydata()[1])
y = 0.5 * (np.asarray(lower_list) + np.asarray(upper_list))
y_error = np.asarray(upper_list) - y
x = np.asarray(x_list)
return x, y, y_error
get_data(p)
Here the returned y_error will be the magnitude of the error bars.
Related
I'm trying to animate multiple dots moving along the circumference of their own circle using matplotlib.
I've been able to animate a single dot moving along a circle, and here's the code to do that:
import numpy as np
import argparse
import matplotlib.pyplot as plt
import matplotlib.animation as animation
# To make the waving flag, we need N dots moving on a circle
# Each subsequent dot is going to be delayed by a slight time, and the last dot should be the same timing as the first dot
r = 3
def circle(phi, phi_off,offset_x, offset_y):
return np.array([r*np.cos(phi+phi_off), r*np.sin(phi+phi_off)]) + np.array([offset_x, offset_y])
plt.rcParams["figure.figsize"] = 8,6
# create a figure with an axes
fig, ax = plt.subplots()
# set the axes limits
ax.axis([-30,30,-30,30])
# set equal aspect such that the circle is not shown as ellipse
ax.set_aspect("equal")
# create a point in the axes
point, = ax.plot(0,1, marker="o")
def update(phi, phi_off, offset_x,offset_y):
# obtain point coordinates
x,y = circle(phi,phi_off, offset_x,offset_y)
# set point coordinates
point.set_data([x],[y])
return point,
ani = animation.FuncAnimation(fig,update,fargs=(0,8*i,0, ), interval = 2, frames=np.linspace(0,2*np.pi,360, endpoint=False))
It looks like this :
In order to have multiple dots, I tried to do ani.append in a loop, i.e. have it do something like this:
i=0
for i in range(3):
ani.append(animation.FuncAnimation(fig,update,fargs=(0,8*i,0, ), interval = 2, frames=np.linspace(0,2*np.pi,360, endpoint=False)))
Here's what it looks like:
Any ideas on how to have multiple dots each moving smoothly on their own circle?
You should only define one update function, which is updating all points:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
r = 3
def circle(phi, phi_off,offset_x, offset_y):
return np.array([r*np.cos(phi+phi_off), r*np.sin(phi+phi_off)]) + np.array([offset_x, offset_y])
plt.rcParams["figure.figsize"] = 8,6
fig, ax = plt.subplots()
ax.axis([-30,30,-30,30])
ax.set_aspect("equal")
# create initial conditions
phi_offs = [0, np.pi/2, np.pi]
offset_xs = [0, 0, 0]
offset_ys = [0, 0, 0]
# amount of points
N = len(phi_offs)
# create a point in the axes
points = []
for i in range(N):
x,y = circle(0, phi_offs[i], offset_xs[i], offset_ys[i])
points.append(ax.plot(x, y, marker="o")[0])
def update(phi, phi_off, offset_x,offset_y):
# set point coordinates
for i in range(N):
x, y = circle(phi,phi_off[i], offset_x[i], offset_y[i])
points[i].set_data([x],[y])
return points
ani = animation.FuncAnimation(fig,update,
fargs=(phi_offs, offset_xs, offset_ys),
interval = 2,
frames=np.linspace(0,2*np.pi,360, endpoint=False),
blit=True)
plt.show()
I also added the blit=True argument to make the animation smoother and faster (only the necessary artists will be updated) but be careful, you might have to omit this feature in more complex animations.
I am doing a Kernel Density Estimation in Python and getting the contours and paths as shown below. (here is my sample data: https://pastebin.com/193PUhQf).
from numpy import *
from math import *
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
x_2d = []
y_2d = []
data = {}
data['nodes'] = []
# here is the sample data:
# https://pastebin.com/193PUhQf
X = [.....]
for Picker in xrange(0, len(X)):
x_2d.append(X[Picker][0])
y_2d.append(X[Picker][1])
# convert to arrays
m1 = np.array([x_2d])
m2 = np.array([y_2d])
x_min = m1.min() - 30
x_max = m1.max() + 30
y_min = m2.min() - 30
y_max = m2.max() + 30
x, y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
positions = np.vstack([x.ravel(), y.ravel()])
values = np.vstack([m1, m2])
kde = stats.gaussian_kde(values)
z = np.reshape(kde(positions).T, x.shape)
fig = plt.figure(2, dpi=200)
ax = fig.add_subplot(111)
pc = ax.pcolor(x, y, z)
cb = plt.colorbar(pc)
cb.ax.set_ylabel('Probability density')
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
ax.plot(m1, m2, 'o', mfc='w', mec='k')
ax.set_title("My Title", fontsize='medium')
plt.savefig("kde.png", dpi=200)
plt.show()
There is a similar way to get the contours using R, which is described here:
http://bl.ocks.org/diegovalle/5166482
Question: how can I achieve the same output using my python script or as a start point?
the desired output should be like contours_tj.json which can be used by leaflet.js lib.
UPDATE:
My input data structure is composed of three columns, comma separated:
first one is the X value
second one is the Y value
third one is the ID of my data, it has no numerical value, it is simply an identifier of the data point.
Update 2:
Question, if simply put, is that I want the same output as in the above link using my input file which is in numpy array format.
update 3:
my input data structure is of list type:
print type(X)
<type 'list'>
and here are the first few lines:
print X[0:5]
[[10.800584, 11.446064, 4478597], [10.576840,11.020229, 4644503], [11.434276,10.790881, 5570870], [11.156718,11.034633, 6500333], [11.054956,11.100243, 6513301]]
geojsoncontour is a python library to convert matplotlib contours to geojson
geojsoncontour.contour_to_geojson requires a contour_levels argument. The levels in pyplot.contour are chosen automatically, but you can access them with c_s._levels
So, for your example you could do:
import geojsoncontour
# your code here
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
# Convert matplotlib contour to geojson
geojsoncontour.contour_to_geojson(
contour=c_s,
geojson_filepath='out.geojson',
contour_levels=c_s._levels,
ndigits=3,
unit='m'
)
I am trying to plot this function: y(x) = (e^-ax)cos(x)
For x value spanning the interval (0,4pi) and 'a' ranging from 0 to 1 in steps of 0.25. I have managed to successfully plot this for a=0.
However, I am wondering if I can write some code that will allow me to plot it for the range of 'a' values, instead of having to rewrite the code for each different 'a' value.
Here is the code I have: (Note y = cos(x) as this is for a = 0)
from numpy import *
import pylab as p
# function to plot ( a = 0 )
x = linspace(0, 4*pi, 100)
y = cos(x)
#plot the function
p.plot(x,y,'o')
# axis annotation
p.xlabel('x-variable')
p.ylabel('y=(e**-ax)cosx')
# graph title
p.title('Plot of function')
#set axis range
p.axis([-0.5, 4*pi, -1.3, 1.3])
# turn grid on
p.grid(True)
p.show()
Thank you!
assuming that you want everything on the same plot...
from numpy import linspace, pi, cos, exp
import pylab as p
for a in p.frange(xini=0, xfin=1, delta=0.25):
x = linspace(0, 4*pi, 100)
y = exp(-a*x)*cos(x)
p.plot(x,y,'o')
# axis annotation
p.xlabel('x-variable')
p.ylabel('y=(e**-ax)cosx')
# graph title
p.title('Plot of function')
#set axis range
p.axis([-0.5, 4*pi, -1.3, 1.3])
# turn grid on
p.grid(True)
p.show()
I'm using the example dendrogram from this post in my work but would also like to keep track of which row / column is from which piece of data.
I've edited the code with records of names of the data as names as follows and would like to print out the names at the bottom and to the right of the distance matrix visualization. I've tried adding labels = names in the call to dendrogram but this didn't help.
Does anyone know how to add labels to this?
import scipy
import pylab
import scipy.cluster.hierarchy as sch
# Generate random features and distance matrix.
x = scipy.rand(40)
D = scipy.zeros([40,40])
for i in range(40):
for j in range(40):
D[i,j] = abs(x[i] - x[j])
### new code
names = [ ]
for i in range(40):
names.append( 'str%i'%( i ) )
print names[-1]
### end new code
# Compute and plot first dendrogram.
fig = pylab.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(D, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(D, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = D[idx1,:]
D = D[:,idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
#axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
#pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')
The python package heatmapcluster (available on PyPI) that I wrote accepts (in fact, requires) labels.
Here's a simplified version of your script using heatmapcluster:
import numpy as np
import matplotlib.pyplot as plt
from heatmapcluster import heatmapcluster
# Generate random features and distance matrix.
x = np.random.rand(40)
D = np.abs(np.subtract.outer(x, x))
names = ['str%i' % i for i in range(len(x))]
h = heatmapcluster(D, names, names,
num_row_clusters=3, num_col_clusters=3,
label_fontsize=8,
xlabel_rotation=-75,
cmap=plt.cm.coolwarm,
show_colorbar=True,
top_dendrogram=True)
plt.show()
And here is the plot it generates:
(Note that, for a symmetric array like D, there is really no point in clustering both axes. By symmetry, they will generate the same dendrogram.)
I'm trying to visualize a sorted table (sorted on a column). My ideal result should be something like
visualization of a sorted table
Any suggestion on how to reach this goal with matplotlib?
I'have already tried with suggestions given here and here but I'm looking for something fancier like that in the attached image.
Thanks in advance,
Matplotlib does not support this directly, but it is fairly easy to replicate the plot that you have linked to.
The function below does something similar given a 2d array of data. It can be sorted or not, the function doesn't really care.
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
def sorted_table_plot(data, labels, categories, cmap=None, ax=None):
# check if an axes was supplied
if ax is None:
ax = plt.gca()
# check if a colormap was supplied
if cmap is None:
cmap = plt.cm.jet
# generate the grid arrays with the coordinates for the annotations
yy, xx = np.mgrid[:data.shape[0], :data.shape[1]]
x = xx.flatten()
y = yy.flatten()
d = data.flatten()
# a norm object which we will use with the colorbar
norm = plt.Normalize(d.min(), d.max())
# iterate over the data points and draw the labels
for di, xi, yi in zip(d, x, y):
color = cmap(norm(di))
hsv = mcolors.rgb_to_hsv(color[:3])
fc = 'w' if hsv[2] < 0.7 else 'k'
ax.annotate(str(di), xy=(xi,yi), xycoords="data",
va="center", ha="center", color=fc,
bbox=dict(boxstyle="circle", fc=color))
# iteratve over all the appearing values and draw the lines
for i in np.unique(data):
xi, yi = x[d==i], y[d==i]
idx = np.argsort(xi)
plt.plot(xi[idx], yi[idx], color=plt.cm.jet(norm(i)), lw=2)
# add the axes labels
ax.set_xticks(xx[0,:])
ax.set_xticklabels(categories)
ax.set_yticks(yy[:,0])
ax.set_yticklabels(labels)
# adjust the axes ranges
ax.set_xlim(xx[0,0] - 0.5, xx[-1,-1] + 0.5)
ax.set_ylim(yy[-1,-1] + 0.5, yy[0,0] - 0.5)
Now, you can simply call it on a data array. In the following I created a random array, since you didn't care to supply an example data set.
# fix the seed for reproducability
np.random.seed(2)
# create random data
data = np.tile(np.arange(1,8), (3,1)).T
labels = map(lambda x: 'label ' + str(x), data[:,1])
categories = map(lambda x: 'cat ' + str(x), np.arange(data.shape[1])+1)
for i in range(1,data.shape[1]):
# shuffle all but the first column
np.random.shuffle(data[:,i])
# call the function and show the plot
sorted_table_plot(data, labels, categories)
plt.show()
Result: