Get data array from object in Python - python

I'm using a library which produces 3 plots given an object k.
I need to figure the data points (x,y,z) that produced these plot, but the problem is that the plots comes from a function from k.
The library I'm using is pyKriging and this is their github repository.
A simplified version of their example code is:
import pyKriging
from pyKriging.krige import kriging
from pyKriging.samplingplan import samplingplan
sp = samplingplan(2)
X = sp.optimallhc(20)
testfun = pyKriging.testfunctions().branin
y = testfun(X)
k = kriging(X, y, testfunction=testfun, name='simple')
k.train()
k.plot()
The full code, comments and output can be found here.
In summary, I'm trying to get the numpy array that produced these plots so I can create plots that follows my formatting styles.
I'm not knowledgeable about going into library codes in Python and I appreciate any help!

There is no single data array that produces the plot. Instead many arrays used for plotting are generated inside the kriging plot function.
Changing the filled contours to line contours is of course not a style option. One therefore needs to use the code from the original plotting function.
An option is to subclass kriging and implement a custom plot function (let's call it myplot). In this function, one can use contour instead of contourf. Naturally, it's also possible to change it completely to one's needs.
import pyKriging
from pyKriging.krige import kriging
from pyKriging.samplingplan import samplingplan
import numpy as np
import matplotlib.pyplot as plt
class MyKriging(kriging):
def __init__(self,*args,**kwargs):
kriging.__init__(self,*args,**kwargs)
def myplot(self,labels=False, show=True, **kwargs):
fig = plt.figure(figsize=(8,6))
# Create a set of data to plot
plotgrid = 61
x = np.linspace(self.normRange[0][0], self.normRange[0][1], num=plotgrid)
y = np.linspace(self.normRange[1][0], self.normRange[1][1], num=plotgrid)
X, Y = np.meshgrid(x, y)
# Predict based on the optimized results
zs = np.array([self.predict([xi,yi]) for xi,yi in zip(np.ravel(X), np.ravel(Y))])
Z = zs.reshape(X.shape)
#Calculate errors
zse = np.array([self.predict_var([xi,yi]) for xi,yi in zip(np.ravel(X), np.ravel(Y))])
Ze = zse.reshape(X.shape)
spx = (self.X[:,0] * (self.normRange[0][1] - self.normRange[0][0])) + self.normRange[0][0]
spy = (self.X[:,1] * (self.normRange[1][1] - self.normRange[1][0])) + self.normRange[1][0]
contour_levels = kwargs.get("levels", 25)
ax = fig.add_subplot(222)
CS = plt.contour(X,Y,Ze, contour_levels)
plt.colorbar()
plt.plot(spx, spy,'or')
ax = fig.add_subplot(221)
if self.testfunction:
# Setup the truth function
zt = self.testfunction( np.array(zip(np.ravel(X), np.ravel(Y))) )
ZT = zt.reshape(X.shape)
CS = plt.contour(X,Y,ZT,contour_levels ,colors='k',zorder=2, alpha=0)
if self.testfunction:
contour_levels = CS.levels
delta = np.abs(contour_levels[0]-contour_levels[1])
contour_levels = np.insert(contour_levels, 0, contour_levels[0]-delta)
contour_levels = np.append(contour_levels, contour_levels[-1]+delta)
CS = plt.contour(X,Y,Z,contour_levels,zorder=1)
plt.plot(spx, spy,'or', zorder=3)
plt.colorbar()
ax = fig.add_subplot(212, projection='3d')
ax.plot_surface(X, Y, Z, rstride=3, cstride=3, alpha=0.4)
if self.testfunction:
ax.plot_wireframe(X, Y, ZT, rstride=3, cstride=3)
if show:
plt.show()
sp = samplingplan(2)
X = sp.optimallhc(20)
testfun = pyKriging.testfunctions().branin
y = testfun(X)
k = MyKriging(X, y, testfunction=testfun, name='simple')
k.train()
k.myplot()

Related

How to remove marker from plot and make it smooth

I have been trying to plot a smooth graph, and here is my code
import matplotlib.pyplot as plt
#fig,axes= plt.subplots(nrows=6, ncols=1, squeeze=False)
x = df["DOY"]
y = df["By"]
z = df["Bz"]
a = df["Vsw"]
b = df["Nsw"]
c = df["magnetopause_distance"]
d = df["reconnection_rate"]
And after that, I used the following logic to plot the same
#create a figure
fig=plt.figure()
#define subplots and define their position
plt1=fig.add_subplot(611)
plt2=fig.add_subplot(612)
plt3=fig.add_subplot(613)
plt4=fig.add_subplot(614)
plt5=fig.add_subplot(615)
plt6=fig.add_subplot(616)
plt1.plot(x,y,'black',linewidth=0.5,marker=None)
plt1.set_ylabel("By")
plt1.set_title("3-6 July 2003")
plt2.plot(x,z,'black',linewidth=0.5)
plt2.set_ylabel("Bz")
plt3.plot(x,a,'black',linewidth=0.5)
plt3.set_ylabel("Vsw")
plt4.plot(x,b,'black',linewidth=0.5)
plt4.set_ylabel("Nsw")
plt5.plot(x,c,'black',linewidth=0.5)
plt5.set_ylabel("MD")
plt6.plot(x,d,'black',linewidth=0.5)
plt6.set_ylabel("MRR")
plt.subplots_adjust(hspace = 2,wspace = 2)
#saving plot in .jpg format
plt.savefig('myplot01.jpg', format='jpeg',dpi=500, bbox_inches='tight')
Finally, I am getting a plot like this:
What I want is something like this:
Sorry for the typos. Thanks for your time :)
Use:
from scipy.interpolate import UnivariateSpline
import numpy as np
list_x_new = np.linspace(min(x), max(x), 1000)
list_y_smooth = UnivariateSpline(x, y, list_x_new)
plt.plot(list_x_new, list_y_smooth)
plt.show()
This is for one of the graphs, you can substitute the values in list_y_smooth in place of y according to the values you want to plot.

python KDE get contours and paths into specific json format leaflet-friendly

I am doing a Kernel Density Estimation in Python and getting the contours and paths as shown below. (here is my sample data: https://pastebin.com/193PUhQf).
from numpy import *
from math import *
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
x_2d = []
y_2d = []
data = {}
data['nodes'] = []
# here is the sample data:
# https://pastebin.com/193PUhQf
X = [.....]
for Picker in xrange(0, len(X)):
x_2d.append(X[Picker][0])
y_2d.append(X[Picker][1])
# convert to arrays
m1 = np.array([x_2d])
m2 = np.array([y_2d])
x_min = m1.min() - 30
x_max = m1.max() + 30
y_min = m2.min() - 30
y_max = m2.max() + 30
x, y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
positions = np.vstack([x.ravel(), y.ravel()])
values = np.vstack([m1, m2])
kde = stats.gaussian_kde(values)
z = np.reshape(kde(positions).T, x.shape)
fig = plt.figure(2, dpi=200)
ax = fig.add_subplot(111)
pc = ax.pcolor(x, y, z)
cb = plt.colorbar(pc)
cb.ax.set_ylabel('Probability density')
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
ax.plot(m1, m2, 'o', mfc='w', mec='k')
ax.set_title("My Title", fontsize='medium')
plt.savefig("kde.png", dpi=200)
plt.show()
There is a similar way to get the contours using R, which is described here:
http://bl.ocks.org/diegovalle/5166482
Question: how can I achieve the same output using my python script or as a start point?
the desired output should be like contours_tj.json which can be used by leaflet.js lib.
UPDATE:
My input data structure is composed of three columns, comma separated:
first one is the X value
second one is the Y value
third one is the ID of my data, it has no numerical value, it is simply an identifier of the data point.
Update 2:
Question, if simply put, is that I want the same output as in the above link using my input file which is in numpy array format.
update 3:
my input data structure is of list type:
print type(X)
<type 'list'>
and here are the first few lines:
print X[0:5]
[[10.800584, 11.446064, 4478597], [10.576840,11.020229, 4644503], [11.434276,10.790881, 5570870], [11.156718,11.034633, 6500333], [11.054956,11.100243, 6513301]]
geojsoncontour is a python library to convert matplotlib contours to geojson
geojsoncontour.contour_to_geojson requires a contour_levels argument. The levels in pyplot.contour are chosen automatically, but you can access them with c_s._levels
So, for your example you could do:
import geojsoncontour
# your code here
c_s = plt.contour(x, y, z, 20, linewidths=1, colors='k')
# Convert matplotlib contour to geojson
geojsoncontour.contour_to_geojson(
contour=c_s,
geojson_filepath='out.geojson',
contour_levels=c_s._levels,
ndigits=3,
unit='m'
)

Linear Regression: Extending line past data and adding a legend

I have a code:
import math
import numpy as np
import pylab as plt1
from matplotlib import pyplot as plt
uH2 = 1.90866638
uHe = 3.60187307
eH2 = 213.38
eHe = 31.96
R = float(uH2*eH2)/(uHe*eHe)
C_Values = []
Delta = []
kHeST = []
J_f21 = []
data = np.genfromtxt("Lamda_HeHCL.txt", unpack=True);
J_i1=data[1];
J_f1=data[2];
kHe=data[7]
data = np.genfromtxt("Basecol_Basic_New_1.txt", unpack=True);
J_i2=data[0];
J_f2=data[1];
kH2=data[5]
print kHe
print kH2
kHe = map(float, kHe)
kH2 = map(float, kH2)
kHe = np.array(kHe)
kH2= np.array(kH2)
g = len(kH2)
for n in range(0,g):
if J_f2[n] == 1:
Jf21 = J_f2[n]
J_f21.append(Jf21)
ratio = kHe[n]/kH2[n]
C = (((math.log(float(kH2[n]),10)))-(math.log(float(kHe[n]),10)))/math.log(R,10)
C_Values.append(C)
St = abs(J_f1[n] - J_i1[n])
Delta.append(St)
print C_Values
print Delta
print J_f21
fig, ax = plt.subplots()
ax.scatter(Delta,C_Values)
for i, txt in enumerate(J_f21):
ax.annotate(txt, (Delta[i],C_Values[i]))
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
plt.plot(Delta, C_Values)
fit = np.polyfit(Delta,C_Values,1)
fit_fn = np.poly1d(fit)
# fit_fn is now a function which takes in x and returns an estimate for y
plt.scatter(Delta,C_Values, Delta, fit_fn(Delta))
plt.xlim(0, 12)
plt.ylim(-3, 3)
In this code, I am trying to plot a linear regression that extends past the data and touches the x-axis. I am also trying to add a legend to the plot that shows the slope of the plot. Using the code, I was able to plot this graph.
Here is some trash data I have been using to try and extend the line and add a legend to my code.
x =[5,7,9,15,20]
y =[10,9,8,7,6]
I would also like it to be a scatter except for the linear regression line.
Given that you don't provide the data you're loading from files I was unable to test this, but off the top of my head:
To extend the line past the plot, you could turn this line
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
Into something like
x = np.linspace(0, 12, 50) # both 0 and 12 are from visually inspecting the plot
plt.plot(x, np.poly1d(np.polyfit(Delta, C_Values, 1))(x))
But if you want the line extended to the x-axis,
polynomial = np.polyfit(Delta, C_Values, 1)
x = np.linspace(0, *np.roots(polynomial))
plt.plot(x, np.poly1d(polynomial)(x))
As for the scatter plot thing, it seems to me you could just remove this line:
plt.plot(Delta, C_Values)
Oh right, as for the legend, add a label to the plots you make, like this:
plt.plot(x, np.poly1d(polynomial)(x), label='Linear regression')
and add a call to plt.legend() just before plt.show().

Python for-loop always plots the same line when 3D (using matplotlib)

I'm plotting multiple lines on the same graph using matplotlib in Python by using a for-loop to add each line to the axis.
When plotted in 2D with each line on top of the other this works fine.
When plotting in 3D however, python displays the same graphed data each time I run through the for-loop, even though the data is significantly different.
Edit: I don't believe that this question is a duplicate of "How can I tell if NumPy creates a view or a copy?" as it highlights one particular instance of unexpected behaviour.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
###### Unimportant maths not relevant to the question ######
def rossler(x_n, y_n, z_n, h, a, b, c):
#defining the rossler function
x_n1=x_n+h*(-y_n-z_n)
y_n1=y_n+h*(x_n+a*y_n)
z_n1=z_n+h*(b+z_n*(x_n-c))
return x_n1,y_n1,z_n1
#defining a, b, and c
a = 1.0/5.0
b = 1.0/5.0
c = 5
#defining time limits and steps
t_0 = 0
t_f = 50*np.pi
h = 0.01
steps = int((t_f-t_0)/h)
#create plotting values
t = np.linspace(t_0,t_f,steps)
x = np.zeros(steps)
y = np.zeros(steps)
z = np.zeros(steps)
##### Relevant to the question again #####
init_condition_array = [[0,0,0],[0.1,0,0],[0.2,0,0],[0.3,0,0]]
color_array = ["red","orange","green","blue"]
color_counter = 0
zs_array = [0, 0.1, 0.2, 0.3]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for row in init_condition_array:
x[0] = row[0]
y[0] = row[1]
z[0] = row[2]
for i in range(x.size-1):
#re-evaluate the values of the x-arrays depending on the initial conditions
[x[i+1],y[i+1],z[i+1]]=rossler(x[i],y[i],z[i],t[i+1]-t[i],a,b,c)
plt.plot(t,x,zs=zs_array[color_counter],zdir="z",color=color_array[color_counter])
color_counter += 1
ax.set_xlabel('t')
ax.set_ylabel('x(t)')
plt.show()
As you can see, the graphs should look incredibly different;
this is a 2D image of the graphs on the same axis with a few alterations to the code (shown below):
Whilst this is the graph produced by the 3D plot:
.
The 2D plot was created by making these small alterations to the code; nothing above the first line was changed:
init_condition_array = [[0,0,0],[0.1,0,0],[0.2,0,0],[0.3,0,0]]
color_array = ["red","orange","green","blue"]
color_counter = 0
fig = plt.figure()
ax = fig.add_subplot(111)
for row in init_condition_array:
x[0] = row[0]
y[0] = row[1]
z[0] = row[2]
for i in range(x.size-1):
#re-evaluate the values of the x-arrays depending on the initial conditions
[x[i+1],y[i+1],z[i+1]]=rossler(x[i],y[i],z[i],t[i+1]-t[i],a,b,c)
plt.plot(t,x,color=color_array[color_counter],lw=1)
color_counter += 1
ax.set_xlabel('t')
ax.set_ylabel('x(t)')
plt.show()
Moving x = np.zeros(steps) inside the for row in init_condition_array loop fixes/avoids the problem. x is stored inside the Line3D objects returned by plt.plot, and mutating x affects the values stored in the other Line3Ds.
If you trace through the source code for Line3D you'll find
that the data that you pass to plt.plot ends up in a Line3D's _verts3d
attribute. The data is not copied; the _verts3d tuple holds references to the
exact same arrays.
And this _verts3d attribute is directly accessed later when rendering:
def draw(self, renderer):
xs3d, ys3d, zs3d = self._verts3d
Thus mutating the data -- even after calling plt.plot -- mutates self._verts3d.
This simple example demonstrates the problem:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
t = np.linspace(0, 1, 5)
x = np.sin(t)
line, = plt.plot(t, x, 0)
Here we have the original values of x:
print(line._verts3d[1])
# [ 0. 0.24740396 0.47942554 0.68163876 0.84147098]
And this shows that mutating x modifies line._verts3d:
x[:] = 1
print(line._verts3d[1])
# [ 1. 1. 1. 1. 1.]
# The result is a straight line, not a sine wave.
plt.show()
This surprising pitfall does not happen when making 2D line plots because there the Line2D._xy attribute which holds the data used for rendering stores a copy of the original data.
This problem could be fixed in the source code by changing this line in art3d.Line3D.set_3d_properties from
self._verts3d = art3d.juggle_axes(xs, ys, zs, zdir)
to
import copy
self._verts3d = copy.deepcopy(art3d.juggle_axes(xs, ys, zs, zdir))

Animation of pcolormesh() with mpl_toolkit.basemap giving attribute error

I am trying to animate some density data on a basemap map. Following an approach as was done in [this SO question][1], I get the following error:
/usr/local/lib/python2.7/dist-packages/matplotlib/collections.pyc in update_scalarmappable(self)
627 if self._A is None:
628 return
--> 629 if self._A.ndim > 1:
630 raise ValueError('Collections can only map rank 1 arrays')
631 if not self.check_update("array"):
AttributeError: 'list' object has no attribute 'ndim'
If I instead set the data in init() with null values by self.quad.set_array(self.z.ravel()), I end up with two plotted maps with no data being animated.
Any light that anybody could shed on what I'm doing wrong would be greatly appreciated. Thanks!
example code:
def plot_pcolor(lons,lats):
class UpdateQuad(object):
def __init__(self,ax, map_object, lons, lats):
self.ax = ax
self.m = map_object
self.lons = lons
self.lats = lats
self.ydim, self.xdim = lons.shape
self.z = np.zeros((self.ydim-1,self.xdim-1))
x, y = self.m(lons, lats)
self.quad = ax.pcolormesh(x, y, self.z, cmap=plt.cm.Reds)
def init(self):
print 'update init'
self.quad.set_array([])
return self.quad
def __call__(self,i):
data = np.zeros((self.ydim-1,self.xdim-1))
for i in range(self.ydim-1):
for j in range(self.xdim-1):
data[i,j]=random.random()+4
self.quad.set_array(data.ravel())
return self.quad
fig = plt.figure()
ax = fig.add_axes([0.1,0.1,0.8,0.8])
m = Basemap(width=2000000,height=2000000,
resolution='l', projection='laea',\
lat_ts=10.,\
lat_0=64.,lon_0=10., ax=ax)
m.fillcontinents()
ud = UpdateQuad(ax, m, lons, lats)
anim = animation.FuncAnimation(fig, ud, init_func=ud.init,
frames=20, blit=False)
plt.show()
if __name__ == '__main__':
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.basemap import Basemap
import numpy as np
import random
lons = np.linspace(-5.,25., num = 25)[:50]
lats = np.linspace(56., 71., num = 25)[:50]
lons,lats = np.meshgrid(lons,lats)
plot_pcolor(lons,lats)
It looks like the set_data method should require an ndarray (not sure why the example I had followed was working correctly).
So in the init() function, you should use quad.set_array(np.array([])) rather than quad.set_array([]).
Other problems to be aware of:
As mentioned before, you also want set blit=False in your FuncAnimation() call.
I was also experiencing problems when I set the quad artist attribute animated to True. Leave that be (i.e. quad.set_animated(False), which is the default anyway).
If you do not specify the bounds via norm in your first pcolormesh() call, it will set them according to the data you pass (in my case null), which resulting in my getting blank animations. Setting them according to the data you will animate later in the initial call prevented this problem in my case.
pcolormesh() takes the bounding positions to the data field, which should be +1 in the y and x dimension of the data array. If the data array is equal (or greater than) the dimensions of the position data, pcolormesh() will omit any data outside of this boundary requirement. I thought that my data would just appear offset by one grid cell, but everything was all whacky before I passed the correct boundary positions. See another question of mine for calculating these HERE.
Older versions of matplotlib do not have very good error reporting. I recommend upgrading to the latest version if that is an option for you.
Some random trouble-shooting:
After updating matplotlib and basemap and attempting to implement this in my existing plotting routine, I received the following error:
ValueError: All values in the dash list must be positive
I first thought it had to do with my pcolormesh() objects, but it took me way too long to discover that it was due to my previous setting of the dash attribute in my m.drawmeridians() call to dashes=[1,0] for a solid meridian. In the new version of matplotlib the handling of dashes was changed to give this error. The new prefered method for setting a solid line for the dash attribute is dashes=(None,None), which I don't like.
Resulting animation:
Code example for above output:
def plot_pcolor(lons,lats):
class UpdateQuad(object):
def __init__(self,ax, map_object, lons, lats):
self.ax = ax
self.m = map_object
self.lons = lons
self.lats = lats
vmin = 0
vmax = 1
self.ydim, self.xdim = lons.shape
self.z = np.zeros((self.ydim-1,self.xdim-1))
levels = MaxNLocator(nbins=15).tick_values(vmin,vmax)
cmap = plt.cm.cool
norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
x, y = self.m(lons, lats)
self.quad = self.ax.pcolormesh(x, y, self.z, alpha=0.9,
norm=norm, cmap=cmap,
vmin=vmin, vmax=vmax)
def init(self):
print 'update init'
self.quad.set_array(np.asarray([]))
return self.quad
def __call__(self,i):
for i in range(self.ydim-1):
for j in range(self.xdim-1):
self.z[i,j]=random.random()
self.quad.set_array(self.z.ravel())
return self.quad
fig, ax = plt.subplots()
m = Basemap(width=2000000,height=2000000,
resolution='l', projection='laea',\
lat_ts=10.,\
lat_0=64.,lon_0=10., ax=ax)
m.fillcontinents()
ud = UpdateQuad(ax, m, lons, lats)
anim = animation.FuncAnimation(fig, ud, init_func=ud.init,
frames=20, blit=False)
fig.tight_layout()
plt.show()
return ud.quad
if __name__ == '__main__':
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.basemap import Basemap
import numpy as np
import random
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
lons = np.linspace(-5.,25., num = 25)[:50]
lats = np.linspace(56., 71., num = 25)[:50]
lons,lats = np.meshgrid(lons,lats)
quad = plot_pcolor(lons,lats)

Categories