Guess fitting Voigt curve to data - Python script behaves erratically - python

Let me describe what I'm attempting to do. This requires the eyes of somebody more knowledgable of Python than myself.
I have a set of data (actually sediment diameter vs. percentage in a sample) and when plotted it shows a unique spectrum. I'm assuming that there are "modes" hidden within the data, and am trying to force fit voigt, guassian or lorentzian curves so draw out some information. The framework of this script came from a person doing a similar thing on XRD data. I'm not quite proficient enough to really understand how the script is achieving the goals, so I'm having trouble isolating a few strange behaviors. Let me outline the weirdness first, then I'll share the code.
If I run the code over and over again with the same data, the results are not always the same. Not only that, but maybe 25% of the time, I get an error that I can't figure out. Why does this error happen, and why is it only happening some of the time?
TypeError: unsupported operand type(s) for -: 'tuple' and 'float'
When I define "spec" in the beginning of the code, I have to specifc model types. By chance, I tried VoigtModel first, and again, it works most of the time. However, if I specify a type to Gaussian or Lorentzian the script doesn't run at all:
TypeError: can't multiply sequence by non-int of type 'float'
In the script, I ask it to print some information regarding the curves that it fit. Specifically, the x, y values of the peak of the curve. However, when I run it subsequent times, it may fit different curves but the print() output doesn't change. Like, what?
If anybody could give the code a try and perhaps offer some insight as to what's wonky about this code, I'd be hugely grateful.
edit I've discovered that if I add more {'type': 'VoigtModel'} to spec = , the frequency of script failure decreases. If I remove some (leave one or two) then it fails at a much greater percentage. Still could use some help understanding the connection.
The code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import random
from lmfit import models
x = 0, 0.09326263, 0.186541806, 0.279826296, 0.373096863, 0.466372043, 0.559644359, 0.652910952, 0.746190193, 0.839463682, 0.932734784, 1.026014714, 1.119288717, 1.212558343, 1.305836463, 1.399111865, 1.492381488, 1.585657384, 1.678931325, 1.772207061, 1.865478378, 1.958752334, 2.05202538, 2.145299504, 2.238574433, 2.331847735, 2.425123471, 2.518395825, 2.611671451, 2.704945386, 2.798218396, 2.891491964, 2.984766114, 3.078040106, 3.171314505, 3.264585057, 3.357863555, 3.451137678, 3.544409886, 3.637684839, 3.730956661, 3.824229504, 3.917507936, 4.010781777, 4.104055591, 4.197326, 4.290603266, 4.383874926, 4.477149297, 4.57042345, 4.663698494, 4.756972396, 4.850245469, 4.943519232, 5.036793499, 5.13006734, 5.223340556, 5.316615186, 5.409888929, 5.503163537, 5.596438512, 5.689708905, 5.782986369, 5.876257098, 5.969532028, 6.062807987, 6.156078156, 6.249352461, 6.342627453, 6.43590194, 6.529177933, 6.622450379, 6.715725752, 6.808997914, 6.902272777, 6.995546352, 7.088819796, 7.18209372, 7.275367937, 7.36864248, 7.461916216, 7.555189618, 7.648464489, 7.741737739, 7.835015624, 7.928288902, 8.021559911, 8.114833257, 8.208110415, 8.301378965, 8.394658258, 8.487929146, 8.581205011, 8.674478952, 8.767749555, 8.861024001, 8.954299075, 9.047574353, 9.140848269, 9.234120373, 9.327394253, 9.420668151, 9.513942544, 9.607217038, 9.700491238, 9.793764758, 9.887039268, 9.980313168, 10.0735868, 10.16686092, 10.26013875, 10.35340805, 10.44668356, 10.53995856, 10.63323182, 10.72650553
y = 0.001352, 0.001721, 0.002661, 0.00523, 0.010879, 0.020142, 0.030427, 0.039188, 0.046922, 0.055438, 0.065352, 0.076432, 0.089913, 0.107888, 0.132296, 0.164797, 0.208043, 0.266067, 0.343688, 0.443698, 0.565158, 0.704086, 0.854979, 1.01437, 1.17932, 1.34739, 1.51366, 1.67215, 1.81638, 1.94147, 2.0432, 2.11934, 2.16792, 2.19005, 2.18907, 2.17172, 2.14565, 2.11866, 2.09749, 2.08736, 2.09102, 2.1084, 2.13739, 2.17478, 2.21729, 2.26139, 2.30342, 2.33966, 2.36671, 2.38045, 2.37413, 2.33769, 2.26088, 2.13908, 1.9769, 1.78619, 1.57832, 1.35944, 1.13483, 0.919488, 0.743312, 0.637312, 0.615423, 0.665356, 0.744581, 0.78791, 0.743882, 0.617121, 0.46602, 0.356204, 0.320677, 0.361725, 0.45788, 0.566712, 0.650727, 0.701846, 0.739237, 0.788714, 0.863346, 0.956347, 1.04314, 1.09353, 1.0874, 1.02493, 0.925497, 0.815472, 0.721377, 0.658056, 0.628985, 0.623906, 0.617012, 0.578717, 0.487132, 0.346259, 0.185964, 0.066494, 0.011942, 0.000815, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
#xlog = [math.log(xval) for xval in x]
spec = {
'x': x,
'y': y,
'model': [
{'type': 'VoigtModel'},
{'type': 'VoigtModel'},
{'type': 'VoigtModel'},
{'type': 'VoigtModel'},
]}
plt.plot(spec['x'], spec['y'])
plt.show()
def update_spec_from_peaks(spec, model_indicies, peak_widths=(1, 50), **kwargs):
x = spec['x']
y = spec['y']
x_range = np.max(x) - np.min(x)
peak_indicies = signal.find_peaks_cwt(y, peak_widths)
np.random.shuffle(peak_indicies)
for peak_indicie, model_indicie in zip(peak_indicies.tolist(), model_indicies):
model = spec['model'][model_indicie]
if model['type'] in ['GaussianModel', 'LorentzianModel', 'VoigtModel']:
params = {
'height': y[peak_indicie],
'sigma': x_range / len(x) * np.min(peak_widths),
'center': x[peak_indicie]
}
if 'params' in model:
model.update(params)
else:
model['params'] = params
return peak_indicies
#
peaks_found = update_spec_from_peaks(spec, [0], peak_widths=(5,))
print(peaks_found)
for i in peaks_found:
print(x[i], y[i])
def generate_model(spec):
composite_model = None
params = None
x = spec['x']
y = spec['y']
x_min = np.min(x)
x_max = np.max(x)
x_range = x_max - x_min
y_max = np.max(y)
for i, basis_func in enumerate(spec['model']):
prefix = f'm{i}_'
model = getattr(models, basis_func['type'])(prefix=prefix)
if basis_func['type'] in ['GaussianModel', 'LorentzianModel', 'VoigtModel']: # for now VoigtModel has gamma constrained to sigma
model.set_param_hint('sigma', min=1e-6, max=x_range)
model.set_param_hint('center', min=x_min, max=x_max)
model.set_param_hint('height', min=1e-6, max=1.1*y_max)
model.set_param_hint('amplitude', min=1e-6)
# default guess is horrible!! do not use guess()
default_params = {
prefix+'center': x_min + x_range * random.random(),
prefix+'height': y_max * random.random(),
prefix+'sigma': x_range * random.random()
}
else:
raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
if 'help' in basis_func: # allow override of settings in parameter
for param, options in basis_func['help'].items():
model.set_param_hint(param, **options)
model_params = model.make_params(**default_params, **basis_func.get('params', {}))
if params is None:
params = model_params
else:
params.update(model_params)
if composite_model is None:
composite_model = model
else:
composite_model = composite_model + model
return composite_model, params
model, params = generate_model(spec)
output = model.fit(spec['y'], params, x=spec['x'])
fig, ax = plt.subplots()
ax.scatter(spec['x'], spec['y'], s=4)
components = output.eval_components(x=spec['x'])
print(len(spec['model']))
for i, model in enumerate(spec['model']):
ax.plot(spec['x'], components[f'm{i}_'])```

It should be sort of obvious that any code will run exactly the same every single time when given the same inputs.
The fitting appears to behave erratically because you are deliberately giving erratic data. Really, you are telling it to randomize the initial starting values. And you are setting bounds programmatically, and not checking how close the initial values are to the bounds. So, ask yourself, why are you doing these things?
Your code seems quite complicated, possibly so much so that you don't understand it. Start by getting rid of all the junk. Maybe make a model that is a sum of Gaussians, maybe something like this (code will run, give a decent fit):
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from lmfit import models
x = np.array([0, 0.09326263, 0.186541806, 0.279826296, 0.373096863, 0.466372043, 0.559644359, 0.652910952, 0.746190193, 0.839463682, 0.932734784, 1.026014714, 1.119288717, 1.212558343, 1.305836463, 1.399111865, 1.492381488, 1.585657384, 1.678931325, 1.772207061, 1.865478378, 1.958752334, 2.05202538, 2.145299504, 2.238574433, 2.331847735, 2.425123471, 2.518395825, 2.611671451, 2.704945386, 2.798218396, 2.891491964, 2.984766114, 3.078040106, 3.171314505, 3.264585057, 3.357863555, 3.451137678, 3.544409886, 3.637684839, 3.730956661, 3.824229504, 3.917507936, 4.010781777, 4.104055591, 4.197326, 4.290603266, 4.383874926, 4.477149297, 4.57042345, 4.663698494, 4.756972396, 4.850245469, 4.943519232, 5.036793499, 5.13006734, 5.223340556, 5.316615186, 5.409888929, 5.503163537, 5.596438512, 5.689708905, 5.782986369, 5.876257098, 5.969532028, 6.062807987, 6.156078156, 6.249352461, 6.342627453, 6.43590194, 6.529177933, 6.622450379, 6.715725752, 6.808997914, 6.902272777, 6.995546352, 7.088819796, 7.18209372, 7.275367937, 7.36864248, 7.461916216, 7.555189618, 7.648464489, 7.741737739, 7.835015624, 7.928288902, 8.021559911, 8.114833257, 8.208110415, 8.301378965, 8.394658258, 8.487929146, 8.581205011, 8.674478952, 8.767749555, 8.861024001, 8.954299075, 9.047574353, 9.140848269, 9.234120373, 9.327394253, 9.420668151, 9.513942544, 9.607217038, 9.700491238, 9.793764758, 9.887039268, 9.980313168, 10.0735868, 10.16686092, 10.26013875, 10.35340805, 10.44668356, 10.53995856, 10.63323182, 10.72650553])
y = np.array([0.001352, 0.001721, 0.002661, 0.00523, 0.010879, 0.020142, 0.030427, 0.039188, 0.046922, 0.055438, 0.065352, 0.076432, 0.089913, 0.107888, 0.132296, 0.164797, 0.208043, 0.266067, 0.343688, 0.443698, 0.565158, 0.704086, 0.854979, 1.01437, 1.17932, 1.34739, 1.51366, 1.67215, 1.81638, 1.94147, 2.0432, 2.11934, 2.16792, 2.19005, 2.18907, 2.17172, 2.14565, 2.11866, 2.09749, 2.08736, 2.09102, 2.1084, 2.13739, 2.17478, 2.21729, 2.26139, 2.30342, 2.33966, 2.36671, 2.38045, 2.37413, 2.33769, 2.26088, 2.13908, 1.9769, 1.78619, 1.57832, 1.35944, 1.13483, 0.919488, 0.743312, 0.637312, 0.615423, 0.665356, 0.744581, 0.78791, 0.743882, 0.617121, 0.46602, 0.356204, 0.320677, 0.361725, 0.45788, 0.566712, 0.650727, 0.701846, 0.739237, 0.788714, 0.863346, 0.956347, 1.04314, 1.09353, 1.0874, 1.02493, 0.925497, 0.815472, 0.721377, 0.658056, 0.628985, 0.623906, 0.617012, 0.578717, 0.487132, 0.346259, 0.185964, 0.066494, 0.011942, 0.000815, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
peaks = signal.find_peaks_cwt(y, (1.5, 25))
xstep = x.ptp() / len(x)
model, params = None, None
for i, peak_index in enumerate(peaks):
this_model = models.GaussianModel(prefix=f'p{1+i:d}_')
this_params = this_model.make_params(amplitude=y[peak_index], center=x[peak_index], sigma=2*xstep)
if model is None:
model = this_model
params = this_params
else:
model += this_model
params.update(this_params)
result = model.fit(y, params, x=x)
print(result.fit_report())
plt.plot(x, y, label='data')
plt.plot(x, result.best_fit, label='fit')
plt.legend()
plt.show()
Does it need to be a lot more complicated than that? Hm, maybe not. This gives a decent fit, though it might be missing a subtle shoulder peak at around x=7.
Start simple. Keep it simple for as long as possible. Add complexity only when it simplifies something else.

Related

Display issue of fitted curve: cannot solve coarseness

Despite having a working script for curve fitting using the lmfit library, I am not able to solve a display issue. Indeed, having only 5 dependent values, the resulting graph is rather coarse.
Before switching to lmfit, I was using curve_fit and could solve the display issue by simply using np.linspace and plot the optimized values resulting from the fit procedure. Then, I was displaying the "real" values through plt.errorbar. With lmfit, the above solution yields a mismatch error, since it recognizes the "fake" independent variables and launches a mismatch type error.
My full script is the following:
import lmfit as lf
from lmfit import Model, Parameters
import numpy as np
import matplotlib.pyplot as plt
from math import atan
def on_res(omega_eff, thetas, R2avg=5, k_ex=0.1, phi_ex=500):
return R2avg*(np.sin(thetas))**2 + ((np.sin(thetas))**2)*(phi_ex*k_ex/(k_ex**2 + omega_eff**2))
model = Model(on_res,independent_vars=['omega_eff','thetas'])
params = model.make_params(R2avg=5, k_ex=0.01, phi_ex=1500)
carrier = 6146.53
O_1 = 5846
spin_locks = (1000, 2000, 3000, 4000, 5000)
delta_omega = (O_1 - carrier)
omega_eff1 = ((delta_omega**2) + (spin_locks[0]**2))**0.5
omega_eff2 = ((delta_omega**2) + (spin_locks[1]**2))**0.5
omega_eff3 = ((delta_omega**2) + (spin_locks[2]**2))**0.5
omega_eff4 = ((delta_omega**2) + (spin_locks[3]**2))**0.5
omega_eff5 = ((delta_omega**2) + (spin_locks[4]**2))**0.5
theta_rad1 = atan(spin_locks[0]/delta_omega)
theta_rad2 = atan(spin_locks[1]/delta_omega)
theta_rad3 = atan(spin_locks[2]/delta_omega)
theta_rad4 = atan(spin_locks[3]/delta_omega)
theta_rad5 = atan(spin_locks[4]/delta_omega)
x = (omega_eff1/1000, omega_eff2/1000, omega_eff3/1000, omega_eff4/1000, omega_eff5/1000)# , omega_eff6/1000)# , omega_eff7/1000)
theta = (theta_rad1, theta_rad2, theta_rad3, theta_rad4, theta_rad5)
R1rho_vals = (7.9328, 6.2642, 6.0005, 5.9972, 5.988)
e = (0.2, 0.2, 0.2, 0.2, 0.2)
new_x = np.linspace(0, 6, 1000)
omega_eff = np.array(x, dtype=float)
thetas = np.array(theta, dtype=float)
R1rho_vals = np.array(R1rho_vals, dtype=float)
error = np.array(e, dtype=float)
R2avg = []
k_ex = []
phi_ex = []
result = model.fit(R1rho_vals, params, weights=1/error, thetas=thetas, omega_eff=omega_eff, method = "emcee", steps = 1000)
print(result.fit_report())
plt.errorbar(x, R1rho_vals, yerr = error, fmt = ".k", markersize = 8, capsize = 3)
plt.plot(new_x, result.best_fit)
plt.show()
As you can see running it, it launches the mismatch shape error message. Changing the plt.plot line to plt.plot(x, result.best_fit) yields the graph correctly, but displaying a very coarse profile (as one would expect, having only 5 points on the x-axis).
Are you aware of any way to solve this? Checking the documentation, I noticed the examples provided all plot the results via the actual independent variables values, since they have enough experimental values.
You need to re-evaluate the ModelResult with your new values for the independent variables:
plt.plot(new_x, result.eval(omega_eff=new_x/1000., thetas=thetas))

Robust Linear Model - No exogenous var, just constants

I'm doing a robust linear regression on only a constant (a column of 1s) and no exogenous variable. I'm able to calculate the model just fine by inputting a list of 1's equal to the size of the 'xi_list' from the code snippet below.
def sigma_and_miu(gvkey, statevar_dict):
statevar_list = statevar_dict[gvkey]
xi_list = [np.log(statevar_list[i]) - np.log(statevar_list[i-1]) for i in range(1, len(statevar_list))]
x = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
y = np.array(xi_list)
rlm_model = sm.RLM(y, x, M=sm.robust.norms.HuberT())
rlm_results = rlm_model.fit()
sigma = np.std(rlm_results.resid * rlm_results.weights)
miudelta = rlm_results.params[0] + (0.5 * sigma ** 2)
return miudelta, sigma
This function is ran with the following inputs.
dict = {1004:[1796.6, 1938.6, 2085.4, 2009.4, 1906.1, 2002.2, 2164.9, 2478.8, 2357.4, 2662.1, 2911.2, 2400.4, 2535.9, 2812.3, 2873.1, 2775.5, 3374.2, 3345.5, 3466.3, 2409.4]}
key = 1004
miu, sigma = sigma_and_miu(key,dict)
However, I'm looking for a more scalable approach. I was thinking that one solution could be to include a loop that appends as many 1's as the length of the xi_list variable but, this does not seem to be very efficient.
I know there is sm.add_constant() and I tried to add this constant to my 'y' variable and leaving 'x' blank in the sm.RLM() function. This results in not being able to run the model.
So my question is, whether there is a better way to create the list of 1s or should I just go for the loop?
Use basic numpy vectorized computation
e.g.
statevar = np.asarray(statevar_list)
y = np.log(statevar[1:]) - np.log(statevar[:-1])
x = np.ones(len(y))
Aside: The rlm_results should have the robust estimate of the standard deviation that is used in the estimation as a scale attribute.

Python Rbf gives singular matrix error with no duplicate coordinates, why?

Very similar to RBF interpolation fails: LinAlgError: singular matrix but I think the problem is different, as I have no duplicated coordinates.
Toy example:
import numpy as np
import scipy.interpolate as interp
coords = (np.array([-1, 0, 1]), np.array([-2, 0, 2]), np.array([-1, 0, 1]))
coords_mesh = np.meshgrid(*coords, indexing="ij")
fn_value = np.power(coords_mesh[0], 2) + coords_mesh[1]*coords_mesh[2] # F(x, y, z)
coords_array = np.vstack([x.flatten() for x in coords_mesh]).T # Columns are x, y, z
unique_coords_array = np.vstack({tuple(row) for row in coords_array})
unique_coords_array.shape == coords_array.shape # True, i.e. no duplicate coords
my_grid_interp = interp.RegularGridInterpolator(points=coords, values=fn_value)
my_grid_interp(np.array([0, 0, 0])) # Runs without error
my_rbf_interp = interp.Rbf(*[x.flatten() for x in coords_mesh], d=fn_value.flatten())
## Error: numpy.linalg.linalg.LinAlgError: singular matrix -- why?
What am I missing? The example above uses the function F(x, y, z) = x^2 + y*z. I'd like to use Rbf to approximate that function. As far as I can tell there are no duplicate coordinates: compare unique_coords_array to coords_array.
I believe the problem is your input:
my_rbf_interp = interp.Rbf(*[x.flatten() for x in coords_mesh],d=fn_value.flatten())
Should you change to:
x,y,z = [x.flatten() for x in coords_mesh]
my_rbf_interp = interp.Rbf(x,y,z,fn_value.flatten())
And it should work. I think your original formulation is repeating lines in the matrix that goes for solve and thus having a very similar problem to duplicates (i.e. Singular Matrix).
Also if you would do:
d = fn_value.flatten()
my_rbf_interp = interp.Rbf(*(x,y,z,d))
It should work also.

python: finding the value of a random variable for a cdf

I apologize in advance if this is poorly worded.
If I have a stdDev = 1, mean = 0, scipy.stats.cdf(-1, loc = 0, scale = 1) will give me the probability that a normally distributed random variable will be <= -1, and that is 0.15865525393145707.
Given 0.15865..., how do I find the value that gives me -1?
i.e. value(cdf = 0.15865, loc = 0, scale = 1)
Thanks for the help.
edit: you actually need import norm from scipy.stats.
I found the answer. You need to use ppf in scipy.stats which stands for "percent point function".
So let's say you have a normal distribution with stdDev = 1, and mean = 0 and you want to find the value at which the random variables will be below ~15% of the time. Just use:
value = norm.ppf(0.15, loc = 0, scale = 1)
This will return ~ -1, likewise if you do:
cdf = norm.cdf(-1, loc = 0, scale = 1)
This will return ~ 0.15 or 15%.
Cool beans.

[scikit learn]: Anomaly Detection - Alternative for OneClassSVM

I have implemented LinearSVC and SVC from the sklearn-framework for text classification.
I am using TfidfVectorizer to get sparse representation of the input data that consists of two different classes(benign data and malicious data). This part is working pretty fine but now i wanted to implement some kind of anomaly detection by using the OneClassSVM classificator and training a model with only one class (outliers detection...). Unfortunately it is not working with sparse-data. Some developers are working on a patch (https://github.com/scikit-learn/scikit-learn/pull/1586) but there a some bugs so there is no solution yet for using the OneClassSVM-implementation.
Are there any other methods in the sklearn-framework for doing something like that? I am looking over the examples but nothing seems to fit.
Thanks!
A bit late, but in case anyone else is looking for information on this... There's a third-party anomaly detection module for sklearn here: http://www.cit.mak.ac.ug/staff/jquinn/software/lsanomaly.html, based on least-squares methods. It should be a plug-in replacement for OneClassSVM.
Unfortunately, scikit-learn currently implements only one-class SVM and robust covariance estimator for outlier detection
You can try a comparision of these methods (as provided in the doc) by examining differences on the 2d data:
import numpy as np
import pylab as pl
import matplotlib.font_manager
from scipy import stats
from sklearn import svm
from sklearn.covariance import EllipticEnvelope
# Example settings
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0, 1, 2]
# define two outlier detection tools to be compared
classifiers = {
"One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
kernel="rbf", gamma=0.1),
"robust covariance estimator": EllipticEnvelope(contamination=.1)}
# Compare given classifiers under given settings
xx, yy = np.meshgrid(np.linspace(-7, 7, 500), np.linspace(-7, 7, 500))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.ones(n_samples, dtype=int)
ground_truth[-n_outliers:] = 0
# Fit the problem with varying cluster separation
for i, offset in enumerate(clusters_separation):
np.random.seed(42)
# Data generation
X1 = 0.3 * np.random.randn(0.5 * n_inliers, 2) - offset
X2 = 0.3 * np.random.randn(0.5 * n_inliers, 2) + offset
X = np.r_[X1, X2]
# Add outliers
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
# Fit the model with the One-Class SVM
pl.figure(figsize=(10, 5))
for i, (clf_name, clf) in enumerate(classifiers.iteritems()):
# fit the data and tag outliers
clf.fit(X)
y_pred = clf.decision_function(X).ravel()
threshold = stats.scoreatpercentile(y_pred,
100 * outliers_fraction)
y_pred = y_pred > threshold
n_errors = (y_pred != ground_truth).sum()
# plot the levels lines and the points
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
subplot = pl.subplot(1, 2, i + 1)
subplot.set_title("Outlier detection")
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
cmap=pl.cm.Blues_r)
a = subplot.contour(xx, yy, Z, levels=[threshold],
linewidths=2, colors='red')
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white')
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black')
subplot.axis('tight')
subplot.legend(
[a.collections[0], b, c],
['learned decision function', 'true inliers', 'true outliers'],
prop=matplotlib.font_manager.FontProperties(size=11))
subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
subplot.set_xlim((-7, 7))
subplot.set_ylim((-7, 7))
pl.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
pl.show()

Categories