Plot basic example of neural network - python

I am studying about neural network tutorial and made simple perceptron code like this below
The purpose is
Spliting 20 points into two groups.
perceptron.py
import numpy as np
from pprint import pprint
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tensorflow.contrib.learn.python.learn.tests.dataframe.mocks import Mock2x2Transform
plt.style.use('ggplot')
font = {'family' : 'meiryo'}
matplotlib.rc('font', **font)
rng = np.random.RandomState(123)
d = 2 #dimension
N = 10 # each group items
mean = 5
x1 = rng.randn(N,d) + np.array([0,0]) # group 0
x2 = rng.randn(N,d) + np.array([mean,mean]) $group 1
x = np.concatenate((x1,x2),axis = 0)
##### Plot points
allDf = pd.DataFrame(columns=['x','y'])
k = 0
for i in x:
print(i[0])
temp = pd.DataFrame({'x' : i[0],
'y' : i[1]},index=[k])
k = k + 1
allDf = pd.concat([allDf,temp])
pprint(allDf)
allDf.plot(kind='scatter',x = 'x',y='y')
#########
#initialize w b
w = np.zeros(d)
b = 0
def y(x):
return step(np.dot(w,x) + b)
def step(x):
return 1 * (x > 0)
def t(i):
if i < N:
return 0
else:
return 1
while True:
classified = True
for i in range(N * 2):
delta_w = (t(i) - y(x[i])) * x[i]
delta_b = (t(i) - y(x[i]))
w += delta_w
b += delta_b
classified *= all(delta_w == 0 ) * (delta_b == 0)
if classified:
print("Final answer")
pprint(w)
pprint(b) # I get the answer here but how can I plot this w and b
X = np.linspace(-2,6,100) # it's wrong!!
Y = (w[0] * X + w[1] * X) - b # it's wrong!!
plt.plot(X,Y)
plt.show()
break
This source code gives me
the final answer like this
w = array([ 2.14037745, 1.2763927 ])
b = -9
But how can I plot this??
I want to make line between two groups.
The final graph(line) is supposed to be like this

Related

Adding Additional Rows To Tensors

I want to simulate a data generating process via tensor methods. In the end, the data will be exported to a csv file such that each row corresponds to a time period and each column corresponds to a unit. The following code
import numpy as np
import pandas as pd
import random
import tensorly as tl
from itertools import product
import matplotlib.pyplot as plt
import tensorly.decomposition
np.random.seed(1812)
# Data Generation
L = 0.05
H = 0.05
dx = 0.0025
dy = 0.0025
tmax = 60
dt = 0.01
epsilon = 0.0001
alpha = 0.5e-5+np.random.random()*1e-5
SimulateData = []
SimulateDataNoNoise = []
r_x = alpha*dt/dx**2
r_y = alpha*dt/dy**2
fo = r_x + r_y
if fo > 0.5:
msg = f'Current Fo = {fo}, which is numerically unstable (>0.5)'
raise ValueError(msg)
# x, y meshgrid based on dx, dy
nx = int(L/dx + 1)
ny = int(H/dy + 1)
X, Y = np.meshgrid(np.linspace(0, L, nx), np.linspace(0, H, ny))
# center point of the domain
ic = int((nx-1)/2)
jc = int((ny-1)/2)
# initial and boundary conditions
S = np.zeros((ny, nx))
def enforceBdy(S):
''' Enforces the boundary conditions on S, the temperature values on the domain's grid points'''
S[:, 0] = 1
S[:, -1] = 1
S[0, :] = 1
S[-1, :] = 1
return S
S = enforceBdy(S)
def Laplace(T):
'''Computes the Laplacian operator, del-squared on the data'''
tmp_x, tmp_y = np.gradient(T, dx, dy)
tmp_x, _ = np.gradient(tmp_x, dx)
_, tmp_y = np.gradient(tmp_y, dy)
return tmp_x+tmp_y
# iteration
nmax = int(tmax/dt)
for n in range(nmax):
dSdt = alpha*Laplace(S)
S = S + dSdt*dt
S = enforceBdy(S)
if n % 100 == 0:
noise = np.random.normal(size=S.shape)*.1
SimulateData.append(S.copy()+noise)
SimulateDataNoNoise.append(S.copy())
# check for convergence
err = np.abs(dSdt*dt).max()
if err <= epsilon:
break
#
# Creates Tensor
X = np.stack(SimulateData, 2)
nx,ny,nt = X.shape
# CP Decomposition
err = []
for i in range(1,11):
CP_Heat = tl.decomposition.parafac(X,i)
reconstructed = tl.kruskal_to_tensor(CP_Heat)
err.append(((X-reconstructed)**2).sum())
AIC1 = [2*e + 2*(i+1) for i,e in enumerate(err)]
AIC2 = [2*e + (i+1)*nx+(i+1)*ny+(i+1)*nt for i,e in enumerate(err)]
AIC = AIC2
idxmin = np.argmin(AIC)
R = idxmin+1
min_AIC = AIC[idxmin]
Y = np.zeros((21,40))
beta = np.random.randint(low=-0,high=15,size=21).reshape(-1,1)
for i in range(40):
RHS = 15 + X[:,:,i]#beta + np.random.normal(size=21).reshape(-1,1)
Y[:,i] = RHS.ravel()
Y
np.savetxt("Sim1.csv", Y, delimiter=",")
Returns a CSV file of 21 rows and 40 columns. Suppose, however, I wanted 40 or 70 rows in the final file with 40 columns. How would I do this? When I try with the number 22
import numpy as np
import pandas as pd
import random
import tensorly as tl
from itertools import product
import matplotlib.pyplot as plt
import tensorly.decomposition
np.random.seed(1812)
# Data Generation
L = 0.05
H = 0.05
dx = 0.0025
dy = 0.0025
tmax = 60
dt = 0.01
epsilon = 0.0001
alpha = 0.5e-5+np.random.random()*1e-5
SimulateData = []
SimulateDataNoNoise = []
r_x = alpha*dt/dx**2
r_y = alpha*dt/dy**2
fo = r_x + r_y
if fo > 0.5:
msg = f'Current Fo = {fo}, which is numerically unstable (>0.5)'
raise ValueError(msg)
# x, y meshgrid based on dx, dy
nx = int(L/dx + 1)
ny = int(H/dy + 1)
X, Y = np.meshgrid(np.linspace(0, L, nx), np.linspace(0, H, ny))
# center point of the domain
ic = int((nx-1)/2)
jc = int((ny-1)/2)
# initial and boundary conditions
S = np.zeros((ny, nx))
def enforceBdy(S):
''' Enforces the boundary conditions on S, the temperature values on the domain's grid points'''
S[:, 0] = 1
S[:, -1] = 1
S[0, :] = 1
S[-1, :] = 1
return S
S = enforceBdy(S)
def Laplace(T):
'''Computes the Laplacian operator, del-squared on the data'''
tmp_x, tmp_y = np.gradient(T, dx, dy)
tmp_x, _ = np.gradient(tmp_x, dx)
_, tmp_y = np.gradient(tmp_y, dy)
return tmp_x+tmp_y
# iteration
nmax = int(tmax/dt)
for n in range(nmax):
dSdt = alpha*Laplace(S)
S = S + dSdt*dt
S = enforceBdy(S)
if n % 100 == 0:
noise = np.random.normal(size=S.shape)*.1
SimulateData.append(S.copy()+noise)
SimulateDataNoNoise.append(S.copy())
# check for convergence
err = np.abs(dSdt*dt).max()
if err <= epsilon:
break
#
# Creates Tensor
X = np.stack(SimulateData, 2)
nx,ny,nt = X.shape
# CP Decomposition
err = []
for i in range(1,11):
CP_Heat = tl.decomposition.parafac(X,i)
reconstructed = tl.kruskal_to_tensor(CP_Heat)
err.append(((X-reconstructed)**2).sum())
AIC1 = [2*e + 2*(i+1) for i,e in enumerate(err)]
AIC2 = [2*e + (i+1)*nx+(i+1)*ny+(i+1)*nt for i,e in enumerate(err)]
AIC = AIC2
idxmin = np.argmin(AIC)
R = idxmin+1
min_AIC = AIC[idxmin]
Y = np.zeros((22,40))
beta = np.random.randint(low=-0,high=15,size=22).reshape(-1,1)
for i in range(40):
RHS = 15 + X[:,:,i]#beta + np.random.normal(size=22).reshape(-1,1)
Y[:,i] = RHS.ravel()
Y
np.savetxt("Sim1.csv", Y, delimiter=",")
Python throws an exception saying "(size 22 is different from 21)", but I'm unclear on where the 21 comes from when I do not specify the number 21 anywhere in my code.

How to animate this optimization model correctly

I have implemented a simple randomized, population-based optimization method - Grey Wolf optimizer. I am having some trouble with properly capturing the Matplotlib plots at each iteration using the camera package.
I am running GWO for the objective function f(x,y) = x^2 + y^2. I can only see the candidate solutions converging to the minima, but the contour plot doesn't show up.
Do you have any suggestions, how can I display the contour plot in the background?
GWO Algorithm implementation
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from celluloid import Camera
import ffmpeg
import pillow
# X : Position vector of the initial population
# n : Initial population size
def gwo(f,max_iterations,LB,UB):
fig = plt.figure()
camera = Camera(fig)
def random_population_uniform(m,a,b):
dims = len(a)
x = [list(a + np.multiply(np.random.rand(dims),b - a)) for i in range(m)]
return np.array(x)
def search_agent_fitness(fitness):
alpha = 0
if fitness[1] < fitness[alpha]:
alpha, beta = 1, alpha
else:
beta = 1
if fitness[2] > fitness[alpha] and fitness[2] < fitness[beta]:
beta, delta = 2, beta
elif fitness[2] < fitness[alpha]:
alpha,beta,delta = 2,alpha,beta
else:
delta = 2
for i in range(3,len(fitness)):
if fitness[i] <= fitness[alpha]:
alpha, beta,delta = i, alpha, beta
elif fitness[i] > fitness[alpha] and fitness[i]<= fitness[beta]:
beta,delta = i,beta
elif fitness[i] > fitness[beta] and fitness[i]<= fitness[delta]:
delta = i
return alpha, beta, delta
def plot_search_agent_positions(f,X,alpha,beta,delta,a,b):
# Plot the positions of search agents
x = X[:,0]
y = X[:,1]
s = plt.scatter(x,y,c='gray',zorder=1)
s = plt.scatter(x[alpha],y[alpha],c='red',zorder=1)
s = plt.scatter(x[beta],y[beta],c='blue',zorder=1)
s = plt.scatter(x[delta],y[delta],c='green',zorder=1)
camera.snap()
# Initialize the position of the search agents
X = random_population_uniform(50,np.array(LB),np.array(UB))
n = len(X)
l = 1
# Plot the first image on screen
x = np.linspace(LB[0],LB[1],1000)
y = np.linspace(LB[0],UB[1],1000)
X1,X2 = np.meshgrid(x,y)
Z = f(X1,X2)
cont = plt.contour(X1,X2,Z,20,linewidths=0.75)
while (l < max_iterations):
# Take the x,y coordinates of the initial population
x = X[:,0]
y = X[:,1]
# Calculate the objective function for each search agent
fitness = list(map(f,x,y))
# Update alpha, beta and delta
alpha,beta,delta = search_agent_fitness(fitness)
# Plot search agent positions
plot_search_agent_positions(f,X,alpha,beta,delta,LB,UB)
# a decreases linearly from 2 to 0
a = 2 - l *(2 / max_iterations)
# Update the position of search agents including the Omegas
for i in range(n):
x_prey = X[alpha]
r1 = np.random.rand(2) #r1 is a random vector in [0,1] x [0,1]
r2 = np.random.rand(2) #r2 is a random vector in [0,1] x [0,1]
A1 = 2*a*r1 - a
C1 = 2*r2
D_alpha = np.abs(C1 * x_prey - X[i])
X_1 = x_prey - A1*D_alpha
x_prey = X[beta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A2 = 2*a*r1 - a
C2 = 2*r2
D_beta = np.abs(C2 * x_prey - X[i])
X_2 = x_prey - A2*D_beta
x_prey = X[delta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A3 = 2*a*r1 - a
C3 = 2*r2
D_delta = np.abs(C3 * x_prey - X[i])
X_3 = x_prey - A3*D_delta
X[i] = (X_1 + X_2 + X_3)/3
l = l + 1
return X[alpha],camera
Function call
# define the objective function
def f(x,y):
return x**2 + y**2
minimizer,camera = gwo(f,7,[-10,-10],[10,10])
animation = camera.animate(interval = 1000, repeat = True,
repeat_delay = 500)
Is it possible that the line x = np.linspace(LB[0],LB[1],1000) should be x = np.linspace(LB[0],UB[1],1000) instead? With your current definition of x, x is an array only filled with the value -10 which means that you are unlikely to find a contour.
Another thing that you might want to do is to move the cont = plt.contour(X1,X2,Z,20,linewidths=0.75) line inside of your plot_search_agent_positions function to ensure that the contour is plotted at each iteration of the animation.
Once you make those changes, the code looks like that:
import matplotlib.pyplot as plt
import numpy as np
from celluloid import Camera
import ffmpeg
import PIL
from matplotlib import animation, rc
from IPython.display import HTML, Image # For GIF
from scipy.interpolate import griddata
rc('animation', html='html5')
# X : Position vector of the initial population
# n : Initial population size
def gwo(f,max_iterations,LB,UB):
fig = plt.figure()
fig.gca(aspect='equal')
camera = Camera(fig)
def random_population_uniform(m,a,b):
dims = len(a)
x = [list(a + np.multiply(np.random.rand(dims),b - a)) for i in range(m)]
return np.array(x)
def search_agent_fitness(fitness):
alpha = 0
if fitness[1] < fitness[alpha]:
alpha, beta = 1, alpha
else:
beta = 1
if fitness[2] > fitness[alpha] and fitness[2] < fitness[beta]:
beta, delta = 2, beta
elif fitness[2] < fitness[alpha]:
alpha,beta,delta = 2,alpha,beta
else:
delta = 2
for i in range(3,len(fitness)):
if fitness[i] <= fitness[alpha]:
alpha, beta,delta = i, alpha, beta
elif fitness[i] > fitness[alpha] and fitness[i]<= fitness[beta]:
beta,delta = i,beta
elif fitness[i] > fitness[beta] and fitness[i]<= fitness[delta]:
delta = i
return alpha, beta, delta
def plot_search_agent_positions(f,X,alpha,beta,delta,a,b,X1,X2,Z):
# Plot the positions of search agents
x = X[:,0]
y = X[:,1]
s = plt.scatter(x,y,c='gray',zorder=1)
s = plt.scatter(x[alpha],y[alpha],c='red',zorder=1)
s = plt.scatter(x[beta],y[beta],c='blue',zorder=1)
s = plt.scatter(x[delta],y[delta],c='green',zorder=1)
Z=f(X1,X2)
cont=plt.contour(X1,X2,Z,levels=20,colors='k',norm=True)
plt.clabel(cont, cont.levels, inline=True, fontsize=10)
camera.snap()
# Initialize the position of the search agents
X = random_population_uniform(50,np.array(LB),np.array(UB))
n = len(X)
l = 1
# Plot the first image on screen
x = np.linspace(LB[0],UB[1],1000)
y = np.linspace(LB[0],UB[1],1000)
X1,X2 = np.meshgrid(x,y)
Z=f(X1,X2)
while (l < max_iterations):
# Take the x,y coordinates of the initial population
x = X[:,0]
y = X[:,1]
# Calculate the objective function for each search agent
fitness = list(map(f,x,y))
# Update alpha, beta and delta
alpha,beta,delta = search_agent_fitness(fitness)
# Plot search agent positions
plot_search_agent_positions(f,X,alpha,beta,delta,LB,UB,X1,X2,Z)
# a decreases linearly from 2 to 0
a = 2 - l *(2 / max_iterations)
# Update the position of search agents including the Omegas
for i in range(n):
x_prey = X[alpha]
r1 = np.random.rand(2) #r1 is a random vector in [0,1] x [0,1]
r2 = np.random.rand(2) #r2 is a random vector in [0,1] x [0,1]
A1 = 2*a*r1 - a
C1 = 2*r2
D_alpha = np.abs(C1 * x_prey - X[i])
X_1 = x_prey - A1*D_alpha
x_prey = X[beta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A2 = 2*a*r1 - a
C2 = 2*r2
D_beta = np.abs(C2 * x_prey - X[i])
X_2 = x_prey - A2*D_beta
x_prey = X[delta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A3 = 2*a*r1 - a
C3 = 2*r2
D_delta = np.abs(C3 * x_prey - X[i])
X_3 = x_prey - A3*D_delta
X[i] = (X_1 + X_2 + X_3)/3
l = l + 1
return X[alpha],camera
# define the objective function
def f(x,y):
return x**2 + y**2
minimizer,camera = gwo(f,7,[-10,-10],[10,10])
animation = camera.animate(interval = 1000, repeat = True,repeat_delay = 500)
And the output gives:

How to Fix Index Error in Differential Equation?

I am trying to create a program that solves the mass-spring-damper system using backward differentiating, the only problem is that I am running into an index error that I am not sure how to solve:
import numpy as np
import matplotlib.pyplot as plt
def MSD_Solver(m,b,K):
#input: m = mass, b = damping ratio, K = spring constant
#output: (t,x) time vs position
tinitial = 0
tfinal = 15
step = .005
t = np.linspace(tinitial,tfinal,step)
x = np.zeros_like(t)
x[0]=0
x[1]=0
for k in range (len(t)-1): # extra element so subtract by 1
x[k] = (t**2)/((m+b)*t+(t**2)*k) + (x[k-2](-m))/((m+b)*t+(t**2)*k) + (x[k-1]((2*m)+(b*t)))/((m+b)*t+(t**2)*k)
return plt.plot(t,x)
print(MSD_Solver(1,.5,5)),MSD_Solver(1,1,5),MSD_Solver(1,2,5)
plt.show()
The linspace doc shows that the third argument is the number of items, not the step. Your step value got truncated to 0, so the returned array for t was empty. As a result, x has no elements, and x[0] is out of range.
Try this:
tinitial = 0
tfinal = 15
step = .005
num = (tfinal - tinitial) / step + 1
t = np.linspace(tinitial,tfinal,num)
This will get you to the semantic errors in your complex computation.
You want, probably(?), use first and second order difference quotients to discretize
m*x''(t) + b*x'(t) + K*x(t) = 1
to
m*(x[j+1]-2*x[j]+x[j-1]) + 0.5*dt*b*(x[j+1]-x[j-1]) + dt^2*K*x[j] = dt**2
so that
x[j+1] = ( dt**2 + (2*m-K*dt**2)*x[j] - (m-0.5*dt*b)*x[j-1] ) / (m+0.5*dt*b)
In code
def MSD_Solver(m,b,K):
#input: m = mass, b = damping ratio, K = spring constant
#output: (t,x) time vs position
tinitial = 0
tfinal = 15
step = .005
t = np.arange(tinitial,tfinal,step)
x = np.zeros_like(t)
dt = t[1]-t[0] # use the actual time step
x[0:2] = [ 0, 0]
for j in range(1,len(t)-1):
x[j+1] = ( dt**2 + (2*m-K*dt**2)*x[j] - (m-0.5*dt*b)*x[j-1] ) / (m+0.5*dt*b)
return t,x
t,x = MSD_Solver(1,.5,5)
plt.plot(t,x); plt.show();

pattern recognition 1d data

I want to find pattern in some spectra.
Spectrum image
Pattern should look like 2 in gray circles on picture, all data looks similarly. Light blue line is the original data, dotted dark blue line - average over 6 points. I was trying to do window with some size and scan data and check whether the y-flux value drops/rise below 60 ish % but that seems to find other regions and the one that I want, or only this I don't want.
The width of pattern is not always the same in spectra that I have. There is a picture of spectrum with pattern black dashed line but my program didn't found it.
not found picture
I tried changing size of window but it doesn't help. Can I use some pattern recognition algorithm to find this patterns? Could somebody point me in some direction? Or explain in easy way since I'm kinda lost in this, please?
That's my code:
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import ascii
import glob
def reading(file_name):
data = ascii.read(file_name)
lam = data['col0'][1:-1]
#data offset *10**17 + 5
flux = data['col1'][1:-1]*10**17 + 5
return lam, flux
def percentChange(startPoint,currentPoint):
return abs(((currentPoint-startPoint)/startPoint))*100.00
def window(data, size):
n = len(data)
out = []
wind = data[0 : size]
i = size
while i + size/2 < n:
wind = data[i - size/2 : i + size/2]
tmp = percentChange(wind[0], wind[-1])
if tmp > 50.:
out.append([tmp, i - size/2, i + size/2])
i = i + size
return out
def window2(data, size):
n = len(data)
out = []
wind = data[0 : size]
i = size
while i + size/2 < n:
wind = data[i - size/2 : i + size/2]
tmp = percentChange(wind[0], wind[len(wind)/2])
if tmp > 50.:
out.append([tmp, i - size/2, i + size/2])
i = i + size
return out
def plotting(lamb, flux):
plt.rcParams['font.family'] = 'freeserif'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 15
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 12
plt.rcParams['xtick.minor.visible'] = True
plt.rcParams['ytick.minor.visible'] = True
plt.plot(lamb, flux)
plt.xlabel("wavelenght [A]")
plt.ylabel("flux [erg/cm^2/s/A]")
def averaging(lamb, flux, param):
end = 1480
bin_flux_1 = [np.mean(flux[i : i + param]) for i in range(0, end, param)]
bin_lam_1 = [np.mean(lamb[i : i + param]) for i in range(0, end, param)]
return bin_lam_1, bin_flux_1
def main():
param = 6
stack = 6
for name in glob.glob('TRAIN/*.dat'):
print name
lamb, flux = reading(name)
lamb_a, flux_a = averaging(lamb, flux, param)
plotting(lamb, flux)
plotting(lamb_a, flux_a)
change = window(flux_a, stack)
change2 = window2(flux_a, stack)
minim = flux_a.index(min(flux_a))
for i in range(len(change)):
plt.axvline(lamb_a[change[i][1]], color='r', linestyle='--',linewidth=1)
plt.axvline(lamb_a[change[i][2]], color='r', linestyle='--',linewidth=1)
for i in range(len(change2)):
plt.axvline(lamb_a[change2[i][1]], color='y', linestyle='-',linewidth=1)
plt.axvline(lamb_a[change2[i][2]], color='y', linestyle='-',linewidth=1)
plt.axvline(lamb_a[minim], color='k', linestyle='--',linewidth=1)
plt.show()
if __name__ == "__main__":
main()
You can do it by using Knuth–Morris–Pratt algorithm in linear O(n + m) time complexity where n and m are the lengths of text and pattern.
KMP algorithm is basically a pattern matching algorithm (finding the starting position of a needle in haystack) which works on character string.
def kmp_matcher(t, d):
n=len(t)
m=len(d)
pi = compute_prefix_function(d)
q = 0
i = 0
while i < n:
if d[q]==t[i]:
q=q+1
i = i + 1
else:
if q != 0:
q = pi[q-1]
else:
i = i + 1
if q == m:
print "pattern occurs with shift "+str(i-q)
q = pi[q-1]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
k=1
l = 0
while k < m:
if p[k] <= p[l]:
l = l + 1
pi[k] = l
k = k + 1
else:
if l != 0:
l = pi[l-1]
else:
pi[k] = 0
k = k + 1
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t, p)

Scikit-learn: How to run KMeans on a one-dimensional array?

I have an array of 13.876(13,876) values between 0 and 1. I would like to apply sklearn.cluster.KMeans to only this vector to find the different clusters in which the values are grouped. However, it seems KMeans works with a multidimensional array and not with one-dimensional ones. I guess there is a trick to make it work but I don't know how. I saw that KMeans.fit() accepts "X : array-like or sparse matrix, shape=(n_samples, n_features)", but it wants the n_samples to be bigger than one
I tried putting my array on a np.zeros() matrix and run KMeans, but then is putting all the non-null values on class 1 and the rest on class 0.
Can anyone help in running this algorithm on a one-dimensional array?
You have many samples of 1 feature, so you can reshape the array to (13,876, 1) using numpy's reshape:
from sklearn.cluster import KMeans
import numpy as np
x = np.random.random(13876)
km = KMeans()
km.fit(x.reshape(-1,1)) # -1 will be calculated to be 13876 here
Read about Jenks Natural Breaks. Function in Python found the link from the article:
def get_jenks_breaks(data_list, number_class):
data_list.sort()
mat1 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat1.append(temp)
mat2 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat2.append(temp)
for i in range(1, number_class + 1):
mat1[1][i] = 1
mat2[1][i] = 0
for j in range(2, len(data_list) + 1):
mat2[j][i] = float('inf')
v = 0.0
for l in range(2, len(data_list) + 1):
s1 = 0.0
s2 = 0.0
w = 0.0
for m in range(1, l + 1):
i3 = l - m + 1
val = float(data_list[i3 - 1])
s2 += val * val
s1 += val
w += 1
v = s2 - (s1 * s1) / w
i4 = i3 - 1
if i4 != 0:
for j in range(2, number_class + 1):
if mat2[l][j] >= (v + mat2[i4][j - 1]):
mat1[l][j] = i3
mat2[l][j] = v + mat2[i4][j - 1]
mat1[l][1] = 1
mat2[l][1] = v
k = len(data_list)
kclass = []
for i in range(number_class + 1):
kclass.append(min(data_list))
kclass[number_class] = float(data_list[len(data_list) - 1])
count_num = number_class
while count_num >= 2: # print "rank = " + str(mat1[k][count_num])
idx = int((mat1[k][count_num]) - 2)
# print "val = " + str(data_list[idx])
kclass[count_num - 1] = data_list[idx]
k = int((mat1[k][count_num] - 1))
count_num -= 1
return kclass
Use and visualization:
import numpy as np
import matplotlib.pyplot as plt
def get_jenks_breaks(...):...
x = np.random.random(30)
breaks = get_jenks_breaks(x, 5)
for line in breaks:
plt.plot([line for _ in range(len(x))], 'k--')
plt.plot(x)
plt.grid(True)
plt.show()
Result:

Categories