I have multi class target, and I'm trying to implement mixup technique in Keras sequence.
Without it, I get above 70%, but I get below 5% if I include the code below.
Could someone let me know what am I doing wrong?
Thanks!
def __getitem__(self, index):
...
x_batch, y_batch = self.mixup(x_batch, y_batch)
...
return x_batch, y_batch
def mixup(self, x, y):
n = x.shape[0]
l = np.random.beta(self.alpha, self.alpha, n)
x_l = l.reshape(n, 1, 1, 1)
y_l = l.reshape(n, 1)
x1 = x
x2 = np.flip(x, axis=0)
x = x1 * x_l + x2 * (1 - x_l)
y1 = y
y2 = np.flip(y, axis=0)
y = y1 * y_l + y2 * (1 - y_l)
return x, y
Related
I been trying to write a python code for logistic regression but the results are showing very high value of cost function which is unexpected. I have created a random variable X and Y and added a noise term to Y which will flip the element of based on the probability theta. This is my code:
import numpy as np
from scipy.stats import bernoulli
rg = np.random.default_rng(100)
def data_generate(n, m, theta):
X_0 = np.ones((n, 1))
X = np.random.normal(loc=0.0, scale=1.0, size=(n, m))
X = np.concatenate((X_0, X), axis = 1)
beta = rg.random((m+1, 1))
Y = np.zeros((n, 1))
P = 1.0/(1.0 + np.exp(-np.dot(X, beta)))
for i in range(len(P)):
if P[i] >= 0.5:
Y[i] = 1
else:
Y[i] = 0
# Noise addition
noise = bernoulli.rvs(size=(n,1), p=theta)
for j in range(len(noise)):
if noise[i] == 1:
Y[i] = int(not(Y[i]))
else:
pass
return X, Y, beta
def Gradient_Descent(X, Y, k, tollerence, learning_rate):
n,m = np.shape(X)
beta = rg.random((m, 1))
costs = []
initial_cost = 0.0
for i in range(k):
Y_pred = 1.0/(1.0 + np.exp(-np.dot(X, beta)))
cost = np.mean(np.dot(Y.T, np.log(Y_pred)) + np.dot((1-Y).T, np.log(1-Y_pred)))
if (abs(cost - initial_cost) <= tollerence):
break
else:
beta = beta - learning_rate*(np.mean(np.dot(X.T, (Y_pred - Y))))
initial_cost = cost
costs.append(cost)
return cost, beta, i
X = data_generate(200, 3, 0.1)[0]
Y = data_generate(200, 3, 0.1)[1]
Gradient_Descent(X, Y, 10000, 1e-6, 0.01)
# Output of code :
(-154.7689765716959,
array([[-0.02218003],
[-0.1182535 ],
[ 0.1169462 ],
[ 0.58610747]]),
14)`
Please tell what is the problem with the code.
I am writing Neural Network code from scratch using Numpy. But even after training my Network for many epochs, the predictions for each class is random and remains same irrespective of the input.
I have checked my concept according to Andrew Ng's Coursera ML course and towardsdatascience.com 's post. I think I'm making some very conceptual mistake which I cannot figure out.
Here is my code:
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(y):
return y * (1 - y)
class NeuralNetwork:
def __init__(self, shape):
self.n_layers = len(shape)
self.shape = shape
self.weight = []
self.bias = []
i = 0
while i < self.n_layers - 1:
self.weight.append(np.random.normal(loc=0.0, scale=0.5,
size=(self.shape[i + 1], self.shape[i])))
self.bias.append(np.random.normal(loc=0.0, scale=0.3,
size=(self.shape[i + 1], 1)))
i += 1
def predict(self, X):
z = self.weight[0] # X + self.bias[0]
a = sigmoid(z)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] # a + self.bias[i]
a = sigmoid(z)
i += 1
return a
def predictVerbose(self, X):
layers = [X]
z = self.weight[0] # X + self.bias[0]
a = sigmoid(z)
layers.append(a)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] # a + self.bias[i]
a = sigmoid(z)
layers.append(a)
i += 1
return layers
def gradOne(self, X, y):
layers = self.predictVerbose(X)
h = layers[-1]
delta_b = [(h - y) * dsigmoid(h)]
delta_w = [delta_b[0] # layers[-2].T]
i = 1
while i < self.n_layers - 1:
buff = delta_b[-1]
delta_b.append((self.weight[-i].T # buff) * dsigmoid(layers[-(i + 1)]))
delta_w.append(delta_b[-1] # layers[-(i + 2)].T)
i += 1
return delta_b[::-1], delta_w[::-1]
def grad(self, data, l_reg=0):
#data: x1, x2, x3, ..., xm, y=(0, 1, 2,...)
m = len(data)
delta_b = []
delta_w = []
i = 0
while i < self.n_layers - 1:
delta_b.append(np.zeros((self.shape[i + 1], 1)))
delta_w.append(np.zeros((self.shape[i + 1], self.shape[i])))
i += 1
for row in data:
X = np.array(row[:-1])[np.newaxis].T
y = np.zeros((self.shape[-1], 1))
# print(row)
y[row[-1], 0] = 1
buff1, buff2 = self.gradOne(X, y)
i = 0
while i < len(delta_b):
delta_b[i] += buff1[i] / m
delta_w[i] += buff2[i] / m
i += 1
return delta_b, delta_w
def train(self, data, batch_size, epoch, alpha, l_reg=0):
m = len(data)
for i in range(epoch):
j = 0
while j < m:
delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)])
i = 0
while i < len(self.weight):
self.weight[i] -= alpha * delta_w[i]
self.bias[i] -= alpha * delta_b[i]
i += 1
j += batch_size
if __name__ == "__main__":
x = NeuralNetwork([2, 2, 2])
# for y in x.gradOne(np.array([[1], [2], [3]]), np.array([[0], [1]])):
# print(y.shape)
data = [
[1, 1, 0],
[0, 0, 0],
[1, 0, 1],
[0, 1, 1]
]
x.train(data, 4, 1000, 0.1)
print(x.predict(np.array([[1], [0]])))
print(x.predict(np.array([[1], [1]])))
Please point out where I am going wrong.
Unfortunately I don't have enough reputation to comment on your post but here's a link to a numpy only neural network that I've made (tested on blob data from sklearn and mnist).
https://github.com/jaymody/backpropagation/blob/master/old/NeuralNetwork.py
Are you still interested in this problem? As I understood, you try to get the XOR-perceptron with direct and inverse outputs?
It looks like:
1. You need to change the expression
delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)]) to
delta_b, delta_w = self.grad(data[::])
in the train function.
2. Some of random values, used for initialization of synaptic and biases weights, requires much more training cycles for alpha=0.1. Try to play with the alpha (I set it up to 2) and number of epochs (I tried up to 20000).
Also your code do not works with 1-layered networks. I tried to train 1-layered AND and OR perceptrons and I got very strange results (or maybe it requires even much more cycles). But in 2-layered cases it works fine.
The code below is trying to do linear implementations, similar to numpy.interp(). But it is quite slow and I think the reason is that some operations in the code do not have GPU implementations. But I don't know which one. Could anyone tell me and suggest some solutions?
def tf_interp(b, x, y):
xaxis_pad = tf.concat([[tf.minimum(b, tf.gather(x, 0))], x, [tf.maximum(b, tf.gather(x, x.get_shape()[0] - 1))]],
axis=0)
yaxis_pad = tf.concat([[0.0], y, [0.0]], axis=0)
cmp = tf.cast(b >= xaxis_pad, dtype=tf.float32)
diff = cmp[1:] - cmp[:-1]
idx = tf.argmin(diff)
# Interpolate
alpha = (b - xaxis_pad[idx]) / (xaxis_pad[idx + 1] - xaxis_pad[idx])
res = alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
def f1(): return 0.0
def f2(): return alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
res = tf.cond(pred=tf.is_nan(res), true_fn=f1, false_fn=f2)
return res
def tf_interpolation(t, x, y):
t = tf.cast(t, tf.float32)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
t1 = tf.reshape(t, [-1, ])
t_return = tf.map_fn(lambda b: tf_interp(b, x, y), t1)
t_return = tf.reshape(t_return, [t.get_shape()[0], t.get_shape()[1]])
return t_return
I've taken the Wikipedia Perlin Noise Algorithm and implemented it in Python, here is the code:
import random
import math
from PIL import Image
from decimal import Decimal
IMAGE_SIZE = 200
PERLIN_RESOLUTION = 10
GRADIENT = []
for x in range(PERLIN_RESOLUTION + 1):
GRADIENT.append([])
for y in range(PERLIN_RESOLUTION + 1):
angle = random.random() * 2 * math.pi
vector = (
Decimal(math.cos(angle)),
Decimal(math.sin(angle))
)
GRADIENT[x].append(vector)
def lerp(a0, a1, w):
return (1 - w)*a0 + w*a1
def dotGridGradient(ix, iy, x, y):
dx = x - Decimal(ix)
dy = y - Decimal(iy)
return (dx*GRADIENT[iy][ix][0] + dy*GRADIENT[iy][ix][1])
def perlin(x, y):
if x > 0.0:
x0 = int(x)
else:
x0 = int(x) - 1
x1 = x0 + 1
if y > 0.0:
y0 = int(y)
else:
y0 = int(y) - 1
y1 = y0 + 1
sx = x - Decimal(x0)
sy = y - Decimal(y0)
n0 = dotGridGradient(x0, y0, x, y)
n1 = dotGridGradient(x1, y0, x, y)
ix0 = lerp(n0, n1, sx)
n0 = dotGridGradient(x0, y1, x, y)
n1 = dotGridGradient(x1, y1, x, y)
ix1 = lerp(n0, n1, sx)
value = lerp(ix0, ix1, sy)
return value
image = Image.new('RGB', (IMAGE_SIZE, IMAGE_SIZE))
pixels = image.load()
for i in range(IMAGE_SIZE):
x = Decimal(i) / IMAGE_SIZE
for j in range(IMAGE_SIZE):
y = Decimal(j) / IMAGE_SIZE
value = perlin(x * 10, y * 10)
greyscale = (value + 1) * 255 / 2
pixels[i, j] = (greyscale, greyscale, greyscale)
image.save('artifacts.png', 'PNG')
Here is the resulting image that is created by the script:
I must be missing something here, you can very clearly see the vertices. Can anyone let me know what is going wrong?
You need to use smoothstep instead of linear interpolation.
def smoothstep(a0, a1, w):
value = w*w*w*(w*(w*6 - 15) + 10)
return a0 + value*(a1 - a0)
I have the following code snippet:
def func1(self, X, y):
#X.shape = (455,13)
#y.shape = (455)
num_examples, num_features = np.shape(X)
self.weights = np.random.uniform(-1 / (2 * num_examples), 1 / (2 * num_examples), num_features)
while condition:
new_weights = np.zeros(num_features)
K = (np.dot(X, self.weights) - y)
for j in range(num_features):
summ = 0
for i in range(num_examples):
summ += K[i] * X[i][j]
new_weights[j] = self.weights[j] - ((self.alpha / num_examples) * summ)
self.weights = new_weights
This code works too slow. Is there any optimization, which I can do?
You can efficiently use np.einsum(). See a testing version below:
def func2(X, y):
num_examples, num_features = np.shape(X)
weights = np.random.uniform(-1./(2*num_examples), 1./(2*num_examples), num_features)
K = (np.dot(X, weights) - y)
return weights - alpha/num_examples*np.einsum('i,ij->j', K, X)
You can get new_weights directly using matrix-multiplication with np.dot like so -
new_weights = self.weights- ((self.alpha / num_examples) * np.dot(K[None],X))