I making a CNN for a binary classification problem between bees and ants images.
Images are of 500x500 dimension with 3 channels.
Here is my code.
Dataloader:
def load_data(path):
data = []
ant = 0
bee = 0
for folder in os.listdir(path):
print(folder)
curfolder = os.path.join(path, folder)
for file in os.listdir(curfolder):
image = plt.imread(curfolder+'/'+file)
image = cv2.resize(image, (500,500))
if folder == 'ants':
ant += 1
data.append([np.array(image) , np.eye(2)[0]])
elif folder == 'bees':
bee += 1
data.append([np.array(image) , np.eye(2)[1]])
np.random.shuffle(data)
np.save('train.npy',data)
print('ants : ',ant)
print('bees : ',bee)
training_data = np.load("train.npy",allow_pickle=True)
print(len(training_data))
CNN class
class Net(nn.Module):
def __init__(self):
super().__init__() # just run the init of parent class (nn.Module)
self.conv1 = nn.Conv2d(3, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
self.conv2 = nn.Conv2d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
self.conv3 = nn.Conv2d(64, 128, 5)
x = torch.randn(3,500,500).view(-1,3,500,500)
self._to_linear = None
self.convs(x)
print(self._to_linear)
self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
self.fc2 = nn.Linear(512, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).
def convs(self, x):
# max pooling over 2x2
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
x = self.convs(x)
x = x.view(-1, self._to_linear) # .view is reshape ... this flattens X before
x = F.relu(self.fc1(x))
x = self.fc2(x) # bc this is our output layer. No activation here.
return F.softmax(x, dim=1)
net = Net()
print(net)
loss and optimizer
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
Data Flat
train_X = torch.Tensor([i[0] for i in training_data]).view(-1,3,500,500)
train_X = train_X/255.0
train_y = torch.Tensor([i[1] for i in training_data])
training the model
device = torch.device("cuda:0")
net = Net().to(device)
print(len(train_X))
epochs = 10
BATCH_SIZE = 1
for epoch in range(epochs):
for i in range(0, len(train_X), BATCH_SIZE): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
#print(f"{i}:{i+BATCH_SIZE}")
batch_X = train_X[i:i+BATCH_SIZE]
batch_y = train_y[i:i+BATCH_SIZE]
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
net.zero_grad()
outputs = net(batch_X)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step() # Does the update
print(f"Epoch : {epoch}. Loss: {loss}")
The loss does not update for every epoch. I have tried changing the learning rate but the problem still remains.
Epoch : 0. Loss: 0.23345321416854858
Epoch : 1. Loss: 0.23345321416854858
Epoch : 2. Loss: 0.23345321416854858
Epoch : 3. Loss: 0.23345321416854858
Epoch : 4. Loss: 0.23345321416854858
Epoch : 5. Loss: 0.23345321416854858
Epoch : 6. Loss: 0.23345321416854858
Epoch : 7. Loss: 0.23345321416854858
Epoch : 8. Loss: 0.23345321416854858
Epoch : 9. Loss: 0.23345321416854858
Thank you in advance.
In your training loop, you should do optimizer.zero_grad() instead of net.zero_grad(). Also, you are using MSELoss() for a classification problem, you need something like BinaryCrossEntropy() or CrossEntropy() or NLLLoss().
Related
I am in the process of training my first CNN to solve a multi-class classification problem. I am feeding in images of animals corresponding to one of 182 classes, however I have ran into some issues. Firstly my code appears to get stuck on optimiser.step(), it has been calculating this for roughly 30 minutes. Secondly my training loss is increasing:
EPOCH: 0 BATCH: 1999 LOSS: 1.5790680234357715
EPOCH: 0 BATCH: 3999 LOSS: 2.9340945997834207
If any one would be able to provide some guidance that would be greatly appreciated. Below is my code
#loading data
train_data = dataset.get_subset(
"train",
transform=transforms.Compose(
[transforms.Resize((448, 448)), transforms.ToTensor()]
),
)
train_loader = get_train_loader("standard", train_data, batch_size=16)
#definind model
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 3)
self.fc1 = nn.Linear(37*37*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 182)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = torch.flatten(X, 1)
X = F.relu(self.fc1((X)))
X = F.relu(self.fc2((X)))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modell.parameters(), lr=0.001)
import time
start_time = time.time()
#VARIABLES (TRACKER)
epochs = 2
train_losses = []
test_losses = []
train_correct = []
test_correct = []
# FOR LOOP EPOCH
for i in range(epochs):
trn_corr = 0
tst_corr = 0
running_loss = 0.0
#TRAIN
for b, (X_train, Y_train, meta) in enumerate(train_loader):
b+=1 #batch starts at 1
#zero parameter gradients
optimizer.zero_grad()
# pass training to model as float (later compute loss)
output = modell(X_train.float())
#Calculate the loss of outputs with respect to ground truth values
loss = criterion(output, Y_train)
#Backpropagate the loss through the network
loss.backward()
#perform parameter update based on the current gradient
optimizer.step()
predicted = torch.max(output.data, 1)[1]
batch_corr = (predicted == Y_train).sum() # True (1) or False (0)
trn_corr += batch_corr
running_loss += loss.item()
if b%2000 == 1999:
print(f"EPOCH: {i} BATCH: {b} LOSS: {running_loss/2000}")
running_loss = 0.0
train_losses.append(loss)
train_correct.append(trn_corr)
As for the loss, it may be due to the model. The model has some rooms for improvement. Only 2 convolution layers is not sufficient for your data, as well as only expanding to 16 channels. Use more convolution layers with more channels. For example, 5 conv layers with channels of 16, 32, 32, 64, 64. Experiment with different numbers of layers and channels to see which one is best. Also, a good learning rate for Adam is 3e-4.To more easily track the models progress, I’d recommend decreasing the interval at which it prints the loss so you can more easily track progress.
About the data, are there enough instances of each class? Is it normalized between 0 and 1?
Trying to upgrade this awesome implementation of gumble-softmax-vae found here. However, I keep getting
TypeError: Cannot convert a symbolic Keras input/output to a numpy array.
I am stumped - tried many many things. Interestingly some searches return with other implementation of VAEs. I believe the error is somewhere in the "KL" term calculation of the loss.
Here is the almost working code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
batch_size = 10
data_dim = 784
M = 10 # classes
N = 30 # how many distributions
nb_epoch = 100
epsilon_std = 0.01
anneal_rate = 0.0003
min_temperature = 0.5
tau = tf.Variable(5.0, dtype=tf.float32)
class Sampling(keras.layers.Layer):
def call(self, logits_y):
u = tf.random.uniform(tf.shape(logits_y), 0, 1)
y = logits_y - tf.math.log(
-tf.math.log(u + 1e-20) + 1e-20
) # logits + gumbel noise
y = tf.nn.softmax(tf.reshape(y, (-1, N, M)) / tau)
y = tf.reshape(y, (-1, N * M))
return y
encoder_inputs = keras.Input(shape=(data_dim))
x = keras.layers.Dense(512, activation="relu")(encoder_inputs)
x = keras.layers.Dense(256, activation="relu")(x)
logits_y = keras.layers.Dense(M * N, name="logits_y")(x)
z = Sampling()(logits_y)
encoder = keras.Model(encoder_inputs, z, name="encoder")
encoder.build(encoder_inputs)
print(encoder.summary())
decoder_inputs = keras.Input(shape=(N * M))
x = keras.layers.Dense(256, activation="relu")(decoder_inputs)
x = keras.layers.Dense(512, activation="relu")(x)
decoder_outputs = keras.layers.Dense(data_dim, activation="sigmoid")(x)
decoder = keras.Model(decoder_inputs, decoder_outputs, name="decoder")
decoder.build(decoder_inputs)
print(decoder.summary())
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.bce = tf.keras.losses.BinaryCrossentropy()
self.loss_tracker = keras.metrics.Mean(name="loss")
#property
def metrics(self):
return [self.loss_tracker]
def call(self, x):
z = self.encoder(x)
x_hat = self.decoder(z)
return x_hat
#tf.function
def gumbel_loss(self, y_true, y_pred, logits_y):
q_y = tf.reshape(logits_y, (-1, N, M))
q_y = tf.nn.softmax(q_y)
log_q_y = tf.math.log(q_y + 1e-20)
kl_tmp = q_y * (log_q_y - tf.math.log(1.0 / M))
kl = tf.math.reduce_sum(kl_tmp, axis=(1, 2))
kl = tf.squeeze(kl, axis=0)
elbo = data_dim * self.bce(y_true, y_pred) - kl
return elbo
def train_step(self, data):
x = data
with tf.GradientTape(persistent=True) as tape:
z = self.encoder(x, training=True)
x_hat = self.decoder(z, training=True)
x = tf.cast(x, dtype=tf.float32)
x_hat = tf.cast(x_hat, dtype=tf.float32)
logits_y = self.encoder.get_layer('logits_y').output
loss = self.gumbel_loss(x, x_hat, logits_y)
grads = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
def main():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(
path="mnist.npz"
)
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
vae = VAE(encoder, decoder, name="vae-model")
vae_inputs = (None, data_dim)
vae.build(vae_inputs)
vae.compile(optimizer="adam", loss=None)
vae.fit(
x_train,
shuffle=True,
epochs=1,
batch_size=batch_size
)
if __name__ == "__main__":
main()
I think the main issue occurs when you try to get the output from the logits_y layer, (AFAIK), you can't do that, and instead, you need to build your encoder model with two outputs. Something like this way
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
# self.encoder = encoder
self.encoder = tf.keras.Model(inputs=encoder.input,
outputs=[encoder.get_layer(name='logits_y').output,
encoder.output])
whatever...
So, in the training loop, this self.encoder will produce two outputs, one of them is the output of layer logit_y, which you need for some loss function. Lastly, change a few codes in other places for this, as follows
def call(self, x):
_, z = self.encoder(x)
x_hat = self.decoder(z)
return x_hat
#tf.function
def gumbel_loss(self, y_true, y_pred, logits_y):
q_y = tf.reshape(logits_y, (-1, N, M))
q_y = tf.nn.softmax(q_y)
log_q_y = tf.math.log(q_y + 1e-20)
kl_tmp = q_y * (log_q_y - tf.math.log(1.0 / M))
kl = tf.math.reduce_sum(kl_tmp, axis=(1, 2))
elbo = data_dim * self.bce(y_true, y_pred) - kl
return elbo
And lastly, the train_step function; note, corresponding variables are already in tf.float32, no need to convert.
def train_step(self, data):
x = data
with tf.GradientTape(persistent=True) as tape:
logits_y, z = self.encoder(x, training=True)
x_hat = self.decoder(z, training=True)
loss = self.gumbel_loss(x, x_hat, logits_y)
grads = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
You don't need to change anything of the above code now, here is some training logs (running on cpu, tf 2.5).
Epoch 1/5
6000/6000 [==============================] - 60s 10ms/step - loss: 54.4604
Epoch 2/5
6000/6000 [==============================] - 60s 10ms/step - loss: 18.8960
Epoch 3/5
6000/6000 [==============================] - 59s 10ms/step - loss: 12.1036
Epoch 4/5
6000/6000 [==============================] - 59s 10ms/step - loss: 8.5804
Epoch 5/5
6000/6000 [==============================] - 59s 10ms/step - loss: 6.3916
I'm new to Neural Networks, so I hope u will forgive me if this is really basic stuff.
So far i managed to use view once in my first very simple project and now i tried to understand something more complex.
I found this code on github and I'm trying to understand it.
This is how network is built.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = x.view(-1, 64 * 12 * 12)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
I'm trying to get the image of number network failed to recognize, yet I don't really know how to use view here.
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
mistakes = 0
images, labels = next(iter(test_loader))
img = images[0].view(1, 784)
with torch.no_grad():
logps = model(img)
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
I couldn't find usefull examples so I would be gratefull for any kind of hint.
Every time I modify it I get one of these two errors.
builtins.RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 2-dimensional input of size [1, 784] instead
builtins.RuntimeError: shape '[1, 784, 3, 3]' is invalid for input of size 784
Thanks in advance for any kind of help.
Go with this
Since your dataloader already has data in 4D . No need to change it using .view()
And your CNN expects data in 4D
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
mistakes = 0
""" NO NEED OF THIS
images, labels = next(iter(test_loader))
img = images[0].view(1, 784) # This makes your image to 1D
"""
with torch.no_grad():
#logps = model(img)
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
I'm trying to implement the Large Margin Cosine Loss in Tensorflow. I've found the following class that does it:
import math
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras.initializers import Constant
from tensorflow.python.keras.utils import tf_utils
def _resolve_training(layer, training):
if training is None:
training = K.learning_phase()
if isinstance(training, int):
training = bool(training)
if not layer.trainable:
# When the layer is not trainable, override the value
training = False
return tf_utils.constant_value(training)
class CosFace(keras.layers.Layer):
"""
Implementation of CosFace layer. Reference: https://arxiv.org/abs/1801.09414
Arguments:
num_classes: number of classes to classify
s: scale factor
m: margin
regularizer: weights regularizer
"""
def __init__(self,
num_classes,
s=30.0,
m=0.35,
regularizer=None,
name='cosface',
**kwargs):
super().__init__(name=name, **kwargs)
self._n_classes = num_classes
self._s = float(s)
self._m = float(m)
self._regularizer = regularizer
def build(self, input_shape):
embedding_shape, label_shape = input_shape
self._w = self.add_weight(shape=(embedding_shape[-1], self._n_classes),
initializer='glorot_uniform',
trainable=True,
regularizer=self._regularizer)
def call(self, inputs, training=None):
"""
During training, requires 2 inputs: embedding (after backbone+pool+dense),
and ground truth labels. The labels should be sparse (and use
sparse_categorical_crossentropy as loss).
"""
print('calling CosFace Layer...')
embedding, label = inputs
# Squeezing is necessary for Keras. It expands the dimension to (n, 1)
label = tf.reshape(int(label), [-1], name='label_shape_correction')
# Normalize features and weights and compute dot product
x = tf.nn.l2_normalize(embedding, axis=1, name='normalize_prelogits')
w = tf.nn.l2_normalize(self._w, axis=0, name='normalize_weights')
cosine_sim = tf.matmul(x, w, name='cosine_similarity')
training = _resolve_training(self, training)
if not training:
# We don't have labels if we're not in training mode
return self._s * cosine_sim
else:
one_hot_labels = tf.one_hot(label,
depth=self._n_classes,
name='one_hot_labels')
theta = tf.math.acos(K.clip(
cosine_sim, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
final_theta = tf.where(tf.cast(one_hot_labels, dtype=tf.bool),
tf.math.cos(theta) - self._m,
tf.math.cos(theta),
name='final_theta')
print(final_theta)
output = tf.math.cos(final_theta, name='cosine_sim_with_margin')
return self._s * output
I'm testing it on a simple CNN trained on the MNIST dataset. However the train doesn't work. Here is the Network architecture:
label = keras.layers.Input((), name="input/labels")
input = keras.layers.Input(shape=[28,28,1], name="input/image")
margin = CosFace(num_classes=10, dtype='float32')
x = keras.layers.Conv2D(64, (3,3), padding="same")(input)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D((2,2))(x)
x = keras.layers.Conv2D(32, (3,3), padding="same")(x)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Conv2D(16, (3,3), padding="same")(x)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Dropout(0.25)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(128)(x)
x = keras.layers.Activation("relu", name="dense")(x)
x = keras.layers.Dropout(0.25)(x)
x = margin([x, label])
output = keras.layers.Activation("softmax")(x)
model_cos = keras.Model(inputs=[input, label], outputs=output)
model_cos.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
H_cos = model_cos.fit((X_train, y_train), y_train, batch_size=64, epochs=3, verbose=1)
And this is the output:
Epoch 1/3
calling CosFace Layer...
Tensor("functional_11/cosface/final_theta:0", shape=(None, 10), dtype=float32)
calling CosFace Layer...
Tensor("functional_11/cosface/final_theta:0", shape=(None, 10), dtype=float32)
860/860 [==============================] - 7s 8ms/step - loss: 0.3194 - accuracy: 0.9751
Epoch 2/3
860/860 [==============================] - 6s 7ms/step - loss: 0.0545 - accuracy: 1.0000
Epoch 3/3
860/860 [==============================] - 6s 7ms/step - loss: 0.0368 - accuracy: 1.0000
I don't understand what's going on, first of all the real accuracy isn't 1, second of all, it looks like that after the second epoch the CosFace layer is not called anymore.
Do you have any idea on how to fix this?
I can't figure out what I'm doing wrong with this XOR neural network. Maybe I'm not computing the loss correctly? The loss improves slightly at the beginning, and then the accuracy converges to 50% very quickly. Could someone please point out what I'm doing wrong?
Here's a minimal self contained example:
import numpy as np
import tensorflow as tf
n_inputs = 2
n_hidden = 3
n_outputs = 1
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.float32, shape=(None), name='y')
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="weights")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
if activation is not None:
return activation(Z)
else: return Z
with tf.name_scope('nn'):
hidden = neuron_layer(X, n_hidden, name='hidden', activation=tf.nn.sigmoid)
prediction_probabilities = neuron_layer(hidden, n_outputs, name='outputs', activation=tf.nn.sigmoid)
with tf.name_scope('loss'):
mse_loss = tf.reduce_mean(tf.squared_difference(y, prediction_probabilities), name='loss')
learning_rate = 0.1
with tf.name_scope('train'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse_loss)
with tf.name_scope('eval'):
correct = tf.equal(tf.greater_equal(prediction_probabilities,0.5), tf.cast(y,tf.bool))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
X_train = [
(0, 0),
(0, 1),
(1, 0),
(1, 1)
]
y_train = [0,1,1,0]
with tf.Session() as sess:
init.run()
for epoch in range(500):
_, mse, acc = sess.run([training_op, mse_loss, accuracy],
feed_dict={X: np.array(X_train), y: np.array(y_train)})
print("mse: %.4f, accuracy: %.2f" % (mse, acc))
Your code is perfectly fine. The problem is with your input. You need to pass a 2D list, not 1D. Each inner list is a single dimension input, that's how tensorflow will parse them.
y_train = [[0],[1],[1],[0]]
Your code works nicely after.
...
mse: 0.0002, accuracy: 1.00