I modified the code from here. What I'm trying to do is combine the two matrices to predict the output matrix. The output matrix is built from the two input matrices. The problem seems to be associated to:
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
The linked medium tutorial only predicting a single number based on the combined mixed input. I however am trying to predict a whole matrix but don't know how to structure the combined layer (if this is even the problem).
The error: "ValueError: Shape mismatch: The shape of labels (received (40,)) should equal the shape of logits except for the last dimension (received (10, 16))."
The code:
import warnings
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow import keras
from IPython.display import clear_output
class model(keras.Model):
def __init__(self):
super().__init__()
# The layers to process our image
self.Conv2D_1 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(1, 1),
strides=(1, 1)
)
self.Conv2D_2 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1)
)
# our combined layers
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
def call(self, input_image_one, input_image_two):
# Image model
I = self.Conv2D_1(input_image_one)
I = self.Conv2D_2(I)
# Flatten I so we can merge our data.
I = tf.keras.layers.Flatten()(I)
N = self.Conv2D_1(input_image_two)
N = self.Conv2D_2(N)
N = tf.keras.layers.Flatten()(N)
# Combined model
x = tf.concat([N, I], 1) # Concatenate through axis #1
x = self.Combined_dense_1(x)
x = self.Combined_dense_2(x)
return x
network = model()
optimizer = tf.keras.optimizers.Adam()
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
def train_step(model, optimizer, loss_function,
images_one_batch, images_two_batch,
labels):
with tf.GradientTape() as tape:
model_output = model(images_one_batch, images_two_batch)
print(model_output)
loss = loss_function(labels, model_output) # our labels vs our predictions
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return loss
def train(model, optimizer, loss_function, epochs,
images_one_batch, images_two_batch,
labels):
loss_array = []
for epoch in range(epochs):
loss = train_step(model, optimizer, loss_function, images_one_batch, images_two_batch, labels)
loss_array.append(loss)
if ((epoch + 1) % 20 == 0):
# Calculating accuracy
network_output = network(images_one_batch, images_two_batch)
preds = np.argmax(network_output, axis=1)
acc = 0
for i in range(len(images_one_batch)):
if (preds[i] == labels[i]):
acc += 1
print(" loss:", loss, " Accuracy: ", acc / len(images_one_batch) * 100, "%")
clear_output(wait=True)
NumberofVars = 2;
width= NumberofVars; height = NumberofVars
NumberOfComputationSets = 10
CM_MatrixArr1 = []
CM_MatrixArr2 = []
for j in range(NumberOfComputationSets):
Theta1 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta1 = list(np.float_(Theta1))
CM_MatrixArr1.append(Theta1)
Theta2 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta2 = list(np.float_(Theta2))
CM_MatrixArr2.append(Theta2)
combinedCM_MatrixArr = []
combinedCM_toIntArr = []
for x,y in zip(CM_MatrixArr1, CM_MatrixArr2):
combinedCM = []
combinedCM_toInt = 0
for a,b in zip(x,y):
LogVal = (a == b)
combinedCM.append(float(LogVal == True))
combinedCM_MatrixArr.append(combinedCM)
combinedCM_MatrixArr = np.array(combinedCM_MatrixArr)
combinedCM_MatrixArr = combinedCM_MatrixArr.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = np.array(CM_MatrixArr1)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets, 2,2,1)
CM_MatrixArr2 = np.array(CM_MatrixArr2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets, 2,2,1)
train(network,optimizer,loss_function,300,CM_MatrixArr1,CM_MatrixArr2,combinedCM_MatrixArr)
Related
I am new to Pytorch and I have compiled the below code from different articles and code snippets. The code is basically taking in sequence of products and then predicting the next product in a sequence.
I am trying to find accuracy of this model but not sure how to do it. Any help or suggestion would be appreciated.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(50)
prod_list = ['AA105045091',
'C2106264154',
'B2106691381',
'AA105045091',
'B2106691381',
'X3106692282',
'V2106350393',
'C2106264154',
'V6104504285',
'A2106329636',
'M6M100936257',
'N2101433968',
'X2M200042701',
'V3M200052002',
'K5101434063',
'B1106334744',
'P1103790575',
'K1106031596',
'E3D227124S6',
'D1105834415',
'M4102794084',
'B4101250283',
'C2102794082',
'D1106816721',
'B5106788450',
'A3106805351',
'C2106788452',
'C2106805373',
'B2106788454',
'A1104146375']
prod_list
sequences = []
for i in range(3, len(prod_list)):
words = prod_list[i-3:i+1]
sequences.append(words)
# split the sequence to input list and output list
X = []
y= []
for i in sequences:
X.append(i[0:3])
y.append(i[3])
# create integer-to-token mapping
int2token = {}
cnt = 0
for w in set(" ".join(prod_list).split()):
int2token[cnt] = w
cnt+= 1
# create token-to-integer mapping
token2int = {t: i for i, t in int2token.items()}
def get_integer_seq_train(seq):
new_list = []
for i in seq:
new_list.append(token2int[i])
return new_list
# convert text sequences to integer sequences
x_int = [get_integer_seq_train(i) for i in X]
# convert lists to numpy arrays
x_int = np.array(x_int)
vocab_size = len(int2token)
vocab_size
def get_integer_seq_test(seq):
return [token2int[w] for w in seq.split()]
#return [token2int[w] for w in seq.split()]
# convert text sequences to integer sequences
y_int = [get_integer_seq_test(i) for i in y]
# convert lists to numpy arrays
y_int = np.array(y_int)
def get_batches(arr_x, arr_y, batch_size):
# iterate through the arrays
prv = 0
for n in range(batch_size, arr_x.shape[0], batch_size):
x = arr_x[prv:n,:]
y = arr_y[prv:n,:]
prv = n
yield x, y
class WordLSTM(nn.Module):
def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
super().__init__()
self.drop_prob = drop_prob
self.n_layers = n_layers
self.n_hidden = n_hidden
self.lr = lr
self.emb_layer = nn.Embedding(vocab_size, 200)
## define the LSTM
self.lstm = nn.LSTM(200, n_hidden, n_layers,
dropout=drop_prob, batch_first=True)
## define a dropout layer
self.dropout = nn.Dropout(drop_prob)
## define the fully-connected layer
self.fc = nn.Linear(3*n_hidden, vocab_size)
torch.manual_seed(50)
def forward(self, x, hidden):
''' Forward pass through the network.
These inputs are x, and the hidden/cell state `hidden`. '''
## pass input through embedding layer
embedded = self.emb_layer(x)
## Get the outputs and the new hidden state from the lstm
lstm_output, hidden = self.lstm(embedded, hidden)
## pass through a dropout layer
out = self.dropout(lstm_output)
#out = out.contiguous().view(-1, self.n_hidden)
out = out.reshape(x.shape[0], -1)
## put "out" through the fully-connected layer
out = self.fc(out)
# return the final output and the hidden state
return out, hidden
def init_hidden(self, batch_size):
''' initializes hidden state '''
# Create two new tensors with sizes n_layers x batch_size x n_hidden,
# initialized to zero, for hidden state and cell state of LSTM
weight = next(self.parameters()).data
# if GPU is available
if (torch.cuda.is_available()):
hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
# if GPU is not available
else:
hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
return hidden
net = WordLSTM()
def train(net, epochs=10, batch_size=32, lr=0.001, clip=1, print_every=32):
torch.manual_seed(50)
# optimizer
opt = torch.optim.Adam(net.parameters(), lr=lr)
# loss
criterion = nn.CrossEntropyLoss()
counter = 0
net.train()
for e in range(epochs):
# initialize hidden state
h = net.init_hidden(batch_size)
for x, y in get_batches(x_int, y_int, batch_size):
counter+= 1
# convert numpy arrays to PyTorch arrays
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
# detach hidden states
h = tuple([each.data for each in h])
# zero accumulated gradients
net.zero_grad()
# get the output from the model
output, h = net(inputs, h)
# calculate the loss and perform backprop
loss = criterion(output, targets.view(-1))
# back-propagate error
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
nn.utils.clip_grad_norm_(net.parameters(), clip)
# update weigths
opt.step()
print("Epoch: {}/{}...".format(e+1, epochs),
"Step: {}...".format(counter),
"Loss: {}...".format(loss))
train(net, batch_size = 32, epochs=20, print_every=256)
def predict(net, tkn, h=None):
# tensor inputs
new_inp = []
for t1 in tkn:
x = np.array([token2int[t1]])
new_inp.append(x)
new_inp = np.asarray(new_inp).reshape(1,-1)
inputs = torch.from_numpy(new_inp)
# detach hidden state from history
h = tuple([each.data for each in h])
# get the output of the model
out, h = net(inputs, h)
# get the token probabilities
p = F.softmax(out, dim=1).data
p = p.cpu()
p = p.numpy()
p = p.reshape(p.shape[1],)
# get indices of top 3 values
top_n_idx = p.argsort()[-1:][::-1]
# randomly select one of the three indices
#sampled_token_index = top_n_idx[random.sample([0],1)[0]]
sampled_token_index = top_n_idx[0]
# return the encoded value of the predicted char and the hidden state
return int2token[sampled_token_index]
# function to generate text
def sample(net, prime):
net.eval()
# batch size is 1
h = net.init_hidden(1)
token = predict(net, prime, h)
return token
sample(net, prime=['AA105045091', 'C2106264154', 'B2106691381'])
I recently shifted to pytorch from keras and I am still trying to understand how all this work. Below is the code I have implemented to classify mnist dataset using a simple MLP. Just like I used to do in keras I have flattend each of 28x28 image into a vector of 784 , and I have also created a one-hot representation for my labels.
In the model I was hoping that given a vector of 784 the model would output a one-hot vector with probabilities,but as soon as my code reaches to compute the loss I get the following error :
RuntimeError: 1D target tensor expected, multi-target not supported
Below is my code :
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
from torch import nn, optim
from keras.datasets import mnist
from torch.utils.data import Dataset, DataLoader
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
# ----------------------------------------------------
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
# Creating one-hot representation of target
y_encoded = []
for label in y:
encoded = np.zeros(10)
encoded[label] = 1
y_encoded.append(encoded)
self.y = np.array(y_encoded)
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
# ----------------------------------------------------
num_train_samples = 10000
num_test_samples = 2000
# Each generator returns a single
# sample & its label on each iteration.
mnist_train = MnistDataset(data_size=num_train_samples)
mnist_test = MnistDataset(data_size=num_test_samples)
# Each generator returns a batch of samples on each iteration.
train_loader = DataLoader(mnist_train, batch_size=128, shuffle=True) # 79 batches
test_loader = DataLoader(mnist_test, batch_size=128, shuffle=True) # 16 batches
# ----------------------------------------------------
# Defining the Model Architecture
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28 * 28, 100)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(100, 50)
self.act2 = nn.ReLU()
self.fc3 = nn.Linear(50, 10)
self.act3 = nn.Sigmoid()
def forward(self, x):
x = self.act1(self.fc1(x))
x = self.act2(self.fc2(x))
output = self.act3(self.fc3(x))
return output
# ----------------------------------------------------
model = MLP()
# Defining optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# ----------------------------------------------------
# Training the model
epochs = 10
print("Training Started...")
for epoch in range(epochs):
for batch_index, (inputs, targets) in enumerate(train_loader):
optimizer.zero_grad() # Zero the gradients
outputs = model(inputs) # Forward pass
loss = criterion(outputs, targets) # Compute the Loss
loss.backward() # Compute the Gradients
optimizer.step() # Update the parameters
# Evaluating the model
total = 0
correct = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
print('Epoch : {} Test Acc : {}'.format(epoch, (100. * correct / total)))
print("Training Completed Sucessfully")
# ----------------------------------------------------
I also read some other posts related to the same problem & most of them said that the CrossEntropy loss the target has to be a single number ,which totally gets over my head.Can someone please explain a solution.Thank you.
For nn.CrossEntropyLoss, you don't need one-hot representation of the label, you just need to pass the prediction's logit, which shape is (batch_size, n_class), and a target vector (batch_size,)
So just pass in the label index vector y instead of one-hot vector.
Fixed of your code:
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
self.y = y # <--
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
Take a look at Pytorch example for more detail:
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
I get an error. I think it might be because of the time steps.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pandas_datareader.data as web
import datetime as dt
import numpy as np
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from tensorflow.keras.activations import relu
start = dt.datetime(2018,1,1)
end = dt.datetime(2019,1,1)
df = web.DataReader(name=['IBM', 'MSFT', 'NKE'],
data_source='yahoo',
start=start,
end=end).reset_index()['Close']
values = df.values
average_3_day = df.NKE.rolling(3).mean().values
previous_1_day = df.NKE.shift(-1).values
naive_3_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[2:], ma_3_day[2:]).numpy()
naive_1_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[:-1], previous_1_day[:-1]).numpy()
print('The benchmark score of 3 day moving average is {:.4f}.'.format(naive_3_day))
print('The benchmark score of the previous day is {:.4f}.'.format(naive_1_day))
for val, fut in zip(df['NKE'].values[:10], previous_1_day[:10]):
print(f'Value: {val:>6.3f} Future: {fut:>6.3f}')
MEAN = np.mean(values[:200, :], axis=0)
STD = np.std(values[:200, :], axis=0)
data = (values - MEAN)/STD
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data, labels = [], []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
PAST_HISTORY = 5
FUTURE_TARGET = 3
STEP = 5
x_train, y_train = multivariate_data(dataset=data,
target=data[:, -1],
start_index=0,
end_index=200,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
x_test, y_test = multivariate_data(dataset=data,
target=data[:, -1],
start_index=200,
end_index=None,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
train_data = tf.data.Dataset.from_tensors((x_train, y_train)).shuffle(len(x_train)).take(-1)
test_data = tf.data.Dataset.from_tensors((x_test, y_test)).shuffle(len(x_test)).take(-1)
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=True))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
bidirec = BiDirectionalLSTM()
bidirec(next(iter(train_data)))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [1,192,2,3] != values[1].shape = [1,192,3] [Op:Pack] name: feat
First of all, as I can see your x_train.shape is (195, 1, 3) and `y_train.shape is (195, 3).
So, your output is 2-d, but you're setting return_sequences=True in your BiLSTM layer, this will produce 3-d output.
ref: https://keras.io/layers/recurrent/
So, just fix this first.
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=False))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
Secondly, I see you're passing next(iter(train_data)), but the Model object doesn't expect that.
You can write bidirec(x_train) which will run fine, but train_data has two elements the x_train and y_train (the labels). The Model is not designed to take both x_train and y_train.
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
As you can see here, each has different dimensions. But you can do this and the code will run fine.
bidirec(next(iter(train_data))[0]) # only the training input data, not labels
From this the model actually gives you the prediction.
To train your model, you can just do the following,
bidirec.compile('adam', 'mse')
bidirec.fit(x_train, y_train)
This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
I am attempting to build a Conditional GAN model based on jacob's code on keras-dcgan (https://github.com/jacobgil/keras-dcgan).
The model architecture I assumed is the following picture:
original paper:
http://cs231n.stanford.edu/reports/2015/pdfs/jgauthie_final_report.pdf
For generator, I insert the condition (the condition is a bunch of one-hot vectors in this case) by first concatenating it with noise, then feed the concatenation through the generator.
For discriminator, I insert the condition by concatenating with a flattened layer in the middle of the model.
My code runs, but it generates some random graph instead of specific numbers. Which step is wrong? Did I not insert the condition appropriately?
My result after running approximately 5500 iterations:
Code:
import warnings
warnings.filterwarnings('ignore')
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Input, merge
from keras.layers import Reshape, concatenate
from keras.layers.core import Activation
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
import tensorflow as tf
from PIL import Image
import argparse
import math
K.set_image_dim_ordering('th')
# based on the labels below, we create a flattened array with 10 one-hot-vectors, and call it y_prime
labels = np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9])
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
# y_dim is the number of labels in one hot vector form, hence its 10
# y_prime is a 100*10 matrix, and len(y_p) = 100. Note that len(y_prime) must equate to batch_size for the matrices to be properly concatenated
# Also y_dim=10, which is the size of any one-hot vector
y_p = dense_to_one_hot(labels)
y_size = len(y_p)
y_dim = len(y_p[0])
#g_inputs is the input for generator
#auxiliary_input is the condition
#d_inputs is the input for discriminator
g_inputs = (Input(shape=(100,), dtype='float32'))
auxiliary_input = (Input(shape=(y_dim,), dtype='float32'))
d_inputs = (Input(shape=(1,28,28), dtype='float32'))
def generator_model():
T = concatenate([g_inputs,auxiliary_input])
T = (Dense(1024))(T)
T = (Dense(128*7*7))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (Reshape((128, 7, 7), input_shape=(128*7*7,)))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(64, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(1, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
model = Model(input=[g_inputs,auxiliary_input], output=T)
return model
def discriminator_model():
T = (Convolution2D(filters= 64, kernel_size= (5,5), padding='same'))(d_inputs)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Convolution2D(128, 5, 5))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Flatten())(T)
T = concatenate([T, auxiliary_input])
T = (Dense(1024))(T)
T = (Activation('tanh'))(T)
T = (Dense(1))(T)
T = (Activation('sigmoid'))(T)
model = Model(input=[d_inputs,auxiliary_input], output=T)
return model
def generator_containing_discriminator(generator, discriminator):
T1 = generator([g_inputs, auxiliary_input])
discriminator.trainable = False
T2 = discriminator([T1,auxiliary_input])
model = Model(input=[g_inputs, auxiliary_input], output=T2)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]), dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = img[0, :, :]
return image
def train(BATCH_SIZE,y_prime):
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
discriminator_on_generator = generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(100):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
y_batch = dense_to_one_hot(y_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE])
y_batch = np.concatenate((y_batch , y_prime))
generated_images = generator.predict([noise,y_prime], verbose=0)
if index % 20 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save(str(epoch)+"_"+str(index)+".png")
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch([X,y_batch], y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch([noise,y_prime], [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if index % 10 == 9:
generator.save_weights('generator', True)
discriminator.save_weights('discriminator', True)
train(100,y_p)
Here is my code for building Conditional GAN (CGAN) with Keras: https://github.com/hklchung/GAN-GenerativeAdversarialNetwork/tree/master/CGAN
After 5 epochs on MNIST I get this:
MNIST CGAN output
and after 50 epochs on the CelebsA dataset:
CelebA CGAN output
My experience is that if you don't see any good results after 20 epochs, something is wrong with your model and training it any longer won't improve your image quality.