This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
Related
I modified the code from here. What I'm trying to do is combine the two matrices to predict the output matrix. The output matrix is built from the two input matrices. The problem seems to be associated to:
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
The linked medium tutorial only predicting a single number based on the combined mixed input. I however am trying to predict a whole matrix but don't know how to structure the combined layer (if this is even the problem).
The error: "ValueError: Shape mismatch: The shape of labels (received (40,)) should equal the shape of logits except for the last dimension (received (10, 16))."
The code:
import warnings
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow import keras
from IPython.display import clear_output
class model(keras.Model):
def __init__(self):
super().__init__()
# The layers to process our image
self.Conv2D_1 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(1, 1),
strides=(1, 1)
)
self.Conv2D_2 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1)
)
# our combined layers
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
def call(self, input_image_one, input_image_two):
# Image model
I = self.Conv2D_1(input_image_one)
I = self.Conv2D_2(I)
# Flatten I so we can merge our data.
I = tf.keras.layers.Flatten()(I)
N = self.Conv2D_1(input_image_two)
N = self.Conv2D_2(N)
N = tf.keras.layers.Flatten()(N)
# Combined model
x = tf.concat([N, I], 1) # Concatenate through axis #1
x = self.Combined_dense_1(x)
x = self.Combined_dense_2(x)
return x
network = model()
optimizer = tf.keras.optimizers.Adam()
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
def train_step(model, optimizer, loss_function,
images_one_batch, images_two_batch,
labels):
with tf.GradientTape() as tape:
model_output = model(images_one_batch, images_two_batch)
print(model_output)
loss = loss_function(labels, model_output) # our labels vs our predictions
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return loss
def train(model, optimizer, loss_function, epochs,
images_one_batch, images_two_batch,
labels):
loss_array = []
for epoch in range(epochs):
loss = train_step(model, optimizer, loss_function, images_one_batch, images_two_batch, labels)
loss_array.append(loss)
if ((epoch + 1) % 20 == 0):
# Calculating accuracy
network_output = network(images_one_batch, images_two_batch)
preds = np.argmax(network_output, axis=1)
acc = 0
for i in range(len(images_one_batch)):
if (preds[i] == labels[i]):
acc += 1
print(" loss:", loss, " Accuracy: ", acc / len(images_one_batch) * 100, "%")
clear_output(wait=True)
NumberofVars = 2;
width= NumberofVars; height = NumberofVars
NumberOfComputationSets = 10
CM_MatrixArr1 = []
CM_MatrixArr2 = []
for j in range(NumberOfComputationSets):
Theta1 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta1 = list(np.float_(Theta1))
CM_MatrixArr1.append(Theta1)
Theta2 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta2 = list(np.float_(Theta2))
CM_MatrixArr2.append(Theta2)
combinedCM_MatrixArr = []
combinedCM_toIntArr = []
for x,y in zip(CM_MatrixArr1, CM_MatrixArr2):
combinedCM = []
combinedCM_toInt = 0
for a,b in zip(x,y):
LogVal = (a == b)
combinedCM.append(float(LogVal == True))
combinedCM_MatrixArr.append(combinedCM)
combinedCM_MatrixArr = np.array(combinedCM_MatrixArr)
combinedCM_MatrixArr = combinedCM_MatrixArr.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = np.array(CM_MatrixArr1)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets, 2,2,1)
CM_MatrixArr2 = np.array(CM_MatrixArr2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets, 2,2,1)
train(network,optimizer,loss_function,300,CM_MatrixArr1,CM_MatrixArr2,combinedCM_MatrixArr)
I'm currently using a LSTM model to make timeserie predictions with Tensorflow 2.2.0
I've been using a large dataset and everything works nicely.
However, the dataset creation takes a lot of RAM and I wanted to use a tensorflow.keras.utils.Sequence to solve the issue, my problem is the following:
When using a Sequence, my model doesn't learn anymore (it predicts the average of the real signal over the whole dataset)
My dataset is created from two python lists x_train_flights and y_train_flights, each containing pandas DataFrames. For each (x_train_flight, y_train_flight) of this list:
x_train_flight of shape (-1, features) containing features signals
y_train_flight of shape (-1, 1) containing one signal being aligned in time with the ones from x_train_flights
The system looks like as follow (I am not allowed to share the real data, I've recreated the graph using pseudo-random signals instead):
Here, features=2 (the blue and orange lines), and look_back=5. That is to say, the 10 points (from x_train_flights) in the rectangle are used to predict the golden point (which is compared to the corresponding point in y_train_flights during the training phase). The gray points are previous predictions.
To create my dataset, I've been using these functions:
def lstm_shapify(sequence, look_back, features):
res = np.empty((look_back, len(sequence), features), dtype=np.float32)
for i in range(look_back):
res[i] = np.roll(sequence, -i * features)
return np.transpose(res, axes=(1, 0, 2))[:-look_back + 1]
def make_dataset(x_flights, y_flights, look_back, features):
x = np.empty((0, look_back, features), dtype=np.float32)
y = np.empty((0, 1), dtype=np.float32)
for i in range(len(x_flights)):
x_sample = x_flights[i].values
y_sample = y_flights[i].values[look_back - 1:]
x = np.concatenate([x, lstm_shapify(x_sample, look_back, features)])
y = np.concatenate([y, y_sample])
return x, y
And I fit my network with the following:
model.fit(
x_train,
y_train,
epochs=7,
batch_size=batch_size
)
So, I've created this custom Sequence:
class LSTMGenerator(Sequence):
def __init__(
self,
x_flights: List[DataFrame],
y_flights: List[DataFrame],
look_back: int,
batch_size: int,
features: int
):
self.x_flights = x_flights
self.y_flights = []
self.look_back = look_back
self.batch_size = batch_size
self.features = features
self.length = 0
for y_flight in y_flights:
y = y_flight.iloc[look_back - 1:].to_numpy()
self.y_flights.append(y)
self.length += len(y) // batch_size
def __getitem__(self, index):
flight_index = 0
while True:
n = len(self.y_flights[flight_index]) // self.batch_size
if index < n:
break
flight_index += 1
index = index - n
start_index = index * self.batch_size
x_batch = lstm_shapify(
self.x_flights[flight_index]
.iloc[start_index:start_index + self.batch_size + self.look_back - 1]
.to_numpy(),
self.look_back,
self.features
)
y_batch = self.y_flights[flight_index][start_index:start_index + self.batch_size]
return x_batch, y_batch
def __len__(self):
return self.length
Each tuple (x, y) it returns are two numpy arrays of shape (batch_size, look_back, features) and (batch_size, 1) respectively.
And now I'm trying to fit it with:
model.fit(
LSTMGenerator(x_train_flights, y_train_flights, look_back, batch_size, features),
epochs=epochs
)
Here is my model:
model = Sequential()
model.add(LSTM(
100,
input_shape=(look_back, features),
kernel_regularizer=regularizers.l2(1e-3),
bias_regularizer=regularizers.l2(1e-4)
))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1, activation='tanh'))
model.compile(optimizer='adam', loss='mse')
Hope you can help me
EDIT: more details about the datasets
I solved it by taking a break and looking at the code once again (and I realized it was a silly mistake): the issue of my Sequence comes from the samples in each batch being consecutive samples in time, whereas my compute-everything-dataset's batches where nicely shuffled.
My Sequence was problematic because the batches were selected at a random index from a random dataset. Now I select each sample at a random index from a random dataset to create a single batch.
Here is a working example:
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.utils import *
import numpy as np
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)
features = 3
lookback = 7
model = Sequential()
model.add(LSTM(500, input_shape = (lookback, features)))
model.add(Dense(1, activation='tanh'))
XS = np.random.randn(200, features)
YS = np.random.randn(200)
class LookbackSeq(Sequence):
def __init__(self, XS, YS, batch_size, lookback):
self.XS = XS
self.YS = YS
self.batch_size = batch_size
self.lookback = lookback
def __len__(self):
n_windows = self.XS.shape[0] - self.lookback
return int(np.ceil(n_windows / self.batch_size))
def __getitem__(self, i):
base = i * self.batch_size
n_windows = self.XS.shape[0] - self.lookback
batch_size = min(n_windows - base, self.batch_size)
X = np.zeros((batch_size, self.lookback, self.XS.shape[1]))
Y = np.zeros((batch_size, 1))
for i in range(batch_size):
for j in range(self.lookback):
X[i, j] = self.XS[base + i + j]
Y[i] = self.YS[base + i + self.lookback]
return X, Y
model.compile(optimizer='adam', loss='mse')
# ALL SAMPLES IN MEMORY
X, Y = [], []
for i in range(len(XS) - lookback):
X.append(XS[i:i+lookback])
Y.append(YS[i+lookback])
X, Y = np.array(X), np.array(Y)
model.fit(X, Y, epochs = 10, batch_size = 4, shuffle = False)
# GENERATED ON THE FLY
# gen = LookbackSeq(XS, YS, 4, lookback)
# model.fit(x = gen,
# steps_per_epoch = len(gen),
# shuffle = False,
# epochs = 10)
I'm assuming your input data has the shape X = (n_points, n_features) and Y = (n_points,). LookbackSeq does the batching and windowing (lookback) for you.
You can comment and uncomment the relevant lines to either train with samples generated on the fly or with them all stored in memory. You should get identical results.
So, there is the universal approximation theorem which says that a neural network can approximate any continuous function, provided it has at least one hidden layer and uses non-linear activation there.
So my doubt is as follows: "How do I approximate a function using neural networks with my input being other functions?"
Let's say I want to approximate y = x + 1 and I have z_1 = 2x, z_2 = 3x + 3 and z_3 = 4x + 1, with x being time variant. What I want my model to learn is the relationship between z_1, z_2, z_3 and y, as I may write *y = -6 * z_1 - 1 * z_2 + 4 z_3* ( I want my network to learn this relationship).
From time 0 to T I have the value of all functions and can do a supervised learning, but from (T + 1) +, I will only have z_1, z_2 and z_3 and so, I would be using the network to approximate the future values of y based on these z functions (z_1, z_2, z_3).
How do I implement that on python using Keras? I used the following code but didn't get any decent results.
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
n = 10000
def z_1(x):
x_0 = []
for i in x:
x_0.append(2*i)
return x_0
def z_2(x):
x_0 = []
for i in x:
x_0.append(3*i + 3)
return x_0
def z_3(x):
x_0 = []
for i in x:
x_0.append(4* i + 1)
return x_0
def z_0(x):
x_0 = []
for i in x:
x_0.append(i + 1)
return x_0
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=3))
model.add(Dense(500, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
np.random.seed(seed = 2000)
input = np.random.random(n) * 10
dataset = z_0(input)
input_1 = z_1(input)
input_2 = z_2(input)
input_3 = z_3(input)
x_train = np.array([input_1[0:int(0.8*n)], input_2[0:int(0.8*n)], input_3[0:int(0.8*n)]])
y_train = np.array([dataset[0:int(0.8*n)]])
x_train = x_train.reshape(int(0.8*n), 3)
y_train = y_train.reshape(int(0.8*n),1)
es = keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0,
patience=0,
verbose=0, mode='auto')
model.fit(x_train, y_train, epochs=100, batch_size=128, callbacks = [es])
x_test = np.array([input_1[int(n-100):n], input_2[int(n-100):n], input_3[int(n-100):n]])
x_test = x_test.reshape(int(100), 3)
classes = model.predict(x_test, batch_size=128)
y_test = np.array([dataset[int(n-100):n]]).reshape(int(100),1)
plt.plot(y_test,c='b', label = 'test data')
plt.plot(classes,c='r', label = 'test result')
plt.legend()
plt.show()
You can't do this with a feedforward neural network. You need to do this with recurrent neural networks. Look up LSTM or GRU cells in Keras.
https://keras.io/layers/recurrent/
It looks like simple CAE not working for Carvana dataset
I’m trying simple CAE for Carvana dataset. You can download it here
My code is following:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import downscale_local_mean
from skimage.color import rgb2grey
from os.path import join, isfile
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
from keras.optimizers import Adam
from random import randint
import hickle as hkl
import dill
class Data(object):
def __init__(self, X, Y):
self.X = X
self.Y = Y
input_folder = join('..', 'input')
print('Path:',input_folder)
data_file_name = 'datafile.pkl'
df_mask = pd.read_csv(join(input_folder, 'train_masks.csv'), usecols=['img'])
load_img = lambda im, idx: imread(join(input_folder, 'train', '{}_{:02d}.jpg'.format(im, idx)))
load_mask = lambda im, idx: imread(join(input_folder, 'train_masks', '{}_{:02d}_mask.gif'.format(im, idx)))
ids_train = df_mask['img'].map(lambda s: s.split('_')[0]).unique()
imgs_idx = list(range(1, 17))
resize = lambda im: downscale_local_mean(im, (4,4) if im.ndim==2 else (4,4,1))
mask_image = lambda im, mask: (im * np.expand_dims(mask, 2))
num_train = 48#len(ids_train)
if isfile(data_file_name):
#with open(data_file_name, 'rb') as f:
data = hkl.load(data_file_name)
X = data.X
y = data.y
else:
X = np.empty((num_train, 320, 480, 1), dtype=np.float32)
y = np.empty((num_train, 320, 480, 1), dtype=np.float32)
with tqdm_notebook(total=num_train) as bar:
idx = 1 # Rotation index
for i, img_id in enumerate(ids_train[:num_train]):
imgs_id = [resize(load_img(img_id, j)) for j in imgs_idx]
greyscale = rgb2grey(imgs_id[idx-1]) / 255
greyscale = np.expand_dims(greyscale, 2)
X[i] = greyscale
y_processed = resize(np.expand_dims(load_mask(img_id, idx), 2)) / 255.
y[i] = y_processed
del imgs_id # Free memory
bar.update()
#data = Data(X, y)
#with open(data_file_name, 'w+') as f:
#hkl.dump(data, data_file_name)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)
y_train_mean = y_train.mean(axis=0)
y_train_std = y_train.std(axis=0)
y_train_min = y_train.min(axis=0)
y_features = np.concatenate([y_train_mean, y_train_std, y_train_min], axis=2)
inp = Input((320, 480, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same')(inp)
max1 = MaxPooling2D(2)(conv1)
conv2 = Conv2D(48, 5, activation='relu', padding='same')(max1)
max2 = MaxPooling2D(2)(conv2)
conv3 = Conv2D(32, 7, activation='relu', padding='same')(max2)
deconv3 = Conv2DTranspose(32, 7, strides=4, activation='relu', padding='same')(conv3)
deconv2 = Conv2DTranspose(48, 5, strides=2, activation='relu', padding='same')(conv2)
deconvs = concatenate([conv1, deconv2, deconv3])
out = Conv2D(1, 7, activation='sigmoid', padding='same')(deconvs)
model = Model(inp, out)
model.summary()
smooth = 1.
# From here: https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def bce_dice_loss(y_true, y_pred):
return 0.5 * K.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
filepath="weights-improvement2_lre-5-{epoch:02d}-{val_acc:.5f}-{val_dice_coef:.5f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
history = model.fit(X_train, y_train, epochs=1000, validation_data=(X_val, y_val), batch_size=22, verbose=2, callbacks=[mcp])
idxs = [0, X_val.shape[0]/2, randint(1, X_val.shape[0] -1)]
for idx in idxs:
print('Index:', idx)
x = X_val[idx]
fig, ax = plt.subplots(3,3, figsize=(16, 16))
ax = ax.ravel()
cmaps = ['Reds', 'Greens', 'Blues']
for i in range(x.shape[-1]):
ax[i].imshow(x[...,i], cmap='gray') #cmaps[i%3])
ax[i].set_title('channel {}'.format(i))
ax[-8].imshow(y_val[idx,...,0], cmap='gray')
ax[-8].set_title('y')
y_pred = model.predict(x[None]).squeeze()
ax[-7].imshow(y_pred, cmap='gray')
ax[-7].set_title('y_pred')
ax[-6].imshow(gaussian_filter(y_pred,1) > 0.5, cmap='gray')
ax[-6].set_title('1')
ax[-5].imshow(gaussian_filter(y_pred,2) > 0.5, cmap='gray')
ax[-5].set_title('2')
ax[-4].imshow(gaussian_filter(y_pred,3) > 0.5, cmap='gray')
ax[-4].set_title('3')
ax[-3].imshow(gaussian_filter(y_pred,4) > 0.5, cmap='gray')
ax[-3].set_title('4')
ax[-2].imshow(gaussian_filter(y_pred,5) > 0.5, cmap='gray')
ax[-2].set_title('5')
ax[-1].imshow(gaussian_filter(y_pred,6) > 0.5, cmap='gray')
ax[-1].set_title('6')
It’s working fine without pre-training, you can check it by commenting these lines:
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
However, then I tried pre-train auto encoder to reconstruct original images I have no accuracy improvements, only dice coefficient improvements:
Moreover, then I tried using pre-trained autoencoder for training to make predictions based on training data I have a different result – accuracy stuck on level 0,8374 and dice coefficient degradation from 0.11864 initially down to 7.5781e-04:
Pre-train of model by autoencoder should increase model accuracy. From my experience it gives an improvement of accuracy to 99.62% for full MNIST dataset with a simple CAE
Also, I looked into data to make sure the same nature for both cases (you can see it by temporary variables to debug it in code)
In the second case I have an idea that it may be caused due to the fact, we have not only encoder, but also decoder’s weights and it can potentially cause an issue during training
After reset of decoder’s weights I had almost the same picture for some time:
But after 49 iteration process has reached a crucial moment and training process became efficient:
However, I have no clue why during autoencoder train we don’t have accuracy increase, despite the fact of dice coefficient improvements, probably something wrong with my code or frameworks I’m using
Additional info:
My environment:
Ubuntu 16.04
Python 2.7
Theano 0.10
Keras 2.0.8
Structure:
Any suggestions will be appreciated
I am attempting to build a Conditional GAN model based on jacob's code on keras-dcgan (https://github.com/jacobgil/keras-dcgan).
The model architecture I assumed is the following picture:
original paper:
http://cs231n.stanford.edu/reports/2015/pdfs/jgauthie_final_report.pdf
For generator, I insert the condition (the condition is a bunch of one-hot vectors in this case) by first concatenating it with noise, then feed the concatenation through the generator.
For discriminator, I insert the condition by concatenating with a flattened layer in the middle of the model.
My code runs, but it generates some random graph instead of specific numbers. Which step is wrong? Did I not insert the condition appropriately?
My result after running approximately 5500 iterations:
Code:
import warnings
warnings.filterwarnings('ignore')
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Input, merge
from keras.layers import Reshape, concatenate
from keras.layers.core import Activation
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
import tensorflow as tf
from PIL import Image
import argparse
import math
K.set_image_dim_ordering('th')
# based on the labels below, we create a flattened array with 10 one-hot-vectors, and call it y_prime
labels = np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9])
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
# y_dim is the number of labels in one hot vector form, hence its 10
# y_prime is a 100*10 matrix, and len(y_p) = 100. Note that len(y_prime) must equate to batch_size for the matrices to be properly concatenated
# Also y_dim=10, which is the size of any one-hot vector
y_p = dense_to_one_hot(labels)
y_size = len(y_p)
y_dim = len(y_p[0])
#g_inputs is the input for generator
#auxiliary_input is the condition
#d_inputs is the input for discriminator
g_inputs = (Input(shape=(100,), dtype='float32'))
auxiliary_input = (Input(shape=(y_dim,), dtype='float32'))
d_inputs = (Input(shape=(1,28,28), dtype='float32'))
def generator_model():
T = concatenate([g_inputs,auxiliary_input])
T = (Dense(1024))(T)
T = (Dense(128*7*7))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (Reshape((128, 7, 7), input_shape=(128*7*7,)))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(64, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(1, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
model = Model(input=[g_inputs,auxiliary_input], output=T)
return model
def discriminator_model():
T = (Convolution2D(filters= 64, kernel_size= (5,5), padding='same'))(d_inputs)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Convolution2D(128, 5, 5))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Flatten())(T)
T = concatenate([T, auxiliary_input])
T = (Dense(1024))(T)
T = (Activation('tanh'))(T)
T = (Dense(1))(T)
T = (Activation('sigmoid'))(T)
model = Model(input=[d_inputs,auxiliary_input], output=T)
return model
def generator_containing_discriminator(generator, discriminator):
T1 = generator([g_inputs, auxiliary_input])
discriminator.trainable = False
T2 = discriminator([T1,auxiliary_input])
model = Model(input=[g_inputs, auxiliary_input], output=T2)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]), dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = img[0, :, :]
return image
def train(BATCH_SIZE,y_prime):
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
discriminator_on_generator = generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(100):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
y_batch = dense_to_one_hot(y_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE])
y_batch = np.concatenate((y_batch , y_prime))
generated_images = generator.predict([noise,y_prime], verbose=0)
if index % 20 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save(str(epoch)+"_"+str(index)+".png")
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch([X,y_batch], y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch([noise,y_prime], [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if index % 10 == 9:
generator.save_weights('generator', True)
discriminator.save_weights('discriminator', True)
train(100,y_p)
Here is my code for building Conditional GAN (CGAN) with Keras: https://github.com/hklchung/GAN-GenerativeAdversarialNetwork/tree/master/CGAN
After 5 epochs on MNIST I get this:
MNIST CGAN output
and after 50 epochs on the CelebsA dataset:
CelebA CGAN output
My experience is that if you don't see any good results after 20 epochs, something is wrong with your model and training it any longer won't improve your image quality.