It looks like simple CAE not working for Carvana dataset
I’m trying simple CAE for Carvana dataset. You can download it here
My code is following:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import downscale_local_mean
from skimage.color import rgb2grey
from os.path import join, isfile
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
from keras.optimizers import Adam
from random import randint
import hickle as hkl
import dill
class Data(object):
def __init__(self, X, Y):
self.X = X
self.Y = Y
input_folder = join('..', 'input')
print('Path:',input_folder)
data_file_name = 'datafile.pkl'
df_mask = pd.read_csv(join(input_folder, 'train_masks.csv'), usecols=['img'])
load_img = lambda im, idx: imread(join(input_folder, 'train', '{}_{:02d}.jpg'.format(im, idx)))
load_mask = lambda im, idx: imread(join(input_folder, 'train_masks', '{}_{:02d}_mask.gif'.format(im, idx)))
ids_train = df_mask['img'].map(lambda s: s.split('_')[0]).unique()
imgs_idx = list(range(1, 17))
resize = lambda im: downscale_local_mean(im, (4,4) if im.ndim==2 else (4,4,1))
mask_image = lambda im, mask: (im * np.expand_dims(mask, 2))
num_train = 48#len(ids_train)
if isfile(data_file_name):
#with open(data_file_name, 'rb') as f:
data = hkl.load(data_file_name)
X = data.X
y = data.y
else:
X = np.empty((num_train, 320, 480, 1), dtype=np.float32)
y = np.empty((num_train, 320, 480, 1), dtype=np.float32)
with tqdm_notebook(total=num_train) as bar:
idx = 1 # Rotation index
for i, img_id in enumerate(ids_train[:num_train]):
imgs_id = [resize(load_img(img_id, j)) for j in imgs_idx]
greyscale = rgb2grey(imgs_id[idx-1]) / 255
greyscale = np.expand_dims(greyscale, 2)
X[i] = greyscale
y_processed = resize(np.expand_dims(load_mask(img_id, idx), 2)) / 255.
y[i] = y_processed
del imgs_id # Free memory
bar.update()
#data = Data(X, y)
#with open(data_file_name, 'w+') as f:
#hkl.dump(data, data_file_name)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)
y_train_mean = y_train.mean(axis=0)
y_train_std = y_train.std(axis=0)
y_train_min = y_train.min(axis=0)
y_features = np.concatenate([y_train_mean, y_train_std, y_train_min], axis=2)
inp = Input((320, 480, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same')(inp)
max1 = MaxPooling2D(2)(conv1)
conv2 = Conv2D(48, 5, activation='relu', padding='same')(max1)
max2 = MaxPooling2D(2)(conv2)
conv3 = Conv2D(32, 7, activation='relu', padding='same')(max2)
deconv3 = Conv2DTranspose(32, 7, strides=4, activation='relu', padding='same')(conv3)
deconv2 = Conv2DTranspose(48, 5, strides=2, activation='relu', padding='same')(conv2)
deconvs = concatenate([conv1, deconv2, deconv3])
out = Conv2D(1, 7, activation='sigmoid', padding='same')(deconvs)
model = Model(inp, out)
model.summary()
smooth = 1.
# From here: https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def bce_dice_loss(y_true, y_pred):
return 0.5 * K.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
filepath="weights-improvement2_lre-5-{epoch:02d}-{val_acc:.5f}-{val_dice_coef:.5f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
history = model.fit(X_train, y_train, epochs=1000, validation_data=(X_val, y_val), batch_size=22, verbose=2, callbacks=[mcp])
idxs = [0, X_val.shape[0]/2, randint(1, X_val.shape[0] -1)]
for idx in idxs:
print('Index:', idx)
x = X_val[idx]
fig, ax = plt.subplots(3,3, figsize=(16, 16))
ax = ax.ravel()
cmaps = ['Reds', 'Greens', 'Blues']
for i in range(x.shape[-1]):
ax[i].imshow(x[...,i], cmap='gray') #cmaps[i%3])
ax[i].set_title('channel {}'.format(i))
ax[-8].imshow(y_val[idx,...,0], cmap='gray')
ax[-8].set_title('y')
y_pred = model.predict(x[None]).squeeze()
ax[-7].imshow(y_pred, cmap='gray')
ax[-7].set_title('y_pred')
ax[-6].imshow(gaussian_filter(y_pred,1) > 0.5, cmap='gray')
ax[-6].set_title('1')
ax[-5].imshow(gaussian_filter(y_pred,2) > 0.5, cmap='gray')
ax[-5].set_title('2')
ax[-4].imshow(gaussian_filter(y_pred,3) > 0.5, cmap='gray')
ax[-4].set_title('3')
ax[-3].imshow(gaussian_filter(y_pred,4) > 0.5, cmap='gray')
ax[-3].set_title('4')
ax[-2].imshow(gaussian_filter(y_pred,5) > 0.5, cmap='gray')
ax[-2].set_title('5')
ax[-1].imshow(gaussian_filter(y_pred,6) > 0.5, cmap='gray')
ax[-1].set_title('6')
It’s working fine without pre-training, you can check it by commenting these lines:
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
However, then I tried pre-train auto encoder to reconstruct original images I have no accuracy improvements, only dice coefficient improvements:
Moreover, then I tried using pre-trained autoencoder for training to make predictions based on training data I have a different result – accuracy stuck on level 0,8374 and dice coefficient degradation from 0.11864 initially down to 7.5781e-04:
Pre-train of model by autoencoder should increase model accuracy. From my experience it gives an improvement of accuracy to 99.62% for full MNIST dataset with a simple CAE
Also, I looked into data to make sure the same nature for both cases (you can see it by temporary variables to debug it in code)
In the second case I have an idea that it may be caused due to the fact, we have not only encoder, but also decoder’s weights and it can potentially cause an issue during training
After reset of decoder’s weights I had almost the same picture for some time:
But after 49 iteration process has reached a crucial moment and training process became efficient:
However, I have no clue why during autoencoder train we don’t have accuracy increase, despite the fact of dice coefficient improvements, probably something wrong with my code or frameworks I’m using
Additional info:
My environment:
Ubuntu 16.04
Python 2.7
Theano 0.10
Keras 2.0.8
Structure:
Any suggestions will be appreciated
Related
So, there is the universal approximation theorem which says that a neural network can approximate any continuous function, provided it has at least one hidden layer and uses non-linear activation there.
So my doubt is as follows: "How do I approximate a function using neural networks with my input being other functions?"
Let's say I want to approximate y = x + 1 and I have z_1 = 2x, z_2 = 3x + 3 and z_3 = 4x + 1, with x being time variant. What I want my model to learn is the relationship between z_1, z_2, z_3 and y, as I may write *y = -6 * z_1 - 1 * z_2 + 4 z_3* ( I want my network to learn this relationship).
From time 0 to T I have the value of all functions and can do a supervised learning, but from (T + 1) +, I will only have z_1, z_2 and z_3 and so, I would be using the network to approximate the future values of y based on these z functions (z_1, z_2, z_3).
How do I implement that on python using Keras? I used the following code but didn't get any decent results.
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
n = 10000
def z_1(x):
x_0 = []
for i in x:
x_0.append(2*i)
return x_0
def z_2(x):
x_0 = []
for i in x:
x_0.append(3*i + 3)
return x_0
def z_3(x):
x_0 = []
for i in x:
x_0.append(4* i + 1)
return x_0
def z_0(x):
x_0 = []
for i in x:
x_0.append(i + 1)
return x_0
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=3))
model.add(Dense(500, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
np.random.seed(seed = 2000)
input = np.random.random(n) * 10
dataset = z_0(input)
input_1 = z_1(input)
input_2 = z_2(input)
input_3 = z_3(input)
x_train = np.array([input_1[0:int(0.8*n)], input_2[0:int(0.8*n)], input_3[0:int(0.8*n)]])
y_train = np.array([dataset[0:int(0.8*n)]])
x_train = x_train.reshape(int(0.8*n), 3)
y_train = y_train.reshape(int(0.8*n),1)
es = keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0,
patience=0,
verbose=0, mode='auto')
model.fit(x_train, y_train, epochs=100, batch_size=128, callbacks = [es])
x_test = np.array([input_1[int(n-100):n], input_2[int(n-100):n], input_3[int(n-100):n]])
x_test = x_test.reshape(int(100), 3)
classes = model.predict(x_test, batch_size=128)
y_test = np.array([dataset[int(n-100):n]]).reshape(int(100),1)
plt.plot(y_test,c='b', label = 'test data')
plt.plot(classes,c='r', label = 'test result')
plt.legend()
plt.show()
You can't do this with a feedforward neural network. You need to do this with recurrent neural networks. Look up LSTM or GRU cells in Keras.
https://keras.io/layers/recurrent/
So I've just started experimenting a bit with tensorflow but I feel like I have a hard time grasping the concept, I'm currently focusing on the MNIST-dataset, but only 8000 of them as training and 2000 for testing. The little code snippet I have currently is:
from keras.layers import Input, Dense, initializers
from keras.models import Model
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras import optimizers, losses
import tensorflow as tf
import keras.backend as K
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
low_dim = 32
def autoencoder():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
input = Input(shape=(num_features,))
encoded = Dense(low_dim, activation='relu', kernel_initializer = w)(input)
decoded = Dense(num_features, activation='sigmoid', kernel_initializer = w)(encoded)
autoencoder = Model(input, decoded)
adam = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
autoencoder.compile(optimizer=adam, loss='binary_crossentropy')
autoencoder.fit(d.X_train, d.X_train,
epochs=50,
batch_size=64,
shuffle=True,
)
encoded_imgs = autoencoder.predict(d.X_test)
decoded_imgs = autoencoder.predict(encoded_imgs)
#sess = tf.InteractiveSession()
#error = losses.mean_absolute_error(decoded_imgs[0], d.X_train[0])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
n = 20 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
#sess = tf.InteractiveSession()
error = losses.mean_absolute_error(decoded_imgs[n], d.X_test[n])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
ax = plt.subplot(2, n, i + 1)
plt.imshow(d.X_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#print(error)
plt.show()
return error
What I want to do is to store the error as a list which I later can print or plot in a graph, but how do you do this efficiently with tensorflow/keras? Thanks in advance
You can store the errors inside a csv file by using the callback CSVLogger. This is a code snippet for this task.
from keras.callbacks import CSVLogger
# define callbacks
callbacks = [CSVLogger(path_csv_logger, separator=';', append=True)]
# pass callback to model.fit() oder model.fit_generator()
model.fit_generator(
train_batch, train_steps, epochs=10, callbacks=callbacks,
validation_data=validation_batch, validation_steps=val_steps)
EDIT: For storing the errors in list you can use something like this
# source https://keras.io/callbacks/
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = []
def on_batch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
I have programmed a GAN model using keras but the training didn't go well. The generator model always returns a bare noise image (28x28 size) instead of something similar to mnist dataset. This doesn't give me any error though, when it comes to training discriminator model will become trainable=False, which is not what I want to do.
If this implementation is bad, please let me know. Can anyone help?
import os
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization
from keras.optimizers import SGD, Adam, RMSprop
from keras.datasets import mnist
from keras.regularizers import l1_l2
def plot_generated(noise, Generator):
image_fake = Generator.predict(noise)
plt.figure(figsize=(10,8))
plt.show()
plt.close()
def plot_metircs(metrics, epoch=None):
plt.figure(figsize=(10,8))
plt.plot(metrics['d'], label='discriminative loss', color='b')
plt.legend()
plt.show()
plt.close()
plt.figure(figsize=(10,8))
plt.plot(metrics['g'], label='generative loss', color='r')
plt.legend()
plt.show()
plt.close()
def Generator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=100, units=128, activation=LeakyReLU, name='g_input'))
model.add(Dense(input_dim=128, units=784, activation='tanh', name='g_output'))
return model
def Discriminator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=784, units=128, activation=LeakyReLU, name='d_input'))
model.add(Dense(input_dim=128, units=1, activation='sigmoid', name='d_output'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Generative_Adversarial_Network(Generator, Discriminator):
model = Sequential()
model.add(Generator)
model.add(Discriminator)
# train only generator in the entire GAN architecture
Discriminator.trainable = False
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Training(z_input_size, Generator, Discriminator, GAN, loss_dict, X_train, epoch, batch, smooth):
for e in range(epoch):
# z: noise, used for input of G to generate fake image based on this noise! it's like a seed
noise = np.random.uniform(-1, 1, size=[batch, z_input_size])
image_fake = Generator.predict_on_batch(noise)
# sampled real_image from dataset
rand_train_index = np.random.randint(0, X_train.shape[0], size=batch)
image_real = X_train[rand_train_index, :]
# concatenate real and fake images
"""
X = [
image_real => label : 1 (we can multiply a smoothing factor)
image_fake => label : 0
]
"""
X = np.vstack((image_real, image_fake))
y = np.zeros(len(X))
# putting label "1" to image_real
y[len(image_real):] = 1*(1 - smooth)
y = y.astype(int)
# train only discriminator
d_loss = Discriminator.train_on_batch(x=X, y=y)
# NOTE: remember?? we set discriminator OFF during the training of GAN!
# So, we can safely train only generator, weight of discriminator set fixed!
g_loss = GAN.train_on_batch(x=noise, y=y[len(noise):])
loss_dict['d'].append(d_loss)
loss_dict['g'].append(g_loss)
if e%1000 == 0:
plt.imshow(image_fake)
plt.show()
plot_generated(noise, Generator)
plot_metircs(loss_dict)
return "done!"
Gen = Generator()
Dis = Discriminator()
GAN = Generative_Adversarial_Network(Gen, Dis)
GAN.summary()
Gen.summary()
Dis.summary()
gan_losses = {"d":[], "g":[], "f":[]}
epoch = 30000
batch = 1000
smooth = 0.9
z_input_size = 100
row, col = 28, 28
z_group_matrix = np.random.uniform(0, 1, examples*z_input_size)
z_group_matrix = z_group_matrix.reshape([9, z_input_size])
print(z_group_matrix.shape)
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train.reshape(X_train.shape[0], row*col), X_test.reshape(X_test.shape[0], row*col)
X_train.astype('float32')
X_test.astype('float32')
X_train, X_test = X_train/255, X_test/255
print('X_train shape: ', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Training(z_input_size, Gen, Dis, GAN, loss_dict=gan_losses, X_train=X_train, epoch=epoch, batch=batch, smooth=smooth)
The model itself is correct.
I would suggest a few minor changes:
smooth 0.9 is too much. Make it close to 0.1.
Leak Factor you have is 0.2, usually its a very small decimal close to 0; take around
0.01/0.02.
Batchsize around 400
Epochs around 2000
And finally early stopping with a bit large threshold.
This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
I am trying to create two sequential models (each trained on different sets of data - different images). Then I would like to take the average of their outputs, and add a softmax layer to give me a single classification output based on the two sequential models. My code is below, but I get an Attribute Error that says 'Sequential' object has no attribute 'get_shape'.
The full error code is:
Traceback (most recent call last):
File "Mergedmodels.pyu", line 135, in <module>
merged = average ([modelo, modelN1])
File "G:\Anaconda\lib\site-packages\keras\layers\merge.py", line 481, in average
return Average(**kwargs)(inputs)
File "G:\Anaconda\lib\site-packages\keras\engine\topology.py", line 542, in _ call_input_shapes.append(K.int_sshape(x_elem))
File "G:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py", line 411, in int_shape
shape = x.get_shape()
AttributeError: 'Sequential' object has no attribute 'get_shape'
Any idea on how to fix it?
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import merge
from keras.layers import average
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import mnist
import pandas as pd
from numpy import array
from PIL import Image
import matplotlib.pyplot as plt
from keras import backend as K
import glob
import os
K.set_image_dim_ordering('th')
np.random.seed(123) #set for reproducibility
size = 48, 48
#IMPORTING TRAINING IMAGES FOR FIRST MODEL (ORIGINAL)
folder = 'images'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder, filename)) for filename in os.listdir(folder)]
X_train = np.array([read(os.path.join(folder, filename)) for filename in os.listdir(folder)], dtype='uint8')
#CHECK print (X_train.shape)
X_train = X_train.reshape(X_train.shape[0],3,48,48)
#X_test = X_test.reshape(X_test.shape[0],1,28,28)
X_train = X_train.astype ('float32')
#X_test = X_test.astype ('float32')
X_train /= 255
#X_test /= 255
#IMPORTING TRAINING IMAGES FOR SECOND MODEL (NORMALIZED)
folder = 'images2'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder, filename)) for filename in os.listdir(folder)]
X_training = np.array([read(os.path.join(folder, filename)) for filename in os.listdir(folder)], dtype='uint8')
#CHECK print (X_train.shape)
X_training = X_training.reshape(X_train.shape[0],3,48,48)
#X_test = X_test.reshape(X_test.shape[0],1,28,28)
X_training = X_training.astype ('float32')
#X_test = X_test.astype ('float32')
X_training /= 255
#X_test /= 255
#IMPORTING LABELS FOR 10K TRAINING IMAGES
saved_column = pd.read_csv('labels4.csv')
y_labels = array(saved_column)
Y_train = np_utils.to_categorical(y_labels,501)
#y_train = np.array ([0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1])
#(X_train, y_train),(X_test, y_test) = mnist.load_data()
#COPYING LABELS FOR SECOND MODEL TRAINING IMAGES
#Y_training = Y_train
#IMPORTING TEST IMAGES
folder2 = 'test'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder2, filename)) for filename in os.listdir(folder2)]
X_test = np.array([read(os.path.join(folder2, filename)) for filename in os.listdir(folder2)], dtype='uint8')
X_test = X_test.reshape(X_test.shape[0],3,48,48)
X_test = X_test.astype ('float32')
X_test /= 255
#IMPORTING LABELS FOR TEST IMAGES
another_column = pd.read_csv('labelstest4.csv')
test_labels = array(another_column)
Y_test = np_utils.to_categorical(test_labels,501)
#train_labels = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
#Y_train = np_utils.to_categorical(y_train, 2)
#Y_test = np_utils.to_categorical(y_test,10)
#BUILDING FIRST NN FOR ORIGINAL IMAGES
modelo = Sequential()
modelo.add(Convolution2D(32,3,3, activation='relu', input_shape=(3,48,48), dim_ordering='th'))
modelo.add(Convolution2D(32,3,3, activation = 'relu'))
modelo.add(MaxPooling2D(pool_size=(2,2)))
modelo.add(Dropout(0.25))
modelo.add(Flatten())
modelo.add(Dense(128,activation='relu'))
modelo.add(Dropout(0.5))
modelo.add(Dense(501, activation = 'sigmoid'))
modelo.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
modelo.fit(X_train, Y_train,
batch_size = 5, nb_epoch= 5, verbose = 1)
score = modelo.evaluate(X_test, Y_test, verbose=0)
#BUILDING SECOND NN FOR NORMALIZED IMAGES
modelN1 = Sequential()
modelN1.add(Convolution2D(32,3,3, activation='relu', input_shape=(3,48,48), dim_ordering='th'))
modelN1.add(Convolution2D(32,3,3, activation = 'relu'))
modelN1.add(MaxPooling2D(pool_size=(2,2)))
modelN1.add(Dropout(0.25))
modelN1.add(Flatten())
modelN1.add(Dense(128,activation='relu'))
modelN1.add(Dropout(0.5))
modelN1.add(Dense(501, activation = 'sigmoid'))
modelN1.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
modelN1.fit(X_training, Y_train,
batch_size = 5, nb_epoch= 1, verbose = 1)
score = modelN1.evaluate(X_test, Y_test, verbose=0)
#MERGING MODELS
merged = average([modelo, modelN1])
finalmodel = Sequential ()
finalmodel.add(merged)
finalmodel.add(Dense(501, activation = 'softmax'))
finalmodel.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
Y_madeuplabels = np.array ([0, 1, 52, 20])
Y_training = np_utils.to_categorical(Y_madeuplabels, 501)
finalmodel.fit([X_train], Y_training,
batch_size = 5, nb_epoch= 1, verbose = 1)
score = finalmodel.evaluate(X_test, Y_test, verbose=0)
print ("the code ran")
This way of combining sequential models doesn't seem to work in Keras 2.0
since average works over tensors and not layers. That is the reason the error message which is saying that the Sequential model has noget_shape() methods; get_shape() exists only on Tensors.
Here is an example that replicates the error:
mod1 = Sequential()
mod1.add(Dense(1, input_shape=(10,)))
mod2 = Sequential()
mod2.add(Dense(1, input_shape=(10,)))
avg = average([mod1, mod2]) # throws AttributeError
A hacky way to get around this is to use the functional API to combine
the outputs of the two models and then do the softmax layer. As an example:
X1 = np.random.rand(10, 10)
X2 = np.random.rand(10, 10)
Y = np.random.choice(2, 10)
mod1 = Sequential()
mod1.add(Dense(16, input_shape=(10,)))
mod2 = Sequential()
mod2.add(Dense(16, input_shape=(10,)))
# so use the outputs of the models to do the average over
# this way we do averaging over tensor __not__ models.
avg = average([mod1.output, mod2.output])
dense = Dense(1, activation="sigmoid")(avg)
# the two inputs are the inputs to the sequential models
# and the output is the dense layer
mod3 = Model(inputs=[mod1.input, mod2.input], outputs=[dense])
mod3.compile(loss='binary_crossentropy', optimizer='sgd')
mod3.fit([X1, X2], Y)