I followed this tutorial to create a custom generator for my Keras model. Here is an MWE that shows the issues I'm facing:
import sys, keras
import numpy as np
import tensorflow as tf
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Input
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, batch_size, shuffle=False):
'Initialization'
self.batch_size = batch_size
self.list_IDs = list_IDs
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
#print('self.batch_size: ', self.batch_size)
print('index: ', index)
sys.exit()
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
print('self.indexes: ', self.indexes)
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
X1 = np.empty((self.batch_size, 10), dtype=float)
X2 = np.empty((self.batch_size, 12), dtype=int)
#Generate data
for i, ID in enumerate(list_IDs_temp):
print('i is: ', i, 'ID is: ', ID)
#Preprocess this sample (omitted)
X1[i,] = np.repeat(1, X1.shape[1])
X2[i,] = np.repeat(2, X2.shape[1])
Y = X1[:,:-1]
return X1, X2, Y
if __name__=='__main__':
train_ids_to_use = list(np.arange(1, 321)) #1, 2, ...,320
valid_ids_to_use = list(np.arange(321, 481)) #321, 322, ..., 480
params = {'batch_size': 32}
train_generator = DataGenerator(train_ids_to_use, **params)
valid_generator = DataGenerator(valid_ids_to_use, **params)
#Build a toy model
input_1 = Input(shape=(3, 10))
input_2 = Input(shape=(3, 12))
y_input = Input(shape=(3, 10))
concat_1 = keras.layers.concatenate([input_1, input_2])
concat_2 = keras.layers.concatenate([concat_1, y_input])
dense_1 = Dense(10, activation='relu')(concat_2)
output_1 = Dense(10, activation='sigmoid')(dense_1)
model = Model([input_1, input_2, y_input], output_1)
print(model.summary())
#Compile and fit_generator
model.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy)
model.fit_generator(generator=train_generator, validation_data = valid_generator, epochs=2, verbose=2)
I don't want to shuffle my input data. I thought that was getting handled, but in my code, when I print out index in __get_item__, I get random numbers. I would like consecutive numbers. Notice I'm trying to kill the process using sys.exit inside __getitem__ to see what's going on.
My questions:
Why does index not go consecutively? How can I fix this?
When I run this in the terminal using screen, why doesn't it respond to Ctrl+C?
You can use shuffle argument of fit_generator method to generate batches consecutively. From fit_generator() documentation:
shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of Sequence (keras.utils.Sequence). Has no effect when steps_per_epoch is not None.
Just pass shuffle=False to fit_generator:
model.fit_generator(generator=train_generator, shuffle=False, ...)
Related
The model I want to train is a stacked model of three different models.
average = tf.keras.layers.Average()([vgg.output, densenet_1.output, densenet_2.output])
dense = tf.keras.layers.Dense(1, activation='sigmoid')(average)
ensemble = tf.keras.Model([vgg.input, densenet_1.input, densenet_2.input], dense)
I could train the model ensemble with numpy array dataset like shown below
ensemble_history = ensemble.fit([X_train, X_train, X_train],y_train_enc,
epochs=100,
validation_data=([X_valid_original, X_valid_original, X_valid_original], y_valid_enc),
batch_size = 16)
But I want to know how to train this ensemble model with an object of ImageDataGenerator class of tensorflow..keras.preprocessing.image. I have written a custom DataGenerator but I am not able to make my way out. My custom Datagenerator look like this
class DataGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, paths, labels, batch_size=16, dim=(224, 224), n_channels=3,
n_classes=2, shuffle=True):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.labels = labels
self.paths = paths
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(len(self.paths) / self.batch_size)
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
list_paths = [self.paths[k] for k in indexes]
y = [self.labels[k] for k in indexes]
y = np.array(y)
# Generate data
X = self.__data_generation(list_paths)
return X,X,X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.paths))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_paths):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.dim, self.n_channels))
# Generate data
for i, path in enumerate(list_paths):
image = tf.keras.preprocessing.image.load_img(path)
image_arr = tf.keras.preprocessing.image.img_to_array(image)
image_arr = tf.image.resize(image_arr,(self.dim[0], self.dim[1])).numpy()
X[i,] = image_arr/255
return X
This generator outputs three same image(for each model) and the corresponding label as required by model, but it still doesn't work.
train_gen = DataGenerator(x_train, labels)
x1, x2, x3, y = train_gen[0]
print(x1.shape)
print(x2.shape)
print(x3.shape)
print(y.shape)
#(16, 224, 224, 3)
#(16, 224, 224, 3)
#(16, 224, 224, 3)
#(16,)
This question already has answers here:
PyTorch NotImplementedError in forward
(3 answers)
Closed 1 year ago.
i'm having problems traing to practice the Logistic Regression in pytorch.
I want to use the CIFAR10 dataset but i cant make the training loop because when i can excecute the Linnear function y recived an NotImplementedError
I probably have more than one error that I am not seeing because as I said I am learning.
I leave my code here.
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from tqdm import tqdm
import torch.nn as nn
#IMPORTING DATA
datatest = mnist_train = datasets.CIFAR10(root="./datasets",
train=True,
transform=transforms.ToTensor(),
download=True)
datatrain = datasets.CIFAR10(root="./datasets",
train=False,
transform=transforms.ToTensor(),
download=True)
print (f'Number of CIFAR test examples {len(datatest)}')
print (f'Number of CIFAR train examples {len(datatest)}')
train_loader = torch.utils.data.DataLoader(datatrain, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(datatest, batch_size=100, shuffle=False)
data_train_iter = iter(train_loader)
images, labels = data_train_iter.next()
print("Shape of the minibatch of images: {}".format(images.shape))
print("Shape of the minibatch of labels: {}".format(labels.shape))
#n_samples, n_features = images.shape, labels.shape
#print(n_samples, n_features)
#MODEL
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = nn.Linear(3072, 10)
def foward(self, x):
return self.linear(x)
#Inicializate model
model = Model()
#Criterion
criterion= nn.CrossEntropyLoss()
#Optimizer
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),
lr=learning_rate)
# Iterate through train set minibatchs
for images, labels in tqdm(train_loader):
# Zero out the gradients
optimizer.zero_grad()
# Forward pass
x = images.view(-1, 32*32*3)
y = model(x)
loss = criterion(y, labels)
loss.backward()
optimizer.step()
## Testing
correct = 0
total = len(datatest)
with torch.no_grad():
# Iterate through test set minibatchs
for images, labels in tqdm(test_loader):
# Forward pass
x = images.view(-1, 32*32*3)
y = model(x)
predictions = torch.argmax(y, dim=1)
correct += torch.sum((predictions == labels).float())
print('Test accuracy: {}'.format(correct/total))
Thanks!
It is due to a spelling error of forward in your Model class. You have written it as foward. Please correct the spelling in
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = nn.Linear(3072, 10)
def forward(self, x): # You have written it as `foward`
return self.linear(x)
I am attempting to implement a CNN-LSTM that classifies mel-spectrogram images representing the speech of people with Parkinson's Disease/Healthy Controls. I am trying to implement a pre-existing model (DenseNet-169) with an LSTM model, however I am running into the following error: ValueError: Input 0 of layer zero_padding2d is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: [None, 216, 1]. Can anyone advise where I'm going wrong?
import librosa
import os
import glob
import IPython.display as ipd
from pathlib import Path
import timeit
import time, sys
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import numpy as np
import cv2
import seaborn as sns
%tensorflow_version 1.x #version 1 works without problems
import tensorflow
from tensorflow.keras import models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import TimeDistributed
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense, BatchNormalization, Activation, GaussianNoise, LSTM
from sklearn.metrics import accuracy_score
DATA_DIR = Path('/content/drive/MyDrive/PhD_Project_Experiments/Spontaneous_Dialogue_PD_Dataset')
diagnosis = [x.name for x in DATA_DIR.glob('*') if x.is_dir()]
diagnosis
def create_paths_ds(paths: Path, label: str) -> list:
EXTENSION_TYPE = '.wav'
return [(x, label) for x in paths.glob('*' + EXTENSION_TYPE)]
from collections import Counter
categories_to_use = [
'Parkinsons_Disease',
'Healthy_Control',
]
NUM_CLASSES = len(categories_to_use)
print(f'Number of classes: {NUM_CLASSES}')
paths_all_labels = []
for cat in categories_to_use:
paths_all_labels += create_paths_ds(DATA_DIR / cat, cat)
X_train, X_test = train_test_split(paths_all_labels,test_size=0.1, stratify = [paths_all_labels[y][1] for y in range(len(paths_all_labels))] ) #fix stratified sampling for test data
X_train, X_val = train_test_split(X_train, test_size=0.2, stratify = [X_train[y][1] for y in range(len(X_train))] )
for i in categories_to_use:
print('Number of train samples for '+i+': '+ str([X_train[y][1] for y in range(len(X_train))].count(i))) #checks whether train samples are equally divided
print('Number of test samples for '+i+': '+ str([X_test[y][1] for y in range(len(X_test))].count(i))) #checks whether test samples are equally divided
print('Number of validation samples for '+i+': '+ str([X_val[y][1] for y in range(len(X_val))].count(i))) #checks whether val samples are equally divided
print(f'Train length: {len(X_train)}')
print(f'Validation length: {len(X_val)}')
print(f'Test length: {len(X_test)}')
def load_and_preprocess_lstm(dataset, SAMPLE_SIZE = 30):
IMG_SIZE = (216,128)
progress=0
data = []
labels = []
for (path, label) in dataset:
audio, sr = librosa.load(path)
dur = librosa.get_duration(audio, sr = sr)
sampleNum = int(dur / SAMPLE_SIZE)
offset = (dur % SAMPLE_SIZE) / 2
for i in range(sampleNum):
audio, sr = librosa.load(path, offset= offset+i, duration=SAMPLE_SIZE)
sample = librosa.feature.melspectrogram(audio, sr=sr)
# print(sample.shape)
sample = cv2.resize(sample, dsize=IMG_SIZE)
sample = np.expand_dims(sample,-1)
print(sample.shape)
data += [(sample, label)]
labels += [label]
progress +=1
print('\r Progress: '+str(round(100*progress/len(dataset))) + '%', end='')
return data, labels
def retrieve_samples(sample_size, model_type):
if model_type == 'cnn':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_cnn(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_cnn(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_cnn(X_val,sample_size)
print('\n')
elif model_type == 'lstm':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
print('\n')
elif model_type == "cnnlstm":
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
print('\n')
print("shape: " + str(X_train_samples[0][0].shape))
print("number of training samples: "+ str(len(X_train_samples)))
print("number of validation samples: "+ str(len(X_val_samples)))
print("number of test samples: "+ str(len(X_test_samples)))
return X_train_samples, X_test_samples, X_val_samples
def create_cnn_lstm_model(input_shape):
model = Sequential()
cnn = tensorflow.keras.applications.DenseNet169(include_top=True, weights=None, input_tensor=None, input_shape=input_shape, pooling=None, classes=2)
# define LSTM model
model.add(tensorflow.keras.layers.TimeDistributed(cnn, input_shape=input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = True, input_shape = input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = False))
model.add(Dense(units=NUM_CLASSES, activation='sigmoid'))#Compile
model.compile(loss=tensorflow.keras.losses.binary_crossentropy, optimizer='adam', metrics=['accuracy'])
print(model.summary())
return model
def create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples):
#Prepare samples to work for training the model
labelizer = LabelEncoder()
#prepare training data and labels
x_train = np.array([x[0] for x in X_train_samples])
y_train = np.array([x[1] for x in X_train_samples])
y_train = labelizer.fit_transform(y_train)
y_train = to_categorical(y_train)
#prepare validation data and labels
x_val = np.array([x[0] for x in X_val_samples])
y_val = np.array([x[1] for x in X_val_samples])
y_val = labelizer.transform(y_val)
y_val = to_categorical(y_val)
#prepare test data and labels
x_test = np.array([x[0] for x in X_test_samples])
y_test = np.array([x[1] for x in X_test_samples])
y_test = labelizer.transform(y_test)
y_test = to_categorical(y_test)
return x_train, y_train, x_val, y_val, x_test, y_test, labelizer
#Main loop for testing multiple sample sizes
#choose model type: 'cnn' or 'lstm'
model_type = 'cnnlstm'
n_epochs = 20
patience= 20
es = EarlyStopping(patience=20)
fragment_sizes = [5,10]
start = timeit.default_timer()
ModelData = pd.DataFrame(columns = ['Model Type','Fragment size (s)', 'Time to Compute (s)', 'Early Stopping epoch', 'Training accuracy', 'Validation accuracy', 'Test Accuracy']) #create a DataFrame for storing the results
conf_matrix_data = []
for i in fragment_sizes:
start_per_size = timeit.default_timer()
print(f'\n---------- Model trained on fragments of size: {i} seconds ----------------')
X_train_samples, X_test_samples, X_val_samples = retrieve_samples(i,model_type)
x_train, y_train, x_val, y_val, x_test, y_test, labelizer = create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples)
if model_type == 'cnn':
model = create_cnn_model(X_train_samples[0][0].shape)
elif model_type == 'lstm':
model = create_lstm_model(X_train_samples[0][0].shape)
elif model_type == 'cnnlstm':
model = create_cnn_lstm_model(X_train_samples[0][0].shape)
history = model.fit(x_train, y_train,
batch_size = 8,
epochs=n_epochs,
verbose=1,
callbacks=[es],
validation_data=(x_val, y_val))
print('Finished training')
early_stopping_epoch = len(history.history['accuracy'])
training_accuracy = history.history['accuracy'][early_stopping_epoch-1-patience]
validation_accuracy = history.history['val_accuracy'][early_stopping_epoch-1-patience]
plot_data(history, i)
predictions = model.predict(x_test)
score = accuracy_score(labelizer.inverse_transform(y_test.argmax(axis=1)), labelizer.inverse_transform(predictions.argmax(axis=1)))
print('Fragment size = ' + str(i) + ' seconds')
print('Accuracy on test samples: ' + str(score))
conf_matrix_data += [(predictions, y_test, i)]
stop_per_size = timeit.default_timer()
time_to_compute = round(stop_per_size - start_per_size)
print ('Time to compute: '+str(time_to_compute))
ModelData.loc[len(ModelData)] = [model_type, i, time_to_compute, early_stopping_epoch, training_accuracy, validation_accuracy, score] #store particular settings configuration, early stoppping epoch and accuracies in dataframe
stop = timeit.default_timer()
print ('\ntime to compute: '+str(stop-start))
I believe the input_shape is (128, 216, 1)
The issue here is that you don't have a time-axis to time distribute your CNN (DenseNet169) layer over.
In this step -
tensorflow.keras.layers.TimeDistributed(cnn, input_shape=(128,216,1)))
You are passing the 128 dimension axis as a time-axis. That means each of the CNN (DenseNet169) is left with a input shape of (216,1), which is not an image and therefore throws an error because it's expecting 3D tensors (images) and not 2D tensors.
Your input shape needs to be a 4D tensor something like - (10, 128, 216, 1), so that the 10 becomes the time axis (for time distributing), and (128, 216, 1) becomes an image input for the CNN (DenseNet169).
A solution with ragged tensors and time-distributed layer
IIUC, your data contains n audio files, each file containing a variable number of mel-spectrogram images.
You need to use tf.raggedtensors to be able to work with variable tensor shapes as inputs to the model
This requires an explicit definition of an Input layer where you set ragged=True
This allows you to pass each audio file as a single sample, with variable images, each of which will be time distributed.
You will have to use None as the time distributed axis shape while defining the model
1. Creating a dummy dataset
Let's start with a sample dataset -
import tensorflow as tf
from tensorflow.keras import layers, Model, utils, applications
#Assuming there are 5 audio files
num_audio = 5
data = []
#Create a random number of mel-spectrograms for each audio file
for i in range(num_audio):
n_images = np.random.randint(4,10)
data.append(np.random.random((n_images,128,216,1)))
print([i.shape for i in data])
[(5, 128, 216, 1),
(5, 128, 216, 1),
(9, 128, 216, 1),
(6, 128, 216, 1),
(4, 128, 216, 1)]
So, your data should be looking something like this. Here, I have a dummy dataset with 5 audio files, first one has 5 images of shape (128,216,1), while the last one has 4 images of the same shape.
2. Converting them to ragged-tensors
Next, let's convert and store these are ragged tensors. Ragged tensors allow variable-length objects to be stored, in this case, a variable number of images. Read more about them here.
#Convert each set of images (for each audio) to tensors and then a ragged tensor
tensors = [tensorflow.convert_to_tensor(i) for i in data]
X_train = tensorflow.ragged.stack(tensors).to_tensor()
#Creating dummy y_train, one for each audio files
y_train = tensorflow.convert_to_tensor(np.random.randint(0,2,(5,2)))
3. Create a model
I am using a functional API since I find it more readable and works better with an explicit input layer, but you can use input layers in Sequential API as well. Feel free to convert it to your preference.
Notice that I am using (None,128,216,1) as input shape. This creates 5 channels (first implicit one for batches) as - (Batch, audio_files, h, w, channels)
I have a dummy LSTM layer to showcase how the architecture works, feel free to stack more layers. Also, do note, that your DenseNet169 is only returning 2 features. And therefore your TimeDistributed layers is returning (None, None, 2) shaped tensor, where first None is the number of audio files, and the second None is the number of images (time axis). Therefore, do choose your next layers accordingly as 512 LSTM cells may be too much :)
#Create model
inp = layers.Input((None,128,216,1), ragged=True)
cnn = tensorflow.keras.applications.DenseNet169(include_top=True,
weights=None,
input_tensor=None,
input_shape=(128,216,1), #<----- input shape for cnn is just the image
pooling=None, classes=2)
#Feel free to modify these layers!
x = layers.TimeDistributed(cnn)(inp)
x = layers.LSTM(8)(x)
out = layers.Dense(2)(x)
model = Model(inp, out)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics='accuracy')
utils.plot_model(model, show_shapes=True, show_layer_names=False)
4. Train!
The next step is simply to train. Feel free to add your own parameters.
model.fit(X_train, y_train, epochs=2)
Epoch 1/2
WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_train_function.<locals>.train_function at 0x7f8e55b4fe50> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
1/1 [==============================] - 37s 37s/step - loss: 3.4057 - accuracy: 0.4000
Epoch 2/2
1/1 [==============================] - 16s 16s/step - loss: 3.3544 - accuracy: 0.4000
Hope that helps.
I get an error. I think it might be because of the time steps.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pandas_datareader.data as web
import datetime as dt
import numpy as np
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from tensorflow.keras.activations import relu
start = dt.datetime(2018,1,1)
end = dt.datetime(2019,1,1)
df = web.DataReader(name=['IBM', 'MSFT', 'NKE'],
data_source='yahoo',
start=start,
end=end).reset_index()['Close']
values = df.values
average_3_day = df.NKE.rolling(3).mean().values
previous_1_day = df.NKE.shift(-1).values
naive_3_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[2:], ma_3_day[2:]).numpy()
naive_1_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[:-1], previous_1_day[:-1]).numpy()
print('The benchmark score of 3 day moving average is {:.4f}.'.format(naive_3_day))
print('The benchmark score of the previous day is {:.4f}.'.format(naive_1_day))
for val, fut in zip(df['NKE'].values[:10], previous_1_day[:10]):
print(f'Value: {val:>6.3f} Future: {fut:>6.3f}')
MEAN = np.mean(values[:200, :], axis=0)
STD = np.std(values[:200, :], axis=0)
data = (values - MEAN)/STD
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data, labels = [], []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
PAST_HISTORY = 5
FUTURE_TARGET = 3
STEP = 5
x_train, y_train = multivariate_data(dataset=data,
target=data[:, -1],
start_index=0,
end_index=200,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
x_test, y_test = multivariate_data(dataset=data,
target=data[:, -1],
start_index=200,
end_index=None,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
train_data = tf.data.Dataset.from_tensors((x_train, y_train)).shuffle(len(x_train)).take(-1)
test_data = tf.data.Dataset.from_tensors((x_test, y_test)).shuffle(len(x_test)).take(-1)
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=True))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
bidirec = BiDirectionalLSTM()
bidirec(next(iter(train_data)))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [1,192,2,3] != values[1].shape = [1,192,3] [Op:Pack] name: feat
First of all, as I can see your x_train.shape is (195, 1, 3) and `y_train.shape is (195, 3).
So, your output is 2-d, but you're setting return_sequences=True in your BiLSTM layer, this will produce 3-d output.
ref: https://keras.io/layers/recurrent/
So, just fix this first.
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=False))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
Secondly, I see you're passing next(iter(train_data)), but the Model object doesn't expect that.
You can write bidirec(x_train) which will run fine, but train_data has two elements the x_train and y_train (the labels). The Model is not designed to take both x_train and y_train.
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
As you can see here, each has different dimensions. But you can do this and the code will run fine.
bidirec(next(iter(train_data))[0]) # only the training input data, not labels
From this the model actually gives you the prediction.
To train your model, you can just do the following,
bidirec.compile('adam', 'mse')
bidirec.fit(x_train, y_train)
I am trying to use the functional api to create an autoencoder in keras. Everything works fine, however, when I try to load the saved model it throws an error that relates to the Model subclassing api. It also throws an error related to autograph that I believe is not relevant to the model loading issue.
I am using tensorflow 2.1 from anaconda on windows 10 and running the code in Spyder 4.
My code with dummy data:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import load_model
import numpy as np
import h5py
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import time
import pickle
def build_autoencoder(width, height, depth, filters=[32], latentDim=64):
# initialize the input shape to be "channels last" along with
# the channels dimension itself
inputShape = (height, width, depth)
chanDim = -1
# define the input to the encoder
inputs = Input(shape=inputShape)
x = inputs
# loop over the number of filters
for f in filters:
# apply a CONV => RELU => BN operation
x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
x = LeakyReLU(alpha=0.2)(x)
x = BatchNormalization(axis=chanDim)(x)
# flatten the network and then construct our latent vector
volumeSize = K.int_shape(x)
x = Flatten()(x)
latent = Dense(latentDim)(x)
# build the encoder model
encoder = Model(inputs, latent, name="encoder")
# start building the decoder model which will accept the
# output of the encoder as its inputs
latentInputs = Input(shape=(latentDim,))
x = Dense(np.prod(volumeSize[1:]))(latentInputs)
x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)
# loop over our number of filters again, but this time in
# reverse order
for f in filters[::-1]:
# apply a CONV_TRANSPOSE => RELU => BN operation
x = Conv2DTranspose(f, (3, 3), strides=2,
padding="same")(x)
x = LeakyReLU(alpha=0.2)(x)
x = BatchNormalization(axis=chanDim)(x)
# apply a single CONV_TRANSPOSE layer used to recover the
# original depth of the image
x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
outputs = Activation("sigmoid")(x)
# build the decoder model
decoder = Model(latentInputs, outputs, name="decoder")
# our autoencoder is the encoder + decoder
autoencoder = Model(inputs, decoder(encoder(inputs)),
name="autoencoder")
# return a 3-tuple of the encoder, decoder, and autoencoder
return (encoder, decoder, autoencoder)
class DataGenerator(Sequence):
"""Generates data for Keras
Sequence based data generator. Suitable for building
data generator for training and prediction.
"""
def __init__(self, indexes, data_path, dataset_name,
to_fit=True, batch_size=16, dim=(256, 256),
n_channels=3, shuffle=True):
"""Initialization
:param num_samples: number of samples in dataset
:param data_path: path to data file location
:param dataset_name: name of datset in datafile
:param to_fit: True to return X and y, False to return X only
:param batch_size: batch size at each iteration
:param dim: tuple indicating image dimension
:param n_channels: number of image channels
:param shuffle: True to shuffle label indexes after every epoch
"""
self.indexes = np.sort(indexes)
self.data_path = data_path
self.dataset_name = dataset_name
self.to_fit = to_fit
self.batch_size = batch_size
self.dim = dim
self.n_channels = n_channels
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch
:return: number of batches per epoch
"""
return int(np.floor(len(self.indexes) / self.batch_size))
def __getitem__(self, index):
"""Generate one batch of data
:param index: index of the batch
:return: X and y when fitting. X only when predicting
"""
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Generate data
X = self._generate_X(indexes)
# normalise images
X = np.divide(X, 255.0)
if self.to_fit:
return X, X
else:
return X
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
if self.shuffle == True:
np.random.shuffle(self.indexes)
def _generate_X(self, indexes):
"""Generates data containing batch_size images
:param list_IDs_temp: list of label ids to load
:return: batch of images
"""
# Generate data
with h5py.File(self.data_path, 'r') as f:
indexes = np.sort(indexes)
X = f[self.dataset_name][indexes, :, :, :]
return X
DATA_PATH = "simulation_data.hdf5"
DATA_NAME = "visual_obs"
EPOCHS = 3
BATCH = 16
DIM = [256, 256]
CHANNELS = 3
NUM_SAMPLES = 100
# Dummy data
with h5py.File('simulation_data.hdf5', 'w') as f:
vis_data = f.create_dataset('visual_obs', (NUM_SAMPLES, 256, 256, 3))
vis_data[:, :, :, :] = np.random.rand(NUM_SAMPLES, 256, 256, 3)
# construct training data generator and validation generator
number_train_samples = int(np.floor(NUM_SAMPLES*0.7))
number_val_samples = int(np.floor(NUM_SAMPLES*0.2))
indexes = np.arange(NUM_SAMPLES)
np.random.shuffle(indexes)
train_indexes = indexes[:number_train_samples]
val_indexes = indexes[number_train_samples:number_train_samples+number_val_samples]
test_indexes = indexes[number_train_samples+number_val_samples:]
train_generator = DataGenerator(train_indexes, DATA_PATH, DATA_NAME,
to_fit=True, batch_size=BATCH, dim=DIM,
n_channels=CHANNELS, shuffle=True)
val_generator = DataGenerator(val_indexes, DATA_PATH, DATA_NAME,
to_fit=True, batch_size=BATCH, dim=DIM,
n_channels=CHANNELS, shuffle=True)
# construct our convolutional autoencoder
(encoder, decoder, autoencoder) = build_autoencoder(*DIM, CHANNELS)
opt = Adam(lr=1e-3)
autoencoder.compile(loss="mse", optimizer=opt)
# train the convolutional autoencoder
H = autoencoder.fit(train_generator,
epochs=EPOCHS, validation_data = val_generator,
workers=4, use_multiprocessing=False)
ts = time.time()
autoencoder.save("model_test", save_format= "tf")
loaded_model = load_model("model_test")
The error:
WARNING: AutoGraph could not transform <function canonicalize_signatures.<locals>.signature_wrapper at 0x00000138CDBF2948> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause:
INFO:tensorflow:Assets written to: model_test\assets
Traceback (most recent call last):
File "C:\Users\seano\Thesis\test.py", line 190, in <module>
loaded_model = load_model("model_test")
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\save.py", line 150, in load_model
return saved_model_load.load(filepath, compile)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 89, in load
model = tf_load.load_internal(path, loader_cls=KerasObjectLoader)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\saved_model\load.py", line 552, in load_internal
export_dir)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 119, in __init__
self._finalize()
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 157, in _finalize
created_layers={layer.name: layer for layer in node.layers})
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 1903, in reconstruct_from_config
process_node(layer, node_data)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 1851, in process_node
output_tensors = layer(input_tensors, **kwargs)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py", line 773, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 712, in call
raise NotImplementedError('When subclassing the `Model` class, you should'
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I think the root-cause of the issue is using DataGenerator(called as custom_objects) in your model.
When you have custom_objects in your model, then loading of the model is little different. You need to add custom_objects to the load_model as shown below
loaded_model = load_model('model_test',custom_objects={'DataGenerator':DataGenerator})