I am attempting to build a Conditional GAN model based on jacob's code on keras-dcgan (https://github.com/jacobgil/keras-dcgan).
The model architecture I assumed is the following picture:
original paper:
http://cs231n.stanford.edu/reports/2015/pdfs/jgauthie_final_report.pdf
For generator, I insert the condition (the condition is a bunch of one-hot vectors in this case) by first concatenating it with noise, then feed the concatenation through the generator.
For discriminator, I insert the condition by concatenating with a flattened layer in the middle of the model.
My code runs, but it generates some random graph instead of specific numbers. Which step is wrong? Did I not insert the condition appropriately?
My result after running approximately 5500 iterations:
Code:
import warnings
warnings.filterwarnings('ignore')
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Input, merge
from keras.layers import Reshape, concatenate
from keras.layers.core import Activation
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
import tensorflow as tf
from PIL import Image
import argparse
import math
K.set_image_dim_ordering('th')
# based on the labels below, we create a flattened array with 10 one-hot-vectors, and call it y_prime
labels = np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9])
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
# y_dim is the number of labels in one hot vector form, hence its 10
# y_prime is a 100*10 matrix, and len(y_p) = 100. Note that len(y_prime) must equate to batch_size for the matrices to be properly concatenated
# Also y_dim=10, which is the size of any one-hot vector
y_p = dense_to_one_hot(labels)
y_size = len(y_p)
y_dim = len(y_p[0])
#g_inputs is the input for generator
#auxiliary_input is the condition
#d_inputs is the input for discriminator
g_inputs = (Input(shape=(100,), dtype='float32'))
auxiliary_input = (Input(shape=(y_dim,), dtype='float32'))
d_inputs = (Input(shape=(1,28,28), dtype='float32'))
def generator_model():
T = concatenate([g_inputs,auxiliary_input])
T = (Dense(1024))(T)
T = (Dense(128*7*7))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (Reshape((128, 7, 7), input_shape=(128*7*7,)))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(64, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(1, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
model = Model(input=[g_inputs,auxiliary_input], output=T)
return model
def discriminator_model():
T = (Convolution2D(filters= 64, kernel_size= (5,5), padding='same'))(d_inputs)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Convolution2D(128, 5, 5))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Flatten())(T)
T = concatenate([T, auxiliary_input])
T = (Dense(1024))(T)
T = (Activation('tanh'))(T)
T = (Dense(1))(T)
T = (Activation('sigmoid'))(T)
model = Model(input=[d_inputs,auxiliary_input], output=T)
return model
def generator_containing_discriminator(generator, discriminator):
T1 = generator([g_inputs, auxiliary_input])
discriminator.trainable = False
T2 = discriminator([T1,auxiliary_input])
model = Model(input=[g_inputs, auxiliary_input], output=T2)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]), dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = img[0, :, :]
return image
def train(BATCH_SIZE,y_prime):
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
discriminator_on_generator = generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(100):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
y_batch = dense_to_one_hot(y_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE])
y_batch = np.concatenate((y_batch , y_prime))
generated_images = generator.predict([noise,y_prime], verbose=0)
if index % 20 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save(str(epoch)+"_"+str(index)+".png")
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch([X,y_batch], y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch([noise,y_prime], [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if index % 10 == 9:
generator.save_weights('generator', True)
discriminator.save_weights('discriminator', True)
train(100,y_p)
Here is my code for building Conditional GAN (CGAN) with Keras: https://github.com/hklchung/GAN-GenerativeAdversarialNetwork/tree/master/CGAN
After 5 epochs on MNIST I get this:
MNIST CGAN output
and after 50 epochs on the CelebsA dataset:
CelebA CGAN output
My experience is that if you don't see any good results after 20 epochs, something is wrong with your model and training it any longer won't improve your image quality.
Related
I am working with Graph Convolutional neural networks for node classification problem. I'm using this article https://towardsdatascience.com/graph-convolutional-networks-on-node-classification-2b6bbec1d042. I understood code very well. But I get following error:
ValueError: Input 1 of layer "model" is incompatible with the layer: expected shape=(None, 2708), found shape=(None, 1000)
I get that this means that the model expects an input with shape (None, 2708) but the input data has shape (None, 1000). In this article tensorflow version was 2.2.0, but now latest is 2.11.0. When I try downgrading to version 2.2.0 it is not possible since it is not possible to install it. Also, downgrading to 2.8.0 or 2.10.0 is not helpful, I get same error. So I guess it has something to do with that, but really don't have any idea except this, and it didn't work. I will also show code below from article above.
import numpy as np
import os
import networkx as nx
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from spektral.layers import GCNConv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from collections import Counter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
all_data = []
all_edges = []
for root, dirs, files in os.walk('./cora'):
for file in files:
if '.content' in file:
with open(os.path.join(root, file), 'r') as f:
all_data.extend(f.read().splitlines())
elif 'cites' in file:
with open(os.path.join(root, file), 'r') as f:
all_edges.extend(f.read().splitlines())
# Shuffle the data because the raw data is ordered based on the label
random_state = 77
all_data = shuffle(all_data, random_state=random_state)
#parse the data
labels = []
nodes = []
X = []
for i,data in enumerate(all_data):
elements = data.split('\t')
labels.append(elements[-1])
X.append(elements[1:-1])
nodes.append(elements[0])
X = np.array(X,dtype=int)
N = X.shape[0] #the number of nodes
F = X.shape[1] #the size of node features
print('X shape: ', X.shape)
#parse the edge
edge_list=[]
for edge in all_edges:
e = edge.split('\t')
edge_list.append((e[0],e[1]))
print('\nNumber of nodes (N): ', N)
print('\nNumber of features (F) of each node: ', F)
print('\nCategories: ', set(labels))
num_classes = len(set(labels))
print('\nNumber of classes: ', num_classes)
def limit_data(labels, limit=20, val_num=500, test_num=1000):
'''
Get the index of train, validation, and test data
'''
label_counter = dict((l, 0) for l in labels)
train_idx = []
for i in range(len(labels)):
label = labels[i]
if label_counter[label] < limit:
# add the example to the training data
train_idx.append(i)
label_counter[label] += 1
# exit the loop once we found 20 examples for each class
if all(count == limit for count in label_counter.values()):
break
# get the indices that do not go to traning data
rest_idx = [x for x in range(len(labels)) if x not in train_idx]
val_idx = rest_idx[:val_num]
test_idx = rest_idx[val_num:(val_num + test_num)]
return train_idx, val_idx, test_idx
train_idx, val_idx, test_idx = limit_data(labels)
# set the mask
train_mask = np.zeros((N,), dtype=bool)
train_mask[train_idx] = True
val_mask = np.zeros((N,), dtype=bool)
val_mask[val_idx] = True
test_mask = np.zeros((N,), dtype=bool)
test_mask[test_idx] = True
#build the graph
G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edge_list)
#obtain the adjacency matrix (A)
A = nx.adjacency_matrix(G)
print('Graph info: ', nx.info(G))
def encode_label(labels):
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels = to_categorical(labels)
return labels, label_encoder.classes_
labels_encoded, classes = encode_label(labels)
# Parameters
channels = 16 # Number of channels in the first layer
dropout = 0.5 # Dropout rate for the features
l2_reg = 5e-4 # L2 regularization rate
learning_rate = 1e-2 # Learning rate
epochs = 200 # Number of training epochs
es_patience = 10 # Patience for early stopping
# Preprocessing operations
A = GCNConv.preprocess(A).astype('f4')
# Model definition
X_in = Input(shape=(F, ))
fltr_in = Input((N, ), sparse=True)
dropout_1 = Dropout(dropout)(X_in)
graph_conv_1 = GCNConv(channels,
activation='relu',
kernel_regularizer=l2(l2_reg),
use_bias=False)([dropout_1, fltr_in])
dropout_2 = Dropout(dropout)(graph_conv_1)
graph_conv_2 = GCNConv(num_classes,
activation='softmax',
use_bias=False)([dropout_2, fltr_in])
# Build model
model = Model(inputs=[X_in, fltr_in], outputs=graph_conv_2)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
loss='categorical_crossentropy',
weighted_metrics=['acc'])
model.summary()
tbCallBack_GCN = tf.keras.callbacks.TensorBoard(
log_dir='./Tensorboard_GCN_cora',
)
callback_GCN = [tbCallBack_GCN]
# Train model
validation_data = ([X, A], labels_encoded, val_mask)
model.fit([X, A],
labels_encoded,
sample_weight=train_mask,
epochs=epochs,
batch_size=N,
validation_data=validation_data,
shuffle=False,
callbacks=[
EarlyStopping(patience=es_patience, restore_best_weights=True),
tbCallBack_GCN
])
# Evaluate model
X_te = X[test_mask]
A_te = A[test_mask,:][:,test_mask]
y_te = labels_encoded[test_mask]
y_pred = model.predict([X_te, A_te], batch_size=N)
report = classification_report(np.argmax(y_te,axis=1), np.argmax(y_pred,axis=1), target_names=classes)
print('GCN Classification Report: \n {}'.format(report))
# Get the hidden layer representation after the first GCN layer
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict([X, A], batch_size=N)
# Get t-SNE Representation
x_tsne = TSNE(n_components=2).fit_transform(activations[3])
def plot_tSNE(labels_encoded, x_tsne):
color_map = np.argmax(labels_encoded, axis=1)
plt.figure(figsize=(10, 10))
for cl in range(num_classes):
indices = np.where(color_map == cl)
indices = indices[0]
plt.scatter(x_tsne[indices, 0], x_tsne[indices, 1], label=cl)
plt.legend()
plt.show()
plot_tSNE(labels_encoded, x_tsne)
Please, if anyone could solve this for me would be thankful, because code is okay. Thanks a lot!
I modified the code from here. What I'm trying to do is combine the two matrices to predict the output matrix. The output matrix is built from the two input matrices. The problem seems to be associated to:
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
The linked medium tutorial only predicting a single number based on the combined mixed input. I however am trying to predict a whole matrix but don't know how to structure the combined layer (if this is even the problem).
The error: "ValueError: Shape mismatch: The shape of labels (received (40,)) should equal the shape of logits except for the last dimension (received (10, 16))."
The code:
import warnings
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow import keras
from IPython.display import clear_output
class model(keras.Model):
def __init__(self):
super().__init__()
# The layers to process our image
self.Conv2D_1 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(1, 1),
strides=(1, 1)
)
self.Conv2D_2 = tf.keras.layers.Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1)
)
# our combined layers
self.Combined_dense_1 = tf.keras.layers.Dense(units=32, activation="relu")
self.Combined_dense_2 = tf.keras.layers.Dense(units=16, activation="softmax")
def call(self, input_image_one, input_image_two):
# Image model
I = self.Conv2D_1(input_image_one)
I = self.Conv2D_2(I)
# Flatten I so we can merge our data.
I = tf.keras.layers.Flatten()(I)
N = self.Conv2D_1(input_image_two)
N = self.Conv2D_2(N)
N = tf.keras.layers.Flatten()(N)
# Combined model
x = tf.concat([N, I], 1) # Concatenate through axis #1
x = self.Combined_dense_1(x)
x = self.Combined_dense_2(x)
return x
network = model()
optimizer = tf.keras.optimizers.Adam()
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
def train_step(model, optimizer, loss_function,
images_one_batch, images_two_batch,
labels):
with tf.GradientTape() as tape:
model_output = model(images_one_batch, images_two_batch)
print(model_output)
loss = loss_function(labels, model_output) # our labels vs our predictions
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return loss
def train(model, optimizer, loss_function, epochs,
images_one_batch, images_two_batch,
labels):
loss_array = []
for epoch in range(epochs):
loss = train_step(model, optimizer, loss_function, images_one_batch, images_two_batch, labels)
loss_array.append(loss)
if ((epoch + 1) % 20 == 0):
# Calculating accuracy
network_output = network(images_one_batch, images_two_batch)
preds = np.argmax(network_output, axis=1)
acc = 0
for i in range(len(images_one_batch)):
if (preds[i] == labels[i]):
acc += 1
print(" loss:", loss, " Accuracy: ", acc / len(images_one_batch) * 100, "%")
clear_output(wait=True)
NumberofVars = 2;
width= NumberofVars; height = NumberofVars
NumberOfComputationSets = 10
CM_MatrixArr1 = []
CM_MatrixArr2 = []
for j in range(NumberOfComputationSets):
Theta1 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta1 = list(np.float_(Theta1))
CM_MatrixArr1.append(Theta1)
Theta2 = list(np.reshape(np.random.randint(2, size=4), (1,4))[0])
Theta2 = list(np.float_(Theta2))
CM_MatrixArr2.append(Theta2)
combinedCM_MatrixArr = []
combinedCM_toIntArr = []
for x,y in zip(CM_MatrixArr1, CM_MatrixArr2):
combinedCM = []
combinedCM_toInt = 0
for a,b in zip(x,y):
LogVal = (a == b)
combinedCM.append(float(LogVal == True))
combinedCM_MatrixArr.append(combinedCM)
combinedCM_MatrixArr = np.array(combinedCM_MatrixArr)
combinedCM_MatrixArr = combinedCM_MatrixArr.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = np.array(CM_MatrixArr1)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr1 = CM_MatrixArr1.reshape(NumberOfComputationSets, 2,2,1)
CM_MatrixArr2 = np.array(CM_MatrixArr2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets,2,2)
CM_MatrixArr2 = CM_MatrixArr2.reshape(NumberOfComputationSets, 2,2,1)
train(network,optimizer,loss_function,300,CM_MatrixArr1,CM_MatrixArr2,combinedCM_MatrixArr)
I get an error. I think it might be because of the time steps.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pandas_datareader.data as web
import datetime as dt
import numpy as np
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from tensorflow.keras.activations import relu
start = dt.datetime(2018,1,1)
end = dt.datetime(2019,1,1)
df = web.DataReader(name=['IBM', 'MSFT', 'NKE'],
data_source='yahoo',
start=start,
end=end).reset_index()['Close']
values = df.values
average_3_day = df.NKE.rolling(3).mean().values
previous_1_day = df.NKE.shift(-1).values
naive_3_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[2:], ma_3_day[2:]).numpy()
naive_1_day = tf.keras.metrics.mean_absolute_error(df['NKE'].values[:-1], previous_1_day[:-1]).numpy()
print('The benchmark score of 3 day moving average is {:.4f}.'.format(naive_3_day))
print('The benchmark score of the previous day is {:.4f}.'.format(naive_1_day))
for val, fut in zip(df['NKE'].values[:10], previous_1_day[:10]):
print(f'Value: {val:>6.3f} Future: {fut:>6.3f}')
MEAN = np.mean(values[:200, :], axis=0)
STD = np.std(values[:200, :], axis=0)
data = (values - MEAN)/STD
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data, labels = [], []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
PAST_HISTORY = 5
FUTURE_TARGET = 3
STEP = 5
x_train, y_train = multivariate_data(dataset=data,
target=data[:, -1],
start_index=0,
end_index=200,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
x_test, y_test = multivariate_data(dataset=data,
target=data[:, -1],
start_index=200,
end_index=None,
history_size=PAST_HISTORY,
target_size=FUTURE_TARGET,
step=STEP)
train_data = tf.data.Dataset.from_tensors((x_train, y_train)).shuffle(len(x_train)).take(-1)
test_data = tf.data.Dataset.from_tensors((x_test, y_test)).shuffle(len(x_test)).take(-1)
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=True))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
bidirec = BiDirectionalLSTM()
bidirec(next(iter(train_data)))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [1,192,2,3] != values[1].shape = [1,192,3] [Op:Pack] name: feat
First of all, as I can see your x_train.shape is (195, 1, 3) and `y_train.shape is (195, 3).
So, your output is 2-d, but you're setting return_sequences=True in your BiLSTM layer, this will produce 3-d output.
ref: https://keras.io/layers/recurrent/
So, just fix this first.
class BiDirectionalLSTM(Model):
def __init__(self):
super(BiDirectionalLSTM, self).__init__()
self.bidr = Bidirectional(LSTM(32, activation=None, return_sequences=False))
self.dense = Dense(3)
def call(self, inputs, training=None, mask=None):
x = self.bidr(relu(inputs, alpha=2e-1))
x = self.dense(x)
return x
Secondly, I see you're passing next(iter(train_data)), but the Model object doesn't expect that.
You can write bidirec(x_train) which will run fine, but train_data has two elements the x_train and y_train (the labels). The Model is not designed to take both x_train and y_train.
print(next(iter(train_data))[0].shape)
print(next(iter(train_data))[1].shape)
As you can see here, each has different dimensions. But you can do this and the code will run fine.
bidirec(next(iter(train_data))[0]) # only the training input data, not labels
From this the model actually gives you the prediction.
To train your model, you can just do the following,
bidirec.compile('adam', 'mse')
bidirec.fit(x_train, y_train)
I am trying to make a VAE to encode movie names and then train it on an 8 core GPU. The model compiles and fits as expected on a single GPU, but breaks when I try to run it on multiple. Here is the basic code of the autoencoder:
from keras.layers import Input, GRU, RepeatVector, Conv1D, Dense, TimeDistributed, Dropout, MaxPooling1D
from keras.models import Model
from keras.utils import to_categorical, plot_model
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras import backend as K
from keras import metrics
from keras.layers import Lambda, Flatten, Layer
from keras import losses
import tensorflow as tf
import random
# Open file with 20k movie names from imdb
movies = open('/home/ubuntu/MovieNames/data/movies.dat')
data = []
# read data
for line in movies:
data += [line.split("\t")]
names = [x[1] for x in data]
# get rid of the header
movie_names = names[1:]
chars = list('abcdefghijklmnopqrstuvwxyz ') + ['<END>', '<NULL>']
indices_for_chars = {c: i for i, c in enumerate(chars)}
NAME_MAX_LEN = 35 # include the <END> char
def name_to_vec(name, maxlen=NAME_MAX_LEN):
name_lowercase = name.lower()
v = np.zeros(maxlen, dtype=int)
null_idx = indices_for_chars['<NULL>']
v.fill(null_idx)
# ignore cases
for i, c in enumerate(name_lowercase):
if i >= maxlen: break
n = indices_for_chars.get(c, null_idx)
v[i] = n
v[min(len(name_lowercase), maxlen-1)] = indices_for_chars['<END>']
return v
# convert to Keras-compatible form
names = np.array([to_categorical(name_to_vec(name),num_classes=len(chars)) for name in movie_names])
# Global parameters
NAME_LENGTH = names.shape[1]
ALPHABET = names.shape[2]
latent_dim = 10 * 8
intermediate_dim = 24 * 8
batch_size = 100 * 8
epochs = 20
epsilon_std = 0.01
i = Input(shape=(NAME_LENGTH, ALPHABET))
x = Conv1D(256, 9)(i)
x = Dropout(0.2)(x) # o
x = Conv1D(256, 7)(x)
x = MaxPooling1D(pool_length=3)(x)
x = Dropout(0.2)(x)
x = Conv1D(256, 3)(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(intermediate_dim, activation='relu')(x)
x = Dropout(0.2)(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim),
mean=0., stddev=epsilon_std)
return z_mean + K.exp(z_log_var) * epsilon
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
h = Dense(intermediate_dim, activation='relu')(z)
h = RepeatVector(NAME_LENGTH)(h)
h = GRU(256, return_sequences=True)(h)
h = Dropout(0.2)(h)
h = GRU(256, return_sequences=True)(h)
h = TimeDistributed(Dense(ALPHABET, activation='softmax'), name='decoded_mean')(h)
autoencoder = Model(i, h)
def vae_objective(y_true, y_pred):
recon = K.sum(K.categorical_crossentropy(y_pred,y_true),axis=1)
kl = 0.5 * K.sum(K.exp(z_log_var) + K.square(z_mean) - 1. - z_log_var,axis=1)
return recon + kl
I then use the Keras multi-GPU tool to parallelize the code:
from keras import backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers.core import Lambda
from keras.layers.merge import Concatenate
def slice_batch(x, n_gpus, part):
"""
Divide the input batch into [n_gpus] slices, and obtain slice no. [part].
i.e. if len(x)=10, then slice_batch(x, 2, 1) will return x[5:].
"""
sh = K.shape(x)
L = sh[0] / n_gpus
if part == n_gpus - 1:
return x[part*L:]
return x[part*L:(part+1)*L]
def to_multi_gpu(model, n_gpus=2):
"""Given a keras [model], return an equivalent model which parallelizes
the computation over [n_gpus] GPUs.
Each GPU gets a slice of the input batch, applies the model on that slice
and later the outputs of the models are concatenated to a single tensor,
hence the user sees a model that behaves the same as the original.
"""
with tf.device('/cpu:0'):
x = Input(model.input_shape[1:], name=model.input_names[0])
towers = []
for g in range(n_gpus):
with tf.device('/gpu:' + str(g)):
slice_g = Lambda(slice_batch, lambda shape: shape, arguments={'n_gpus':n_gpus, 'part':g})(x)
towers.append(model(slice_g))
with tf.device('/cpu:0'):
merged = Concatenate(axis=0)(towers)
return Model(inputs=[x], outputs=[merged])
When it's time to fit it is when I run into the issue:
model = to_multi_gpu(autoencoder, n_gpus=8)
model.compile(loss=vae_objective, optimizer='adam', metrics=["accuracy"])
model.fit(names[:8000], names[:8000], batch_size=batch_size)
gives me the following error:
InvalidArgumentError: You must feed a value for placeholder tensor 'input_4' with dtype float
[[Node: input_4 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Note that all of the parameters are evenly divisible by the number of GPUs, so I don't expect that to be the problem.
Use
model = to_multi_gpu(autoencoder, n_gpus=8)
model.compile(loss=vae_objective, optimizer='adam', metrics=["accuracy"])
model.fit(names[:8000], names[:8000], batch_size=batch_size*8)
ie code the VAE using batch_size, run fit with batch_size*gpus
Make sure the sample size can be divided by batch_size*gpus
This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')