I am trying to create two sequential models (each trained on different sets of data - different images). Then I would like to take the average of their outputs, and add a softmax layer to give me a single classification output based on the two sequential models. My code is below, but I get an Attribute Error that says 'Sequential' object has no attribute 'get_shape'.
The full error code is:
Traceback (most recent call last):
File "Mergedmodels.pyu", line 135, in <module>
merged = average ([modelo, modelN1])
File "G:\Anaconda\lib\site-packages\keras\layers\merge.py", line 481, in average
return Average(**kwargs)(inputs)
File "G:\Anaconda\lib\site-packages\keras\engine\topology.py", line 542, in _ call_input_shapes.append(K.int_sshape(x_elem))
File "G:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py", line 411, in int_shape
shape = x.get_shape()
AttributeError: 'Sequential' object has no attribute 'get_shape'
Any idea on how to fix it?
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import merge
from keras.layers import average
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import mnist
import pandas as pd
from numpy import array
from PIL import Image
import matplotlib.pyplot as plt
from keras import backend as K
import glob
import os
K.set_image_dim_ordering('th')
np.random.seed(123) #set for reproducibility
size = 48, 48
#IMPORTING TRAINING IMAGES FOR FIRST MODEL (ORIGINAL)
folder = 'images'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder, filename)) for filename in os.listdir(folder)]
X_train = np.array([read(os.path.join(folder, filename)) for filename in os.listdir(folder)], dtype='uint8')
#CHECK print (X_train.shape)
X_train = X_train.reshape(X_train.shape[0],3,48,48)
#X_test = X_test.reshape(X_test.shape[0],1,28,28)
X_train = X_train.astype ('float32')
#X_test = X_test.astype ('float32')
X_train /= 255
#X_test /= 255
#IMPORTING TRAINING IMAGES FOR SECOND MODEL (NORMALIZED)
folder = 'images2'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder, filename)) for filename in os.listdir(folder)]
X_training = np.array([read(os.path.join(folder, filename)) for filename in os.listdir(folder)], dtype='uint8')
#CHECK print (X_train.shape)
X_training = X_training.reshape(X_train.shape[0],3,48,48)
#X_test = X_test.reshape(X_test.shape[0],1,28,28)
X_training = X_training.astype ('float32')
#X_test = X_test.astype ('float32')
X_training /= 255
#X_test /= 255
#IMPORTING LABELS FOR 10K TRAINING IMAGES
saved_column = pd.read_csv('labels4.csv')
y_labels = array(saved_column)
Y_train = np_utils.to_categorical(y_labels,501)
#y_train = np.array ([0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1])
#(X_train, y_train),(X_test, y_test) = mnist.load_data()
#COPYING LABELS FOR SECOND MODEL TRAINING IMAGES
#Y_training = Y_train
#IMPORTING TEST IMAGES
folder2 = 'test'
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
ims = [read(os.path.join(folder2, filename)) for filename in os.listdir(folder2)]
X_test = np.array([read(os.path.join(folder2, filename)) for filename in os.listdir(folder2)], dtype='uint8')
X_test = X_test.reshape(X_test.shape[0],3,48,48)
X_test = X_test.astype ('float32')
X_test /= 255
#IMPORTING LABELS FOR TEST IMAGES
another_column = pd.read_csv('labelstest4.csv')
test_labels = array(another_column)
Y_test = np_utils.to_categorical(test_labels,501)
#train_labels = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
#Y_train = np_utils.to_categorical(y_train, 2)
#Y_test = np_utils.to_categorical(y_test,10)
#BUILDING FIRST NN FOR ORIGINAL IMAGES
modelo = Sequential()
modelo.add(Convolution2D(32,3,3, activation='relu', input_shape=(3,48,48), dim_ordering='th'))
modelo.add(Convolution2D(32,3,3, activation = 'relu'))
modelo.add(MaxPooling2D(pool_size=(2,2)))
modelo.add(Dropout(0.25))
modelo.add(Flatten())
modelo.add(Dense(128,activation='relu'))
modelo.add(Dropout(0.5))
modelo.add(Dense(501, activation = 'sigmoid'))
modelo.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
modelo.fit(X_train, Y_train,
batch_size = 5, nb_epoch= 5, verbose = 1)
score = modelo.evaluate(X_test, Y_test, verbose=0)
#BUILDING SECOND NN FOR NORMALIZED IMAGES
modelN1 = Sequential()
modelN1.add(Convolution2D(32,3,3, activation='relu', input_shape=(3,48,48), dim_ordering='th'))
modelN1.add(Convolution2D(32,3,3, activation = 'relu'))
modelN1.add(MaxPooling2D(pool_size=(2,2)))
modelN1.add(Dropout(0.25))
modelN1.add(Flatten())
modelN1.add(Dense(128,activation='relu'))
modelN1.add(Dropout(0.5))
modelN1.add(Dense(501, activation = 'sigmoid'))
modelN1.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
modelN1.fit(X_training, Y_train,
batch_size = 5, nb_epoch= 1, verbose = 1)
score = modelN1.evaluate(X_test, Y_test, verbose=0)
#MERGING MODELS
merged = average([modelo, modelN1])
finalmodel = Sequential ()
finalmodel.add(merged)
finalmodel.add(Dense(501, activation = 'softmax'))
finalmodel.compile(loss='categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
Y_madeuplabels = np.array ([0, 1, 52, 20])
Y_training = np_utils.to_categorical(Y_madeuplabels, 501)
finalmodel.fit([X_train], Y_training,
batch_size = 5, nb_epoch= 1, verbose = 1)
score = finalmodel.evaluate(X_test, Y_test, verbose=0)
print ("the code ran")
This way of combining sequential models doesn't seem to work in Keras 2.0
since average works over tensors and not layers. That is the reason the error message which is saying that the Sequential model has noget_shape() methods; get_shape() exists only on Tensors.
Here is an example that replicates the error:
mod1 = Sequential()
mod1.add(Dense(1, input_shape=(10,)))
mod2 = Sequential()
mod2.add(Dense(1, input_shape=(10,)))
avg = average([mod1, mod2]) # throws AttributeError
A hacky way to get around this is to use the functional API to combine
the outputs of the two models and then do the softmax layer. As an example:
X1 = np.random.rand(10, 10)
X2 = np.random.rand(10, 10)
Y = np.random.choice(2, 10)
mod1 = Sequential()
mod1.add(Dense(16, input_shape=(10,)))
mod2 = Sequential()
mod2.add(Dense(16, input_shape=(10,)))
# so use the outputs of the models to do the average over
# this way we do averaging over tensor __not__ models.
avg = average([mod1.output, mod2.output])
dense = Dense(1, activation="sigmoid")(avg)
# the two inputs are the inputs to the sequential models
# and the output is the dense layer
mod3 = Model(inputs=[mod1.input, mod2.input], outputs=[dense])
mod3.compile(loss='binary_crossentropy', optimizer='sgd')
mod3.fit([X1, X2], Y)
Related
New to tf/python and have created a model that classifies text with a toxicity level (obscene, toxic, threat, etc). This is what I have so far and it does produce the summary, so I know it is loading correctly. How do I pass text to the model to return a prediction? Any help would be much appreciated.
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
# Check its architecture
new_model.summary()
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment "
]
predictions = new_model.predict(inputs)
print(predictions)
I get many error messages, some of the long winded ones are as follows:
WARNING:tensorflow:Model was constructed with shape (None, 150) for input KerasTensor(type_spec=TensorSpec(shape=(None, 150), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'"), but it was called on an input with incompatible shape (None, 1).
ValueError: Negative dimension size caused by subtracting 3 from 1 for '{{node model/conv1d/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](model/conv1d/conv1d/ExpandDims, model/conv1d/conv1d/ExpandDims_1)' with input shapes: [?,1,1,256], [1,3,256,64].
This is the py code used to create and test it/prediction which works perfectly:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
TRAIN_DATA = "datasets/train.csv"
GLOVE_EMBEDDING = "embedding/glove.6B.100d.txt"
train = pd.read_csv(TRAIN_DATA)
train["comment_text"].fillna("fillna")
x_train = train["comment_text"].str.lower()
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
max_words = 100000
max_len = 150
embed_size = 100
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(x_train)
x_train = tokenizer.texts_to_sequences(x_train)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_len)
embeddings_index = {}
with open(GLOVE_EMBEDDING, encoding='utf8') as f:
for line in f:
values = line.rstrip().rsplit(' ')
word = values[0]
embed = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = embed
word_index = tokenizer.word_index
num_words = min(max_words, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embed_size), dtype='float32')
for word, i in word_index.items():
if i >= max_words:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
input = tf.keras.layers.Input(shape=(max_len,))
x = tf.keras.layers.Embedding(max_words, embed_size, weights=[embedding_matrix], trainable=False)(input)
x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences=True, dropout=0.1,
recurrent_dropout=0.1))(x)
x = tf.keras.layers.Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
x = tf.keras.layers.concatenate([avg_pool, max_pool])
preds = tf.keras.layers.Dense(6, activation="sigmoid")(x)
model = tf.keras.Model(input, preds)
model.summary()
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=1e-3), metrics=['accuracy'])
batch_size = 128
checkpoint_path = "tf_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
save_weights_only=True,
verbose=1)
callbacks = [
tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss'),
tf.keras.callbacks.TensorBoard(log_dir='logs'),
cp_callback
]
model.fit(x_train, y_train, validation_split=0.2, batch_size=batch_size,
epochs=1, callbacks=callbacks, verbose=1)
latest = tf.train.latest_checkpoint(checkpoint_dir)
model.load_weights(latest)
# Save the entire model as a SavedModel.
model.save('tf_model/the_model')
predictions = model.predict(np.expand_dims(x_train[42], 0))
print(tokenizer.sequences_to_texts([x_train[42]]))
print(y_train[42])
print(predictions)
Final solution:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
max_words = 100000
max_len = 150
# Check its architecture
# new_model.summary()
inputs = ["tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment."]
# use same tokenizer used to build model
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(inputs)
# pass string to tokenizer and that 'array' is passed to predict
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
print(predictions)
# [[0.0365479 0.01275077 0.02102855 0.00647011 0.02302513 0.00406089]]
It needs to be processed in the same way. This can be done with:
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline
how to reuse a model in a production environment"]
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
I have the following Convolutional Neural Network (CNN) in Keras, but keep having the prediction on the test images as class "1", provided that the training data is balanced. Any ideas on how I can solve this issue? Thanks.
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import cv2
import numpy as np
import os
train_directory = '/train'
validation_directory = '/valid'
test_directory = '/test'
results_directory = '/results'
correct_classification = 0
number_of_test_images = 0
labels = []
prediction_probabilities = []
model = models.Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(512,512,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(256,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(512,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dense(1024,activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['acc'])
train_data = ImageDataGenerator(rescale=1.0/255)
validation_data = ImageDataGenerator(rescale=1.0/255)
train_generator = train_data.flow_from_directory(train_directory,target_size=(512,512),batch_size=20,class_mode='binary')
validation_generator = validation_data.flow_from_directory(validation_directory,target_size=(512,512),batch_size=20,class_mode='binary')
history = model.fit_generator(train_generator,
steps_per_epoch=10,
epochs=10,
validation_data=validation_generator,
validation_steps=5)
model.save('my_model.h5')
for root, dirs, files in os.walk(test_directory):
for file in files:
img = cv2.imread(root + '/' + file)
img = cv2.resize(img,(512,512),interpolation=cv2.INTER_AREA)
img = np.expand_dims(img, axis=0)
img = img/255.0
if os.path.basename(root) == 'nevus':
label = 1
elif os.path.basename(root) == 'melanoma':
label = 0
labels.append(label)
img_class = model.predict_classes(img)
img_class_probability = model.predict(img)
prediction_probability = img_class_probability[0]
prediction_probabilities.append(prediction_probability)
prediction = img_class[0]
if prediction == label:
correct_classification = correct_classification + 1
The output of your network is the cause of constant prediction of "1". You need to have an two output units in your final layer. A similar question is asked here, and I have quoted the explanation from Matias below for convenience.
Softmax normalizes by the sum of exponential of each output. Since there is one output, the only possible output is 1.0.
For a binary classifier you can either use a sigmoid activation with the "binary_crossentropy" loss, or put two output units at the last layer, keep using softmax and change the loss to categorical_crossentropy.
I am training a neuron network with two types of input: image and BR (blue over red, it's kind of a non-image feature like height, weight...). To do that, i use fit function in keras, and convert image to list for input. But I don't know why the image list, which have 4 dimensions shape became 2 dimemsions when going into fit, and I got the error as below:
Error when checking input: expected dense_1_input to have 3
dimensions, but got array with shape (1630, 1)
When I converted the image list to array, I had checked the shape of image_array and it has exactly 4 dimensions (particularly its shape is 1630, 60, 60, 3). Even right before the fit function, it still has the same shape. So I really don't know why the shape became (1630,1). Could anyone explain for me?
Here is my code:
from keras.utils.np_utils import to_categorical
import pandas as pd
import numpy as np
import os
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Input, Activation, Dropout, Flatten, Dense,Concatenate, concatenate,Reshape, BatchNormalization, Merge
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.optimizers import Adagrad
from sklearn import preprocessing
from scipy.misc import imread
import time
from PIL import Image
import cv2
img_width, img_height = 60, 60
img_list = []
BR_list = []
label_list = []
data_num = 1630
folder1 = "cut2/train/sugi/"
folder2 = "cut2/train/hinoki/"
def imgConvert(file_path):
img = imread(file_path,flatten = True)
img = np.arange(1*3*60*60).reshape((60,60,3))
img = np.array(img).reshape(60,60,3)
img = img.astype("float32")
return img
def B_and_R(img_path):
img = cv2.imread(img_path)
B = 0
R = 0
for i in range(25,35):
#print(i)
for j in range(25,35):
B = B+img[i,j,0]
R = R+img[i,j,2]
#(j)
#(img[i,j])
ave_B = B/100
ave_R = R/100
BR = ave_B/ave_R
return BR
def getData(path,pollen):
for the_file in os.listdir(path):
#print(the_file)
file_path = os.path.join(path, the_file)
B_over_R = B_and_R(file_path)
img_arr = imgConvert(file_path)
#writer.writerow([img_arr,B_over_R,"sugi"])
img_list.append(img_arr)
BR_list.append(B_over_R)
lb = np.zeros(2)
if pollen == "sugi":
lb[0] +=1
else:
lb[1] +=1
label_list.append(lb)
if __name__ == '__main__':
getData(folder1,"sugi")
getData(folder2,"hinoki")
img_arr = np.array(img_list)
print(img_arr.shape)
#.reshape(img_list[0],1,img_width,img_height)
img_arr.astype("float32")
img_arr /= 255
print(img_arr.shape)
img_array = np.expand_dims(img_arr, axis = 0)
img_array = img_array[0,:,:,:,:]
print(img_array.shape)
"""
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
datagen.fit(img_array)
"""
#img_array = img_array.reshape(img_array[0],1,img_width,img_height)
print(img_array.shape)
label_arr = np.array(label_list)
print(label_arr.shape)
#label_array = np.expand_dims(label_arr, axis = 0)
#label_array = label_array[0,:,:,:,:]
BR_arr = np.array(BR_list)
print(BR_arr.shape)
#BR_array = np.expand_dims(BR_arr, axis = 0)
#BR_array = BR_array[0,:,:,:,:]
#print(len([img_arr,BR_arr]))
input_tensor = Input(shape=(img_width, img_height,3))
vgg16 = VGG16(include_top=False, weights='imagenet', input_tensor=input_tensor)
# FC層の作成
top_model = Sequential()
top_model.add(Flatten(input_shape=vgg16.output_shape[1:]))
#print(top_model.summary())
# VGG16とFC層を結合してモデルを作成
branch1 = Model(input=vgg16.input, output=top_model(vgg16.output))
#model.summary()
print(branch1.summary())
branch2 = Sequential()
branch2.add(Dense(1, input_shape=(data_num,1), activation='sigmoid'))
#branch1.add(Reshape(BR.shape, input_shape = BR.shape))
branch2.add(BatchNormalization())
branch2.add(Flatten())
print(branch2.summary())
merged = Merge([branch1, branch2], mode = "concat")
model = Sequential()
model.add(merged)
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
#last_model = Model(input = [branch1.input,branch2.input],output=model())
print(model.summary())
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.SGD(lr=1e-3, momentum=0.9),
metrics=['accuracy'])
print(img_array.shape)
model.fit([img_array,BR_arr], label_arr,
epochs=5, batch_size=100, verbose=1)
Ok, then the problem is the input shape.
While your data for branch 2 is 2D (batch, 1), your model should also have a 2D input: input_shape = (1,). (Batch sizes are ignored in input_shape)
I am attempting to build a Conditional GAN model based on jacob's code on keras-dcgan (https://github.com/jacobgil/keras-dcgan).
The model architecture I assumed is the following picture:
original paper:
http://cs231n.stanford.edu/reports/2015/pdfs/jgauthie_final_report.pdf
For generator, I insert the condition (the condition is a bunch of one-hot vectors in this case) by first concatenating it with noise, then feed the concatenation through the generator.
For discriminator, I insert the condition by concatenating with a flattened layer in the middle of the model.
My code runs, but it generates some random graph instead of specific numbers. Which step is wrong? Did I not insert the condition appropriately?
My result after running approximately 5500 iterations:
Code:
import warnings
warnings.filterwarnings('ignore')
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Input, merge
from keras.layers import Reshape, concatenate
from keras.layers.core import Activation
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
import tensorflow as tf
from PIL import Image
import argparse
import math
K.set_image_dim_ordering('th')
# based on the labels below, we create a flattened array with 10 one-hot-vectors, and call it y_prime
labels = np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9])
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
# y_dim is the number of labels in one hot vector form, hence its 10
# y_prime is a 100*10 matrix, and len(y_p) = 100. Note that len(y_prime) must equate to batch_size for the matrices to be properly concatenated
# Also y_dim=10, which is the size of any one-hot vector
y_p = dense_to_one_hot(labels)
y_size = len(y_p)
y_dim = len(y_p[0])
#g_inputs is the input for generator
#auxiliary_input is the condition
#d_inputs is the input for discriminator
g_inputs = (Input(shape=(100,), dtype='float32'))
auxiliary_input = (Input(shape=(y_dim,), dtype='float32'))
d_inputs = (Input(shape=(1,28,28), dtype='float32'))
def generator_model():
T = concatenate([g_inputs,auxiliary_input])
T = (Dense(1024))(T)
T = (Dense(128*7*7))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (Reshape((128, 7, 7), input_shape=(128*7*7,)))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(64, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (UpSampling2D(size=(2, 2)))(T)
T = (Convolution2D(1, 5, 5, border_mode='same'))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
model = Model(input=[g_inputs,auxiliary_input], output=T)
return model
def discriminator_model():
T = (Convolution2D(filters= 64, kernel_size= (5,5), padding='same'))(d_inputs)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Convolution2D(128, 5, 5))(T)
T = (BatchNormalization())(T)
T = (Activation('tanh'))(T)
T = (MaxPooling2D(pool_size=(2, 2)))(T)
T = (Flatten())(T)
T = concatenate([T, auxiliary_input])
T = (Dense(1024))(T)
T = (Activation('tanh'))(T)
T = (Dense(1))(T)
T = (Activation('sigmoid'))(T)
model = Model(input=[d_inputs,auxiliary_input], output=T)
return model
def generator_containing_discriminator(generator, discriminator):
T1 = generator([g_inputs, auxiliary_input])
discriminator.trainable = False
T2 = discriminator([T1,auxiliary_input])
model = Model(input=[g_inputs, auxiliary_input], output=T2)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]), dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = img[0, :, :]
return image
def train(BATCH_SIZE,y_prime):
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
discriminator_on_generator = generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(100):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
y_batch = dense_to_one_hot(y_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE])
y_batch = np.concatenate((y_batch , y_prime))
generated_images = generator.predict([noise,y_prime], verbose=0)
if index % 20 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save(str(epoch)+"_"+str(index)+".png")
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch([X,y_batch], y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch([noise,y_prime], [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if index % 10 == 9:
generator.save_weights('generator', True)
discriminator.save_weights('discriminator', True)
train(100,y_p)
Here is my code for building Conditional GAN (CGAN) with Keras: https://github.com/hklchung/GAN-GenerativeAdversarialNetwork/tree/master/CGAN
After 5 epochs on MNIST I get this:
MNIST CGAN output
and after 50 epochs on the CelebsA dataset:
CelebA CGAN output
My experience is that if you don't see any good results after 20 epochs, something is wrong with your model and training it any longer won't improve your image quality.
I get an error when trying to apply the below code onto the MNIST sample dataset for both training and testing. Please helpe
The following is my code:
import pandas
import numpy
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# Read in the TRAINING dataset
f = open("C:/Users/USER/Desktop/mnist/mnist_train_100.csv", 'r')
a = f.readlines() # place everythig in a lsit called 'a'
#print(a)
f.close()
# go through the list a and split by comma
output_nodes = 10
for record in a: #go through the big list "a"
all_values = record.split(',')
X_train = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
y_train = numpy.zeros(output_nodes) + 0.01
y_train[int(all_values[0])] = 0.99
# Read in the TEST data set and then split
f = open("C:/Users/USER/Desktop/mnist/mnist_test_10.csv", 'r')
a = f.readlines() # place everythig in a lsit called 'a'
#print(a)
f.close()
# go through the list a and split by comma
for record in a: #go through the big list "a"
all_values = record.split(',')
X_test = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
y_test = numpy.zeros(output_nodes) + 0.01
y_test[int(all_values[0])] = 0.99
num_pixels = len(X_train)
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu'))
model.add(Dense(output_nodes, init='normal', activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
## build the model
#model = baseline_model()
## Fit the model
#model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200,verbose=2)
I get the following error:
Exception: Error when checking model input: expected dense_input_6 to have shape (None, 784) but got array with shape (784L, 1L)
I assume you are working with this tutorial.
I would recommend using pandas to read your format:
import pandas as pd
import numpy as np
data = pd.read_csv('mnist_train_100.csv', header=None)
# numpy array of shape (100, 784), type float32
X_train = data.ix[:, 1:].values.astype(np.float32)
# numpy array of shape (100,), type int64
y_train = data.ix[:, 0].values