I am trying to implement a simple multi-task learning with the following network:
y_train_target1 = Y_train.iloc[:, 0]
y_test_target1 = Y_test.iloc[:, 0]
y_train_target2 = Y_train.iloc[:, 1]
y_test_target2 = Y_test.iloc[:, 1]
input_dim_train=X_train.shape[1]
#shape of X_train is: (30000,126)
inputs = Input(shape=X_train.shape[1], name='main_input')
main_model = Sequential()
main_model.add(Dense(200, input_dim=input_dim_train, activation='relu'))
main_model.add(Dense(50, input_dim=input_dim_train, activation='relu'))
main_model.add(BatchNormalization())
main_model.add(Dropout(0.4))
main_model.add(Dense(1, activation='softmax'))
model_target1 = Sequential()
model_target2 = Sequential()
model_target1.add(main_model)
model_target2.add(main_model)
model_target1.add(Dense(1, activation='softmax', name='target1_output'))
model_target2.add(Dense(1, activation='softmax', name='target2_output'))
model_share = Model(inputs = inputs,outputs = [model_target1, model_target2])
model_share.summary()
But I face the following error, when I run Model(inputs = inputs,outputs = [model_target1, model_target2]) line:
ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.sequential.Sequential object at 0x00000214980FE5B0>
Any idea to handle this issue?
If the objective is to share the layers between the two outputs then you can write your code as shown below, see also this answer.
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
np.random.seed(0)
tf.random.set_seed(0)
# generate the features
x = np.random.normal(0, 1, (100, 10))
# generate the targets
y1 = np.mean(x, axis=1)
y2 = np.median(x, axis=1)
# define the shared layers
d1 = Dense(200, activation='relu')
d2 = Dense(50, activation='relu')
d3 = BatchNormalization()
d4 = Dropout(0.4)
# create a function for applying the shared layers
def nn(x, name):
y = d1(x)
y = d2(y)
y = d3(y)
y = d4(y)
y = Dense(1, name=name)(y)
return y
# create the model
inputs = Input(shape=x.shape[1], name='common_input')
output1 = nn(inputs, name='target_1')
output2 = nn(inputs, name='target_2')
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(optimizer='adam', loss='mse')
# train the model
model.fit(x, [y1, y2], epochs=5)
# Epoch 1/5
# 4/4 [==============================] - 1s 1ms/step - loss: 5.3587 - target_1_loss: 2.7805 - target_2_loss: 2.5782
# Epoch 2/5
# 4/4 [==============================] - 0s 1ms/step - loss: 3.8924 - target_1_loss: 1.8996 - target_2_loss: 1.9927
# Epoch 3/5
# 4/4 [==============================] - 0s 970us/step - loss: 2.8755 - target_1_loss: 1.4582 - target_2_loss: 1.4173
# Epoch 4/5
# 4/4 [==============================] - 0s 943us/step - loss: 2.6111 - target_1_loss: 1.2023 - target_2_loss: 1.4088
# Epoch 5/5
# 4/4 [==============================] - 0s 910us/step - loss: 2.6412 - target_1_loss: 1.1902 - target_2_loss: 1.4510
# generate the model predictions
y1_pred, y2_pred = model.predict(x)
print(y1_pred)
# [[0.3716803 ]
# [0.22038066]
# [0.2840684 ]
# [0.09253158]
# [0.21785215]
# ...
print(y2_pred)
# [[ 0.17823327]
# [ 0.10360342]
# [ 0.12475234]
# [-0.04125798]
# [-0.25730723]
# ...
Related
I have trained a custom text classifier in Tensorflow with python for classifying sentences into questions/sentences containing information using this code:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
text = ""
with open("/content/train_new.txt") as source:
for line in source.readlines():
text = text + line
print("text: " + text)
sentences = []
labels = []
for item in text.split("<n>"):
parts = item.split("<t>")
print(parts)
sentences.append(parts[0])
labels.append(parts[1])
print(sentences)
print(labels)
print("----")
train_test_split_percentage = 80
training_size = round((len(sentences)/100)*train_test_split_percentage)
print("training size: " + str(training_size) + " of " + str(len(labels)))
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]
vocab_size = 100
max_length = 10
tokenizer = Tokenizer(num_words = vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding="post", truncating="post")
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding="post", truncating="post")
# convert training & testing data into numpy array
# Need this block to get it to work with TensorFlow 2.x
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.asarray(training_labels).astype('float32').reshape((-1,1))
testing_padded = np.array(testing_padded)
testing_labels = np.asarray(testing_labels).astype('float32').reshape((-1,1))
# defining the model
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, 24, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation='relu'),
tf.keras.layers.Dense(1, activation='softmax')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
# training the model
num_epochs = 1000
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)
However, while training, it prints weird accuracy and loss values like this:
Epoch 972/1000
9/9 - 0s - loss: -8.2316e+03 - accuracy: 0.7345 - val_loss: -2.7299e+04 - val_accuracy: 0.0000e+00
Epoch 973/1000
9/9 - 0s - loss: -8.2452e+03 - accuracy: 0.7345 - val_loss: -2.7351e+04 - val_accuracy: 0.0000e+00
Epoch 974/1000
9/9 - 0s - loss: -8.2571e+03 - accuracy: 0.7345 - val_loss: -2.7363e+04 - val_accuracy: 0.0000e+00
Epoch 975/1000
9/9 - 0s - loss: -8.2703e+03 - accuracy: 0.7345 - val_loss: -2.7416e+04 - val_accuracy: 0.0000e+00
The train_new.txt file contains data in the form of text<t>class_num<n>
When trying to predict using the model.predict() function, it always outputs [[1.]]
What's the issue with my code?
tf.keras.layers.Dense(1, activation='sigmoid')
You should use sigmoid as activation if you are doing a binary classification. However also,
tf.keras.layers.Dense(2, activation='softmax')
will be correct in the terms of probability.
Softmax outputs' sum will always be equal to one. That's why you get 1 as output everytime.
I want to use CNN in python to get values from dartboard (or the value of the field where dart landed) using pictures.
I took 208 photos of dartboard, in each dart is in specific location. I want to predict if the dart in next image is in specific field (208 pictures represent 4 classes/52 each) (single, double and triple from same field represent same number or in our case, same class.
sample dart in a field
Then i use similar picture to test model.
When I try to fit model I get something like this
208/208 [==============================] - 3s 15ms/sample - loss: 0.0010 - accuracy: 1.0000 - val_loss: 8.1726 - val_accuracy: 0.2500
Epoch 29/100
208/208 [==============================] - 3s 15ms/sample - loss: 9.8222e-04 - accuracy: 1.0000 - val_loss: 8.6713 - val_accuracy: 0.2500
Epoch 30/100
208/208 [==============================] - 3s 15ms/sample - loss: 8.5902e-04 - accuracy: 1.0000 - val_loss: 9.2214 - val_accuracy: 0.2500
Epoch 31/100
208/208 [==============================] - 3s 15ms/sample - loss: 7.9463e-04 - accuracy: 1.0000 - val_loss: 9.6584 - val_accuracy: 0.2500
As the accuracy hits 1 the val_accuracy stays the same, some previous model got me a little better result, but it was little better than this.
As I am new in the field I need some advice to get my model or whole program better.
Here is my current model_
model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(640, 480, 3)))
model.add(MaxPooling2D(2, 2))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(512, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(4, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(X, y, batch_size=16, epochs=100, validation_data=(Xtest,ytest))
AND MY SAMPLE PROGRAM
training_data = []
DATADIR = 'C:/PikadaNew'
dir = sorted(os.listdir(DATADIR), key=len)
def create_training_data():
for category in dir: # do dogs and cats
path = os.path.join(DATADIR,category)
class_num = dir.index(category)
for img in tqdm(os.listdir(path)):
try:
img_array = cv2.imread(os.path.join(path,img))
training_data.append([img_array, class_num])
except Exception as e:
pass
create_training_data()
DATATESTDIR = 'C:/PikadaNewTest'
dir1 = sorted(os.listdir(DATATESTDIR), key=len)
test_data = []
def create_test_data():
for category in dir1:
path = os.path.join(DATATESTDIR,category)
class_num = dir1.index(category)
for img in tqdm(os.listdir(path)):
try:
img_array = cv2.imread(os.path.join(path,img)) # convert to array
test_data.append([img_array, class_num])
except Exception as e:
pass
create_test_data()
#print(len(training_data))
#print(len(test_data))
X = []
y = []
Xtest = []
ytest = []
for features,label in training_data:
X.append(features)
y.append(label)
for features,label in test_data:
Xtest.append(features)
ytest.append(label)
X = np.array(X).reshape(-1, 640, 480, 3)
Xtest= np.array(Xtest).reshape(-1, 640, 480, 3)
y = np.array(y)
ytest = np.array(ytest)
y = to_categorical(y)
ytest = to_categorical(ytest)
X = X/255.0
Xtest = Xtest/255.0
X,y = shuffle(X,y)
Xtest,ytest = shuffle(Xtest,ytest)
Thanks and sorry for mistakes, i hope its understandable what i wanna to achieve
Every advice is much appreciated
Samo
You are facing an overfitting problem because your data are so small and the model in more complex than needed. you can try the following:
Add more data if you can.
Try to simplify the model by removing some layers.
Add dropout to the model and use regularizes.
Use smaller number of epochs.
I have the following Keras code that tries to train a combination of an image and a vector of real number features:
from keras.layers import (Conv2D, Flatten, Lambda, Dense, concatenate,
Dropout, Input )
from keras.models import Model
import pandas as pd
import numpy as np
import cv2
import os
def label_img(img):
word_label = img.split('.')[-3]
if word_label == 'r':
return 1
elif word_label == 'i':
return 0
train_directory = '/train'
images = []
y = []
dataset = pd.read_csv('features.csv')
dataset = dataset[[ 'first_value',
'second_value']]
features = dataset.iloc[:,0:2].values
for root, dirs, files in os.walk(train_directory):
for file in files:
image = cv2.imread(root + '/' + file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.resize(image,(512,512),interpolation=cv2.INTER_AREA)
images.append(image)
label = label_img(file)
y.append(label)
images = np.asarray(images)
images = images.reshape((-1,512,512,1))
image_input = Input(shape=(512,512,1))
aux_input = Input(shape=(2,))
input_layer = Conv2D(32,(5,5), activation='relu')(image_input)
cov1 = Conv2D(24,(5,5), activation='relu', subsample=(2,2))(input_layer)
cov2 = Conv2D(36,(5,5), activation='relu', subsample=(2,2))(cov1)
cov3 = Conv2D(48,(5,5), activation='relu', subsample=(2,2))(cov2)
cov4 = Conv2D(64,(5,5), activation='relu')(cov3)
cov5 = Conv2D(64,(3,3), activation='relu')(cov4)
dropout = Dropout(0.5)(cov5)
flatten = Flatten()(dropout)
# Here we add in the feature vectors
merge = concatenate([flatten, aux_input])
d1 = Dense(100, activation='elu')(merge)
d2 = Dense(50, activation='elu')(d1)
d3 = Dense(10, activation='elu')(d2)
out = Dense(1)(d3)
model = Model(inputs=[image_input, aux_input], outputs=[out])
model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['acc'])
model.fit([images, features], y, epochs=50)
However, I'm getting something like the following:
Epoch 2/50
5/5 [==============================] - 0s - loss: nan - acc: 0.0000e+00
Epoch 3/50
5/5 [==============================] - 0s - loss: nan - acc: 0.0000e+00
Epoch 4/50
5/5 [==============================] - 0s - loss: nan - acc: 0.0000e+00
Epoch 5/50
...
...
...
Why is that? How can I solve the issue?
I would like to mention that my images are black background images with foreground showing an edge of the original image. I'm not sure if that's an issue at all, as there is not much data in the image?
Thanks.
To be able to use crossentropy correctly, you need to ensure, that the outputs of your network are between 0 and 1. Therefore you should use sigmoid or softmax activation in the last layer.
Elu or linear activations allow negative values, which will cause infinite loss, when using cross-entropy. On the other hand if output values are larger than one the same as before can happen and we can not ensure that the cross-entropy loss stays positive, therefore minimaization doesn't make sense anymore at this point.
This is caused by the definition of the cross-entropy:
-(y_true*log(y_pred)+(1-y_true)*log(1-y_pred))
The logarithm is not defined for values being less than zero.
I am facing to a problem for which I have difficulties to understand why I have such behaviour.
I am trying to use a pre-trained resnet 50 (keras) model for a binary image classification, I also built a simple cnn. I have about 8k balanced RGB images of size 200x200 and I divided this set into three sub-sets (train 70%, validation 15%, test 15%).
I built a generator to feed data to my models based on keras.utils.Sequence.
The problem that I have is my models tends to learn on the training set but on validation set I have poor results on pre-trained resnet50 and on simple cnn.
I tried several things to solve this problem but Not improvement at all.
With and without Data augmentation on training set (rotation)
Images are normalised between [0,1]
With and without Regularizers
Variation of the learning rate
This is an example of results obtained:
Epoch 1/200
716/716 [==============================] - 320s 447ms/step - loss: 8.6096 - acc: 0.4728 - val_loss: 8.6140 - val_acc: 0.5335
Epoch 00001: val_loss improved from inf to 8.61396, saving model to ../models_saved/resnet_adam_best.h5
Epoch 2/200
716/716 [==============================] - 287s 401ms/step - loss: 8.1217 - acc: 0.5906 - val_loss: 10.9314 - val_acc: 0.4632
Epoch 00002: val_loss did not improve from 8.61396
Epoch 3/200
716/716 [==============================] - 249s 348ms/step - loss: 7.5357 - acc: 0.6695 - val_loss: 11.1432 - val_acc: 0.4657
Epoch 00003: val_loss did not improve from 8.61396
Epoch 4/200
716/716 [==============================] - 284s 397ms/step - loss: 7.5092 - acc: 0.6828 - val_loss: 10.0665 - val_acc: 0.5351
Epoch 00004: val_loss did not improve from 8.61396
Epoch 5/200
716/716 [==============================] - 261s 365ms/step - loss: 7.0679 - acc: 0.7102 - val_loss: 4.2205 - val_acc: 0.5351
Epoch 00005: val_loss improved from 8.61396 to 4.22050, saving model to ../models_saved/resnet_adam_best.h5
Epoch 6/200
716/716 [==============================] - 285s 398ms/step - loss: 6.9945 - acc: 0.7161 - val_loss: 10.2276 - val_acc: 0.5335
....
This is classes used to load data into my models.
class DataGenerator(keras.utils.Sequence):
def __init__(self, inputs,
labels, img_size,
input_shape,
batch_size, num_classes,
validation=False):
self.inputs = inputs
self.labels = labels
self.img_size = img_size
self.input_shape = input_shape
self.batch_size = batch_size
self.num_classes = num_classes
self.validation = validation
self.indexes = np.arange(len(self.inputs))
self.inc = 0
def __getitem__(self, index):
"""Generate one batch of data
Parameters
----------
index :the index from which batch will be taken
Returns
-------
out : a tuple that contains (inputs and labels associated)
"""
batch_inputs = np.zeros((self.batch_size, *self.input_shape))
batch_labels = np.zeros((self.batch_size, self.num_classes))
# Generate data
for i in range(self.batch_size):
# choose random index in features
if self.validation:
index = self.indexes[self.inc]
self.inc += 1
if self.inc == len(self.inputs):
self.inc = 0
else:
index = random.randint(0, len(self.inputs) - 1)
batch_inputs[i] = self.rgb_processing(self.inputs[index])
batch_labels[i] = to_categorical(self.labels[index], num_classes=self.num_classes)
return batch_inputs, batch_labels
def __len__(self):
"""Denotes the number of batches per epoch
Returns
-------
out : number of batches per epochs
"""
return int(np.floor(len(self.inputs) / self.batch_size))
def rgb_processing(self, path):
img = load_img(path)
rgb = img.get_rgb_array()
if not self.validation:
if random.choice([True, False]):
rgb = random_rotation(rgb)
return rgb/np.max(rgb)
class Models:
def __init__(self, input_shape, classes):
self.input_shape = input_shape
self.classes = classes
pass
def simpleCNN(self, optimizer):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=self.input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(self.classes), activation='softmax'))
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=optimizer,
metrics=['accuracy'])
return model
def resnet50(self, optimizer):
model = keras.applications.resnet50.ResNet50(include_top=False,
input_shape=self.input_shape,
weights='imagenet')
model.summary()
model.layers.pop()
model.summary()
for layer in model.layers:
layer.trainable = False
output = Flatten()(model.output)
#I also tried to add dropout layers here with batch normalization but it does not change results
output = Dense(len(self.classes), activation='softmax')(output)
finetuned_model = Model(inputs=model.input,
outputs=output)
finetuned_model.compile(optimizer=optimizer,
loss=keras.losses.binary_crossentropy,
metrics=['accuracy'])
return finetuned_model
This is how these functions are called:
train_batches = DataGenerator(inputs=train.X.values,
labels=train.y.values,
img_size=img_size,
input_shape=input_shape,
batch_size=batch_size,
num_classes=len(CLASSES))
validate_batches = DataGenerator(inputs=validate.X.values,
labels=validate.y.values,
img_size=img_size,
input_shape=input_shape,
batch_size=batch_size,
num_classes=len(CLASSES),
validation=True)
if model_name == "cnn":
model = models.simpleCNN(optimizer=Adam(lr=0.0001))
elif model_name == "resnet":
model = models.resnet50(optimizer=Adam(lr=0.0001))
early_stopping = EarlyStopping(patience=15)
checkpointer = ModelCheckpoint(output_name + '_best.h5', verbose=1, save_best_only=True)
history = model.fit_generator(train_batches, steps_per_epoch=num_train_steps, epochs=epochs,
callbacks=[early_stopping, checkpointer], validation_data=validate_batches,
validation_steps=num_valid_steps)
I finally found the principal element that causes this over-fitting. Since I use a pre-trained model. I was set layers as non-trainable. Thus I tried to put them as trainable and It seems that it solves the problem.
for layer in model.layers:
layer.trainable = False
My hypothesis is that my images are too far away from data used to train the model.
I also added some dropouts and batch normalization at the end of the resnet model.
I'm very new to keras and I was following this doc to produce a multi-input and multi-output model. However, after each epoch, the results remain the same. Could someone point me out where I got stuck in?
My code is something like
main_input = Input(shape = (maxlen, ), name="main_input")
x = Embedding(94, 64)(main_input) # dic length = 94
lstm_out0 = LSTM(256, activation="relu", dropout=0.1,
recurrent_dropout=0.2, return_sequences=True)(x)
lstm_out = LSTM(256, activation="relu", dropout=0.1, recurrent_dropout=0.2)(lstm_out0)
auxiliary_input = Input(shape=(maxlen,), dtype="int32", name='aux_input')
aux_embed = Embedding(94, 64)(auxiliary_input)
aux_lstm_out = LSTM(256, activation="relu", dropout=0.2, recurrent_dropout=0.2)(aux_embed)
auxiliary_output = Dense(10, activation="softmax", name="aux_output")(lstm_out)
x = keras.layers.concatenate([aux_lstm_out, lstm_out])
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
model.compile(optimizer='rmsprop', loss={'main_output': 'binary_crossentropy', 'aux_output': 'categorical_crossentropy'},metrics=['accuracy'])
model.fit([X_train, X_aux_train], [train_label, aux_train_label],
validation_data=[[X_dev, X_aux_dev], [dev_label,aux_dev_label]],
epochs=10, batch_size=batch_size)
The main input is a sequence of chars while the main output is a binary value. The aux input is also a sequence of chars while the aux output is a categorical label.
The output is something like
Train on 200000 samples, validate on 20000 samples
Epoch 1/10
200000/200000 [==============================] - 892s - loss: 7.3824 - main_output_loss: 5.8560 - aux_output_loss: 1.5264 - main_output_acc: 0.5186 - aux_output_acc: 0.5371 - val_loss: 9.5776 - val_main_output_loss: 8.0590 - val_aux_output_loss: 1.5186 - val_main_output_acc: 0.5000 - val_aux_output_acc: 0.5362
Epoch 2/10
200000/200000 [==============================] - 894s - loss: 9.5818 - main_output_loss: 8.0586 - aux_output_loss: 1.5233 - main_output_acc: 0.5000 - aux_output_acc: 0.5372 - val_loss: 9.5771 - val_main_output_loss: 8.0590 - val_aux_output_loss: 1.5181 - val_main_output_acc: 0.5000 - val_aux_output_acc: 0.5362
I ran > 5 epochs and the results are almost all the same. The input data is prepared through features: sequence.pad_sequences label: to_categorical(for multiclass)