Im working with the Iris dataset using tensorflow2
after fitting my model I get this error message
ValueError: A target array with shape (135, 4, 8) was passed for an output of shape (None, 3) while using as loss `categorical_crossentropy`. This loss expects targets to have the same shape as the output.
I'm importing / splitting / one hot encoding the model with:
iris_data = datasets.load_iris()
def read_in_and_split_data(iris_data):
return model_selection.train_test_split(iris_data["data"], iris_data["data"], test_size=0.1)
train_data, test_data, train_targets, test_targets = read_in_and_split_data(iris_data)
train_data shape is (135, 4)
train_target shape is (135, 4)
train_targets = tf.keras.utils.to_categorical(np.array(train_targets))
test_targets = tf.keras.utils.to_categorical(np.array(test_targets))
loss = "categorical_crossentropy"
def get_model(input_shape):
model = Sequential([
Dense(64, activation = "relu", kernel_initializer='he_uniform', bias_initializer='ones', input_shape=input_shape),
Dense(128, activation = "relu"),
Dense(128, activation = "relu"),
Dense(128, activation = "relu"),
Dense(128, activation = "relu"),
Dense(64, activation = "relu"),
Dense(64, activation = "relu"),
Dense(64, activation = "relu"),
Dense(64, activation = "relu"),
Dense(3, activation = "softmax"),
])
return model
model = get_model(train_data[0].shape)
def train_model(model, train_data, train_targets, epochs):
return model.fit(train_data, train_targets, epochs)
history = train_model(model, train_data, train_targets, epochs=800)
thanks for the help!
solved it
in def read_in_and_split_data(iris_data): i was reading loading "data" two times instead of loading "data" and then "targets"
below is the correct code:
def read_in_and_split_data(iris_data):
return model_selection.train_test_split(iris_data["data"], iris_data["target"], test_size=0.1)
Related
I have started training a basic MLP model on MNIST data taken from here. Below is my code for implementing the model.
train = pd.read_csv(r"train.csv")
test = pd.read_csv(r"test.csv")
train_img_path = "./Images/train/"
test_img_path = "./Images/test/"
train_img = []
for img in train['filename']:
img_path = train_img_path+img
image = imread(img_path)
image = image/255
train_img.append(image)
train_img = np.array(train_img)
batch_size = 64
y_train = train['label']
from tensorflow.keras.utils import to_categorical
#y_train = to_categorical(y_train)
model = Sequential()
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_img, y_train, epochs=20, batch_size=batch_size)
While trying to fit my model on this data I get error InvalidArgumentError: logits and labels must have the same first dimension, got logits shape [50176,10] and labels shape [64] with loss='sparse_categorical_crossentropy'.
There were suggestions to try with loss='categorical_crossentropy' after having one-hot encoded values and that also gives error ValueError: Shapes (None, 10) and (None, 28, 28, 10) are incompatible
I am confused on how I am getting the shape [50176,10] (though examples are 49000) in the error.
I guess I am missing something on shape. Can someone guide me where I am doing wrong and how to solve this.
Edit: I have modified my code as below to pick the data from keras for_from_dataframe. But I still get the same error.
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_data = train_datagen.flow_from_dataframe(
dataframe=train,
directory='./Images/train',
x_col='filename',
y_col='label',
weight_col=None,
target_size=(28,28),
color_mode='grayscale',
class_mode='categorical',
batch_size=64
)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.summary()
model.fit(train_data, epochs=20)
The main problem is in your model building code:
model = Sequential()
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
You are trying to feed images and its label to ANN which obviously gives error. Also there is no any inputs given in your model.
For images, CNN should be used instead of ANN.
import tensorflow as tf
model = Sequential()
model.add(tf.keras.layers.Conv2D(32, activation = 'relu', input_shape=(28,28,3)))
model.add(tf.keras.layers.MaxPooling2D((2,2))
model.add(tf.keras.layers.Conv2D(64, activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2))
model.add(tf.keras.layers.Conv2D(128, activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2))
model.add(tf.keras.layers.Flatten())
model.add(Dense(10, activation = 'relu'))
model.add(Dense(20, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
If you have one-hot encoded your labels, use categorical_crossentropy. If your labels are numbers then use sparse_categorical_crossentropy
I created a model and then loaded it in another script and try to perform a prediction from it however I can not understand why the shape being passed to the function is incorrect.
This is how the model is created:
batch_size = 1232
epochs = 5
IMG_HEIGHT = 400
IMG_WIDTH = 400
model1 = np.load("training_data.npy", allow_pickle=True)
model2 = np.load("training_data_1.npy", allow_pickle=True)
data = np.asarray(np.concatenate((model1, model2), axis=0)) # 1232
train_data = data[:-100]
X_train = np.asarray(np.array([i[0] for i in train_data]))
Y_train = np.asarray([i[1] for i in train_data])
validation_data = data[-100:]
X_val = np.asarray(np.array([i[0] for i in validation_data]))
Y_val = np.asarray([i[1] for i in validation_data])
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu',
input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(X_train, Y_train, steps_per_epoch=batch_size, epochs=epochs,
validation_data=(X_val, Y_val), validation_steps=batch_size)
model.save("test")
And this is how I'm trying to make a prediction:
batch_size = 1232
epochs = 5
IMG_HEIGHT = 400
IMG_WIDTH = 400
model = tf.keras.models.load_model('test')
test_1 = cv2.imread('./Data/Images/test_no.jpg')
test_1 = cv2.resize(test_1, (IMG_HEIGHT, IMG_WIDTH))
prediction = model.predict([test_1])[0]
print(prediction)
When printing the shape of the test image the output is: (400, 400, 3)
I also tried using the numpy operation reshape when passing the test image to predict. However the error is always:
ValueError: Input 0 of layer sequential is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: [None, 400, 3]
Add extra dimension to your input as [n_items,400,400,3]
import tensorflow as tf
X_train = tf.expand_dims(X_train, axis =-1)
I have a small image data set with 4 classes. So I've used an auto-encoder to extract features automatically.
These features are fed to an Support Vector Machine classifier in order to do the classification task.
After training the auto encoder for 10 epochs and training the SVM model on the extracted features I've got these confusion matrices:
My concern is about if the model isn't as general as possible in order to be applicable on a new data.
My question is: Is there any method to know if the model is over fitting except testing it on some other samples?
EDIT:
This is the code for the auto encoder:
encoder_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name='original_img')
x = Conv2D(filters=32, kernel_size=3, padding="same", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))(encoder_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(filters=64, kernel_size=3, padding="same", activation='relu')(x)
encoder_output = MaxPooling2D((2, 2))(x)
encoder = Model(encoder_input, encoder_output, name='encoder')
encoder.summary()
decoder_input = Input(shape=(32, 35, 64,), name='encoded_img')
x = Conv2D(filters=64, kernel_size=3, padding="same", activation='relu') (decoder_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(filters=32, kernel_size=3, padding="same", activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoder_output = Conv2D(filters=3, kernel_size=3, padding='same', activation='relu')(x)
decoder = Model(decoder_input, decoder_output, name='decoder')
decoder.summary()
autoencoder_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name='img')
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
Autoencoder = Model(autoencoder_input, decoded_img, name='autoencoder')
Autoencoder.summary()
total_train = 80
total_val = 40
Autoencoder.compile(optimizer='adam',
loss=tf.keras.losses.MeanSquaredError())
history = Autoencoder.fit_generator(
train_data_gen,
steps_per_epoch=total_train // batch_size,
epochs=epochs,
validation_data=val_data_gen,
validation_steps=total_val // batch_size
)
The code for the SVM and confusion matrices:
samples, y = next(train_data_gen_svm)
samples_val, y_val = next(val_data_gen_svm)
#Get features
X = encoder.predict(samples)
X_val = encoder.predict(samples_val)
# Reshape for the SVM input
X_reshape = X.reshape(80,32*35*64)
X_val_reshape = X_val.reshape(40,32*35*64)
clf = SVC(kernel='poly', degree=3)
print('X shape:', X_reshape.shape)
print('y shape: ', X_val_reshape.shape)
clf.fit(X_reshape, y)
y_pred_val = clf.predict(X_val_reshape)
y_pred_train = clf.predict(X_reshape)
cnf_matrix_val = confusion_matrix(y_val, y_pred_val,normalize='true')
cnf_matrix_train = confusion_matrix(y,y_pred_train,normalize='true')
Code -
def define_model():
# channel 1
inputs1 = Input(shape=(32,1))
conv1 = Conv1D(filters=256, kernel_size=2, activation='relu')(inputs1)
#bat1 = BatchNormalization(momentum=0.9)(conv1)
pool1 = MaxPooling1D(pool_size=2)(conv1)
flat1 = Flatten()(pool1)
# channel 2
inputs2 = Input(shape=(32,1))
conv2 = Conv1D(filters=256, kernel_size=4, activation='relu')(inputs2)
pool2 = MaxPooling1D(pool_size=2)(conv2)
flat2 = Flatten()(pool2)
# channel 3
inputs3 = Input(shape=(32,1))
conv3 = Conv1D(filters=256, kernel_size=4, activation='relu')(inputs3)
pool3 = MaxPooling1D(pool_size=2)(conv3)
flat3 = Flatten()(pool3)
# channel 4
inputs4 = Input(shape=(32,1))
conv4 = Conv1D(filters=256, kernel_size=6, activation='relu')(inputs4)
pool4 = MaxPooling1D(pool_size=2)(conv4)
flat4 = Flatten()(pool4)
# merge
merged = concatenate([flat1, flat2, flat3, flat4])
# interpretation
dense1 = Dense(128, activation='relu')(merged)
dense2 = Dense(96, activation='relu')(dense1)
outputs = Dense(10, activation='softmax')(dense2)
model = Model(inputs=[inputs1, inputs2, inputs3, inputs4 ], outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[categorical_accuracy])
plot_model(model, show_shapes=True, to_file='/content/q.png')
return model
model_concat = define_model()
# fit model
print()
red_lr= ReduceLROnPlateau(monitor='val_loss',patience=2,verbose=2,factor=0.001,min_delta=0.01)
check=ModelCheckpoint(filepath=r'/content/drive/My Drive/Colab Notebooks/gen/concatcnn.hdf5', verbose=1, save_best_only = True)
History = model_concat.fit([X_train, X_train, X_train, X_train], y_train , epochs=20, verbose = 1 ,validation_data=([X_test, X_test, X_test, X_test], y_test), callbacks = [check, red_lr], batch_size = 32)
model_concat.summary
Unfortunately, I used binary crossentropy as loss and 'accuracy' as metrics. I got above 90% of val_accuracy.
Then, I found this link, Keras binary_crossentropy vs categorical_crossentropy performance?.
After reading the first answer, I used binary crossentropy as loss and categorical crossentropy as metrics...
Even though, I Changed this, the val_acc is not improving, it shows around 62%. what to do...
I minimised the model complexity to learn the data, but the accuracy is not improving. Am I miss anything...
Data set shape, x_train is (800,32) y_train is (200,32) y_train is (800,10) and y_test is (200, 10). Before fed into Network, I used standard scalar in x.and changed the x_train and, x_test shape to (800, 32, 1) and, (200, 32, 1).
Thanks
I am trying to predict 2 features. This is how my model looks like:
Defining the model
def my_model():
input_x = Input(batch_shape=(batch_size, look_back, x_train.shape[2]), name='input')
drop = Dropout(0.5)
lstm_1 = LSTM(100, return_sequences=True, batch_input_shape=(batch_size, look_back, x_train.shape[2]), name='3dLSTM', stateful=True)(input_x)
lstm_1_drop = drop(lstm_1)
lstm_2 = LSTM(100, batch_input_shape=(batch_size, look_back, x_train.shape[2]), name='2dLSTM', stateful=True)(lstm_1_drop)
lstm_2_drop = drop(lstm_2)
y1 = Dense(1, activation='relu', name='op1')(lstm_2_drop)
y2 = Dense(1, activation='relu', name='op2')(lstm_2_drop)
model = Model(inputs=input_x, outputs=[y1,y2])
optimizer = Adam(lr=0.001, decay=0.00001)
model.compile(loss='mse', optimizer=optimizer,metrics=['mse'])
model.summary()
return model
model = my_model()
for j in range(50):
start = time.time()
history = model.fit(x_train, [y_11_train,y_22_train], epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
model.reset_states()
print("Epoch",j, time.time()-start,"s")
p = model.predict(x_test, batch_size=batch_size)
My data set has 9 features:
x_train (31251, 6, 9)
y_11_train (31251,)
y_22_train (31251,)
x_test (13399, 6, 9)
y_11_test (13399,)
y_22_test (13399,)
I am trying to predict the first(y_11) and second(y_22) feature of my dataset. But I am getting prediction for only first feature not the second one.
Any help on how can I get both the predictions instead of one?
First of all you should remove multiple inputs of the same thing:
(batch_size, look_back, x_train.shape[2])
Also, try to concatenate your ouputs inside your model like this:
def my_model():
from keras.layers import concatenate
lstm_1 = LSTM(100, return_sequences=True, batch_input_shape=(batch_size, look_back, x_train.shape[2]), name='3dLSTM', stateful=True)
lstm_1_drop = drop(lstm_1)
lstm_2 = LSTM(100, name='2dLSTM', stateful=True)(lstm_1_drop)
lstm_2_drop = drop(lstm_2)
y1 = Dense(1, activation='linear', name='op1')(lstm_2_drop)
y2 = Dense(1, activation='linear', name='op2')(lstm_2_drop)
y= concatenate([y1,y2])
model = Model(inputs=input_x, outputs=y)
optimizer = Adam(lr=0.001, decay=0.00001)
model.compile(loss='mse', optimizer=optimizer,metrics=['mse'])
model.summary()
return model
EDIT
I think you should fit like this:
y_11_train = y_11_train.reshape(y_11_train.shape[0],1)
y_22_train = y_22_train.reshape(y_22_train.shape[0],1)
model = my_model()
model.fit(x_train,np.concatenate((y_11_train,y_22_train),axis=1),...)