So I have a problem when train deep learning with BERT with tensorflow which contain text dataset. So i want to fit() the model but got an error when training. I think it happen because the data_train did't have the label. But from my research It also same problem like SO question in here Same problem. Since it didn't have a answer is this a bug? The error is like this
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.
My code like this
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))
def generate_training_data(df, ids, masks, tokenizer):
for i, text in tqdm(enumerate(df['text'])):
tokenized_text = tokenizer.encode_plus(
text,
max_length=256,
truncation=True,
padding='max_length',
add_special_tokens=True,
return_tensors='tf'
)
ids[i, :] = tokenized_text.input_ids
masks[i, :] = tokenized_text.attention_mask
return ids, masks
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)
labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks
},
dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)
model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
train_dataset,
validation_data=val_dataset,
epochs=2
)
I spend a bit of time finding something we can update and I download the model from the websites.
[ Sample ]:
import tensorflow as tf
import tensorflow_text as text # Registers the ops.
import tensorflow_hub as hub
import os
from os.path import exists
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def generate_training_data(train_labels):
input_ids = [ ]
attn_masks = [ ]
labels = [ ]
for item in train_labels:
input_ids.append( str(item) )
attn_masks.append( int(1) )
labels.append( item )
attn_masks = tf.constant(attn_masks, shape=(1, len(attn_masks),1), dtype=tf.float32)
labels = tf.constant(labels, shape=(1, len(labels),1), dtype=tf.int64)
input_ids = tf.constant(input_ids, shape=(1, len(input_ids),1), dtype=tf.string)
return input_ids, attn_masks, labels
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks,
'labels': labels
},
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
X_input_ids, X_attn_masks, labels = generate_training_data(train_labels)
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks))
options = tf.saved_model.LoadOptions(
allow_partial_checkpoint=False,
experimental_io_device="/physical_device:GPU:0",
experimental_skip_checkpoint=True
)
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='sentences')
preprocessor = hub.KerasLayer(export_dir)
encoder_inputs = preprocessor(text_input)
encoder = hub.KerasLayer( export_dir_2, trainable=False, load_options=options)
outputs = encoder(encoder_inputs)
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(outputs['default'])
output_layer = tf.keras.layers.Dense(1, activation='softmax', name='output_layer')(intermediate_layer)
sentiment_model = tf.keras.Model(inputs=[text_input], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
dataset,
validation_data=dataset,
epochs=2
)
[ Output ]:
outputs: KerasTensor(type_spec=TensorSpec(shape=(None, 512), dtype=tf.float32, name=None), name='keras_layer_1/StatefulPartitionedCall:0', description="created by layer 'keras_layer_1'")
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
sentences (InputLayer) [(None,)] 0 []
keras_layer (KerasLayer) {'input_mask': (Non 0 ['sentences[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_1 (KerasLayer) {'default': (None, 28763649 ['keras_layer[0][0]',
512), 'keras_layer[0][1]',
'encoder_outputs': 'keras_layer[0][2]']
[(None, 128, 512),
(None, 128, 512),
(None, 128, 512),
(None, 128, 512)],
'sequence_output':
(None, 128, 512),
'pooled_output': (
None, 512)}
intermediate_layer (Dense) (None, 512) 262656 ['keras_layer_1[0][0]']
output_layer (Dense) (None, 1) 513 ['intermediate_layer[0][0]']
==================================================================================================
Total params: 29,026,818
Trainable params: 263,169
Non-trainable params: 28,763,649
__________________________________________________________________________________________________
Epoch 1/2
Related
How to fix the error "Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 150528 but received input with shape [224, 672]".
I'd problem to build sequantial model. Hope you all will help me on this time.
def decode_csv(csv_row):
record_defaults = ["path", "flower"]
filename, label_string = tf.io.decode_csv(csv_row, record_defaults)
img = read_and_decode(filename, [IMG_HEIGHT, IMG_WIDTH])
return img, label_string
train_dataset (tf.data.TextLineDataset("/home/pi/Downloads/ml_code/train_set.csv").map(decode_csv)).take(500)
eval_dataset = (tf.data.TextLineDataset("/home/pi/Downloads/ml_code/eval_set.csv").map(decode_csv)).take(50)
model =tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)),
tf.keras.layers.Dense(len(CLASS_NAME), activation = "softmax")
])
model.compile(optimizer="adam",
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = False),
metrics = ['accuracy'])
model.summary()
tf.keras.utils.plot_model(model, show_shapes = True, show_layer_names = False, to_file = "model.jpg")
history = model.fit(train_dataset, validation_data = eval_dataset, epochs = 10)
model.save("first")
json.dump(history.history, open("First_History", "w"))
Try something like this:
import pandas as pd
# Create dummy data
tf.keras.utils.save_img('image1.png', tf.random.normal((64, 64, 3)))
tf.keras.utils.save_img('image2.png', tf.random.normal((64, 64, 3)))
tf.keras.utils.save_img('image3.png', tf.random.normal((64, 64, 3)))
tf.keras.utils.save_img('image4.png', tf.random.normal((64, 64, 3)))
tf.keras.utils.save_img('image5.png', tf.random.normal((64, 64, 3)))
df = pd.DataFrame(data= {'path': ['/content/image1.png', '/content/image2.png', '/content/image3.png', '/content/image4.png', '/content/image5.png'],
'label': ['0', '1', '2', '3', '2']})
df.to_csv('data.csv', index=False)
Preprocess data and train:
import tensorflow as tf
def decode_csv(csv_row):
record_defaults = ["path", "label"]
filename, label_string = tf.io.decode_csv(csv_row, record_defaults)
img = tf.io.decode_png(tf.io.read_file(filename), channels=3)
return img, tf.strings.to_number(label_string, out_type=tf.int32)
# Skip header row.
train_dataset = tf.data.TextLineDataset("/content/data.csv").skip(1).map(decode_csv).batch(2)
model =tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape = (64, 64, 3)),
tf.keras.layers.Dense(4, activation = "softmax")
])
model.compile(optimizer="adam",
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = False),
metrics = ['accuracy'])
model.summary()
tf.keras.utils.plot_model(model, show_shapes = True, show_layer_names = False, to_file = "model.jpg")
history = model.fit(train_dataset, epochs = 2)
model.save("first")
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_2 (Flatten) (None, 12288) 0
dense_2 (Dense) (None, 4) 49156
=================================================================
Total params: 49,156
Trainable params: 49,156
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
3/3 [==============================] - 1s 62ms/step - loss: 623.7551 - accuracy: 0.4000
Epoch 2/2
3/3 [==============================] - 0s 7ms/step - loss: 1710.6586 - accuracy: 0.2000
INFO:tensorflow:Assets written to: first/assets
My Final Fix Code is here:
def read_and_decode(filename, reshape_dims):
img = tf.io.read_file(filename)
# Range [0, 255]
img = tf.image.decode_jpeg(img, channels = 3)
#convert into range[0, 1] for ml flexible
img = tf.image.convert_image_dtype(img, tf.float32)
return img
def decode_csv(csv_row):
record_defaults = ["path", "flower"]
filenames, label_string = tf.io.decode_csv(csv_row, record_defaults, field_delim = ",")
img = read_and_decode(filenames, [IMG_HEIGHT, IMG_WIDTH])
return img, tf.argmax(tf.cast(label_string == CLASS_NAME, tf.int32))
train_dataset = tf.data.TextLineDataset("/home/pi/Downloads/ml_code/train_set.csv").skip(1).map(decode_csv).batch(2)
eval_dataset = tf.data.TextLineDataset("/home/pi/Downloads/ml_code/eval_set.csv").skip(1).map(decode_csv).batch(2)
model =tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape = (IMG_HEIGHT, IMG_WIDTH, 3)),
tf.keras.layers.Dense(len(CLASS_NAME), activation = "softmax")
])
model.compile(optimizer="adam",
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = False),
metrics = ['accuracy'])
model.summary()
tf.keras.utils.plot_model(model, show_shapes = True, show_layer_names = False, to_file = "model.jpg")
history = model.fit(train_dataset, validation_data = eval_dataset, epochs = 2)
model.save("first")
json.dump(history.history, open("First_History", "w"))
All credits should go to AloneTogether, Thanks for your help ^_^.
Here is my training code:
OUTPUT_UNITS = 38
NUM_UNITS = [256]
LOSS = "sparse_categorical_crossentropy"
LEARNING_RATE = 0.001
EPOCHS = 50
BATCH_SIZE = 64
SAVE_MODEL_PATH = "model.h5"
def build_model(output_units, num_units, loss, learning_rate):
input = keras.layers.Input(shape=(None, output_units))
x = keras.layers.LSTM(num_units[0])(input)
x = keras.layers.Dropout(0.2)(x)
output = keras.layers.Dense(output_units, activation="softmax")(x)
model = keras.Model(input, output)
model.compile(loss=loss,
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
metrics=["accuracy"])
model.summary()
return model
and this is the error I got:
ValueError: Input 0 is incompatible with layer model: expected shape=(None, None, 38), found shape=(None, 64, 23)
I am building a prediction model for the sequence data using conv1d layer provided by Keras. This is how I did`
def autoencoder():
#autoencoder = Model(inputs=input_layer, outputs=decoder)
input_dim = x_train_scaled.shape[1]
input_layer = Input(shape=(input_dim,))
conv1 = Conv1D(filters = 32, kernel_size=3,activation='relu') (input_layer)
batch1 = BatchNormalization()(conv1)
maxp1 = MaxPooling1D(pool_size=2)(batch1)
dropout1 = Dropout(0.2)(maxp1)
conv2 = Conv1D(filters = 16, kernel_size=3,activation='relu') (dropout1)
batch2 = BatchNormalization()(conv2)
maxp2 = MaxPooling1D(2)(batch2)
dropout2 = Dropout(0.2)(maxp2)
conv3 = Conv1D(filters = 8, kernel_size=3,activation='relu') (dropout2)
batch3 = BatchNormalization()(conv3)
maxp3 = MaxPooling1D(2)(batch3)
dropout3 = Dropout(0.2)(maxp3)
#decoder layers
conv4 = Conv1D(filters = 8, kernel_size=3,activation='relu') (dropout3)
batch4 = BatchNormalization()(conv4)
dropout4 = Dropout(0.2)(batch4)
conv5 = Conv1D(filters = 16, kernel_size=3,activation='relu') (dropout4)
batch5 = BatchNormalization()(conv5)
unsamp5 = UpSampling1D(2)(batch5)
dropout5 = Dropout(0.2)(unsamp5)
conv6 = Conv1D(filters = 32, kernel_size=3,activation='relu') (dropout5)
batch6 = BatchNormalization()(conv6)
unsamp6 = UpSampling1D(2)(batch6)
dropout6 = Dropout(0.2)(unsamp6)
decoder = Conv1D(filters = 1, kernel_size=3,activation='sigmoid') (dropout6)
return Model(input_layer, decoder)
Train model to reduce the data dimension using autoencoder
model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
model.fit(x_train_scaled, x_train_scaled, epochs=15, batch_size=32,
verbose=verbose, shuffle=True)
However, the debugging information has
ValueError: Input 0 of layer conv1d is incompatible with the layer: :
expected min_ndim=3, found ndim=2. Full shape received: (None, 19)
The training data and validation data shape are as follows
x_train_scaled shape (125973, 19)
Dataset use to train model NSL-KDD(https://www.unb.ca/cic/datasets/nsl.html)
Conv1D expects 3+D tensor with shape: batch_shape + (steps, input_dim). Add extra dimension to your input.
Working sample code
# The inputs are 128-length vectors with 10 timesteps, and the batch size
# is 4.
input_shape = (4, 10, 128)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv1D(
32, 3, activation='relu',input_shape=input_shape[1:])(x)
print(y.shape)
Output
(4, 8, 32)
I am getting a ValueError: logits and labels must have the same shape ((None, 1) vs ()) when doing a model evaluate. I get the model to train but when I evaluate is when I have the problem. I used a tf.expand_dims for logits but wondering if this needs to be applied to the labels as well?
here is my code below.
import tensorflow as tf
import tensorflow_datasets as tfds
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(1)
VOCAB_SIZE, EMBED_SIZE, NUM_OOV_BUCKETS = 10000, 128, 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.query_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.value_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.attention_layer = tf.keras.layers.Attention()
def call(self, inputs):
query = self.query_layer(inputs)
value = self.value_layer(inputs)
attention = self.attention_layer([query, value])
return tf.keras.layers.concatenate([query, attention])
attention_layer = AttentionLayer()
model1 = tf.keras.models.Sequential([
tf.keras.Input(shape=(),batch_size=1, dtype=tf.string, name='InputLayer'),
encoder,
tf.keras.layers.Embedding(VOCAB_SIZE + NUM_OOV_BUCKETS, EMBED_SIZE, mask_zero=True, name='Embedding_Layer'),
attention_layer,
tf.keras.layers.Conv1D(filters=32, kernel_size=4, padding = 'same', activation = 'relu', name='Conv1DLayer'),
tf.keras.layers.MaxPooling1D(pool_size=2, name='MaxPoolLayer'),
tf.keras.layers.LSTM(64, dropout = 0.2, name='DropoutLayer'),
tf.keras.layers.Dense(250, activation = 'relu', name='DenseLayer'),
tf.keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def preprocess_y(x, y):
return x, tf.expand_dims(y, -1)
history1 = model1.fit(
train_dataset.map(preprocess_y),
batch_size=BATCH_SIZE,
epochs=1)
model1.evaluate(test_dataset)
ValueError: logits and labels must have the same shape ((None, 1) vs ())
I am getting this error when I am trying to train my model:
ValueError: Input 0 of layer dense_encoder is incompatible with the layer: expected axis -1 of input shape to have value 2048 but received input with shape [446, 98, 1024]
My model architecture is:
input1 = Input(shape=(2048), name='Image_1')
dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1)
input2 = Input(shape=(153), name='Text_Input')
emb_layer = Embedding(input_dim = vocab_size, output_dim = 300, input_length=153, mask_zero=True, trainable=False,
weights=[embedding_matrix], name="Embedding_layer")
emb = emb_layer(input2)
LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb)
#LSTM1_output = LSTM1(emb)
LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), name="LSTM2")
LSTM2_output = LSTM2(LSTM1)
dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output)
dec = tf.keras.layers.Add()([dense1, dropout1])
fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1')
fc1_output = fc1(dec)
dropout2 = Dropout(0.4, name='dropout2')(fc1_output)
output_layer = Dense(vocab_size, activation='softmax', name='Output_layer')
output = output_layer(dropout2)
encoder_decoder = Model(inputs = [input1, input2], outputs = output)
encoder_decoder.summary()
Here's my code for training the model:
for epoch in range(20):
print('EPOCH : ',epoch+1)
start = time.time()
batch_loss_tr = 0
batch_loss_vl = 0
for img, report in train_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.train_on_batch([img_input, rep_input], output_word)
batch_loss_tr += results
train_loss = batch_loss_tr/(X_train.shape[0]//14)
with train_summary_writer.as_default():
tf.summary.scalar('loss', train_loss, step = epoch)
for img, report in cv_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.test_on_batch([img_input, rep_input], output_word)
batch_loss_vl += results
The img_input shape is (417, 98, 1024) and I am getting the error for Image_1 layer.
What could be the reasons? Any help would be appreciated.