I'm implementing Keras text classification from scratch on an army dataset of reviews but I'm getting an error of logits and labels must have the same shape. (None,1) vs ().
The code below is a bit long but it has the tensor preprocesing and the model is a functional API:
max_features = 200
sequence_length = None
embedding_dim = 128
from tensorflow.keras.layers import TextVectorization
dtrain_lab = data_train[['airline_sentiment','negativereason']].to_numpy()
display(dtrain_lab)
tlist_txt = data_train['negativereason'].tolist()
tlist_sent = data_train['airline_sentiment'].tolist()
rac = 0
for k in tlist_txt:
rap = tlist_txt[rac]
if pd.isnull(rap) == True:
tlist_txt[rac] = 'empty'
rac+=1
#p-prueba
p_list = []
for i in tlist_sent:
if i == 'positive':
p_list.append(1)
if i == 'negative' or i == 'neutral':
p_list.append(0)
train_sent = np.array(p_list)
val_txt = data_val['negativereason'].tolist()
val_sent = data_val['airline_sentiment'].tolist()
l_val = []
for j in val_sent:
if j == 'positive':
l_val.append(1)
if j == 'negative' or j == 'neutral':
l_val.append(0)
sent_val_na = np.array(l_val)
dac = 0
for k in val_txt:
hap = val_txt[dac]
if pd.isnull(hap) == True:
val_txt[dac] = 'empty'
dac+=1
tftrain_db = tf.data.Dataset.from_tensor_slices((tlist_txt, train_sent))
tfval_db = tf.data.Dataset.from_tensor_slices((val_txt, sent_val_na))
vectorize_layer = TextVectorization(
standardize='lower_and_strip_punctuation',
split="whitespace",
max_tokens=200,
output_mode="int",
output_sequence_length=30,
)
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
text_ds = tftrain_db.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)
v_dbtrain = tftrain_db.map(vectorize_text)
v_dbval = tfval_db.map(vectorize_text)
from tensorflow.keras import layers
inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu" input_shape = (None,))(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)
model = tf.keras.Model(inputs, predictions)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
epochs = 3
model.fit(v_dbtrain, validation_data= tfval_db, epochs=epochs)
Error is:
ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs ()).
Related
I have a problem. I want to get the confiendence/ probability for my prediction. How could I get the confidence? I looked at How can I implement confidence level in a CNN with tensorflow? . But I do not understand how I could get the prediction.
class CNN_Text:
def __init__(self, x, y):
self.x =x
self.y = y
def forward(self):
filter_sizes = [1,2,3,5]
num_filters = 32
inp = Input(shape=(maxlen, ))
x = Embedding(embedding_matrix.shape[0], 300, weights=[embedding_matrix], trainable=False)(inp)
x = SpatialDropout1D(0.4)(x)
x = Reshape((maxlen, embed_size, 1))(x)
conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embed_size), kernel_initializer='normal',
activation='elu')(x)
maxpool_0 = MaxPool2D(pool_size=(maxlen - filter_sizes[0] + 1, 1))(conv_0)
maxpool_1 = MaxPool2D(pool_size=(maxlen - filter_sizes[1] + 1, 1))(conv_1)
maxpool_2 = MaxPool2D(pool_size=(maxlen - filter_sizes[2] + 1, 1))(conv_2)
maxpool_3 = MaxPool2D(pool_size=(maxlen - filter_sizes[3] + 1, 1))(conv_3)
z = Concatenate(axis=1)([maxpool_0
, maxpool_1
, maxpool_2
, maxpool_3
])
# z = Dropout(0.3)(z)
z = Flatten()(z)
z = Dropout(0.3)(z)
outp = Dense(53, activation="softmax")(z)
model = Model(inputs=inp, outputs=outp)
model.summary()
return model
p1 = CNN_Text(...)
model = p1.forward()
model.compile(...)
history = model.fit(...)
pred = model.predict(...)
How I predict a class
x = x.lower()
x = remove_URL(x)
x = remove_punct(x)
x = remove_stopwords(x)
x = tokenizer.texts_to_sequences([x])
x = pad_sequences(x, maxlen=maxlen)
pred = model.predict(x)
pred = pred.argmax(axis=1)
pred = le.classes_[pred]
return pred[0]
The Softmax activation function normalises the output of the network, giving you the predicted probability of each of the 53 classes for a given sample.
pred = pred.argmax(axis=1)
This line gives you the index of the node with the highest predicted probability.
pred = pred.max(axis=1)
This will give you the according probability instead (If you don't overwrite pred with argmax before).
I have a dataset of Tweets labelled with sentiments. I have pre-processed the data and done parts of speech tagging (all via NLTK in python). After preprocessing the data looks like this:
Pre-processed tweets
After preprocessing training data is prepared with the following code:
full_text = list(train['content'].values) + list(test['content'].values)
tokenizer = Tokenizer(num_words=20000,lower = True, filters = '')
tokenizer.fit_on_texts(full_text)
train_tokenized = tokenizer.texts_to_sequences(train['content'])
test_tokenized = tokenizer.texts_to_sequences(test['content'])
max_len = 50
X_train = pad_sequences(train_tokenized, maxlen = max_len)
X_test = pad_sequences(test_tokenized, maxlen = max_len)
embed_size = 300
max_features = 20000
def get_coefs(word,*arr):
return word, np.asarray(arr, dtype='float32')
def get_embed_mat(embedding_path):
embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path,encoding="utf8"))
word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
print(nb_words)
embedding_matrix = np.zeros((nb_words + 1, embed_size))
for word, i in word_index.items():
if i >= max_features:
continue
embedding_vector = embedding_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
return embedding_matrix
A deep learning model is built with Word embeddings as a layer. The code for model building is given below:
def build_model1(lr = 0.0, lr_d = 0.0, units = 0, dr = 0.0):
inp = Input(shape = (max_len,))
x = Embedding(20001, embed_size, weights = [embedding_matrix], trainable = False)(inp)
x1 = SpatialDropout1D(dr)(x)
x_lstm = Bidirectional(LSTM(units, return_sequences = True))(x1)
x1 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(x_lstm)
avg_pool1_lstm1 = GlobalAveragePooling1D()(x1)
max_pool1_lstm1 = GlobalMaxPooling1D()(x1)
x_lstm = Bidirectional(LSTM(units, return_sequences = True))(x1)
x1 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(x_lstm)
avg_pool1_lstm = GlobalAveragePooling1D()(x1)
max_pool1_lstm = GlobalMaxPooling1D()(x1)
x = concatenate([avg_pool1_lstm1, max_pool1_lstm1,
avg_pool1_lstm, max_pool1_lstm])
#x = BatchNormalization()(x)
x = Dropout(0.1)(Dense(128,activation='relu') (x))
x = BatchNormalization()(x)
x = Dropout(0.1)(Dense(64,activation='relu') (x))
x = Dense(8, activation = "sigmoid")(x)
model = Model(inputs = inp, outputs = x)
model.compile(loss = "binary_crossentropy", optimizer = Adam(lr = lr, decay = lr_d), metrics = ["accuracy"])
history = model.fit(X_train, y_one_hot, batch_size = 128, epochs = 20, validation_split=0.1,
verbose = 1, callbacks = [check_point, early_stop])
model = load_model(file_path)
return model
I want to use LIME to explain the predictions of this model (as given in the below image). But it is not working.
Lime Text explanation of Model
I trained a network to perform semantic segmentation with dropout, and it is my understanding that as you vary the dropout keep_prob value, the output prediction changes. However, after saving the model using the tensorflow-serving method, loading it using tf.saved_model.loader.load, and varying the dropout value, I get the same output prediction value (dice score).
I followed the suggestions in this SO post, but I still get the same prediction results even if I enter 0.0.
Didn't know if it was a tensorflow issue or a bug in my code, so I tried downgrading from v1.15 to v1.10 to see if it was the former and still got the same results. I am sure it is a bug in my code now, but I am not sure where it is. A minimum working example is shown below. Could someone help me? Thank you!
This is a snippet of my training script:
#===============
def run_iteration(self, feed_dict, op_list, summaries):
output_args = self.sess.run(op_list, feed_dict=feed_dict)
return output_args
#===============
def run_epoch_train(self, curr_epoch):
print('Training over all batches')
num_total_batches = self.num_total_batches_train
curr_batch_counter = 0
# for each batch in training images
for batch in self.batch_iterator_train:
# dropout is included
if self.dropout_training_Flag == 1:
_, loss, dice = self.run_iteration(
feed_dict={
self.placeholders['images']: batch['images'],
self.placeholders['labels']: batch['labels'],
self.placeholders['is_training']: True,
self.placeholders['dropout_prob']: self.dropout_prob_training,
},
op_list=[
self.fitting_op,
self.losses[self.active_loss],
#self.outputs['sigmoid'],
self.outputs['dice'],
],
summaries=[],
)
curr_batch_counter = curr_batch_counter + 1
if (self.iteration % 5) == 0:
print('Saving model in training session')
self.saver.save(curr_epoch + 1)
This is a snippet of my testing script:
#===============
path_to_model = self.root_path_to_models + '/' + '25'
print(path_to_model)
model = tf.saved_model.loader.load( #tf.saved_model.loader.load(
sess,
[tf.saved_model.tag_constants.SERVING],
path_to_model
)
inputImage_name = model.signature_def['prediction'].inputs['images'].name
x_inp = tf.get_default_graph().get_tensor_by_name(inputImage_name)
isTraining_name = model.signature_def['prediction'].inputs['is_training'].name
tflag_op = tf.get_default_graph().get_tensor_by_name(isTraining_name)
outputs_name = model.signature_def['prediction'].outputs['sigmoid'].name
y_op = tf.get_default_graph().get_tensor_by_name(outputs_name)
if self.dropout_training_Flag == 1:
dropoutProb_name = model.signature_def['prediction'].inputs['dropout_prob'].name
dropout_prob_op = tf.get_default_graph().get_tensor_by_name(dropoutProb_name)
print(dropout_prob_op)
# iterate over batches of images
# iterate over motion category
for moCat in self.motion_categories:
# get datasets in motion category
datasets_in_moCat = d_ffn_images_labels[moCat]
dataset_name = list(datasets_in_moCat.keys())[-1]
#print(dataset_name)
loss_for_each_image = []
final_vol = np.zeros((self.original_input_image_width, self.original_input_image_height, self.num_vol_slices), dtype = np.uint8)
# get images
curr_dataset_images = datasets_in_moCat[dataset_name][0][0]
# get labels
curr_dataset_labels = datasets_in_moCat[dataset_name][0][1]
#current dataset label numbers
curr_dataset_label_numbers = d_bfnumber_images_labels[moCat][dataset_name]
#print('curr_dataset_label_numbers',curr_dataset_label_numbers)
# number of images/labels in current dataset, for current category
num_images = len(curr_dataset_images)
num_labels = len(curr_dataset_labels)
# check if num-images/labels are the same
assert(num_images == num_labels)
# load each image
for elem_idx in range(num_images):
img_path = curr_dataset_images[elem_idx]
lab_path = curr_dataset_labels[elem_idx]
xn = nib.load(img_path)
x = np.array(xn.dataobj)
labn = nib.load(lab_path)
lab = np.array(labn.dataobj)
data_affine_tform = xn.affine
# resize
xr = cv2.resize(x, (self.network_input_image_width, self.network_input_image_height), interpolation = cv2.INTER_LANCZOS4)
# standardize
y = standardize_zeroMeanUnitVar_image(copy.deepcopy(xr), self.network_input_image_width, self.network_input_image_height, self.network_input_channels)
#y = cv2.normalize(copy.deepcopy(xr), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# match network input -- [height, width, channels]
y = np.reshape(y, newshape=(self.network_input_image_height, self.network_input_image_width, self.network_input_channels))
# append to match network input -- [batch, height, width, channels]
input_list = []
input_list.append(y)
input_list = np.asarray(input_list).astype(np.float32)
# ======================
# MODIFY DROPOUT HERE FROM JSON FILE
# CHANGED VALUES FROM 0.0, 0.5, 1.0 -- same prediction score
# ======================
# run and get output
if self.dropout_training_Flag == 1:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: True, dropout_prob_op: self.dropout_prob_testing})
else:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: False})
tmpOut = cv2.resize(output[0,:,:,0], (self.original_input_image_width, self.original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
prediction = np.asarray((tmpOut > 0.5))
labels = np.asarray((lab > 0))
EPS = 0.0000001
#output_original = cv2.resize(output[0,:,:,0], (original_input_image_width, original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
loss = 2.0 * np.sum(labels * prediction, axis=(0, 1)) / (np.sum(labels ** 2 + prediction ** 2, axis=(0, 1)) + EPS)
loss_for_each_image.append(loss)
#place slice in final_vol
#print(curr_dataset_label_numbers[elem_idx][1])
#print(type(curr_dataset_label_numbers[elem_idx][1]))
final_vol[:,:,curr_dataset_label_numbers[elem_idx][1] - 1] = np.asarray(prediction*255.0).astype(np.uint8)
# dice mean over dataset
dice_mean_for_dataset = np.mean(loss_for_each_image)
print(dataset_name, dice_mean_for_dataset)
self.diceScore_for_each_dataset.append(dice_mean_for_dataset)
self.list_dataset_name.append(dataset_name)
This is the code for the inputs/outputs:
#===============
def create_placeholders(self):
self.placeholders['images'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_input_channels],
name='images',
dtype=tf.float32
)
self.placeholders['labels'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_output_channels],
name='labels',
dtype=tf.float32
)
self.placeholders['is_training'] = tf.placeholder(
shape=[],
name='is_training',
dtype=tf.bool
)
# dropout is included
if self.dropout_training_Flag == 1:
self.placeholders['dropout_prob'] = tf.placeholder(
shape=[],
name='dropout_prob',
dtype=tf.float32
)
#===============
def create_outputs(self):
if self.network_name == 'UNet':
print('\n')
print('Training UNet')
# dropout is included
if self.dropout_training_Flag == 1:
# train with dropout
unet_output = unet_dropout(
self.placeholders['images'],
self.placeholders['is_training'],
self.placeholders['dropout_prob'],
self.network_output_channels
)
if self.network_output_channels == 1:
self.outputs['sigmoid'] = unet_output
else:
self.outputs['sigmoid'] = unet_output
This is the code for my model:
#===============
def batch_norm_relu(inputs, is_training):
net = slim.batch_norm(inputs, is_training=is_training)
net = tf.nn.relu(net)
return net
#===============
def dropout (input, keep_prob, is_training):
if is_training == True:
dropout = tf.nn.dropout(input, keep_prob)
else:
dropout = input
return dropout
#===============
def model(inputs, is_training, keep_prob, num_classes):
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
base_num_kernels = 64
# =================================
# encoder
# 256
x = conv2d_fixed_padding(inputs=inputs, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b1 = x
output_list_b1 = [x]
output_b1 = dropout(output_b1, keep_prob, is_training)
output_b1 = tf.layers.max_pooling2d(inputs=output_b1, pool_size=2, strides=2, padding='SAME')
# =================================
# 128
x = conv2d_fixed_padding(inputs=output_b1, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b2 = x
output_list_b2 = [x]
output_b2 = dropout(output_b2, keep_prob, is_training)
# =================================
# decoder
# 128 -> 256
output_b3 = conv2d_transpose(output_b2, kernel_size=2, output_channels=base_num_kernels)
output_b4 = tf.concat([output_b3, x], axis=3)
# =================================
# 256
conv_final = conv2d_fixed_padding(inputs=output_b4, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
conv_final = conv2d_fixed_padding(inputs=conv_final, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
# =================================
# output
outputs = conv2d_fixed_padding(inputs=conv_final, filters=num_classes, kernel_size=3, stride=1)
if num_classes == 1:
outputs = tf.nn.sigmoid(outputs)
else:
h = outputs.get_shape().as_list()[1]
w = outputs.get_shape().as_list()[2]
outputs_reshaped = tf.reshape(outputs, np.asarray([-1, num_classes]))
outputs_final = tf.nn.softmax(outputs_reshaped)
outputs = tf.reshape(outputs_final, np.asarray([-1, h, w, num_classes]))
return outputs
This is the way that I save the network weights:
#===============
def __create_summary_manager(self):
self.saver = Saver(
self.sess,
self.placeholders,
self.outputs,
self.savepath
)
#===============
import tensorflow as tf
class Saver(object):
def __init__(self, sess, input_dict, output_dict, path):
self.sess = sess
self.input_dict = input_dict
self.output_dict = output_dict
self.path = path
self.iteration = 0
self.input_dict_info = {}
self.output_dict_info = {}
for key in input_dict.keys():
self.input_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.input_dict[key]
)
for key in output_dict.keys():
self.output_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.output_dict[key]
)
self.prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs=self.input_dict_info,
outputs=self.output_dict_info)
)
def save(self, iteration_val):
self.iteration += 1
export_path = os.path.join(
tf.compat.as_bytes(self.path),
tf.compat.as_bytes(str(iteration_val))
)
self.builder = tf.saved_model.builder.SavedModelBuilder(export_path)
self.builder.add_meta_graph_and_variables(
self.sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'prediction': self.prediction_signature,
}
)
self.builder.save()
I used keras.Model to build the model, but I used a custom loss function, a custom training process, I wrote the iteration process and sess.run, then I want to get the weight l2 loss in the iterative process, How to do it?
Support the model is as follows:
def model():
x = Input(shape=(None, None, 3))
y = Conv2D(10, 3, strides=1, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=regularizers.l2(0.0005))(x)
y = Conv2D(16, 3, strides=1, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=regularizers.l2(0.0005))(y)
y = Conv2D(32, 3, strides=1, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=regularizers.l2(0.0005))(y)
y = Conv2D(16, 3, strides=1, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=regularizers.l2(0.0005))(y)
y = Conv2D(1, 3, strides=1, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=regularizers.l2(0.0005))(y)
return Model(inputs=[x], outputs=[y])
def loss(y_true, y_pred):
return tf.softmax_loss(.....)
train code:
def train():
dataset = tf.TFRecordDataset(tfrecords).make_one_shot_iterator().get_next()
input_image = tf.placeholder(...)
label = tf.placeholder(...)
net = model()
pred = model(input_image)
loss_op = loss(label, pred)
while True:
imgs, loss = sess.run([dataset, loss_op])
Through the above code, I don't think I have lost the weight. How can I get it? I try to use l2_loss_op = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), but the value is 0.
Update: cleaner and more complete implementation here.
I wrote a custom function to return l1, l2, and l1_l2 losses from all layers, including recurrent, summed up; does not include activity_regularizer losses, which aren't weight losses:
def l1l2_weight_loss(model):
l1l2_loss = 0
for layer in model.layers:
if 'layer' in layer.__dict__ or 'cell' in layer.__dict__:
l1l2_loss += _l1l2_rnn_loss(layer)
continue
if 'kernel_regularizer' in layer.__dict__ or \
'bias_regularizer' in layer.__dict__:
l1l2_lambda_k, l1l2_lambda_b = [0,0], [0,0] # defaults
if layer.__dict__['kernel_regularizer'] is not None:
l1l2_lambda_k = list(layer.kernel_regularizer.__dict__.values())
if layer.__dict__['bias_regularizer'] is not None:
l1l2_lambda_b = list(layer.bias_regularizer.__dict__.values())
if any([(_lambda != 0) for _lambda in (l1l2_lambda_k + l1l2_lambda_b)]):
W = layer.get_weights()
for idx,_lambda in enumerate(l1l2_lambda_k + l1l2_lambda_b):
if _lambda != 0:
_pow = 2**(idx % 2) # 1 if idx is even (l1), 2 if odd (l2)
l1l2_loss += _lambda*np.sum(np.abs(W[idx//2])**_pow)
return l1l2_loss
def _l1l2_rnn_loss(layer):
l1l2_loss = 0
if 'backward_layer' in layer.__dict__:
bidirectional = True
_layer = layer.layer
else:
_layer = layer
bidirectional = False
ldict = _layer.cell.__dict__
if 'kernel_regularizer' in ldict or \
'recurrent_regularizer' in ldict or \
'bias_regularizer' in ldict:
l1l2_lambda_k, l1l2_lambda_r, l1l2_lambda_b = [0,0], [0,0], [0,0]
if ldict['kernel_regularizer'] is not None:
l1l2_lambda_k = list(_layer.kernel_regularizer.__dict__.values())
if ldict['recurrent_regularizer'] is not None:
l1l2_lambda_r = list(_layer.recurrent_regularizer.__dict__.values())
if ldict['bias_regularizer'] is not None:
l1l2_lambda_b = list(_layer.bias_regularizer.__dict__.values())
all_lambda = l1l2_lambda_k + l1l2_lambda_r + l1l2_lambda_b
if any([(_lambda != 0) for _lambda in all_lambda]):
W = layer.get_weights()
idx_incr = len(W)//2 # accounts for 'use_bias'
for idx,_lambda in enumerate(all_lambda):
if _lambda != 0:
_pow = 2**(idx % 2) # 1 if idx is even (l1), 2 if odd (l2)
l1l2_loss += _lambda*np.sum(np.abs(W[idx//2])**_pow)
if bidirectional:
l1l2_loss += _lambda*np.sum(
np.abs(W[idx//2 + idx_incr])**_pow)
return l1l2_loss
TESTING IMPLEMENTATION:
from keras.layers import Input, Dense, LSTM, GRU, Bidirectional
from keras.models import Model
from keras.regularizers import l1, l2, l1_l2
import numpy as np
ipt = Input(shape=(1200,16))
x = LSTM(60, activation='relu', return_sequences=True,
recurrent_regularizer=l2(1e-3),)(ipt)
x = Bidirectional(GRU(60, activation='relu', bias_regularizer =l1(1e-4)))(x)
out = Dense(1, activation='sigmoid', kernel_regularizer =l1_l2(2e-4))(x)
model = Model(ipt,out)
model.compile(loss='binary_crossentropy', optimizer='adam')
X = np.random.rand(10,1200,16) # (batch_size, timesteps, input_dim)
Y = np.random.randint(0,2,(10,1))
keras_loss = model.evaluate(X,Y)
custom_loss = binary_crossentropy(Y, model.predict(X))
custom_loss += l1l2_weight_loss(model)
print('%.6f'%keras_loss + ' -- keras_loss')
print('%.6f'%custom_loss + ' -- custom_loss')
0.763822 -- keras_loss
0.763822 -- custom_loss
(See my answer for binary_crossentropy implementation)
I'm trying to train a Tensorflow Convolutional Neural Network on my RaspberryPi 3B. I get the following error:
2018-08-19 18:57:07.720022: F./tensorflow/core/util/bcast.h:111] Check failed:
vec.size() == NDIMS (1 vs. 2)
Aborted
Not sure what's going wrong, but here's the code:
import tensorflow as tf
import numpy as np
import csv
import pandas as pd
import os
image_height = 60
image_width = 1
image1_height = 15
image2_height = 1
model_name = "TensorflowCNN"
#Training Data Configuration
train_data = np.asarray(pd.read_csv("/media/pi/DISK_IMG/TrainingInput.csv", usecols=[1]))
lis = train_data.tolist()
lis = lis[0:60]
lis = [x[0].strip('[]\n,') for x in lis]
nlis = []
for i in lis:
nlis.append(i.split())
for i in range(len(nlis)):
nlis[i] = [float(x) for x in nlis[i] if x != "...,"]
nlis = [np.mean(x) for x in nlis]
train_data = np.asarray(nlis)
#Training Labels Configuration
train_labels = np.asarray(pd.read_csv("/media/pi/DISK_IMG/TrainingInput.csv", usecols=[2]))
mylist = train_labels.tolist()
mylist = mylist[0:60]
mylist = [x[0] for x in mylist]
index = 0
while index < len(mylist):
if mylist[index] == "GravelTraining":
mylist[index] = 1
elif mylist[index] == "WaterTraining":
mylist[index] = 2
else:
mylist[index] = 3
index=index+1
train_labels = np.asarray(mylist)
#Validation Data Configuration
eval_data = np.asarray(pd.read_csv("/media/pi/DISK_IMG/TestingInput.csv", usecols=[1]))
List = eval_data.tolist()
List = List[0:15]
eval_data = np.asarray(List)
#Validation Labels Configuration
eval_labels = np.asarray(pd.read_csv("/media/pi/DISK_IMG/TestingInput.csv", usecols=[2]))
myList = eval_labels.tolist()
myList = myList[0:15]
index = 0
while index < len(myList):
if myList[index] == "GravelTesting":
myList[index] = 1
elif myList[index] == "WaterTesting":
myList[index] = 2
else:
myList[index] = 3
index=index+1
eval_labels = np.asarray(myList)
category_names = list(map(str, range(3)))
#Processing and reshaping data
train_data = np.reshape(train_data, (-1, image_height, image_width, 1))
train_labels = np.reshape(train_labels, (-1, image_height, image_width, 1))
eval_data = np.reshape(eval_data, (-1, image1_height, image2_height, 1))
eval_labels = np.reshape(eval_labels, (-1, image1_height, image2_height, 1))
#CLASS FOR THE CONVOLUTIONAL NEURAL NETWORK
class ConvNet:
def __init__(self, image_height, Image_width, num_classes, chan):
self.input_layer = tf.placeholder(dtype = tf.float32, shape = [1,image_height, Image_width, chan], name = "inputs")
conv_layer_1 = tf.layers.conv2d(self.input_layer, filters = 32, kernel_size = [5,5], padding = "same", activation = tf.nn.relu)
pooling_layer_1 = tf.layers.max_pooling2d(conv_layer_1, pool_size = [2,1], strides = 1)
flattened_pooling = tf.layers.flatten(pooling_layer_1)
dense_layer = tf.layers.dense(flattened_pooling, 60, activation = tf.nn.relu)
dropout = tf.layers.dropout(dense_layer, rate = 0.4, training = True)
output_dense_layer = tf.layers.dense(dropout, num_classes)
self.choice = tf.argmax(output_dense_layer, axis=1)
self.probabilities = tf.nn.softmax(output_dense_layer)
self.labels = tf.placeholder(dtype=tf.float32, name="labels")
self.accuracy, self.accuracy_op = tf.metrics.accuracy(self.labels, self.choice)
one_hot_labels = tf.one_hot(indices=tf.cast(self.labels, dtype=tf.int32), depth=num_classes)
self.loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits=output_dense_layer)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
self.train_operation = optimizer.minimize(loss=self.loss, global_step=tf.train.get_global_step())
#Training process:variables
training_steps = 20000
batch_size = 60
path = "./" + model_name + "-cnn/"
load_checkpoint = False
tf.reset_default_graph()
dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
dataset = dataset.shuffle(buffer_size=train_labels.shape[0])
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
dataset_iterator = dataset.make_initializable_iterator()
next_element = dataset_iterator.get_next()
#Final initialization of Neural Network and Training Process
cnn = ConvNet(image_height, image_width, 1, 1)
print("milestone1")
saver = tf.train.Saver(max_to_keep=2)
print('milestone2')
if not os.path.exists(path):
os.makedirs(path)
print('milestone3')
#Training Loop For neural network
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print('milestone4')
sess.run(tf.local_variables_initializer())
sess.run(dataset_iterator.initializer)
for step in range(training_steps):
current_batch = sess.run(next_element)
batch_inputs = current_batch[0]
batch_labels = current_batch[1]
print("milestone5")
sess.run((cnn.train_operation, cnn.accuracy_op), feed_dict={cnn.input_layer:batch_inputs, cnn.labels:batch_labels})
if step % 1 == 0 and step > 0:
current_acc = sess.run(cnn.accuracy)
print("Accuracy at step " + str(step) + ":" + str(current_acc))
saver.save(sess, path + model_name, step)
print("Saving final checkpoint for training session.")
saver.save(sess, path + model_name, step)
I know it's long, but I would appreciate it if someone could let me know what's wrong with my program. There is no traceback.I'm getting the notification from Stack Overflow that my post is mostly code, but there's really not much more to say about the problem. Also, I have looked at many questions talking about this and have not found an answer in any of them that I can use. Thanks in advance!