I'm trying to define the following (toy) custom loss function in Keras:
def flexed_distance_loss(y_true, y_pred):
y_true_df = pd.DataFrame(y_true, columns=my_columns)
# do something with y_true_df
return categorical_crossentropy(y_true_df.values, y_pred)
I'm running this model on GPU with tf.distribute.MirroredStrategy().
Compiling the model generates no error, but when running model.fit(), the following error happens:
>>> y_true_df = pd.DataFrame(y_true, columns=my_columns)
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed:
AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
It seems that Pandas is trying to iterate over the tensor y_true, which is forbidden in graph mode (the preferred mode when training on GPU).
Must I understand that this is not possible to use Pandas within a loss function when training on GPU?
What would be some plausible alternatives, other than doing all the manipulations directly in TensorFlow itself? I'm doing quite some heavy re-indexing and merging and I can't begin to imagine the pain of doing all this in native TensorFlow code.
Note:
For reference, this is the kind of manipulation I'm trying to make:
def flexed_distance_loss(y_true, y_pred):
y_true_df = pd.DataFrame(y_true, columns=my_columns)
y_true_custom = y_true_df.idxmax(axis=1).to_frame(name='my_name')
y_true_df = pd.concat([y_true_custom, y_true_df], axis=1)
y_true_df = y_true_df.where(y_true_df != 0, np.NaN)
y_true_df = y_true_df.reset_index().set_index('my_name')
nearby = y_true_df.fillna(pivoted_df.reindex(y_true_df.index)) \
.fillna(0) \
.set_index('index').sort_index()
nearby = np.expm1(nearby).div(np.sum(np.expm1(nearby), axis=1), axis=0)
y_true_flexed = nearby.values
return categorical_crossentropy(y_true_flexed, y_pred)
Actually I realised that all I'm doing within the custom loss function is transforming y_true. In the real case, I'm transforming it based on some random number (if random.random() > 0.1 then apply the transformation).
The most appropriate place to do this is not in a loss function, but in the batch generator instead.
class BatchGenerator(tf.keras.utils.Sequence):
def __init__(self, indices, batch_size, mode):
self.indices = indices
self.batch_size = batch_size
self.mode = mode
def __len__(self):
return math.ceil(len(self.indices) / self.batch_size)
def __getitem__(self, idx):
batch = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
X_batch = X[batch, :]
y_batch = y[batch, :]
if self.mode == 'train' and random.random() > 0.3:
# pick y from regular batch
return X_batch, y_batch
else:
# apply flex-distancing to y
return X_batch, flex_distance_batch(y_batch)
batch_size = 512*4
train_generator = BatchGenerator(range(0, test_cutoff), batch_size, 'train')
test_generator = BatchGenerator(range(test_cutoff, len(y_df)), batch_size, 'test')
This way the transformations are applied directly from the batch generator, and Pandas is perfectly allowed here as we're dealing only with NumPy array on the CPU.
Related
I need to build a custom loss method based on BLEU. I'm passing my LabelEncoder in the constructor to reverse labels and predictions and calculate the bleu distance.
Here is my Loss class
class CIMCodeSuccessiveLoss(Loss):
def __init__(self, labelEncoder: LabelEncoder):
super().__init__()
self.le = labelEncoder
def bleu_score(self, true_label, pred_label):
cim_true_label = self.le.inverse_transform(true_label.numpy())
cim_pred_label = self.le.inverse_transform(pred_label.numpy())
bleu_scores = [sentence_bleu(list(one_true_label),
list(one_pred_label),
weights=(0.5, 0.25, 0.125, 0.125)) for one_true_label, one_pred_label in
zip(cim_true_label, cim_pred_label)]
return np.float32(bleu_scores)
def call(self, y_true, y_pred):
labeled_y_pred = tf.cast(tf.argmax(y_pred, axis=-1), tf.int32)
bleu = tf.py_function(self.bleu_score, (tf.reshape(y_true, [-1]), labeled_y_pred), tf.float32)
return tf.reduce_sum(tf.square(1 - bleu))
The bleu_score method is calculating the correct scores and returns a NumPy array.
when I try to return the squared sum, I get this error
raise ValueError(f"No gradients provided for any variable: {variable}.
I'm also providing the model:
inputs = tf.keras.Input(shape=(1,), dtype=tf.string)
x = vectorize_layer(inputs)
x = Embedding(vocab_size, embedding_dim, name="embedding")(x)
x = LSTM(units=32, name="lstm")(x)
outputs = Dense(classes_number, name="classification")(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name="first_cim_classifier")
model.summary()
# we add early stopping for our model.
early_stopping = EarlyStopping(monitor='loss', patience=2)
model.compile(
loss=CIMCodeSuccessiveLoss(le),
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy", "crossentropy"],
run_eagerly=True)
trained_model = model.fit(np.array(x_train), np.array(y_train), batch_size=64, epochs=10,
validation_data=(np.array(x_val), np.array(y_val)),
callbacks=[early_stopping])
Any help is appreciated. Thanks in advance.
To calculate the loss function, you use the method 'tf.argmax(y_pred, axis=-1)',argmax is not differentiable and the automatic differentiation to calculate the gradients is not possible, you have to remove this method, for example (depending on your data) you can change the output layer to softmax and labels to one_hot.
The issue is, the argmax function is not a differentiable, which is problematic when including it in a loss function:
labeled_y_pred = tf.cast(tf.argmax(y_pred, axis=-1), tf.int32)
One way to workaround this is to use a differentiable approximation of the argmax function, similar to the smooth maximum function:
As β approaches infinity, this will approach the the true maximum. For your purposes, β=10 or β=100 should accomplish your goals.
In Tensorflow, this could be accomplished as follows:
def differentiable_argmax_approx(x, beta=10, axis=None):
return tf.reduce_sum(tf.cumsum(tf.ones_like(x)) * tf.exp(beta * x) / tf.reduce_sum(tf.exp(beta * x), axis=axis), axis=axis) - 1
Then changing the original line to:
labeled_y_pred = tf.cast(differentiable_argmax_approx(y_pred, axis=-1), tf.int32)
We can verify the functionality with a simple test case:
beta = 10
x = np.array([1, 2, 3, 10, 4, 5], dtype=np.float)
y = differentiable_argmax_approx(x, beta)
assert x.argmax() == y
One caveat to this approach: if the maximum value is not unique along the axis that we're applying the function to, the result will be the arithmetic mean of the indices. Providing another test case to illustrate:
beta = 10
x = np.array([1, 2, 10, 3, 10], dtype=np.float)
y = differentiable_argmax_approx(x, beta)
assert y == 3
The result is 3 here, because we have two occurrences of the maximum value (10): one at index 2, and the other at index 4. In contrast, the regular argmax function returns the first index of the maximum argument.
Another improvement would be moving more computation into Tensorflow functions. To start, instead of using sklearn's LabelEncoder, to apply a mapping in the loss function, you could use a tf.lookup.StaticHashTable to accomplish the same objective with the Tensorflow API. To convert from a LabelEncoder to a tf.lookup.StaticHashTable, you can use the following function:
def convert_label_encoder_to_static_hash_table(le: LabelEncoder,
default_value: int = -1) -> tf.lookup.StaticHashTable:
static_hash_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(
tf.convert_to_tensor(le.classes_),
tf.convert_to_tensor(le.transform(le.classes_))), default_value=default_value)
return static_hash_table
Or, for your purposes, since you're applying the inverse mapping (to go from integers -> string), you may want to swap the key and the values:
def convert_label_encoder_to_static_hash_table(le: LabelEncoder,
default_value: int = "") -> tf.lookup.StaticHashTable:
static_hash_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(
tf.convert_to_tensor(le.transform(le.classes_)),
tf.convert_to_tensor(le.classes_))), default_value=default_value)
return static_hash_table
and, in the initializer:
def __init__(self, labelEncoder: LabelEncoder):
super().__init__()
self.table = convert_label_encoder_to_static_hash_table(labelEncoder)
By operating on tf.Tensor objects, you can utilize tf.map_fn instead of using a for-loop and converting to a numpy array/lists - your loss function would become:
def bleu_score(self, true_label, pred_label):
cim_true_label = self.table[true_label]
cim_pred_label = self.table[pred_label]
bleu_scores = tf.map_fn(lambda x: sentence_bleu([str(x[0])], [str(x[1])], weights=(0.5, 0.25, 0.125, 0.125)),
elems=tf.stack([(ground_truth, pred) for ground_truth, pred in
zip(cim_pred_label, cim_true_label)],
dtype=(tf.string, tf.string),
fn_output_signature=tf.int32))
return bleu_scores
This should also mitigate the need to call tf.py_func in the loss computation, since the bleu_score function is now entirely Tensorflow operations instead of calling native Python functions.
In an attempt to yield more metrics during the training of my model (written in TensorFlow version 2.1.0), like the Character Error Rate (CER) and Word Error Rate (WER), I created a callback to pass to the fit function of my model. It is able to generate the CER and WER at the end of an epoch.
It's my second choice as I wanted to create a custom metric for this, but you can only use keras backend functionality for custom metrics. Does anyone have any advice on how to convert the callback below into a Custom Metric (which can then be calculated during training on the validation and/or training data)?
Some roadblocks I encountered are:
Failure to convert the K.ctc_decode result to a sparse tensor
How can you calculate a distance like edit-distance using the Keras backend?
class Metrics(tf.keras.callbacks.Callback):
def __init__(self, valid_data, steps):
"""
valid_data is a TFRecordDataset with batches of 100 elements per batch, shuffled and repeated infinitely.
steps define the amount of batches per epoch
"""
super(Metrics, self).__init__()
self.valid_data = valid_data
self.steps = steps
def on_train_begin(self, logs={}):
self.cer = []
self.wer = []
def on_epoch_end(self, epoch, logs={}):
imgs = []
labels = []
for idx, (img, label) in enumerate(self.valid_data.as_numpy_iterator()):
if idx >= self.steps:
break
imgs.append(img)
labels.extend(label)
imgs = np.array(imgs)
labels = np.array(labels)
out = self.model.predict((batch for batch in imgs))
input_length = len(max(out, key=len))
out = np.asarray(out)
out_len = np.asarray([input_length for _ in range(len(out))])
decode, log = K.ctc_decode(out,
out_len,
greedy=True)
decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode][0]
for (pred, lab) in zip(decode, labels):
dist = editdistance.eval(pred, lab)
self.cer.append(dist / (max(len(pred), len(lab))))
self.wer.append(not np.array_equal(pred, lab))
print("Mean CER: {}".format(np.mean([self.cer], axis=1)[0]))
print("Mean WER: {}".format(np.mean([self.wer], axis=1)[0]))
Solved in TF 2.3.1, but should apply for previous versions of 2.x as well.
Some remarks:
Information on how to properly implement a Tensorflow Custom Metric is scarce. The question implied the use of a callback to implement the metric. This has longer epochs as a consequence (due to the explicit extra calculation of the metric on_epoch_end), or so I believe. Implementing it as a subclass of tensorflow.keras.metrics.Metric seems the right way, and yields results (if verbose is set correctly) while the epoch is ongoing.
Calculating the edit distance for the CER is quite easily performed using tf.edit_distance (using sparse tensors), this can subsequently be used to calculate the WER using some tf logic.
Alas, I am yet to find out how to implement both the CER and WER in one metric (as it has quite some duplicate code), if anyone knows how to do so, please contact me.
Custom metrics can simply be added into the compilation of your TF model:
self.model.compile(optimizer=opt, loss=loss, metrics=[CERMetric(), WERMetric()])
class CERMetric(tf.keras.metrics.Metric):
"""
A custom Keras metric to compute the Character Error Rate
"""
def __init__(self, name='CER_metric', **kwargs):
super(CERMetric, self).__init__(name=name, **kwargs)
self.cer_accumulator = self.add_weight(name="total_cer", initializer="zeros")
self.counter = self.add_weight(name="cer_count", initializer="zeros")
def update_state(self, y_true, y_pred, sample_weight=None):
input_shape = K.shape(y_pred)
input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')
decode, log = K.ctc_decode(y_pred,
input_length,
greedy=True)
decode = K.ctc_label_dense_to_sparse(decode[0], K.cast(input_length, 'int32'))
y_true_sparse = K.ctc_label_dense_to_sparse(y_true, K.cast(input_length, 'int32'))
decode = tf.sparse.retain(decode, tf.not_equal(decode.values, -1))
distance = tf.edit_distance(decode, y_true_sparse, normalize=True)
self.cer_accumulator.assign_add(tf.reduce_sum(distance))
self.counter.assign_add(len(y_true))
def result(self):
return tf.math.divide_no_nan(self.cer_accumulator, self.counter)
def reset_states(self):
self.cer_accumulator.assign(0.0)
self.counter.assign(0.0)
class WERMetric(tf.keras.metrics.Metric):
"""
A custom Keras metric to compute the Word Error Rate
"""
def __init__(self, name='WER_metric', **kwargs):
super(WERMetric, self).__init__(name=name, **kwargs)
self.wer_accumulator = self.add_weight(name="total_wer", initializer="zeros")
self.counter = self.add_weight(name="wer_count", initializer="zeros")
def update_state(self, y_true, y_pred, sample_weight=None):
input_shape = K.shape(y_pred)
input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')
decode, log = K.ctc_decode(y_pred,
input_length,
greedy=True)
decode = K.ctc_label_dense_to_sparse(decode[0], K.cast(input_length, 'int32'))
y_true_sparse = K.ctc_label_dense_to_sparse(y_true, K.cast(input_length, 'int32'))
decode = tf.sparse.retain(decode, tf.not_equal(decode.values, -1))
distance = tf.edit_distance(decode, y_true_sparse, normalize=True)
correct_words_amount = tf.reduce_sum(tf.cast(tf.not_equal(distance, 0), tf.float32))
self.wer_accumulator.assign_add(correct_words_amount)
self.counter.assign_add(len(y_true))
def result(self):
return tf.math.divide_no_nan(self.wer_accumulator, self.counter)
def reset_states(self):
self.wer_accumulator.assign(0.0)
self.counter.assign(0.0)
Alas, I am yet to find out how to implement both the CER and WER in
one metric (as it has quite some duplicate code), if anyone knows how
to do so, please contact me.
Hey, this solution really helped me a lot. As of now, there are TensorFlow 2.10 releases, so for this version, I wrote a combination of WER and CER metrics, here is the final working code:
import tensorflow as tf
class CWERMetric(tf.keras.metrics.Metric):
""" A custom TensorFlow metric to compute the Character Error Rate
"""
def __init__(self, name='CWER', **kwargs):
super(CWERMetric, self).__init__(name=name, **kwargs)
self.cer_accumulator = tf.Variable(0.0, name="cer_accumulator", dtype=tf.float32)
self.wer_accumulator = tf.Variable(0.0, name="wer_accumulator", dtype=tf.float32)
self.counter = tf.Variable(0, name="counter", dtype=tf.int32)
def update_state(self, y_true, y_pred, sample_weight=None):
input_shape = tf.keras.backend.shape(y_pred)
input_length = tf.ones(shape=input_shape[0], dtype='int32') * tf.cast(input_shape[1], 'int32')
decode, log = tf.keras.backend.ctc_decode(y_pred, input_length, greedy=True)
decode = tf.keras.backend.ctc_label_dense_to_sparse(decode[0], input_length)
y_true_sparse = tf.cast(tf.keras.backend.ctc_label_dense_to_sparse(y_true, input_length), "int64")
decode = tf.sparse.retain(decode, tf.not_equal(decode.values, -1))
distance = tf.edit_distance(decode, y_true_sparse, normalize=True)
correct_words_amount = tf.reduce_sum(tf.cast(tf.not_equal(distance, 0), tf.float32))
self.wer_accumulator.assign_add(correct_words_amount)
self.cer_accumulator.assign_add(tf.reduce_sum(distance))
self.counter.assign_add(len(y_true))
def result(self):
return {
"CER": tf.math.divide_no_nan(self.cer_accumulator, tf.cast(self.counter, tf.float32)),
"WER": tf.math.divide_no_nan(self.wer_accumulator, tf.cast(self.counter, tf.float32))
}
I still need to check whether it calculates CER and WER correctly, I'll find out that something is missing, I'll update this.
I'm using a deep CNN+LSTM network to perfom a classification on a dataset of 1D signals. I'm using keras 2.2.4 backed by tensorflow 1.12.0. Since I have a large dataset and limited resources, I'm using a generator to load the data into the memory during the training phase. First, I tried this generator:
def data_generator(batch_size, preproc, type, x, y):
num_examples = len(x)
examples = zip(x, y)
examples = sorted(examples, key = lambda x: x[0].shape[0])
end = num_examples - batch_size + 1
batches = [examples[i:i + batch_size] for i in range(0, end, batch_size)]
random.shuffle(batches)
while True:
for batch in batches:
x, y = zip(*batch)
yield preproc.process(x, y)
Using the above method, I'm able to launch training with a mini-batch size up to 30 samples at a time. However, this kind of method does not guarantee that the network will only train once on each sample per epoch. Considering this comment from Keras's website:
Sequence is a safer way to do multiprocessing. This structure
guarantees that the network will only train once on each sample per
epoch which is not the case with generators.
I've tried another way of loading data using the following class:
class Data_Gen(Sequence):
def __init__(self, batch_size, preproc, type, x_set, y_set):
self.x, self.y = np.array(x_set), np.array(y_set)
self.batch_size = batch_size
self.indices = np.arange(self.x.shape[0])
np.random.shuffle(self.indices)
self.type = type
self.preproc = preproc
def __len__(self):
# print(self.type + ' - len : ' + str(int(np.ceil(self.x.shape[0] / self.batch_size))))
return int(np.ceil(self.x.shape[0] / self.batch_size))
def __getitem__(self, idx):
inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_x = self.x[inds]
batch_y = self.y[inds]
return self.preproc.process(batch_x, batch_y)
def on_epoch_end(self):
np.random.shuffle(self.indices)
I can confirm that using this method the network is training once on each sample per epoch but this time when I put more than 7 samples in the mini-batch, I got out of memory error:
OP_REQUIRES failed at random_op.cc: 202: Resource exhausted: OOM when
allocating tensor with shape...............
I can confirm that I'm using the same model architecture, configuration, and machine to do this test. I'm wondering why would be a difference between these 2 ways of loading data??
Please don't hesitate to ask for more details in case needed.
Thanks in advance.
EDITED:
Here is the code I'm using to fit the model:
reduce_lr = keras.callbacks.ReduceLROnPlateau(
factor=0.1,
patience=2,
min_lr=params["learning_rate"])
checkpointer = keras.callbacks.ModelCheckpoint(
filepath=str(get_filename_for_saving(save_dir)),
save_best_only=False)
batch_size = params.get("batch_size", 32)
path = './logs/run-{0}'.format(datetime.now().strftime("%b %d %Y %H:%M:%S"))
tensorboard = keras.callbacks.TensorBoard(log_dir=path, histogram_freq=0,
write_graph=True, write_images=False)
if index == 0:
print(model.summary())
print("Model memory needed for batchsize {0} : {1} Gb".format(batch_size, get_model_memory_usage(batch_size, model)))
if params.get("generator", False):
train_gen = load.data_generator(batch_size, preproc, 'Train', *train)
dev_gen = load.data_generator(batch_size, preproc, 'Dev', *dev)
valid_metrics = Metrics(dev_gen, len(dev[0]) // batch_size, batch_size)
model.fit_generator(
train_gen,
steps_per_epoch=len(train[0]) / batch_size + 1 if len(train[0]) % batch_size != 0 else len(train[0]) // batch_size,
epochs=MAX_EPOCHS,
validation_data=dev_gen,
validation_steps=len(dev[0]) / batch_size + 1 if len(dev[0]) % batch_size != 0 else len(dev[0]) // batch_size,
callbacks=[valid_metrics, MyCallback(), checkpointer, reduce_lr, tensorboard])
# train_gen = load.Data_Gen(batch_size, preproc, 'Train', *train)
# dev_gen = load.Data_Gen(batch_size, preproc, 'Dev', *dev)
# model.fit_generator(
# train_gen,
# epochs=MAX_EPOCHS,
# validation_data=dev_gen,
# callbacks=[valid_metrics, MyCallback(), checkpointer, reduce_lr, tensorboard])
Those methods are roughly the same. It is correct to subclass
Sequence when your dataset doesn't fit in memory. But you shouldn't
run any preprocessing in any of the class' methods because that will
be reexecuted once per epoch wasting lots of computing resources.
It is probably also easier to shuffle the samples rather than their
indices. Like this:
from random import shuffle
class DataGen(Sequence):
def __init__(self, batch_size, preproc, type, x_set, y_set):
self.samples = list(zip(x, y))
self.batch_size = batch_size
shuffle(self.samples)
self.type = type
self.preproc = preproc
def __len__(self):
return int(np.ceil(len(self.samples) / self.batch_size))
def __getitem__(self, i):
batch = self.samples[i * self.batch_size:(i + 1) * self.batch_size]
return self.preproc.process(*zip(batch))
def on_epoch_end(self):
shuffle(self.samples)
I think it is impossible to say why you run out of memory without
knowing more about your data. My guess would be that your preproc
function is doing something wrong. You can debug it by running:
for e in DataGen(batch_size, preproc, *train):
print(e)
for e in DataGen(batch_size, preproc, *dev):
print(e)
You will most likely run out of memory.
I'm trying to calculate the mean_iou and update a confusion matrix for each batch. But after 30 steps I get a SIGKILL event. The images which I use in my generator have the resolution of 2048x1024 because of this my batch_size is 2. It seems that I can't release the memory after one step is finished. I tested the generator while I'm iterating over all images but everything works well.
I'm using Keras 2.1.2 with Tensorflow 1.4.1 as Backend on a GTX 1080. It would be really nice if someone have an advice.
def calculate_iou_tf(model, generator, steps, num_classes):
conf_m = K.tf.zeros((num_classes, num_classes), dtype=K.tf.float64)
generator.reset()
pb = Progbar(steps)
for i in range(0, steps):
x, y_true = generator.next()
y_pred = model.predict_on_batch(x)
# num_classes = K.int_shape(y_pred)[-1]
y_pred = K.flatten(K.argmax(y_pred, axis=-1))
y_true = K.reshape(y_true, (-1,))
mask = K.less_equal(y_true, num_classes - 1)
y_true = K.tf.to_int32(K.tf.boolean_mask(y_true, mask))
y_pred = K.tf.to_int32(K.tf.boolean_mask(y_pred, mask))
mIoU, up_op = K.tf.contrib.metrics.streaming_mean_iou(y_pred, y_true, num_classes, updates_collections=[conf_m])
K.get_session().run(K.tf.local_variables_initializer())
with K.tf.control_dependencies([up_op]):
score = K.eval(mIoU)
print(score)
pb.update(i + 1)
conf_m = K.eval(conf_m)
return conf_m, K.eval(mIoU)
The problem lied in using keras.backend functions instead of numpy ones. Every time when a function was called - a new tensor was created. Unfortunately - with the current implementation of tf - there is no a systematic garbage collection of tensors - so this made memory full error. Switching to numpy solved the problem.
I want to train, evaluate the accuracy and eventually predict with my model. This is my first time using high level APIs such as tf.estimator.
I'm getting a value error from estimator.train(train_input_fn):
'ValueError: features should be a dictionary of `Tensor's. Given type: '
I'm not sure what is going on here. My model is taking 3 inputs and producing a binary output from one neuron.
Before this error I was getting an error about the requested shape not equal to the actual shape, or something along those lines. I fixed it by reducing the batchSize down to 1, instead of 100. I'm sure this isn't going to do so well when it comes to training though.
Any ideas? Heres my code:
import tensorflow as tf
import numpy as np
import sys
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Testing/')
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Training/')
#other files
from TestDataNormaliser import *
from TrainDataNormaliser import *
learning_rate = 0.01
trainingIteration = 15
batchSize = 1
displayStep = 2
#Layers using tf.layers
def get_logits(features):
l1 = tf.layers.dense(features, 3, activation=tf.nn.relu)
l2 = tf.layers.dense(l1, 4, activation=tf.nn.relu)
l3 = tf.layers.dense(l2, 1, activation=None)
a = l3
return a
#cost function
def get_loss(a, labels):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
return tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=labels)
#cross_entropy = tf.reduce_mean((l3 - y)**2)
#cross_entropy = -tf.reduce_sum(y*tf.log(a))-tf.reduce_sum((1-y)*tf.log(1-a))
#optimizer
def get_train_op(loss):
learning_rate = 1e-3
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(loss, global_step=tf.train.get_global_step())
#training
####
def get_inputs(feature_data, label_data, batch_size, n_epochs=None, shuffle=True):
dataset = tf.data.Dataset.from_tensor_slices(
(feature_data, label_data))
dataset = dataset.repeat(n_epochs)
if shuffle:
dataset = dataset.shuffle(len(feature_data))
dataset = dataset.batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
def model_fn(features, labels, mode):
a = get_logits(features)
loss = get_loss(a, labels)
train_op = get_train_op(loss)
predictions = tf.greater(a, 0)
accuracy = tf.metrics.accuracy(labels, predictions)
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops={'Accuracy': accuracy},
predictions=predictions
)
def train_input_fn():
return get_inputs(
trainArrayValues,
trainArrayLabels,
batchSize
)
def eval_input_fn():
return get_inputs(
testArrayValues,
testArrayLabels,
batchSize,
n_epochs=1,
shuffle=False
)
model_dir = './savedModel'
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
#estimator.train(train_input_fn, max_steps=1)
estimator.train(train_input_fn)
estimator.evaluate(eval_input_fn)
Your problem is this line:
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
You need to set the feature_columns argument to an array of feature columns. A feature column tells the estimator about the data you're feeding it.
It looks like all your input data is numeric, so I'd call tf.feature_column.numeric_column to create your feature column(s). The documentation is here. For example, the following code creates a numeric feature column containing x-coordinates:
xcol = tf.feature_column.numeric_column('x')
If all your estimator needs are x-coordinates, then you could create the estimator with the following code:
estimator = tf.estimator.LinearRegressor(feature_columns=[xcol])