I use TensorFlow 2.2.0. In my data pipeline, I use multiple datasets to train a neural net. Something like:
# these are all tf.data.Dataset objects:
paired_data = get_dataset(id=0, repeat=False, shuffle=True)
unpaired_images = get_dataset(id=1, repeat=True, shuffle=True)
unpaired_masks = get_dataset(id=2, repeat=True, shuffle=True)
In the training loop, I want to iterate over paired_data to define one epoch. But I also want to iterate over unpaired_images and unpaired_masks to optimize other objectives (classic semi-supervised learning for semantic segmentation, with a mask discriminator).
In order to do this, my current code looks like:
def train_one_epoch(self, writer, step, paired_data, unpaired_images, unpaired_masks):
unpaired_images = unpaired_images.as_numpy_iterator()
unpaired_masks = unpaired_masks.as_numpy_iterator()
for images, labels in paired_data:
with tf.GradientTape() as sup_tape, \
tf.GradientTape() as gen_tape, \
tf.GradientTape() as disc_tape:
# paired data (supervised cost):
predictions = segmentor(images, training=True)
sup_loss = weighted_cross_entropy(predictions, labels)
# unpaired data (adversarial cost):
pred_real = discriminator(next(unpaired_masks), training=True)
pred_fake = discriminator(segmentor(next(unpaired_images), training=True), training=True)
gen_loss = generator_loss(pred_fake)
disc_loss = discriminator_loss(pred_real, pred_fake)
gradients = sup_tape.gradient(sup_loss, self.segmentor.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients, self.segmentor.trainable_variables))
gradients = gen_tape.gradient(gen_loss, self.segmentor.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients, self.segmentor.trainable_variables))
gradients = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)
discriminator_optimizer.apply_gradients(zip(gradients, self.discriminator.trainable_variables))
However, this results in the error:
main.py:275 train_one_epoch *
unpaired_images = unpaired_images.as_numpy_iterator()
/home/venvs/conda/miniconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py:476 as_numpy_iterator **
raise RuntimeError("as_numpy_iterator() is not supported while tracing "
RuntimeError: as_numpy_iterator() is not supported while tracing functions
Any idea what is wrong with this? Is this the correct way of optimizing over multiple losses/datasets in tensorflow 2?
I add my current solution to the problem in the comments. Any suggestion fo more optimized ways is more than welcome! :)
My current solution:
def train_one_epoch(self, writer, step, paired_data, unpaired_images, unpaired_masks):
# create a new dataset zipping the three original dataset objects
dataset = tf.data.Dataset.zip((paired_data, unpaired_images, unpaired_masks))
for (images, labels), unpaired_images, unpaired_masks in dataset:
# go ahead and train:
with tf.GradientTape() as tape:
#[...]
Related
I am trying to implement MNIST digits using PyTorch Lightning.
The train function is like the below one
def train(epochs, train_loader, test_loader, model):
early_stopping = EarlyStopping('train_loss', mode='min', patience=5)
model_checkpoint = ModelCheckpoint(dirpath=model_path/'mnist_{epoch}-{train_loss:.2f}',monitor='train_loss', mode='min', save_top_k=3)
trainer = pl.Trainer(max_epochs=epochs, profiler=False, callbacks = [model_checkpoint],default_root_dir=model_path)
trainer.fit(model, train_dataloader=train_loader)
trainer.test(test_dataloaders=test_loader, ckpt_path=None)
The test_step function is like the below one
def test_step(self, test_batch):
x, y = test_batch
logits = self.forward(x)
loss = self.mean_squared_error_loss(logits.squeeze(-1), y.float())
# I want to calculate R2, MAPE, etc and want to save in a pandas df and
# need to return to the train function
self.log('test_loss', loss)
return {'test_loss': loss}
I can do calculate R2, MAPE, etc using TorchMetrics. But, I am not sure how (or is it possible) to save them in a pandas df (or maybe in a list) for the whole test dataset. I have gone through this post but not sure how should I try!
Any suggestions are appreciated.
You can aggregate test result in test_epoch_end:
def test_step(self, test_batch):
x, y = test_batch
logits = self.forward(x)
loss = self.mean_squared_error_loss(logits.squeeze(-1), y.float())
self.log('test_loss', loss)
return {'test_loss': loss, "logits":logits, "labels": y}
def test_epoch_end(self, outputs):
all_preds, all_labels = [], []
for output in outputs:
probs = list(output['logits'].cpu().detach().numpy()) # predicted values
labels = list(output['labels'].flatten().cpu().detach().numpy())
all_preds.extend(probs)
all_labels.extend(labels)
# you can calculate R2 here or save results as file
r2 = ...
Note that this only works on a single GPU. If you are using multiple GPUs, you need some function to gather results from different GPUs.
To get model predictions, you need to add a predict_step() in the model class.
def predict_step(self, test_batch):
x, y = test_batch
logits = self.forward(x)
return {'logits': logits, 'labels':y}
And run:
outputs = trainer.predict(model, test_loader, return_predictions=True)
I recently switched from Tensorflow 1.14 and Estimaror API to Tensorflow 2.0 and keras API.I am working on an image segmentation problem so the inputs/outputs/labels are all images. When I used Estimator, things where pretty straight forward. In model_fn where the arguments were (features, labels, mode, params) I could just pick the features and labels, do the necessary processing and then pass it in tf.summary.image() and everything worked like a charm. Now, using the keras API, although it provides greater ease of use, it makes hard to do simple handling on data during training, which becomes even harder when it is used with dataset API.Example:
Tensorflow 1.14/Estimator:
def model_fn(features, labels, mode, params):
loss, train_op, = None, None
eval_metric_ops, training_hooks, evaluation_hooks = None, None, None
output = model(input=features)
predictions = tf.argmax(output, axis=-1)
predictions_dict = {'predicted': predictions}
dice_score = tf.contrib.metrics.f1_score(labels=label, predictions=predictions[:, :, :, 1])
if mode in (estimator.ModeKeys.TRAIN, estimator.ModeKeys.EVAL):
global_step = tf.train.get_or_create_global_step()
learning_rate = tf.train.exponential_decay(params['lr'], global_step=global_step,
decay_steps=params['decay_steps'],
decay_rate=params['decay_rate'], staircase=False)
loss = loss_fn(outputs=predictions, labels=labels)
summary.image('Input_Image', features)
summary.image('Label', tf.expand_dims(tf.cast(label, dtype=tf.float32), axis=-1))
summary.image('Prediction', tf.expand_dims(tf.cast(predictions, dtype=tf.float32), axis=-1))
if mode == estimator.ModeKeys.TRAIN:
with tf.name_scope('Metrics'):
summary.scalar('Dice_Coefficient', dice_score[1])
summary.scalar('Learning_Rate', learning_rate)
summary.merge_all()
train_logs_hook = tf.estimator.LoggingTensorHook({'Dice_Coefficient': dice_score[1]},every_n_iter=params['train_log_every_n_steps']) every_n_iter=params['train_log_every_n_steps'])
training_hooks = [train_logs_hook]
train_op = Adam(learning_rate=learning_rate, epsilon=params['epsilon']).minimize(loss=loss, global_step=global_step)
if mode == estimator.ModeKeys.EVAL:
eval_metric_ops = {'Metrics/Dice_Coefficient': dice_score}
eval_summary_hook = tf.estimator.SummarySaverHook(output_dir=params['eval_metrics_path'],
summary_op=summary.merge_all(),
save_steps=params['eval_steps_per_summary_save'])
evaluation_hooks = [eval_summary_hook]
return estimator.EstimatorSpec(mode,
predictions=predictions_dict,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops,
training_hooks=training_hooks,
evaluation_hooks=evaluation_hooks)
Using Keras with Tensorflow 2.0 AFAIK, I can't have this kind of access to the Input/Output tensors during training or evaluation (notice than even though during evaluation estimator dont get the image summaries, you can still have access to preview the results by using a tf.estimator.SummarySaverHook). Below is my falied attempt:
def train_data(params): # Similar is the eval_data
def standardization_summaries(image, label, step, writer):
# Some processing to images
with writer.as_default():
tf.summary.image('Input_dataset', image, step=step, max_outputs=1)
tf.summary.image('label_dataset', label, step=step, max_outputs=1)
return image, label
data_set = tf.data.Dataset.from_generator(generator=lambda: data_generator(params),
output_types=(tf.float32, tf.int64),
output_shapes=(tf.TensorShape([None, None]), tf.TensorShape([None, None])))
data_set = data_set.map(lambda x, y: standardization_summaries(image=x, label=y, step=params['global_step'], writer=params['writer']))
data_set = data_set.batch(params['batch_size'])
data_set = data_set.prefetch(buffer_size=-1)
return data_set
model = tf.keras.models.load_model(saved_model)
summary_writer = tf.summary.create_file_writer(save_model_path)
step = tf.Variable(0, trainable=False, dtype=tf.int64)
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=save_model_path, histogram_freq=1, write_graph=True,
write_images=False)
early_stop = tf.keras.callbacks.EarlyStopping(patience=args.early_stop)
callbacks = [tensorboard, early_stop]
params = {'batch_size': args.batch_size,
'global_step': step,
'writer': summary_writer}
model.fit(x=train_data(params), epochs=args.epochs, initial_epoch=args.initial_epoch,
validation_data=val_data(params), steps_per_epoch=2, callbacks=callbacks)
Getting the input images from the dataset API came from here but this just gets tons of images whenever the dataset fetches data from the generator. Also, with the step variable being constant and not changing (I can't figure out how to make it walk) everything is just under the step 0 and I can't think any viable way to connect these outputs with the predicted output, given that I would find a way to print them.
So, the question is: Is there anything that I am still missing with Keras API and Tensorboard synergies on image summaries. Is there a way to save image summaries lets say, for every half epoch in training and once at the end of evaluation or should I just let the model be trained and get the training outputs through model.predict() at the end of training an then inspect if something goes wrong(which is not efficient)?
I'm creating a neural network on python, my problem is a really simple classification problem, I'm using a Leaky ReLu network, that its main goal is to over-fit to a set data points. Before I was working with small data points, my network was really fast if I gave it all the data at once. Now ever now my computer is not able to take it all at once. So I have decided to feed it mini batches of the whole data set, and now it works. Now I have several questions:
If I used a loss function before, should I use the same one? (in my case I'm using BCEWithLogitsLoss). I ask this because I get really weird loss values in each batch. For example before I made this batches change the loss function goes from being really small, like 0.005 in the model without batches. To being really big.
My input data points size 310000 x 3 more or less. Is mini batches the correct approach? Or is there another trick to tackle this problem.
Now that I have trained the data, do I have evaluate the trained model in the same way(i.e by batches so I don't run out memory on my GPU)?
Here is my code :
class CustomDataset(Dataset):
def __init__(self, x_tensor, y_tensor):
self.x = x_tensor
self.y = y_tensor
def __getitem__(self, index):
return (self.x[index], self.y[index])
def __len__(self):
return len(self.x)
class LabelData(Dataset):
def __init__(self, y_tensor):
self.y = y_tensor
def __getitem__(self, index):
return (self.y[index])
def __len__(self):
return len(self.y)
#X, order = common.loadplanes()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Samples, Ocupancy = common.load_samples()
x_test = torch.from_numpy(Samples.astype(np.float32)).to(device)
y_test = torch.from_numpy(Ocupancy.astype(np.float32)).to(device)
train_data = CustomDataset(x_test, y_test)
train_loader = DataLoader(dataset=train_data, batch_size= 1000, shuffle=False) # Make bigger batches
phi = common.MLP(3, 1).to(device)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(phi.parameters(), lr = 0.01)
epoch = 512
fit_start_time = time.time()
for epoch in range(epoch):
for x_batch, y_batch in train_loader:
optimizer.zero_grad()
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
y_pred = phi(x_batch)
# Compute Loss
loss = criterion(y_pred.squeeze(), y_batch.squeeze())
print('Epoch {}: train loss: {}'.format(epoch, loss.item())) # Backward pass
loss.backward()
optimizer.step()
fit_end_time = time.time()
print("Total time = %f" % (fit_end_time - fit_start_time))
X,Y,Z = np.mgrid[-3000:300:80,-3000:3000:80,-3000:3000:80]
xyz = torch.from_numpy(np.vstack([X.ravel(), Y.ravel(),Z.ravel()]).transpose().astype(np.float32))
# feed the network bit by bit?
xyz = LabelData(xyz)
labels = phi(xyz).to(device)
visualization_iso(X,Y,Z,labels)
I'm still running my code as I write this, but I can imagine that the there is going to be an error after the model is trained, that is going to say that I've run out of memory(that is why I'm asking the 3rd question) or that I'm evaluating my network incorrectly, edit it seems I'm evaluating the model wrongly, you can see my error below. Please let me know if I'm not using any common practices or if you spot any mayor flaws on my code.
I want to add regularization into my optimizer like this:
tf.train.AdadeltaOptimizer(learning_rate=1).minimize(loss)
But I don't know how to design the function "loss" into the code below
The website I saw is:
https://blog.csdn.net/marsjhao/article/details/72630147
The modified code originally came from the Google machine Learning course:
https://colab.research.google.com/notebooks/mlcc/improving_neural_net_performance.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=improvingneuralnets-colab&hl=zh-tw#scrollTo=P8BLQ7T71JWd
Can someone give me some advice or discuss with me?
def train_nn_classifier_model_new(
my_optimizer,
steps,
batch_size,
hidden_units,
training_examples,
training_targets,
validation_examples,
validation_targets):
periods = 10
steps_per_period = steps / periods
# Create a DNNClassifier object.
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
dnn_classifier = tf.estimator.DNNClassifier(
feature_columns=construct_feature_columns(training_examples),
hidden_units=hidden_units,
optimizer=my_optimizer
)
# Create input functions.
training_input_fn = lambda: my_input_fn(training_examples,
training_targets["deal_or_not"],
batch_size=batch_size)
predict_training_input_fn = lambda: my_input_fn(training_examples,
training_targets["deal_or_not"],
num_epochs=1,
shuffle=False)
predict_validation_input_fn = lambda: my_input_fn(validation_examples,
validation_targets["deal_or_not"],
num_epochs=1,
shuffle=False)
# Train the model, but do so inside a loop so that we can periodically assess
# loss metrics.
print("Training model...")
print("LogLoss (on training data):")
training_log_losses = []
validation_log_losses = []
for period in range (0, periods):
# Train the model, starting from the prior state.
dnn_classifier.train(
input_fn=training_input_fn,
steps=steps_per_period
)
# Take a break and compute predictions.
training_probabilities =
dnn_classifier.predict(input_fn=predict_training_input_fn)
training_probabilities = np.array([item['probabilities'] for item in training_probabilities])
print(training_probabilities)
validation_probabilities = dnn_classifier.predict(input_fn=predict_validation_input_fn)
validation_probabilities = np.array([item['probabilities'] for item in validation_probabilities])
training_log_loss = metrics.log_loss(training_targets, training_probabilities)
validation_log_loss = metrics.log_loss(validation_targets, validation_probabilities)
# Occasionally print the current loss.
print(" period %02d : %0.2f" % (period, training_log_loss))
# Add the loss metrics from this period to our list.
training_log_losses.append(training_log_loss)
validation_log_losses.append(validation_log_loss)
print("Model training finished.")
# Output a graph of loss metrics over periods.
plt.ylabel("LogLoss")
plt.xlabel("Periods")
plt.title("LogLoss vs. Periods")
plt.tight_layout()
plt.plot(training_log_losses, label="training")
plt.plot(validation_log_losses, label="validation")
plt.legend()
return dnn_classifier
result = train_nn_classifier_model_new(
my_optimizer=tf.train.AdadeltaOptimizer (learning_rate=1),
steps=30000,
batch_size=250,
hidden_units=[150, 150, 150, 150],
training_examples=training_examples,
training_targets=training_targets,
validation_examples=validation_examples,
validation_targets=validation_targets
)
Regularization are added to loss function. Your Optimizer AdadeltaOptimizer do not support regularization parameter. If you want to add regularization to your optimizer you should use tf.train.ProximalAdagradOptimizer as it has l2_regularization_strength and l1_regularization_strength parameters where you can set the values.These parameters were part of the original algorithm.
Other wise you simply have to apply regularization to your custom loss function but DNNClassifier does not allow to use any custom loss function.You have to create your network manually for that.
How to add regularization ,check it here.
Suppose you have an network that has worked with feed_dict so far to inject data into a graph. Every few epochs, I evaluated the training and test loss by feeding a batch from either dataset to my graph.
Now, for performance reasons, I decided to use an input pipeline. Take a look at this dummy example:
import tensorflow as tf
import numpy as np
dataset_size = 200
batch_size= 5
dimension = 4
# create some training dataset
dataset = tf.data.Dataset.\
from_tensor_slices(np.random.normal(2.0,size=(dataset_size,dimension)).
astype(np.float32))
dataset = dataset.batch(batch_size) # take batches
iterator = dataset.make_initializable_iterator()
x = tf.cast(iterator.get_next(),tf.float32)
w = tf.Variable(np.random.normal(size=(1,dimension)).astype(np.float32))
loss_func = lambda x,w: tf.reduce_mean(tf.square(x-w)) # notice that the loss function is a mean!
loss = loss_func(x,w) # this is the loss that will be minimized
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train one epoch
sess.run(iterator.initializer)
for i in range(dataset_size//batch_size):
# the training step will update the weights based on ONE batch of examples each step
loss1,_ = sess.run([loss,train_op])
print('train step {:d}. batch loss {:f}.'.format(i,loss1))
# I want to print the loss from another dataset (test set) here
Printing the loss of the training data is no problem, but how do I do this for another dataset? When using feed_dict, I simply got a batch from said set and fed it a value for x.
There are several things you can do for that. One simple option could be something like having two datasets and iterators and use tf.cond to switch between them. However, the more powerful way of doing it is to use an iterator that supports this directly. See the guide on how to create iterators for a description of the various iterator types. For example, using a reinitializable iterator you could have something like this:
import tensorflow as tf
import numpy as np
dataset_size = 200
dataset_test_size = 20
batch_size= 5
dimension = 4
# create some training dataset
dataset = tf.data.Dataset.\
from_tensor_slices(np.random.normal(2.0,size=(dataset_size,dimension)).
astype(np.float32))
dataset = dataset.batch(batch_size) # take batches
# create some test dataset
dataset_test = tf.data.Dataset.\
from_tensor_slices(np.random.normal(2.0,size=(dataset_test_size,dimension)).
astype(np.float32))
dataset_test = dataset_test.batch(batch_size) # take batches
iterator = tf.data.Iterator.from_structure(dataset.output_types,
dataset.output_shapes)
dataset_init_op = iterator.make_initializer(dataset)
dataset_test_init_op = iterator.make_initializer(dataset_test)
x = tf.cast(iterator.get_next(),tf.float32)
w = tf.Variable(np.random.normal(size=(1,dimension)).astype(np.float32))
loss_func = lambda x,w: tf.reduce_mean(tf.square(x-w)) # notice that the loss function is a mean!
loss = loss_func(x,w) # this is the loss that will be minimized
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train one epoch
sess.run(dataset_init_op)
for i in range(dataset_size//batch_size):
# the training step will update the weights based on ONE batch of examples each step
loss1,_ = sess.run([loss,train_op])
print('train step {:d}. batch loss {:f}.'.format(i,loss1))
# print test loss
sess.run(dataset_test_init_op)
for i in range(dataset_test_size//batch_size):
loss1 = sess.run(loss)
print('test step {:d}. batch loss {:f}.'.format(i,loss1))
You can do something similar with a feedable iterator, depending on what you find more convenient, and I suppose even with an initializable iterator, for example making a boolean dataset that then you map to some data with tf.cond, although that would not be a very natural way to do it.
EDIT:
Here is how you can do it with an initializable iterator, actually in a cleaner way than what I was initially thinking, so maybe you actually like this more:
import tensorflow as tf
import numpy as np
dataset_size = 200
dataset_test_size = 20
batch_size= 5
dimension = 4
# create data
data = tf.constant(np.random.normal(2.0,size=(dataset_size,dimension)), tf.float32)
data_test = tf.constant(np.random.normal(2.0,size=(dataset_test_size,dimension)), tf.float32)
# choose data
testing = tf.placeholder_with_default(False, ())
current_data = tf.cond(testing, lambda: data_test, lambda: data)
# create dataset
dataset = tf.data.Dataset.from_tensor_slices(current_data)
dataset = dataset.batch(batch_size)
# create iterator
iterator = dataset.make_initializable_iterator()
x = tf.cast(iterator.get_next(),tf.float32)
w = tf.Variable(np.random.normal(size=(1,dimension)).astype(np.float32))
loss_func = lambda x,w: tf.reduce_mean(tf.square(x-w)) # notice that the loss function is a mean!
loss = loss_func(x,w) # this is the loss that will be minimized
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train one epoch
sess.run(iterator.initializer)
for i in range(dataset_size//batch_size):
# the training step will update the weights based on ONE batch of examples each step
loss1,_ = sess.run([loss,train_op])
print('train step {:d}. batch loss {:f}.'.format(i,loss1))
# print test loss
sess.run(iterator.initializer, feed_dict={testing: True})
for i in range(dataset_test_size//batch_size):
loss1 = sess.run(loss)
print('test step {:d}. batch loss {:f}.'.format(i,loss1))