A newbie tensorflow question here. I am doing a project using Google QuickDraw dataset. I used the code given by Google to train a CNN model using the data.:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import functools
import sys
import tensorflow as tf
tf.logging.set_verbosity('INFO')
def get_num_classes():
with open(FLAGS.classes_path) as label_class:
classes = label_class.readlines()
classes = [x.strip() for x in classes]
return len(classes)
def get_input_fn(mode, tfrecord_pattern, batch_size):
"""Creates an input_fn that stores all the data in memory.
Args:
mode: one of tf.contrib.learn.ModeKeys.{TRAIN, INFER, EVAL}
tfrecord_pattern: path to a TF record file created using create_dataset.py.
batch_size: the batch size to output.
Returns:
A valid input_fn for the model estimator.
"""
def _parse_tfexample_fn(example_proto, mode):
"""Parse a single record which is expected to be a tensorflow.Example."""
feature_to_type = {
"drawing": tf.VarLenFeature(dtype=tf.float32),
"shape": tf.FixedLenFeature([2], dtype=tf.int64)
}
if mode != tf.estimator.ModeKeys.PREDICT:
# The labels won't be available at inference time, so don't add them
# to the list of feature_columns to be read.
feature_to_type["class_index"] = tf.FixedLenFeature([1], dtype=tf.int64)
parsed_features = tf.parse_single_example(example_proto, feature_to_type)
labels = None
if mode != tf.estimator.ModeKeys.PREDICT:
labels = parsed_features["class_index"]
parsed_features["drawing"] = tf.sparse_tensor_to_dense(parsed_features["drawing"])
return parsed_features, labels
def _input_fn():
"""Estimator `input_fn`.
Returns:
A tuple of:
- Dictionary of string feature name to `Tensor`.
- `Tensor` of target labels.
"""
dataset = tf.data.TFRecordDataset.list_files(tfrecord_pattern)
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.repeat()
# Preprocesses 10 files concurrently and interleaves records from each file.
dataset = dataset.interleave(
tf.data.TFRecordDataset,
cycle_length=10,
block_length=1)
dataset = dataset.map(
functools.partial(_parse_tfexample_fn, mode=mode),
num_parallel_calls=10)
dataset = dataset.prefetch(10000)
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=1000000)
# Our inputs are variable length, so pad them.
dataset = dataset.padded_batch(
batch_size, padded_shapes=dataset.output_shapes)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
return _input_fn
def model_fn(features, labels, mode, params):
"""Model function for RNN classifier.
This function sets up a neural network which applies convolutional layers (as
configured with params.num_conv and params.conv_len) to the input.
The output of the convolutional layers is given to LSTM layers (as configured
with params.num_layers and params.num_nodes).
The final state of the all LSTM layers are concatenated and fed to a fully
connected layer to obtain the final classification scores.
Args:
features: dictionary with keys: inks, lengths.
labels: one hot encoded classes
mode: one of tf.estimator.ModeKeys.{TRAIN, INFER, EVAL}
params: a parameter dictionary with the following keys: num_layers,
num_nodes, batch_size, num_conv, conv_len, num_classes, learning_rate.
Returns:
ModelFnOps for Estimator API.
"""
def _get_input_tensors(features, labels):
"""Converts the input dict into inks, lengths, and labels tensors."""
# features[ink] is a sparse tensor that is [8, batch_maxlen, 3]
# inks will be a dense tensor of [8, maxlen, 3]
# shapes is [batchsize, 2]
shapes = features["shape"]
# lengths will be [batch_size]
lengths = tf.squeeze(
tf.slice(shapes, begin=[0, 0], size=[params.batch_size, 1]))
inks = tf.reshape(features["drawing"], [params.batch_size, -1, 3])
if labels is not None:
labels = tf.squeeze(labels)
return inks, lengths, labels
def _add_conv_layers(inks, lengths):
"""Adds convolution layers."""
convolved = inks
for i in range(len(params.num_conv)):
convolved_input = convolved
if params.batch_norm:
convolved_input = tf.layers.batch_normalization(
convolved_input,
training=(mode == tf.estimator.ModeKeys.TRAIN))
# Add dropout layer if enabled and not first convolution layer.
if i > 0 and params.dropout:
convolved_input = tf.layers.dropout(
convolved_input,
rate=params.dropout,
training=(mode == tf.estimator.ModeKeys.TRAIN))
convolved = tf.layers.conv1d(
convolved_input,
filters=params.num_conv[i],
kernel_size=params.conv_len[i],
activation=None,
strides=1,
padding="same",
name="conv1d_%d" % i)
return convolved, lengths
def _add_regular_rnn_layers(convolved, lengths):
"""Adds RNN layers."""
if params.cell_type == "lstm":
cell = tf.nn.rnn_cell.BasicLSTMCell
elif params.cell_type == "block_lstm":
cell = tf.contrib.rnn.LSTMBlockCell
cells_fw = [cell(params.num_nodes) for _ in range(params.num_layers)]
cells_bw = [cell(params.num_nodes) for _ in range(params.num_layers)]
if params.dropout > 0.0:
cells_fw = [tf.contrib.rnn.DropoutWrapper(cell) for cell in cells_fw]
cells_bw = [tf.contrib.rnn.DropoutWrapper(cell) for cell in cells_bw]
outputs, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
cells_fw=cells_fw,
cells_bw=cells_bw,
inputs=convolved,
sequence_length=lengths,
dtype=tf.float32,
scope="rnn_classification")
return outputs
def _add_cudnn_rnn_layers(convolved):
"""Adds CUDNN LSTM layers."""
# Convolutions output [B, L, Ch], while CudnnLSTM is time-major.
convolved = tf.transpose(convolved, [1, 0, 2])
lstm = tf.contrib.cudnn_rnn.CudnnLSTM(
num_layers=params.num_layers,
num_units=params.num_nodes,
dropout=params.dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0,
direction="bidirectional")
outputs, _ = lstm(convolved)
# Convert back from time-major outputs to batch-major outputs.
outputs = tf.transpose(outputs, [1, 0, 2])
return outputs
def _add_rnn_layers(convolved, lengths):
"""Adds recurrent neural network layers depending on the cell type."""
if params.cell_type != "cudnn_lstm":
outputs = _add_regular_rnn_layers(convolved, lengths)
else:
outputs = _add_cudnn_rnn_layers(convolved)
# outputs is [batch_size, L, N] where L is the maximal sequence length and N
# the number of nodes in the last layer.
mask = tf.tile(
tf.expand_dims(tf.sequence_mask(lengths, tf.shape(outputs)[1]), 2),
[1, 1, tf.shape(outputs)[2]])
zero_outside = tf.where(mask, outputs, tf.zeros_like(outputs))
outputs = tf.reduce_sum(zero_outside, axis=1)
return outputs
def _add_fc_layers(final_state):
"""Adds a fully connected layer."""
return tf.layers.dense(final_state, params.num_classes)
# Build the model.
inks, lengths, labels = _get_input_tensors(features, labels)
convolved, lengths = _add_conv_layers(inks, lengths)
final_state = _add_rnn_layers(convolved, lengths)
logits = _add_fc_layers(final_state)
# Add the loss.
cross_entropy = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits))
# Add the optimizer.
train_op = tf.contrib.layers.optimize_loss(
loss=cross_entropy,
global_step=tf.train.get_global_step(),
learning_rate=params.learning_rate,
optimizer="Adam",
# some gradient clipping stabilizes training in the beginning.
clip_gradients=params.gradient_clipping_norm,
summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
# Compute current predictions.
predictions = tf.argmax(logits, axis=1)
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={"logits": logits, "predictions": predictions},
loss=cross_entropy,
train_op=train_op,
eval_metric_ops={"accuracy": tf.metrics.accuracy(labels, predictions)})
def create_estimator_and_specs(run_config):
"""Creates an Experiment configuration based on the estimator and input fn."""
model_params = tf.contrib.training.HParams(
num_layers=FLAGS.num_layers,
num_nodes=FLAGS.num_nodes,
batch_size=FLAGS.batch_size,
num_conv=ast.literal_eval(FLAGS.num_conv),
conv_len=ast.literal_eval(FLAGS.conv_len),
num_classes=get_num_classes(),
learning_rate=FLAGS.learning_rate,
gradient_clipping_norm=FLAGS.gradient_clipping_norm,
cell_type=FLAGS.cell_type,
batch_norm=FLAGS.batch_norm,
dropout=FLAGS.dropout)
estimator = tf.estimator.Estimator(
model_fn=model_fn,
config=run_config,
params=model_params)
train_spec = tf.estimator.TrainSpec(input_fn=get_input_fn(
mode=tf.estimator.ModeKeys.TRAIN,
tfrecord_pattern=FLAGS.training_data,
batch_size=FLAGS.batch_size), max_steps=FLAGS.steps)
eval_spec = tf.estimator.EvalSpec(input_fn=get_input_fn(
mode=tf.estimator.ModeKeys.EVAL,
tfrecord_pattern=FLAGS.eval_data,
batch_size=FLAGS.batch_size))
return estimator, train_spec, eval_spec
def main(unused_args):
estimator, train_spec, eval_spec = create_estimator_and_specs(
run_config=tf.estimator.RunConfig(
model_dir=FLAGS.model_dir,
save_checkpoints_secs=300,
save_summary_steps=100))
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--training_data",
type=str,
default="/Train",
help="Path to training data (tf.Example in TFRecord format)")
parser.add_argument(
"--eval_data",
type=str,
default="/Eval",
help="Path to evaluation data (tf.Example in TFRecord format)")
parser.add_argument(
"--classes_file",
type=str,
default="",
help="Path to a file with the classes - one class per line")
parser.add_argument(
"--num_layers",
type=int,
default=3,
help="Number of recurrent neural network layers.")
parser.add_argument(
"--num_nodes",
type=int,
default=128,
help="Number of node per recurrent network layer.")
parser.add_argument(
"--num_conv",
type=str,
default="[48, 64, 96]",
help="Number of conv layers along with number of filters per layer.")
parser.add_argument(
"--conv_len",
type=str,
default="[5, 5, 3]",
help="Length of the convolution filters.")
parser.add_argument(
"--cell_type",
type=str,
default="lstm",
help="Cell type used for rnn layers: cudnn_lstm, lstm or block_lstm.")
parser.add_argument(
"--batch_norm",
type="bool",
default="False",
help="Whether to enable batch normalization or not.")
parser.add_argument(
"--learning_rate",
type=float,
default=0.0001,
help="Learning rate used for training.")
parser.add_argument(
"--gradient_clipping_norm",
type=float,
default=9.0,
help="Gradient clipping norm used during training.")
parser.add_argument(
"--dropout",
type=float,
default=0.3,
help="Dropout used for convolutions and bidi lstm layers.")
parser.add_argument(
"--steps",
type=int,
default=100000,
help="Number of training steps.")
parser.add_argument(
"--batch_size",
type=int,
default=8,
help="Batch size to use for training/evaluation.")
parser.add_argument(
"--model_dir",
type=str,
default="A:\Code\Machine Learning\Software Engineering project\Quick Draw\Model Checkpoints",
help="Path for storing the model checkpoints.")
parser.add_argument(
"--self_test",
type="bool",
default="False",
help="Whether to enable batch normalization or not.")
parser.add_argument(
"--classes_path",
type=str,
default="A:\Code\Machine Learning\Software Engineering project\Quick Draw\quickdraw-dataset-master\categories.txt",
help="Path of the text file which contains name of classes"
)
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
I have data in TFRecords format. And after running the code, it creates an eval folder with events.out.my system name files, some more events.out files in the main folder, a graph.pbtxt file, model.ckpt-0.data-0000-of-0001, model.ckpt.index, model.ckpt.meta and a file named checkpoint. What are all these files and how to use them for prediction of testing data?
Also, why there just one model.ckpt.data when the data consists of 9 training and 9 eval files.
Another question: When I ran the code, it finished execution quite quickly, while most deep learning models take long time to execute. I mean there are 3450000 examples for training and 345000 for eval and everything was done in like a minute. I am new to TensorFlow, so please keep that in mind. I don't know much things about it.
Update: After adding the line tf.logging.set_verbosity('INFO') to the code, I am getting the following output:
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from A:\Code\Machine Learning\Software Engineering project\Quick Draw\Model Checkpoints\model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into A:\Code\Machine Learning\Software Engineering project\Quick Draw\Model Checkpoints\model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-23-22:28:00
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from A:\Code\Machine Learning\Software Engineering project\Quick Draw\Model Checkpoints\model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-23-22:28:02
INFO:tensorflow:Saving dict for global step 0: accuracy = 0.0, global_step = 0, loss = 0.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 0: A:\Code\Machine Learning\Software Engineering project\Quick Draw\Model Checkpoints\model.ckpt-0
INFO:tensorflow:Loss for final step: None.
Why is the accuracy 0.0 and loss 0.0. I think there is some problem with this, but don't know exacttly what. he training is not happening on all examples I think.
Most obvious reason, your model doesn't receive any training data. As far as I could understand, your get_input_fn contains unnecessary functionality and probably behaves not as you need it to.
In particular, using if mode==train twice
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.repeat()
# Preprocesses 10 files concurrently and interleaves records from each file.
dataset = dataset.interleave(
tf.data.TFRecordDataset,
cycle_length=10,
block_length=1)
dataset = dataset.map(
functools.partial(_parse_tfexample_fn, mode=mode),
num_parallel_calls=10)
dataset = dataset.prefetch(10000)
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=1000000)
And returning iterator, which shoudn't be used with Estimator API
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
In order to debug, try simple input pipeline first and add functionality step by step
dataset = tf.data.TFRecordDataset.list_files(tfrecord_pattern)
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.repeat()
dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=10, block_length=1)
dataset = dataset.map(_parse_tfexample_fn, num_parallel_calls=10)
dataset = dataset.prefetch(1)
dataset = dataset.padded_batch(batch_size, padded_shapes=dataset.output_shapes)
return dataset
And try using estimator.train at first
P.s. This is not a ready to use function, you should adapt it according to your input data.
I am new to TensorFlow but experienced at analytics. I am attempting to reproduce in TensorFlow the performance I obtain using a legacy Neural Network trainer, comprised simply of a single hidden-layer, trained using back-prop with momentum update and a mean-squared error objective function.
Unfortunately all my attempts have thus far failed, with TensorFlow models being produced that range from being roughly "only" 10-15% inferior to my home-brewed trainer when using the canned tf.estimator.DNNClassifier, to much, much worse when I try to construct the net myself.
Can someone identify the mistakes being made in how I am calling the tool in the code below? Using a command line input, I toggle between the "canned_dnn" and my "custom" set up
def get_classifier( model_name, cur_model_dir, feature_columns ):
if model_name == 'canned_dnn':
return tf.estimator.DNNClassifier( feature_columns=feature_columns,
model_dir=cur_model_dir,
hidden_units=[18],
activation_fn=tf.nn.relu )
elif model_name == 'custom':
return tf.estimator.Estimator( model_fn=my_model,
model_dir=cur_model_dir,
params={
'feature_columns': feature_columns,
'n_classes': 2
})
else:
raise RuntimeError()
def my_model(features, labels, mode, params):
# input layer
net = tf.feature_column.input_layer( features, params['feature_columns'] )
# hidden layer 1
net = tf.layers.dense(net, units=18, activation=tf.nn.relu, use_bias=True, bias_initializer=tf.zeros_initializer(), name='LEARN_1')
# output layer computes logits
logits = tf.layers.dense(net, params['n_classes'], activation=tf.nn.sigmoid, name='OUTPUT')
# compute predictions
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits),
'logits': logits,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# mean squared error
predicted_classes = tf.argmax(logits, 1)
loss = tf.losses.mean_squared_error(labels=tf.one_hot(labels, 2), predictions=logits)
# metrics
# loss report
tf.summary.scalar('loss', loss )
# accuracy
accuracy = tf.metrics.accuracy(labels=labels,
predictions=predicted_classes,
name='acc_op')
metrics = {'accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy[1])
# EVALUATION MODE
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
# TRAINING MODE
assert mode == tf.estimator.ModeKeys.TRAIN
with tf.name_scope( 'OPTIMIZE' ):
optimizer=tf.train.MomentumOptimizer(learning_rate=.02,momentum=.5)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
I'm having a problem making the cell initial_state configurable so I can use different batch sizes for training and prediction. Essentially at training, I am going to feed fixed size mini batches, while at prediction, I'll predict one input at a time, and feed that back into the model to get the next output.
However, I am unable to create a graph with the first dimension for the cell initial_state configurable. Here is a simple model_fn to model character input
def model_fn(features, labels, mode, params):
inputs = tf.one_hot(features, params["VOCAB_SIZE"], 1.0, 0.0)
cell = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.GRUCell(params["INTERNAL_SIZE"]) for _ in range(params["NUM_LAYERS"])
], state_is_tuple=False)
pkeep = params["DROPOUT_PKEEP"] if mode == tf.estimator.ModeKeys.TRAIN else 1.0
cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=pkeep)
initial_state = tf.get_variable(
"initial_state",
dtype=tf.float32,
initializer=cell.zero_state(params["BATCH_SIZE"], dtype=tf.float32),
)
if mode == tf.estimator.ModeKeys.EVAL:
initial_state = cell.zero_state(1, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state)
if mode != tf.estimator.ModeKeys.EVAL:
tf.assign(initial_state, final_state)
logits = ...
if mode == tf.estimator.ModeKeys.PREDICT:
logits = tf.reshape(logits, [-1, 1, 98])
else:
logits = tf.reshape(logits, [-1, features.shape[1], 98])
probabilities = tf.nn.softmax(logits)
predictions = tf.argmax(probabilities, 2)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = { "predictions": predictions, "probabilities": probabilities }
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = ...
if mode == tf.estimator.ModeKeys.EVAL:
accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops={
"accuracy": accuracy,
})
optimizer = tf.train.AdamOptimizer(learning_rate=params["LEARNING_RATE"])
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
The problem is that the requires me to pass BATCH_SIZE in params that is used to define the initial_state, which at train time is a value like 200. However at test time, for a single batch, it gives an error saying that [1, 384] dimensional tensor cannot be assigned to a [200, 384] dimensional variable. How do I make the dimension of the initial_state configurable based on the training mode?
I am following different tutorials on PyTorch. And I'm trying to use GPU speed but I am encountering a problem between the execution time announced on the web site and my reality!
https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
You can find here the code at the end of the page, and we can read that the program finished in 1min30 against 6/7min on my computer! Can you try it and tell me how many time have you got please! I am very confuse to see a high difference using the same file code than the web site!
This will allow me to understand if the problem comes from my GPU or not :)
My config :
GTX 1080Ti
Windows10
Cuda 9.1
Pytorch 0.4.0
The code:
if __name__ == "__main__":
plt.ion() # interactive mode
######################################################################
# Load Data
# ---------
#
# We will use torchvision and torch.utils.data packages for loading the
# data.
#
# The problem we're going to solve today is to train a model to classify
# **ants** and **bees**. We have about 120 training images each for ants and bees.
# There are 75 validation images for each class. Usually, this is a very
# small dataset to generalize upon, if trained from scratch. Since we
# are using transfer learning, we should be able to generalize reasonably
# well.
#
# This dataset is a very small subset of imagenet.
#
# .. Note ::
# Download the data from
# `here <https://download.pytorch.org/tutorial/hymenoptera_data.zip>`_
# and extract it to the current directory.
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
######################################################################
# Visualize a few images
# ^^^^^^^^^^^^^^^^^^^^^^
# Let's visualize a few training images so as to understand the data
# augmentations.
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
######################################################################
# Training the model
# ------------------
#
# Now, let's write a general function to train a model. Here, we will
# illustrate:
#
# - Scheduling the learning rate
# - Saving the best model
#
# In the following, parameter ``scheduler`` is an LR scheduler object from
# ``torch.optim.lr_scheduler``.
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
######################################################################
# Visualizing the model predictions
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Generic function to display predictions for a few images
#
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
######################################################################
# Finetuning the convnet
# ----------------------
#
# Load a pretrained model and reset final fully connected layer.
#
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# It should take around 15-25 min on CPU. On GPU though, it takes less than a
# minute.
#
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
######################################################################
#
visualize_model(model_ft)
######################################################################
# ConvNet as fixed feature extractor
# ----------------------------------
#
# Here, we need to freeze all the network except the final layer. We need
# to set ``requires_grad == False`` to freeze the parameters so that the
# gradients are not computed in ``backward()``.
#
# You can read more about this in the documentation
# `here <http://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>`__.
#
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)
model_conv = model_conv.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# On CPU this will take about half the time compared to previous scenario.
# This is expected as gradients don't need to be computed for most of the
# network. However, forward does need to be computed.
#
model_conv = train_model(model_conv, criterion, optimizer_conv,
exp_lr_scheduler, num_epochs=25)
######################################################################
#
visualize_model(model_conv)
plt.ioff()
plt.show()
Thanks every body!
A bit late I know, but it might be that you are missing
model.to(cuda)
I want to set the same weight for parts of positive samples. However,tf.nn.weighted_cross_entropy_with_logits can only set the weight for all positive samples in my opinion.
for example, in the ctr predicition, I want set 10 weights for the order samples, and the weight of click samples and the unclick sample is still 1.
Here is my unweighted code
def my_model(features, labels, mode, params):
net = tf.feature_column.input_layer(features, params['feature_columns'])
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, activation=params["activation"])
logits = tf.layers.dense(net, params['n_classes'], activation=None)
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes, #predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits),
'logits': logits,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
metrics = {'auc': tf.metrics.auc(labels=labels, predictions=tf.nn.softmax(logits)[:,1])}
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
assert mode == tf.estimator.ModeKeys.TRAIN
optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
Train
train_input_fn = tf.estimator.inputs.pandas_input_fn(x=data_train, y=data_train_click, batch_size = 1024, num_epochs=1, shuffle=False)
classifier.train(input_fn=train_input_fn)
Here data_train_click is a Series, which the click samples are 1 and the unclicked samples are 0. And I have a Series named data_train_order, which the order samples are 1 and the others are 0
The easiest way to do this is by using keras
https://keras.io/models/model/
The fit function has a sample_weight parameter.
You can weigh each samples differently by passing a weight parameter to the loss function which is a tensor of shape [batch_size] containing corresponding weights for each samples.
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits, weights=weights)