Specifically, when using TensorFlow to build my model in OOP style, where should I build the graph? Where should I start a session to run the graph? What's the best practice for this case?
In TensorFlow Mechanics 101, the MNIST example just simply define the inference, loss and training function in the module mnist.py and build the graph in fully_connected_feed.py. But in my opinion, the graph is actually part of the model and should be built inside the model, maybe in its __init__ method.
I have seen many other models using TensorFlow in its model zoo and each have their own practice, so I am a little confused here. Is there a best practice or any recommended programming paradigms when using TensorFlow?
Also check out a nice article about this topic:
https://danijar.com/structuring-your-tensorflow-models/
In this article, Danijar Hafner introduces lazy property:
class Model:
def __init__(self, data, target):
self.data = data
self.target = target
self.prediction
self.optimize
self.error
#lazy_property
def prediction(self):
data_size = int(self.data.get_shape()[1])
target_size = int(self.target.get_shape()[1])
weight = tf.Variable(tf.truncated_normal([data_size, target_size]))
bias = tf.Variable(tf.constant(0.1, shape=[target_size]))
incoming = tf.matmul(self.data, weight) + bias
return tf.nn.softmax(incoming)
#lazy_property
def optimize(self):
cross_entropy = -tf.reduce_sum(self.target, tf.log(self.prediction))
optimizer = tf.train.RMSPropOptimizer(0.03)
return optimizer.minimize(cross_entropy)
#lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
See more in the article.
I usually build my graphs in the init but I sometime create a separate compile function. I have a unique variable scope for the entire class and the class provided save and restore and init functions for its variables. I also provide functions to train and predict. I don't think there is really any standard practice but this makes sense to me. Here is an example of how I build a generative model with image pyramids.
class PyramidGenerator:
def __init__(self,
session,
log2_input_size,
log2_output_size,
num_features,
convs_per_cell,
filter_size,
conv_activation,
num_attributes,
name = 'pyrgen'):
self.session = session
self.log2_input_size = log2_input_size
self.log2_output_size = log2_output_size
self.num_attributes = num_attributes
if not hasattr(num_features, '__iter__'):
num_features = [num_features] * (log2_output_size - log2_input_size)
if not hasattr(convs_per_cell, '__iter__'):
convs_per_cell = [convs_per_cell] * (log2_output_size - log2_input_size)
if not hasattr(filter_size, '__iter__'):
filter_size = [filter_size] * (log2_output_size - log2_input_size)
with tf.variable_scope(name) as scope:
self.training_images = tf.placeholder(tf.float32, (None, 2 ** log2_output_size, 2 ** log2_output_size, 3), 'training_images')
if num_attributes:
self.image_attributes = tf.placeholder(tf.float32, (None, num_attributes))
self.seed_images = tf.placeholder(tf.float32, (None, 2 ** log2_input_size, 2 ** log2_input_size, 3), 'seed_images')
self.learning_rate = tf.placeholder(tf.float32, (), 'learning_rate')
self.scope_name = scope.name
self.cost = 0
def _augment(img):
img = tf.image.random_flip_left_right(img)
return img
augmented = tf.map_fn(_augment, self.training_images)
training_scales = {s:tf.image.resize_area(augmented, (2 ** s, 2 ** s)) for s in range(log2_input_size, log2_output_size + 1)}
x_gen = self.seed_images
x_train = None
if num_attributes:
h_gen = h_train = tf.tile(tf.reshape(self.image_attributes, (-1, 1, 1, num_attributes)), (1, 2 ** log2_input_size, 2 ** log2_input_size, 1))
else:
h_gen = h_train = None
self.generator_outputs = []
for n_features, conv_size, n_convs, log2_size in zip(num_features, filter_size, convs_per_cell, range(log2_input_size, log2_output_size)):
size = 2 ** log2_size
with tf.variable_scope('level_%d' % size) as level_scope:
y_train = training_scales[log2_size + 1]
x_train = training_scales[log2_size]
x_train, h_train = ops.sharpen_cell(x_train, h_train, 2, n_features, conv_size, n_convs, conv_activation, 'upsampler')
self.cost += tf.reduce_mean((x_train - y_train) ** 2)
level_scope.reuse_variables()
x_gen, h_gen = ops.sharpen_cell(x_gen, h_gen, 2, n_features, conv_size, n_convs, conv_activation, 'upsampler')
self.generator_outputs.append(tf.clip_by_value(x_gen, -1, 1))
with tf.variable_scope('training'):
opt = tf.train.AdamOptimizer(self.learning_rate)
grads = opt.compute_gradients(self.cost)
grads = [(tf.clip_by_value(g, -1.0, 1.0), v) for g, v in grads]
self.train_step = opt.apply_gradients(grads)
self.variables = tf.get_collection(tf.GraphKeys.VARIABLES, self.scope_name)
self.init_vars = tf.initialize_variables(self.variables)
self.saver = tf.train.Saver(self.variables)
def save(self, fn):
self.saver.save(self.session, fn)
def restore(self, fn):
self.saver.restore(self.session, fn)
def initialize(self):
self.session.run(self.init_vars)
def train(self, training_images, validation_images = [], learning_rate = 1e-3, batch_size = 32):
with ThreadPoolExecutor(max(os.cpu_count(), batch_size)) as exc:
def _loadImage(fn):
img = cv2.imread(fn, cv2.IMREAD_COLOR)
img = cv2.resize(img, (2 ** self.log2_output_size, 2 ** self.log2_output_size))
return np.float32(img / 128.0 - 1.0)
def _loadBatch(b):
if self.num_attributes:
imgs, attrs = zip(*b)
else:
imgs = b
attrs = None
imgs = list(exc.map(_loadImage, imgs))
return imgs, attrs
total_cost = 0
batches = list(_batch(training_images, batch_size, False))
loader = exc.submit(_loadBatch, batches[0])
for i in range(len(batches)):
imgs, attrs = loader.result()
if i < len(batches) - 1:
loader = exc.submit(_loadBatch, batches[i + 1])
feed_dict = {self.training_images: imgs, self.learning_rate: learning_rate}
if self.num_attributes:
feed_dict.update({self.image_attributes: attrs})
total_cost += self.session.run((self.cost, self.train_step), feed_dict)[0]
print('Training Batch(%d/%d) Cost(%e)' % (i + 1, len(batches), total_cost / (i + 1)), end = '\r')
print()
return total_cost / (i + 1)
def generate_random(self):
img = np.clip(np.random.randn(1, 2 ** self.log2_input_size, 2 ** self.log2_input_size, 3), -1, 1)
if self.num_attributes:
attrs = np.random.choice((1.0, -1.0), size = (1, self.num_attributes))
feed = {self.seed_images: img, self.image_attributes: attrs}
else:
feed = {self.seed_images: img}
y = self.session.run(self.generator_outputs, feed)
return [img] + y
def generate_from(self, seed_image):
if self.num_attributes:
img, attrs = seed_image
else:
img = seed_image
img = cv2.imread(img, cv2.IMREAD_COLOR)
img = cv2.resize(img, (2 ** self.log2_input_size, 2 ** self.log2_input_size))
img = np.expand_dims(np.float32(img / 128.0 - 1.0), 0)
if self.num_attributes:
feed = {self.seed_images: img, self.image_attributes: [attrs]}
else:
feed = {self.seed_images: img}
y = self.session.run(self.generator_outputs, feed)
return [img] + y
Related
I'm training an MRI super-resolution model on about 40k volumes with Tensorflow/Keras and a custom training loop:
Pseudo python/tf code :
for e in epochs:
for s in the steps:
batch = data_handler.get_batch(files[s])
model.train_on_batch(batch['lr'], batch['hr'])
def get_batch(volume_path) # online data augmentation and image degradation
vol_hr = np.load(volume_path) # 200x200x200 array
vol_hr = augment(vol_hr) # flips / rotation
vol_lr = degrade(vol_hr) # blur, add noise, downsample (100x100x200)
batch = crop_patches(vol_lr, vol_hr)
# batch['lr'].shape == batch_size, 32, 32, 3
# batch['hr'].shape == batch_size, 64, 64, 3
return batch
This runs terribly slow so I'm looking into optimizing the training through (i) implementing tf data pipeline (tf.data.Dataset) and (ii) Distributed training. This post is about optimizing the data pipeline.
My plan is to implement a tf data input pipeline (tf 2.4.1). The generator looks something like this at the moment:
import tensorflow as tf
import numpy as np
import os
import time
from skimage.transform import downscale_local_mean
def make_rand_vols(n, size=(200, 200, 200), tmp_path='./tmp/tmp_vol'):
if not os.path.isdir(tmp_path):
os.makedirs(tmp_path)
rand_arr_fnames = []
for f in range(n):
rand_arr = np.random.rand(size[0], size[1], size[2])
fpath = os.path.join(tmp_path, f"{f}.npy")
np.save(fpath, rand_arr)
rand_arr_fnames.append(fpath)
return rand_arr_fnames
def benchmark(dataset, num_epochs=2, steps=10):
start_time = time.perf_counter()
for epoch_num in range(num_epochs):
for s, sample in enumerate(dataset):
# Performing a training step
print(s)
time.sleep(0.01)
if s == steps - 1:
break
print("Execution time:", time.perf_counter() - start_time)
class CustomGenerator(tf.keras.utils.Sequence):
def __init__(self, fnames, batch_size=3,
scale=2, shuffle=True, patch_size=32, n_channels=3):
self.fnames = fnames
self.shuffle = shuffle
self.batch_size = batch_size
self.on_epoch_end()
self.scale = scale
self.lr_patch_size = patch_size
self.hr_patch_size = patch_size * scale
self.n_channels = n_channels
def __len__(self):
return len(self.fnames)
def on_epoch_end(self):
#'Updates indexes after each epoch'
if self.shuffle == True:
np.random.shuffle(self.fnames)
def __getitem__(self, idx):
vol_hr = np.load(self.fnames[idx]) # array of size e.g. (300, 300, 300)
vol_hr = self.augment(vol_hr)
vol_lr = self.degrade(vol_hr, self.scale) # array of size e.g. (150, 150, 300)
batch = self.get_vol_crops(vol_lr, vol_hr) # array of size (batch_size, width / scale, length / scale, depth)
# batch -> (batch_lr, batch_hr) -> (arr [batch_size, patch_lr, patch_lr, n_channels], arr [batch_size, patch_hr, patch_hr, n_channels])
return batch
def augment(self, vol):
if np.random.randint(0, 2):
vol = vol[:, ::-1, :] # simple for illustration
return vol
def degrade(self, vol_hr, downscale_factor):
vol_lr = downscale_local_mean(vol_hr,
(downscale_factor, downscale_factor, 1),
cval=0)
return vol_lr
def get_vol_crops(self, vol_lr, vol_hr):
pad = 2
rows_lr, cols_lr, n_slices = vol_lr.shape
lr_y_list = np.random.randint(0, rows_lr - self.lr_patch_size - 1, batch_size) #top left row lr
lr_x_list = np.random.randint(0, cols_lr - self.lr_patch_size - 1, batch_size) # top left col lr
z_list = np.random.randint(pad, n_slices - pad, self.batch_size)
lr_patches, hr_patches = [], []
for y_lr, x_lr, z in zip(lr_y_list, lr_x_list, z_list):
slice_idxs = [z for z in range(z - n_channels // 2, z + n_channels//2 + 1)]
# LR patch
patch_lr = vol_lr[y_lr: y_lr + self.lr_patch_size,
x_lr: x_lr + self.lr_patch_size,
slice_idxs]
print()
lr_patches.append(patch_lr)
# HR patch
y_hr, x_hr = scale * y_lr, scale * x_lr
patch_hr = vol_hr[y_hr: y_hr + self.hr_patch_size,
x_hr: x_hr + self.hr_patch_size,
slice_idxs]
#print(patch_lr.shape, patch_hr.shape)
#preview_patches(patch_lr, patch_hr)
hr_patches.append(patch_hr)
return np.array(lr_patches), np.array(hr_patches)
test_vol_fnames = make_rand_vols(50)
# Make tf.data.Dataset from keras Generator
batch_size = 24
patch_size = 32
scale = 2
n_channels = 3
gen = CustomGenerator(test_vol_fnames, patch_size=patch_size, batch_size=batch_size)
benchmark(gen, steps=50)
# Execution time: 9.38716044393368
dataset = tf.data.Dataset.from_generator(
lambda: gen,
output_types=(tf.float32, tf.float32),
output_shapes = ([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(dataset, steps=50)
# Execution time: 9.54540992085822
benchmark(dataset.prefetch(tf.data.AUTOTUNE), steps=50)
# Execution time: 8.615464322036132
benchmark(
tf.data.Dataset.range(2)
.interleave(
lambda _: dataset,
num_parallel_calls=tf.data.AUTOTUNE),
steps=50
)
# Execution time: 5.204080824041739
def mapped_generator(g):
return tf.py_function(lambda f: g,
inp=[f],
Tout=[tf.float32, tf.float32])
dataset_parallel = tf.data.Dataset.from_generator(
lambda: mapped_generator(gen),
output_types=(tf.float32, tf.float32),
output_shapes=([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(
dataset_parallel,
steps=50
)
# NameError: name 'f' is not defined
Is there anything I can do to make this more efficient? How can I wrap the generator inside tf.py_function?
I am new to Tensorboard.
I am using fairly simple code running an experiment, and this is the output:
I don't remember asking for a hp_metric graph, yet here it is.
What is it and how do I get rid of it?
Full code to reproduce, using Pytorch Lightning (not that I think anyone should have to reproduce this to answer):
Please notice the ONLY line dereferencing TensorBoard is
self.logger.experiment.add_scalars("losses", {"train_loss": loss}, global_step=self.current_epoch)
import torch
from torch import nn
import torch.nn.functional as F
from typing import List, Optional
from pytorch_lightning.core.lightning import LightningModule
from Testing.Research.toy_datasets.ClustersDataset import ClustersDataset
from torch.utils.data import DataLoader
from Testing.Research.config.ConfigProvider import ConfigProvider
from pytorch_lightning import Trainer, seed_everything
from torch import optim
import os
from pytorch_lightning.loggers import TensorBoardLogger
class VAEFC(LightningModule):
# see https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73
# for possible upgrades, see https://arxiv.org/pdf/1602.02282.pdf
# https://stats.stackexchange.com/questions/332179/how-to-weight-kld-loss-vs-reconstruction-loss-in-variational-auto-encoder
def __init__(self, encoder_layer_sizes: List, decoder_layer_sizes: List, config):
super(VAEFC, self).__init__()
self._config = config
self.logger: Optional[TensorBoardLogger] = None
assert len(encoder_layer_sizes) >= 3, "must have at least 3 layers (2 hidden)"
# encoder layers
self._encoder_layers = nn.ModuleList()
for i in range(1, len(encoder_layer_sizes) - 1):
enc_layer = nn.Linear(encoder_layer_sizes[i - 1], encoder_layer_sizes[i])
self._encoder_layers.append(enc_layer)
# predict mean and covariance vectors
self._mean_layer = nn.Linear(encoder_layer_sizes[
len(encoder_layer_sizes) - 2],
encoder_layer_sizes[len(encoder_layer_sizes) - 1])
self._logvar_layer = nn.Linear(encoder_layer_sizes[
len(encoder_layer_sizes) - 2],
encoder_layer_sizes[len(encoder_layer_sizes) - 1])
# decoder layers
self._decoder_layers = nn.ModuleList()
for i in range(1, len(decoder_layer_sizes)):
dec_layer = nn.Linear(decoder_layer_sizes[i - 1], decoder_layer_sizes[i])
self._decoder_layers.append(dec_layer)
self._recon_function = nn.MSELoss(reduction='mean')
def _encode(self, x):
for i in range(len(self._encoder_layers)):
layer = self._encoder_layers[i]
x = F.relu(layer(x))
mean_output = self._mean_layer(x)
logvar_output = self._logvar_layer(x)
return mean_output, logvar_output
def _reparametrize(self, mu, logvar):
if not self.training:
return mu
std = logvar.mul(0.5).exp_()
if std.is_cuda:
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
reparameterized = eps.mul(std).add_(mu)
return reparameterized
def _decode(self, z):
for i in range(len(self._decoder_layers) - 1):
layer = self._decoder_layers[i]
z = F.relu((layer(z)))
decoded = self._decoder_layers[len(self._decoder_layers) - 1](z)
# decoded = F.sigmoid(self._decoder_layers[len(self._decoder_layers)-1](z))
return decoded
def _loss_function(self, recon_x, x, mu, logvar, reconstruction_function):
"""
recon_x: generating images
x: origin images
mu: latent mean
logvar: latent log variance
"""
binary_cross_entropy = reconstruction_function(recon_x, x) # mse loss TODO see if mse or cross entropy
# loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
kld_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
kld = torch.sum(kld_element).mul_(-0.5)
# KL divergence Kullback–Leibler divergence, regularization term for VAE
# It is a measure of how different two probability distributions are different from each other.
# We are trying to force the distributions closer while keeping the reconstruction loss low.
# see https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73
# read on weighting the regularization term here:
# https://stats.stackexchange.com/questions/332179/how-to-weight-kld-loss-vs-reconstruction-loss-in-variational
# -auto-encoder
return binary_cross_entropy + kld * self._config.regularization_factor
def training_step(self, batch, batch_index):
orig_batch, noisy_batch, _ = batch
noisy_batch = noisy_batch.view(noisy_batch.size(0), -1)
recon_batch, mu, logvar = self.forward(noisy_batch)
loss = self._loss_function(
recon_batch,
orig_batch, mu, logvar,
reconstruction_function=self._recon_function
)
# self.logger.experiment.add_scalars("losses", {"train_loss": loss})
self.logger.experiment.add_scalars("losses", {"train_loss": loss}, global_step=self.current_epoch)
# self.logger.experiment.add_scalar("train_loss", loss, self.current_epoch)
self.logger.experiment.flush()
return loss
def train_dataloader(self):
default_dataset, train_dataset, test_dataset = ClustersDataset.clusters_dataset_by_config()
train_dataloader = DataLoader(train_dataset, batch_size=self._config.batch_size, shuffle=True)
return train_dataloader
def test_dataloader(self):
default_dataset, train_dataset, test_dataset = ClustersDataset.clusters_dataset_by_config()
test_dataloader = DataLoader(test_dataset, batch_size=self._config.batch_size, shuffle=True)
return test_dataloader
def configure_optimizers(self):
optimizer = optim.Adam(model.parameters(), lr=self._config.learning_rate)
return optimizer
def forward(self, x):
mu, logvar = self._encode(x)
z = self._reparametrize(mu, logvar)
decoded = self._decode(z)
return decoded, mu, logvar
if __name__ == "__main__":
config = ConfigProvider.get_config()
seed_everything(config.random_seed)
latent_dim = config.latent_dim
enc_layer_sizes = config.enc_layer_sizes + [latent_dim]
dec_layer_sizes = [latent_dim] + config.dec_layer_sizes
model = VAEFC(config=config, encoder_layer_sizes=enc_layer_sizes, decoder_layer_sizes=dec_layer_sizes)
logger = TensorBoardLogger(save_dir='tb_logs', name='VAEFC')
logger.hparams = config # TODO only put here relevant stuff
# trainer = Trainer(gpus=1)
trainer = Trainer(deterministic=config.is_deterministic,
#auto_lr_find=config.auto_lr_find,
#log_gpu_memory='all',
# min_epochs=99999,
max_epochs=config.num_epochs,
default_root_dir=os.getcwd(),
logger=logger
)
# trainer.tune(model)
trainer.fit(model)
print("done training vae with lightning")
ClustersDataset.py
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch
import numpy as np
from Testing.Research.config.ConfigProvider import ConfigProvider
class ClustersDataset(Dataset):
__default_dataset = None
__default_dataset_train = None
__default_dataset_test = None
def __init__(self, cluster_size: int, noise_factor: float = 0, transform=None, n_clusters=2, centers_radius=4.0):
super(ClustersDataset, self).__init__()
self._cluster_size = cluster_size
self._noise_factor = noise_factor
self._n_clusters = n_clusters
self._centers_radius = centers_radius
# self._transform = transform
self._size = self._cluster_size * self._n_clusters
self._create_data_clusters()
self._combine_clusters_to_array()
self._normalize_data()
self._add_noise()
# self._plot()
pass
#staticmethod
def clusters_dataset_by_config():
if ClustersDataset.__default_dataset is not None:
return \
ClustersDataset.__default_dataset, \
ClustersDataset.__default_dataset_train, \
ClustersDataset.__default_dataset_test
config = ConfigProvider.get_config()
default_dataset = ClustersDataset(
cluster_size=config.cluster_size,
noise_factor=config.noise_factor,
transform=None,
n_clusters=config.n_clusters,
centers_radius=config.centers_radius
)
train_size = int(config.train_size * len(default_dataset))
test_size = len(default_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(default_dataset, [train_size, test_size])
ClustersDataset.__default_dataset = default_dataset
ClustersDataset.__default_dataset_train = train_dataset
ClustersDataset.__default_dataset_test = test_dataset
return default_dataset, train_dataset, test_dataset
def _create_data_clusters(self):
self._clusters = [torch.zeros((self._cluster_size, 2)) for _ in range(self._n_clusters)]
centers_radius = self._centers_radius
for i, c in enumerate(self._clusters):
r, x, y = 3.0, centers_radius * np.cos(i * np.pi * 2 / self._n_clusters), centers_radius * np.sin(
i * np.pi * 2 / self._n_clusters)
cluster_length = 1.1
cluster_start = i * 2 * np.pi / self._n_clusters
cluster_end = cluster_length * (i + 1) * 2 * np.pi / self._n_clusters
cluster_inds = torch.linspace(start=cluster_start, end=cluster_end, steps=self._cluster_size,
dtype=torch.float)
c[:, 0] = r * torch.sin(cluster_inds) + y
c[:, 1] = r * torch.cos(cluster_inds) + x
def _plot(self):
plt.figure()
plt.scatter(self._noisy_values[:, 0], self._noisy_values[:, 1], s=1, color='b', label="noisy_values")
plt.scatter(self._values[:, 0], self._values[:, 1], s=1, color='r', label="values")
plt.legend(loc="upper left")
plt.show()
def _combine_clusters_to_array(self):
size = self._size
self._values = torch.zeros(size, 2)
self._labels = torch.zeros(size, dtype=torch.long)
for i, c in enumerate(self._clusters):
self._values[i * self._cluster_size: (i + 1) * self._cluster_size, :] = self._clusters[i]
self._labels[i * self._cluster_size: (i + 1) * self._cluster_size] = i
def _add_noise(self):
size = self._size
mean = torch.zeros(size, 2)
std = torch.ones(size, 2)
noise = torch.normal(mean, std)
self._noisy_values = torch.zeros(size, 2)
self._noisy_values[:] = self._values
self._noisy_values = self._noisy_values + noise * self._noise_factor
def _normalize_data(self):
values_min, values_max = torch.min(self._values), torch.max(self._values)
self._values = (self._values - values_min) / (values_max - values_min)
self._values = self._values * 2 - 1
def __len__(self):
return self._size # number of samples in the dataset
def __getitem__(self, index):
item = self._values[index, :]
noisy_item = self._noisy_values[index, :]
# if self._transform is not None:
# noisy_item = self._transform(item)
return item, noisy_item, self._labels[index]
#property
def values(self):
return self._values
#property
def noisy_values(self):
return self._noisy_values
Config values (ConfigProvider just returns those as an object)
num_epochs: 15
batch_size: 128
learning_rate: 0.0001
auto_lr_find: False
noise_factor: 0.1
regularization_factor: 0.0
cluster_size: 5000
n_clusters: 5
centers_radius: 4.0
train_size: 0.8
latent_dim: 8
enc_layer_sizes: [2, 200, 200, 200]
dec_layer_sizes: [200, 200, 200, 2]
retrain_vae: False
random_seed: 11
is_deterministic: True
It's the default setting of tensorboard in pytorch lightning. You can set default_hp_metric to false to get rid of this metric.
TensorBoardLogger(save_dir='tb_logs', name='VAEFC', default_hp_metric=False)
The hp_metric helps you track the model performance across different hyperparameters. You can check it at hparams in your tensorboard.
hp_metric (hyperparameter metric) is to help you tune your hyperparameters.
You can set this metric to whatever you like as documented in pytorch official docs.
Then, you can look through your hyperparameters and see which come out best according to whichever metric you choose.
Alternatively, if you don't want it, you can disable it as suggested in #joe32140's answer:
You can set default_hp_metric to false to get rid of this metric.
TensorBoardLogger(save_dir='tb_logs', name='VAEFC', default_hp_metric=False)
I am trying to run some code on Deep embedding clustering on mnist with the help of Keras , however, I get the following error
from keras.datasets import mnist
import numpy as np
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
# input
input_img = Input(shape=(dims[0],), name='input')
x = input_img
# internal layers in encoder
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
# hidden layer
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
# internal layers in decoder
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
# output
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',
distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
pretrain_epochs = 300
batch_size = 256
save_dir = './results'
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb)
autoencoder.save_weights(save_dir + '/ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
y_pred_last = np.copy(y_pred_kmeans)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# computing an auxiliary target distribution
def target_distribution(q):
weight = q ** 2 / q.sum(0)
return (weight.T / weight.sum(1)).T
loss = 0
index = 0
maxiter = 8000
update_interval = 140
index_array = np.arange(x.shape[0])
tol = 0.001 # tolerance threshold to stop training
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q = model.predict(x, verbose=2 )
p = target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, y_pred), 5)
nmi = np.round(metrics.nmi(y, y_pred), 5)
ari = np.round(metrics.ari(y, y_pred), 5)
loss = np.round(loss, 5)
print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)
# check stop criterion - model convergence
delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
y_pred_last = np.copy(y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
break
idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
loss = model.train_on_batch(x=x[idx], y=p[idx])
index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0
model.save_weights(save_dir + '/DEC_model_final.h5')
model.load_weights(save_dir + '/DEC_model_final.h5')
the error:
with session.graph.as_default():
AttributeError: 'NoneType' object has no attribute 'graph'
(the problem might be in saving the model but I can't figure out why I am wrong.) my code runs perfectly in jupyter notebook but I can't run it in an editor like pycharm .please help.
I'm really new to tfjs and keras so I need to ask you a question. I've found a keras model for OCR on the internet. I then modified the code a little bit because it used save_weights and I used only save to have the model topography correct. I converted the file using tensorflow_converter and used in an hybrid framework like ionic 3.
Now I've an error with the lambda layer. How can I solve?
To make things clear, there is the python model
'''This example uses a convolutional stack followed by a recurrent stack
and a CTC logloss function to perform optical character recognition
of generated text images. I have no evidence of whether it actually
learns general shapes of text, or just is able to recognize all
the different fonts thrown at it...the purpose is more to demonstrate CTC
inside of Keras. Note that the font list may need to be updated
for the particular OS in use.
This starts off with 4 letter words. For the first 12 epochs, the
difficulty is gradually increased using the TextImageGenerator class
which is both a generator class for test/train data and a Keras
callback class. After 20 epochs, longer sequences are thrown at it
by recompiling the model to handle a wider image and rebuilding
the word list to include two words separated by a space.
The table below shows normalized edit distance values. Theano uses
a slightly different CTC implementation, hence the different results.
Norm. ED
Epoch | TF | TH
------------------------
10 0.027 0.064
15 0.038 0.035
20 0.043 0.045
25 0.014 0.019
This requires cairo and editdistance packages:
pip install cairocffi
pip install editdistance
Created by Mike Henry
https://github.com/mbhenry/
'''
import os
import itertools
import codecs
import re
import datetime
import cairocffi as cairo
import editdistance
import numpy as np
from scipy import ndimage
import pylab
from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import GRU
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
from keras.preprocessing import image
import keras.callbacks
OUTPUT_DIR = 'image_ocr'
# character classes and matching regex filter
regex = r'^[a-z ]+$'
alphabet = u'abcdefghijklmnopqrstuvwxyz '
np.random.seed(55)
# this creates larger "blotches" of noise which look
# more realistic than just adding gaussian noise
# assumes greyscale with pixels ranging from 0 to 1
def speckle(img):
severity = np.random.uniform(0, 0.6)
blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
img_speck = (img + blur)
img_speck[img_speck > 1] = 1
img_speck[img_speck <= 0] = 0
return img_speck
# paints the string in a random location the bounding box
# also uses a random font, a slight random rotation,
# and a random amount of speckle noise
def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
with cairo.Context(surface) as context:
context.set_source_rgb(1, 1, 1) # White
context.paint()
# this font list works in CentOS 7
if multi_fonts:
fonts = [
'Century Schoolbook', 'Courier', 'STIX',
'URW Chancery L', 'FreeMono']
context.select_font_face(
np.random.choice(fonts),
cairo.FONT_SLANT_NORMAL,
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
else:
context.select_font_face('Courier',
cairo.FONT_SLANT_NORMAL,
cairo.FONT_WEIGHT_BOLD)
context.set_font_size(25)
box = context.text_extents(text)
border_w_h = (4, 4)
if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
raise IOError(('Could not fit string into image.'
'Max char count is too large for given image width.'))
# teach the RNN translational invariance by
# fitting text box randomly on canvas, with some room to rotate
max_shift_x = w - box[2] - border_w_h[0]
max_shift_y = h - box[3] - border_w_h[1]
top_left_x = np.random.randint(0, int(max_shift_x))
if ud:
top_left_y = np.random.randint(0, int(max_shift_y))
else:
top_left_y = h // 2
context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
context.set_source_rgb(0, 0, 0)
context.show_text(text)
buf = surface.get_data()
a = np.frombuffer(buf, np.uint8)
a.shape = (h, w, 4)
a = a[:, :, 0] # grab single channel
a = a.astype(np.float32) / 255
a = np.expand_dims(a, 0)
if rotate:
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
a = speckle(a)
return a
def shuffle_mats_or_lists(matrix_list, stop_ind=None):
ret = []
assert all([len(i) == len(matrix_list[0]) for i in matrix_list])
len_val = len(matrix_list[0])
if stop_ind is None:
stop_ind = len_val
assert stop_ind <= len_val
a = list(range(stop_ind))
np.random.shuffle(a)
a += list(range(stop_ind, len_val))
for mat in matrix_list:
if isinstance(mat, np.ndarray):
ret.append(mat[a])
elif isinstance(mat, list):
ret.append([mat[i] for i in a])
else:
raise TypeError('`shuffle_mats_or_lists` only supports '
'numpy.array and list objects.')
return ret
# Translation of characters to unique integer values
def text_to_labels(text):
ret = []
for char in text:
ret.append(alphabet.find(char))
return ret
# Reverse translation of numerical classes back to characters
def labels_to_text(labels):
ret = []
for c in labels:
if c == len(alphabet): # CTC Blank
ret.append("")
else:
ret.append(alphabet[c])
return "".join(ret)
# only a-z and space..probably not to difficult
# to expand to uppercase and symbols
def is_valid_str(in_str):
search = re.compile(regex, re.UNICODE).search
return bool(search(in_str))
# Uses generator functions to supply train/test with
# data. Image renderings are text are created on the fly
# each time with random perturbations
class TextImageGenerator(keras.callbacks.Callback):
def __init__(self, monogram_file, bigram_file, minibatch_size,
img_w, img_h, downsample_factor, val_split,
absolute_max_string_len=16):
self.minibatch_size = minibatch_size
self.img_w = img_w
self.img_h = img_h
self.monogram_file = monogram_file
self.bigram_file = bigram_file
self.downsample_factor = downsample_factor
self.val_split = val_split
self.blank_label = self.get_output_size() - 1
self.absolute_max_string_len = absolute_max_string_len
def get_output_size(self):
return len(alphabet) + 1
# num_words can be independent of the epoch size due to the use of generators
# as max_string_len grows, num_words can grow
def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5):
assert max_string_len <= self.absolute_max_string_len
assert num_words % self.minibatch_size == 0
assert (self.val_split * num_words) % self.minibatch_size == 0
self.num_words = num_words
self.string_list = [''] * self.num_words
tmp_string_list = []
self.max_string_len = max_string_len
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
self.X_text = []
self.Y_len = [0] * self.num_words
def _is_length_of_word_valid(word):
return (max_string_len == -1 or
max_string_len is None or
len(word) <= max_string_len)
# monogram file is sorted by frequency in english speech
with codecs.open(self.monogram_file, mode='r', encoding='utf-8') as f:
for line in f:
if len(tmp_string_list) == int(self.num_words * mono_fraction):
break
word = line.rstrip()
if _is_length_of_word_valid(word):
tmp_string_list.append(word)
# bigram file contains common word pairings in english speech
with codecs.open(self.bigram_file, mode='r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
if len(tmp_string_list) == self.num_words:
break
columns = line.lower().split()
word = columns[0] + ' ' + columns[1]
if is_valid_str(word) and _is_length_of_word_valid(word):
tmp_string_list.append(word)
if len(tmp_string_list) != self.num_words:
raise IOError('Could not pull enough words'
'from supplied monogram and bigram files.')
# interlace to mix up the easy and hard words
self.string_list[::2] = tmp_string_list[:self.num_words // 2]
self.string_list[1::2] = tmp_string_list[self.num_words // 2:]
for i, word in enumerate(self.string_list):
self.Y_len[i] = len(word)
self.Y_data[i, 0:len(word)] = text_to_labels(word)
self.X_text.append(word)
self.Y_len = np.expand_dims(np.array(self.Y_len), 1)
self.cur_val_index = self.val_split
self.cur_train_index = 0
# each time an image is requested from train/val/test, a new random
# painting of the text is performed
def get_batch(self, index, size, train):
# width and height are backwards from typical Keras convention
# because width is the time dimension when it gets fed into the RNN
if K.image_data_format() == 'channels_first':
X_data = np.ones([size, 1, self.img_w, self.img_h])
else:
X_data = np.ones([size, self.img_w, self.img_h, 1])
labels = np.ones([size, self.absolute_max_string_len])
input_length = np.zeros([size, 1])
label_length = np.zeros([size, 1])
source_str = []
for i in range(size):
# Mix in some blank inputs. This seems to be important for
# achieving translational invariance
if train and i > size - 4:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
else:
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
labels[i, 0] = self.blank_label
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = 1
source_str.append('')
else:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = (
self.paint_func(self.X_text[index + i])[0, :, :].T)
else:
X_data[i, 0:self.img_w, :, 0] = (
self.paint_func(self.X_text[index + i])[0, :, :].T)
labels[i, :] = self.Y_data[index + i]
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = self.Y_len[index + i]
source_str.append(self.X_text[index + i])
inputs = {'the_input': X_data,
'the_labels': labels,
'input_length': input_length,
'label_length': label_length,
'source_str': source_str # used for visualization only
}
outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function
return (inputs, outputs)
def next_train(self):
while 1:
ret = self.get_batch(self.cur_train_index,
self.minibatch_size, train=True)
self.cur_train_index += self.minibatch_size
if self.cur_train_index >= self.val_split:
self.cur_train_index = self.cur_train_index % 32
(self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(
[self.X_text, self.Y_data, self.Y_len], self.val_split)
yield ret
def next_val(self):
while 1:
ret = self.get_batch(self.cur_val_index,
self.minibatch_size, train=False)
self.cur_val_index += self.minibatch_size
if self.cur_val_index >= self.num_words:
self.cur_val_index = self.val_split + self.cur_val_index % 32
yield ret
def on_train_begin(self, logs={}):
self.build_word_list(16000, 4, 1)
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=False, multi_fonts=False)
def on_epoch_begin(self, epoch, logs={}):
# rebind the paint function to implement curriculum learning
if 3 <= epoch < 6:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=False)
elif 6 <= epoch < 9:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=True)
elif epoch >= 9:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=True, ud=True, multi_fonts=True)
if epoch >= 21 and self.max_string_len < 12:
self.build_word_list(32000, 12, 0.5)
# the actual loss calc occurs here despite it not being
# an internal Keras loss function
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
# For a real OCR application, this should be beam search with a dictionary
# and language model. For this example, best path is sufficient.
def decode_batch(test_func, word_batch):
out = test_func([word_batch])[0]
ret = []
for j in range(out.shape[0]):
out_best = list(np.argmax(out[j, 2:], 1))
out_best = [k for k, g in itertools.groupby(out_best)]
outstr = labels_to_text(out_best)
ret.append(outstr)
return ret
class VizCallback(keras.callbacks.Callback):
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
self.test_func = test_func
self.output_dir = os.path.join(
OUTPUT_DIR, run_name)
self.text_img_gen = text_img_gen
self.num_display_words = num_display_words
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
def show_edit_distance(self, num):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func,
word_batch['the_input'][0:num_proc])
for j in range(num_proc):
edit_dist = editdistance.eval(decoded_res[j],
word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
print('\nOut of %d samples: Mean edit distance:'
'%.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
def on_epoch_end(self, epoch, logs={}):
self.model.save(
os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
self.show_edit_distance(256)
word_batch = next(self.text_img_gen)[0]
res = decode_batch(self.test_func,
word_batch['the_input'][0:self.num_display_words])
if word_batch['the_input'][0].shape[0] < 256:
cols = 2
else:
cols = 1
for i in range(self.num_display_words):
pylab.subplot(self.num_display_words // cols, cols, i + 1)
if K.image_data_format() == 'channels_first':
the_input = word_batch['the_input'][i, 0, :, :]
else:
the_input = word_batch['the_input'][i, :, :, 0]
pylab.imshow(the_input.T, cmap='Greys_r')
pylab.xlabel(
'Truth = \'%s\'\nDecoded = \'%s\'' %
(word_batch['source_str'][i], res[i]))
fig = pylab.gcf()
fig.set_size_inches(10, 13)
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
pylab.close()
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = 64
words_per_epoch = 16000
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
minibatch_size = 32
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
fdir = os.path.dirname(
get_file('wordlists.tgz',
origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
untar=True))
img_gen = TextImageGenerator(
monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
minibatch_size=minibatch_size,
img_w=img_w,
img_h=img_h,
downsample_factor=(pool_size ** 2),
val_split=words_per_epoch - val_words)
act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2),
(img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True,
kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True,
go_backwards=True, kernel_initializer='he_normal',
name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True,
kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
labels = Input(name='the_labels',
shape=[img_gen.absolute_max_string_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(
ctc_lambda_func, output_shape=(1,),
name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labels, input_length, label_length],
outputs=loss_out)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
if start_epoch > 0:
weight_file = os.path.join(
OUTPUT_DIR,
os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
model.load_weights(weight_file)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
model.fit_generator(
generator=img_gen.next_train(),
steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
epochs=stop_epoch,
validation_data=img_gen.next_val(),
validation_steps=val_words // minibatch_size,
callbacks=[viz_cb, img_gen],
initial_epoch=start_epoch)
if __name__ == '__main__':
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
train(run_name, 0, 20, 128)
# increase to wider images and start at epoch 20.
# The learned weights are reloaded
train(run_name, 20, 25, 512)
That's the error I've in browser
Error: Uncaught (in promise): Error: Unknown layer: Lambda
Error: Unknown layer: Lambda
at new ValueError (http://localhost:8100/build/vendor.js:27661:28)
at Object.deserializeKerasObject (http://localhost:8100/build/vendor.js:27536:19)
at Object.deserialize (http://localhost:8100/build/vendor.js:67301:28)
at processLayer (http://localhost:8100/build/vendor.js:32601:41)
at Container.fromConfig (http://localhost:8100/build/vendor.js:32617:13)
at Object.deserializeKerasObject (http://localhost:8100/build/vendor.js:27555:29)
at Object.deserialize (http://localhost:8100/build/vendor.js:67301:28)
at http://localhost:8100/build/vendor.js:95907:45
at step (http://localhost:8100/build/vendor.js:95882:23)
at Object.next (http://localhost:8100/build/vendor.js:95863:53)
at c (http://localhost:8100/build/polyfills.js:3:19752)
at http://localhost:8100/build/polyfills.js:3:19174
at rejected (http://localhost:8100/build/main.js:59:89)
at t.invoke (http://localhost:8100/build/polyfills.js:3:14976)
at Object.onInvoke (http://localhost:8100/build/vendor.js:5124:33)
at t.invoke (http://localhost:8100/build/polyfills.js:3:14916)
at r.run (http://localhost:8100/build/polyfills.js:3:10143)
at http://localhost:8100/build/polyfills.js:3:20242
at t.invokeTask (http://localhost:8100/build/polyfills.js:3:15660)
at Object.onInvokeTask (http://localhost:8100/build/vendor.js:5115:33)
I am trying to modify the code provided by neural-networks-and-deep-learning on github for network3.py. This code basically constructs a convolution neural network and trains the MNIST data set.
What I am trying to do is add the concept of back propagation and sparsity to this code. The part of code which I added is outlined between the two lines of #. I get an Typeerror: make node requires 4D tensor of kernels
I understand that the size should be of 4D (1,1,28,28) but I am not sure where and how to do this modification.
class ConvPoolLayer(object):
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
activation_fn=sigmoid):
self.filter_shape = filter_shape
self.image_shape = image_shape
self.poolsize = poolsize
self.activation_fn=activation_fn
# initialize weights and biases
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
#print self.w.eval()
self.b = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
#print filter_shape[0]
#print self.b.eval()
self.params = [self.w, self.b]
def sigmoid(self, x):
return (1 / (1 + T.exp(-x)))
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape)
############################################################
learning_rate = 0.0001
learning_rate_s = 0.0001
gamma = 1
alpha = 1 - learning_rate
v1 = T.dot(self.w, self.inpt) + self.b
y1 = self.sigmoid(v1)
diff1 = self.inpt - T.dot(T.transpose(self.w), y1)
d1 = T.dot(self.w, diff1)
d1 = T.dot(d1, (1.0 - T.dot(v1,v1)))
delta_w1_bp = learning_rate * T.dot(d1 , T.transpose(self.inpt))
delta_b1_bp = T.sum(learning_rate * d1, axis=1)
delta_w1_s = learning_rate_s * T.dot(self.sigmoid(y1),T.transpose(self.inpt))
delta_b1_s = T.sum(learning_rate_s * self.sigmoid(y1), axis=1)
total_w1 = gamma * delta_w1_bp + (1 - gamma) * delta_w1_s
total_b1 = gamma * delta_b1_bp + (1 - gamma) * delta_b1_s
self.w = (alpha * self.w) + total_w1
self.b = (alpha * self.b) + total_b1
##################################################################
conv_out = conv.conv2d(
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out = downsample.max_pool_2d(
input=conv_out, ds=self.poolsize, ignore_border=True)
self.output = self.activation_fn(
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
self.output_dropout = self.output # no dropout in the convolution layers
Does anyone know how to fix this?
The main code I run to call the above script is
import network3
from network3 import Network
from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
training_data, validation_data, test_data = network3.load_data_shared()
mini_batch_size = 10
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=20*12*12, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1,
validation_data, test_data)