I'm really new to tfjs and keras so I need to ask you a question. I've found a keras model for OCR on the internet. I then modified the code a little bit because it used save_weights and I used only save to have the model topography correct. I converted the file using tensorflow_converter and used in an hybrid framework like ionic 3.
Now I've an error with the lambda layer. How can I solve?
To make things clear, there is the python model
'''This example uses a convolutional stack followed by a recurrent stack
and a CTC logloss function to perform optical character recognition
of generated text images. I have no evidence of whether it actually
learns general shapes of text, or just is able to recognize all
the different fonts thrown at it...the purpose is more to demonstrate CTC
inside of Keras. Note that the font list may need to be updated
for the particular OS in use.
This starts off with 4 letter words. For the first 12 epochs, the
difficulty is gradually increased using the TextImageGenerator class
which is both a generator class for test/train data and a Keras
callback class. After 20 epochs, longer sequences are thrown at it
by recompiling the model to handle a wider image and rebuilding
the word list to include two words separated by a space.
The table below shows normalized edit distance values. Theano uses
a slightly different CTC implementation, hence the different results.
Norm. ED
Epoch | TF | TH
------------------------
10 0.027 0.064
15 0.038 0.035
20 0.043 0.045
25 0.014 0.019
This requires cairo and editdistance packages:
pip install cairocffi
pip install editdistance
Created by Mike Henry
https://github.com/mbhenry/
'''
import os
import itertools
import codecs
import re
import datetime
import cairocffi as cairo
import editdistance
import numpy as np
from scipy import ndimage
import pylab
from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import GRU
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
from keras.preprocessing import image
import keras.callbacks
OUTPUT_DIR = 'image_ocr'
# character classes and matching regex filter
regex = r'^[a-z ]+$'
alphabet = u'abcdefghijklmnopqrstuvwxyz '
np.random.seed(55)
# this creates larger "blotches" of noise which look
# more realistic than just adding gaussian noise
# assumes greyscale with pixels ranging from 0 to 1
def speckle(img):
severity = np.random.uniform(0, 0.6)
blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
img_speck = (img + blur)
img_speck[img_speck > 1] = 1
img_speck[img_speck <= 0] = 0
return img_speck
# paints the string in a random location the bounding box
# also uses a random font, a slight random rotation,
# and a random amount of speckle noise
def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
with cairo.Context(surface) as context:
context.set_source_rgb(1, 1, 1) # White
context.paint()
# this font list works in CentOS 7
if multi_fonts:
fonts = [
'Century Schoolbook', 'Courier', 'STIX',
'URW Chancery L', 'FreeMono']
context.select_font_face(
np.random.choice(fonts),
cairo.FONT_SLANT_NORMAL,
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
else:
context.select_font_face('Courier',
cairo.FONT_SLANT_NORMAL,
cairo.FONT_WEIGHT_BOLD)
context.set_font_size(25)
box = context.text_extents(text)
border_w_h = (4, 4)
if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
raise IOError(('Could not fit string into image.'
'Max char count is too large for given image width.'))
# teach the RNN translational invariance by
# fitting text box randomly on canvas, with some room to rotate
max_shift_x = w - box[2] - border_w_h[0]
max_shift_y = h - box[3] - border_w_h[1]
top_left_x = np.random.randint(0, int(max_shift_x))
if ud:
top_left_y = np.random.randint(0, int(max_shift_y))
else:
top_left_y = h // 2
context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
context.set_source_rgb(0, 0, 0)
context.show_text(text)
buf = surface.get_data()
a = np.frombuffer(buf, np.uint8)
a.shape = (h, w, 4)
a = a[:, :, 0] # grab single channel
a = a.astype(np.float32) / 255
a = np.expand_dims(a, 0)
if rotate:
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
a = speckle(a)
return a
def shuffle_mats_or_lists(matrix_list, stop_ind=None):
ret = []
assert all([len(i) == len(matrix_list[0]) for i in matrix_list])
len_val = len(matrix_list[0])
if stop_ind is None:
stop_ind = len_val
assert stop_ind <= len_val
a = list(range(stop_ind))
np.random.shuffle(a)
a += list(range(stop_ind, len_val))
for mat in matrix_list:
if isinstance(mat, np.ndarray):
ret.append(mat[a])
elif isinstance(mat, list):
ret.append([mat[i] for i in a])
else:
raise TypeError('`shuffle_mats_or_lists` only supports '
'numpy.array and list objects.')
return ret
# Translation of characters to unique integer values
def text_to_labels(text):
ret = []
for char in text:
ret.append(alphabet.find(char))
return ret
# Reverse translation of numerical classes back to characters
def labels_to_text(labels):
ret = []
for c in labels:
if c == len(alphabet): # CTC Blank
ret.append("")
else:
ret.append(alphabet[c])
return "".join(ret)
# only a-z and space..probably not to difficult
# to expand to uppercase and symbols
def is_valid_str(in_str):
search = re.compile(regex, re.UNICODE).search
return bool(search(in_str))
# Uses generator functions to supply train/test with
# data. Image renderings are text are created on the fly
# each time with random perturbations
class TextImageGenerator(keras.callbacks.Callback):
def __init__(self, monogram_file, bigram_file, minibatch_size,
img_w, img_h, downsample_factor, val_split,
absolute_max_string_len=16):
self.minibatch_size = minibatch_size
self.img_w = img_w
self.img_h = img_h
self.monogram_file = monogram_file
self.bigram_file = bigram_file
self.downsample_factor = downsample_factor
self.val_split = val_split
self.blank_label = self.get_output_size() - 1
self.absolute_max_string_len = absolute_max_string_len
def get_output_size(self):
return len(alphabet) + 1
# num_words can be independent of the epoch size due to the use of generators
# as max_string_len grows, num_words can grow
def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5):
assert max_string_len <= self.absolute_max_string_len
assert num_words % self.minibatch_size == 0
assert (self.val_split * num_words) % self.minibatch_size == 0
self.num_words = num_words
self.string_list = [''] * self.num_words
tmp_string_list = []
self.max_string_len = max_string_len
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
self.X_text = []
self.Y_len = [0] * self.num_words
def _is_length_of_word_valid(word):
return (max_string_len == -1 or
max_string_len is None or
len(word) <= max_string_len)
# monogram file is sorted by frequency in english speech
with codecs.open(self.monogram_file, mode='r', encoding='utf-8') as f:
for line in f:
if len(tmp_string_list) == int(self.num_words * mono_fraction):
break
word = line.rstrip()
if _is_length_of_word_valid(word):
tmp_string_list.append(word)
# bigram file contains common word pairings in english speech
with codecs.open(self.bigram_file, mode='r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
if len(tmp_string_list) == self.num_words:
break
columns = line.lower().split()
word = columns[0] + ' ' + columns[1]
if is_valid_str(word) and _is_length_of_word_valid(word):
tmp_string_list.append(word)
if len(tmp_string_list) != self.num_words:
raise IOError('Could not pull enough words'
'from supplied monogram and bigram files.')
# interlace to mix up the easy and hard words
self.string_list[::2] = tmp_string_list[:self.num_words // 2]
self.string_list[1::2] = tmp_string_list[self.num_words // 2:]
for i, word in enumerate(self.string_list):
self.Y_len[i] = len(word)
self.Y_data[i, 0:len(word)] = text_to_labels(word)
self.X_text.append(word)
self.Y_len = np.expand_dims(np.array(self.Y_len), 1)
self.cur_val_index = self.val_split
self.cur_train_index = 0
# each time an image is requested from train/val/test, a new random
# painting of the text is performed
def get_batch(self, index, size, train):
# width and height are backwards from typical Keras convention
# because width is the time dimension when it gets fed into the RNN
if K.image_data_format() == 'channels_first':
X_data = np.ones([size, 1, self.img_w, self.img_h])
else:
X_data = np.ones([size, self.img_w, self.img_h, 1])
labels = np.ones([size, self.absolute_max_string_len])
input_length = np.zeros([size, 1])
label_length = np.zeros([size, 1])
source_str = []
for i in range(size):
# Mix in some blank inputs. This seems to be important for
# achieving translational invariance
if train and i > size - 4:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
else:
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
labels[i, 0] = self.blank_label
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = 1
source_str.append('')
else:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = (
self.paint_func(self.X_text[index + i])[0, :, :].T)
else:
X_data[i, 0:self.img_w, :, 0] = (
self.paint_func(self.X_text[index + i])[0, :, :].T)
labels[i, :] = self.Y_data[index + i]
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = self.Y_len[index + i]
source_str.append(self.X_text[index + i])
inputs = {'the_input': X_data,
'the_labels': labels,
'input_length': input_length,
'label_length': label_length,
'source_str': source_str # used for visualization only
}
outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function
return (inputs, outputs)
def next_train(self):
while 1:
ret = self.get_batch(self.cur_train_index,
self.minibatch_size, train=True)
self.cur_train_index += self.minibatch_size
if self.cur_train_index >= self.val_split:
self.cur_train_index = self.cur_train_index % 32
(self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(
[self.X_text, self.Y_data, self.Y_len], self.val_split)
yield ret
def next_val(self):
while 1:
ret = self.get_batch(self.cur_val_index,
self.minibatch_size, train=False)
self.cur_val_index += self.minibatch_size
if self.cur_val_index >= self.num_words:
self.cur_val_index = self.val_split + self.cur_val_index % 32
yield ret
def on_train_begin(self, logs={}):
self.build_word_list(16000, 4, 1)
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=False, multi_fonts=False)
def on_epoch_begin(self, epoch, logs={}):
# rebind the paint function to implement curriculum learning
if 3 <= epoch < 6:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=False)
elif 6 <= epoch < 9:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=True)
elif epoch >= 9:
self.paint_func = lambda text: paint_text(
text, self.img_w, self.img_h,
rotate=True, ud=True, multi_fonts=True)
if epoch >= 21 and self.max_string_len < 12:
self.build_word_list(32000, 12, 0.5)
# the actual loss calc occurs here despite it not being
# an internal Keras loss function
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
# For a real OCR application, this should be beam search with a dictionary
# and language model. For this example, best path is sufficient.
def decode_batch(test_func, word_batch):
out = test_func([word_batch])[0]
ret = []
for j in range(out.shape[0]):
out_best = list(np.argmax(out[j, 2:], 1))
out_best = [k for k, g in itertools.groupby(out_best)]
outstr = labels_to_text(out_best)
ret.append(outstr)
return ret
class VizCallback(keras.callbacks.Callback):
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
self.test_func = test_func
self.output_dir = os.path.join(
OUTPUT_DIR, run_name)
self.text_img_gen = text_img_gen
self.num_display_words = num_display_words
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
def show_edit_distance(self, num):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func,
word_batch['the_input'][0:num_proc])
for j in range(num_proc):
edit_dist = editdistance.eval(decoded_res[j],
word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
print('\nOut of %d samples: Mean edit distance:'
'%.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
def on_epoch_end(self, epoch, logs={}):
self.model.save(
os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
self.show_edit_distance(256)
word_batch = next(self.text_img_gen)[0]
res = decode_batch(self.test_func,
word_batch['the_input'][0:self.num_display_words])
if word_batch['the_input'][0].shape[0] < 256:
cols = 2
else:
cols = 1
for i in range(self.num_display_words):
pylab.subplot(self.num_display_words // cols, cols, i + 1)
if K.image_data_format() == 'channels_first':
the_input = word_batch['the_input'][i, 0, :, :]
else:
the_input = word_batch['the_input'][i, :, :, 0]
pylab.imshow(the_input.T, cmap='Greys_r')
pylab.xlabel(
'Truth = \'%s\'\nDecoded = \'%s\'' %
(word_batch['source_str'][i], res[i]))
fig = pylab.gcf()
fig.set_size_inches(10, 13)
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
pylab.close()
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = 64
words_per_epoch = 16000
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
minibatch_size = 32
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
fdir = os.path.dirname(
get_file('wordlists.tgz',
origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
untar=True))
img_gen = TextImageGenerator(
monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
minibatch_size=minibatch_size,
img_w=img_w,
img_h=img_h,
downsample_factor=(pool_size ** 2),
val_split=words_per_epoch - val_words)
act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2),
(img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True,
kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True,
go_backwards=True, kernel_initializer='he_normal',
name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True,
kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
labels = Input(name='the_labels',
shape=[img_gen.absolute_max_string_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(
ctc_lambda_func, output_shape=(1,),
name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labels, input_length, label_length],
outputs=loss_out)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
if start_epoch > 0:
weight_file = os.path.join(
OUTPUT_DIR,
os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
model.load_weights(weight_file)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
model.fit_generator(
generator=img_gen.next_train(),
steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
epochs=stop_epoch,
validation_data=img_gen.next_val(),
validation_steps=val_words // minibatch_size,
callbacks=[viz_cb, img_gen],
initial_epoch=start_epoch)
if __name__ == '__main__':
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
train(run_name, 0, 20, 128)
# increase to wider images and start at epoch 20.
# The learned weights are reloaded
train(run_name, 20, 25, 512)
That's the error I've in browser
Error: Uncaught (in promise): Error: Unknown layer: Lambda
Error: Unknown layer: Lambda
at new ValueError (http://localhost:8100/build/vendor.js:27661:28)
at Object.deserializeKerasObject (http://localhost:8100/build/vendor.js:27536:19)
at Object.deserialize (http://localhost:8100/build/vendor.js:67301:28)
at processLayer (http://localhost:8100/build/vendor.js:32601:41)
at Container.fromConfig (http://localhost:8100/build/vendor.js:32617:13)
at Object.deserializeKerasObject (http://localhost:8100/build/vendor.js:27555:29)
at Object.deserialize (http://localhost:8100/build/vendor.js:67301:28)
at http://localhost:8100/build/vendor.js:95907:45
at step (http://localhost:8100/build/vendor.js:95882:23)
at Object.next (http://localhost:8100/build/vendor.js:95863:53)
at c (http://localhost:8100/build/polyfills.js:3:19752)
at http://localhost:8100/build/polyfills.js:3:19174
at rejected (http://localhost:8100/build/main.js:59:89)
at t.invoke (http://localhost:8100/build/polyfills.js:3:14976)
at Object.onInvoke (http://localhost:8100/build/vendor.js:5124:33)
at t.invoke (http://localhost:8100/build/polyfills.js:3:14916)
at r.run (http://localhost:8100/build/polyfills.js:3:10143)
at http://localhost:8100/build/polyfills.js:3:20242
at t.invokeTask (http://localhost:8100/build/polyfills.js:3:15660)
at Object.onInvokeTask (http://localhost:8100/build/vendor.js:5115:33)
Related
I am working on clinical EHR.
I am currently referring to this blog and github link here.
https://sparklerussell.com/post/using-electronic-health-records-to-predict-future-diagnosis-codes-with-gated-recurrent-units/
https://github.com/sparalic/Electronic-Health-Records-GRUs
I have generated the dataset and processed it as per the instructions in the notebooks present in the repository. I am facing an issue trying to train the model.
using : jupytor notebook (with google colab)
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools
import pickle
import sys, random
Load Data :
def load_data(sequences, labels):
dataSize = len(labels)
idx = np.random.permutation(dataSize)
nTest = int(np.ceil(0.15 * dataSize))
nValid = int(np.ceil(0.10 * dataSize))
test_idx = idx[:nTest]
valid_idx = idx[nTest:nTest+nValid]
train_idx = idx[nTest+nValid:]
train_x = sequences[train_idx]
train_y = labels[train_idx]
test_x = sequences[test_idx]
test_y = labels[test_idx]
valid_x = sequences[valid_idx]
valid_y = labels[valid_idx]
train_x = [sorted(seq) for seq in train_x]
train_y = [sorted(seq) for seq in train_y]
valid_x = [sorted(seq) for seq in valid_x]
valid_y = [sorted(seq) for seq in valid_y]
test_x = [sorted(seq) for seq in test_x]
test_y = [sorted(seq) for seq in test_y]
train = (train_x, train_y)
test = (test_x, test_y)
valid = (valid_x, valid_y)
return (train, test, valid)
Padding the input :
def padding(seqs, labels, vocab, n_classes):
lengths = np.array([len(seq) for seq in seqs]) - 1 # remove the last list in each patient's sequences for labels
n_samples = len(lengths)
maxlen = np.max(lengths)
x = torch.zeros(maxlen, n_samples, vocab) # maxlen = number of visits, n_samples = samples
y = torch.zeros(maxlen, n_samples, n_classes)
mask = torch.zeros(maxlen, n_samples)
for idx, (seq,label) in enumerate(zip(seqs,labels)):
for xvec, subseq in zip(x[:,idx,:], seq[:-1]):
xvec[subseq] = 1.
for yvec, subseq in zip(y[:,idx,:], label[1:]):
yvec[subseq] = 1.
mask[:lengths[idx], idx] = 1.
return x, y, lengths, mask
GRU Class :
torch.manual_seed(1)
class EHRNN(nn.Module):
def __init__(self, inputDimSize, hiddenDimSize,embSize, batchSize, numClass):
super(EHRNN, self).__init__()
self.hiddenDimSize = hiddenDimSize
self.inputDimSize = inputDimSize
self.embSize = embSize
self.numClass = numClass
self.batchSize = batchSize
#Initialize random weights
self.W_z = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_r = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_h = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.U_z = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_r = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_h = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.b_z = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_r = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_h = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.params = [self.W_z, self.W_r, self.W_h,
self.U_z, self.U_r, self.U_h,
self.b_z, self.b_r, self.b_h]
def forward(self,emb,h):
z = torch.sigmoid(torch.matmul(emb, self.W_z) + torch.matmul(h, self.U_z) + self.b_z)
r = torch.sigmoid(torch.matmul(emb, self.W_r) + torch.matmul(h, self.U_r) + self.b_r)
h_tilde = torch.tanh(torch.matmul(emb, self.W_h) + torch.matmul(r * h, self.U_h) + self.b_h)
h = z * h + ((1. - z) * h_tilde)
return h
def init_hidden(self):
return Variable(torch.zeros(self.batchSize,self.hiddenDimSize))
Custom Layer for handling two layer GRU :
torch.manual_seed(1)
class build_EHRNN(nn.Module):
def __init__(self, inputDimSize=4894, hiddenDimSize=[200,200], batchSize=100, embSize=200,numClass=4894, dropout=0.5,logEps=1e-8):
super(build_EHRNN, self).__init__()
self.inputDimSize = inputDimSize
self.hiddenDimSize = hiddenDimSize
self.numClass = numClass
self.embSize = embSize
self.batchSize = batchSize
self.dropout = nn.Dropout(p=0.5)
self.logEps = logEps
# Embedding inputs
self.W_emb = nn.Parameter(torch.randn(self.inputDimSize, self.embSize).cuda())
self.b_emb = nn.Parameter(torch.zeros(self.embSize).cuda())
self.W_out = nn.Parameter(torch.randn(self.hiddenDimSize, self.numClass).cuda())
self.b_out = nn.Parameter(torch.zeros(self.numClass).cuda())
self.params = [self.W_emb, self.W_out,
self.b_emb, self.b_out]
def forward(self,x, y, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
# Loss calculation
cross_entropy = -(y * torch.log(yhat + self.logEps) + (1. - y) * torch.log(1. - yhat + self.logEps))
last_step = -torch.mean(y[-1] * torch.log(yhat[-1] + self.logEps) + (1. - y[-1]) * torch.log(1. - yhat[-1] + self.logEps))
prediction_loss = torch.sum(torch.sum(cross_entropy, dim=0),dim=1)/ torch.cuda.FloatTensor(lengths)
cost = torch.mean(prediction_loss) + 0.000001 * (self.W_out ** 2).sum() # regularize
return (yhat, hidden_state, cost)
def init_hidden(self):
return torch.zeros(self.batchSize, self.hiddenDimSize) # initial state
Load data :
train, test, valid = load_data(sequences, labels)
Batch Size:
batchSize = 100
n_batches = int(np.ceil(float(len(train[0])) / float(batchSize)))-1
n_batches_valid = int(np.ceil(float(len(valid[0])) / float(batchSize)))-1
Model:
model = build_EHRNN(inputDimSize=4894, hiddenDimSize=200, batchSize=20, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8)
model = model.to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr = 0.01, rho=0.90)
max_epochs = 5
loss_all = []
iteration = 0
for e in range(max_epochs):
for index in random.sample(range(n_batches), n_batches):
batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
optimizer.zero_grad()
x, y, lengths, mask = padding(batchX, batchY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
outputs, hidden, cost = model(x,y, h, lengths, mask)
if torch.cuda.is_available():
cost.cuda()
cost.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
loss_all.append(cost.item())
iteration +=1
if iteration % 10 == 0:
# Calculate Accuracy
losses = []
model.eval()
val_loss = []
for index in random.sample(range(n_batches_valid), n_batches_valid):
validX = valid[0][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
validY = valid[1][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
x, y, lengths, mask = padding(validX, validY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lenghts, mask.cuda()
outputs, hidden_val, cost_val = model(x,y, h, lengths, mask)
losses.append(cost_val)
model.train()
print("Epoch: {}/{}...".format(e+1, max_epochs),
"Step: {}...".format(iteration),
"Training Loss: {:.4f}...".format(np.mean(loss_all)),
"Val Loss: {:.4f}".format(torch.mean(torch.tensor(losses))))
ERROR :
ValueError Traceback (most recent call last)
<ipython-input-76-9ca4916456a9> in <module>()
8
9 for e in range(max_epochs):
---> 10 for index in random.sample(range(n_batches), n_batches):
11 batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
12 batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
/usr/lib/python3.7/random.py in sample(self, population, k)
319 n = len(population)
320 if not 0 <= k <= n:
--> 321 raise ValueError("Sample larger than population or is negative")
322 result = [None] * k
323 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
I tried many things but I couldn't solve the problem.
I'm training an MRI super-resolution model on about 40k volumes with Tensorflow/Keras and a custom training loop:
Pseudo python/tf code :
for e in epochs:
for s in the steps:
batch = data_handler.get_batch(files[s])
model.train_on_batch(batch['lr'], batch['hr'])
def get_batch(volume_path) # online data augmentation and image degradation
vol_hr = np.load(volume_path) # 200x200x200 array
vol_hr = augment(vol_hr) # flips / rotation
vol_lr = degrade(vol_hr) # blur, add noise, downsample (100x100x200)
batch = crop_patches(vol_lr, vol_hr)
# batch['lr'].shape == batch_size, 32, 32, 3
# batch['hr'].shape == batch_size, 64, 64, 3
return batch
This runs terribly slow so I'm looking into optimizing the training through (i) implementing tf data pipeline (tf.data.Dataset) and (ii) Distributed training. This post is about optimizing the data pipeline.
My plan is to implement a tf data input pipeline (tf 2.4.1). The generator looks something like this at the moment:
import tensorflow as tf
import numpy as np
import os
import time
from skimage.transform import downscale_local_mean
def make_rand_vols(n, size=(200, 200, 200), tmp_path='./tmp/tmp_vol'):
if not os.path.isdir(tmp_path):
os.makedirs(tmp_path)
rand_arr_fnames = []
for f in range(n):
rand_arr = np.random.rand(size[0], size[1], size[2])
fpath = os.path.join(tmp_path, f"{f}.npy")
np.save(fpath, rand_arr)
rand_arr_fnames.append(fpath)
return rand_arr_fnames
def benchmark(dataset, num_epochs=2, steps=10):
start_time = time.perf_counter()
for epoch_num in range(num_epochs):
for s, sample in enumerate(dataset):
# Performing a training step
print(s)
time.sleep(0.01)
if s == steps - 1:
break
print("Execution time:", time.perf_counter() - start_time)
class CustomGenerator(tf.keras.utils.Sequence):
def __init__(self, fnames, batch_size=3,
scale=2, shuffle=True, patch_size=32, n_channels=3):
self.fnames = fnames
self.shuffle = shuffle
self.batch_size = batch_size
self.on_epoch_end()
self.scale = scale
self.lr_patch_size = patch_size
self.hr_patch_size = patch_size * scale
self.n_channels = n_channels
def __len__(self):
return len(self.fnames)
def on_epoch_end(self):
#'Updates indexes after each epoch'
if self.shuffle == True:
np.random.shuffle(self.fnames)
def __getitem__(self, idx):
vol_hr = np.load(self.fnames[idx]) # array of size e.g. (300, 300, 300)
vol_hr = self.augment(vol_hr)
vol_lr = self.degrade(vol_hr, self.scale) # array of size e.g. (150, 150, 300)
batch = self.get_vol_crops(vol_lr, vol_hr) # array of size (batch_size, width / scale, length / scale, depth)
# batch -> (batch_lr, batch_hr) -> (arr [batch_size, patch_lr, patch_lr, n_channels], arr [batch_size, patch_hr, patch_hr, n_channels])
return batch
def augment(self, vol):
if np.random.randint(0, 2):
vol = vol[:, ::-1, :] # simple for illustration
return vol
def degrade(self, vol_hr, downscale_factor):
vol_lr = downscale_local_mean(vol_hr,
(downscale_factor, downscale_factor, 1),
cval=0)
return vol_lr
def get_vol_crops(self, vol_lr, vol_hr):
pad = 2
rows_lr, cols_lr, n_slices = vol_lr.shape
lr_y_list = np.random.randint(0, rows_lr - self.lr_patch_size - 1, batch_size) #top left row lr
lr_x_list = np.random.randint(0, cols_lr - self.lr_patch_size - 1, batch_size) # top left col lr
z_list = np.random.randint(pad, n_slices - pad, self.batch_size)
lr_patches, hr_patches = [], []
for y_lr, x_lr, z in zip(lr_y_list, lr_x_list, z_list):
slice_idxs = [z for z in range(z - n_channels // 2, z + n_channels//2 + 1)]
# LR patch
patch_lr = vol_lr[y_lr: y_lr + self.lr_patch_size,
x_lr: x_lr + self.lr_patch_size,
slice_idxs]
print()
lr_patches.append(patch_lr)
# HR patch
y_hr, x_hr = scale * y_lr, scale * x_lr
patch_hr = vol_hr[y_hr: y_hr + self.hr_patch_size,
x_hr: x_hr + self.hr_patch_size,
slice_idxs]
#print(patch_lr.shape, patch_hr.shape)
#preview_patches(patch_lr, patch_hr)
hr_patches.append(patch_hr)
return np.array(lr_patches), np.array(hr_patches)
test_vol_fnames = make_rand_vols(50)
# Make tf.data.Dataset from keras Generator
batch_size = 24
patch_size = 32
scale = 2
n_channels = 3
gen = CustomGenerator(test_vol_fnames, patch_size=patch_size, batch_size=batch_size)
benchmark(gen, steps=50)
# Execution time: 9.38716044393368
dataset = tf.data.Dataset.from_generator(
lambda: gen,
output_types=(tf.float32, tf.float32),
output_shapes = ([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(dataset, steps=50)
# Execution time: 9.54540992085822
benchmark(dataset.prefetch(tf.data.AUTOTUNE), steps=50)
# Execution time: 8.615464322036132
benchmark(
tf.data.Dataset.range(2)
.interleave(
lambda _: dataset,
num_parallel_calls=tf.data.AUTOTUNE),
steps=50
)
# Execution time: 5.204080824041739
def mapped_generator(g):
return tf.py_function(lambda f: g,
inp=[f],
Tout=[tf.float32, tf.float32])
dataset_parallel = tf.data.Dataset.from_generator(
lambda: mapped_generator(gen),
output_types=(tf.float32, tf.float32),
output_shapes=([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(
dataset_parallel,
steps=50
)
# NameError: name 'f' is not defined
Is there anything I can do to make this more efficient? How can I wrap the generator inside tf.py_function?
I trained a network to perform semantic segmentation with dropout, and it is my understanding that as you vary the dropout keep_prob value, the output prediction changes. However, after saving the model using the tensorflow-serving method, loading it using tf.saved_model.loader.load, and varying the dropout value, I get the same output prediction value (dice score).
I followed the suggestions in this SO post, but I still get the same prediction results even if I enter 0.0.
Didn't know if it was a tensorflow issue or a bug in my code, so I tried downgrading from v1.15 to v1.10 to see if it was the former and still got the same results. I am sure it is a bug in my code now, but I am not sure where it is. A minimum working example is shown below. Could someone help me? Thank you!
This is a snippet of my training script:
#===============
def run_iteration(self, feed_dict, op_list, summaries):
output_args = self.sess.run(op_list, feed_dict=feed_dict)
return output_args
#===============
def run_epoch_train(self, curr_epoch):
print('Training over all batches')
num_total_batches = self.num_total_batches_train
curr_batch_counter = 0
# for each batch in training images
for batch in self.batch_iterator_train:
# dropout is included
if self.dropout_training_Flag == 1:
_, loss, dice = self.run_iteration(
feed_dict={
self.placeholders['images']: batch['images'],
self.placeholders['labels']: batch['labels'],
self.placeholders['is_training']: True,
self.placeholders['dropout_prob']: self.dropout_prob_training,
},
op_list=[
self.fitting_op,
self.losses[self.active_loss],
#self.outputs['sigmoid'],
self.outputs['dice'],
],
summaries=[],
)
curr_batch_counter = curr_batch_counter + 1
if (self.iteration % 5) == 0:
print('Saving model in training session')
self.saver.save(curr_epoch + 1)
This is a snippet of my testing script:
#===============
path_to_model = self.root_path_to_models + '/' + '25'
print(path_to_model)
model = tf.saved_model.loader.load( #tf.saved_model.loader.load(
sess,
[tf.saved_model.tag_constants.SERVING],
path_to_model
)
inputImage_name = model.signature_def['prediction'].inputs['images'].name
x_inp = tf.get_default_graph().get_tensor_by_name(inputImage_name)
isTraining_name = model.signature_def['prediction'].inputs['is_training'].name
tflag_op = tf.get_default_graph().get_tensor_by_name(isTraining_name)
outputs_name = model.signature_def['prediction'].outputs['sigmoid'].name
y_op = tf.get_default_graph().get_tensor_by_name(outputs_name)
if self.dropout_training_Flag == 1:
dropoutProb_name = model.signature_def['prediction'].inputs['dropout_prob'].name
dropout_prob_op = tf.get_default_graph().get_tensor_by_name(dropoutProb_name)
print(dropout_prob_op)
# iterate over batches of images
# iterate over motion category
for moCat in self.motion_categories:
# get datasets in motion category
datasets_in_moCat = d_ffn_images_labels[moCat]
dataset_name = list(datasets_in_moCat.keys())[-1]
#print(dataset_name)
loss_for_each_image = []
final_vol = np.zeros((self.original_input_image_width, self.original_input_image_height, self.num_vol_slices), dtype = np.uint8)
# get images
curr_dataset_images = datasets_in_moCat[dataset_name][0][0]
# get labels
curr_dataset_labels = datasets_in_moCat[dataset_name][0][1]
#current dataset label numbers
curr_dataset_label_numbers = d_bfnumber_images_labels[moCat][dataset_name]
#print('curr_dataset_label_numbers',curr_dataset_label_numbers)
# number of images/labels in current dataset, for current category
num_images = len(curr_dataset_images)
num_labels = len(curr_dataset_labels)
# check if num-images/labels are the same
assert(num_images == num_labels)
# load each image
for elem_idx in range(num_images):
img_path = curr_dataset_images[elem_idx]
lab_path = curr_dataset_labels[elem_idx]
xn = nib.load(img_path)
x = np.array(xn.dataobj)
labn = nib.load(lab_path)
lab = np.array(labn.dataobj)
data_affine_tform = xn.affine
# resize
xr = cv2.resize(x, (self.network_input_image_width, self.network_input_image_height), interpolation = cv2.INTER_LANCZOS4)
# standardize
y = standardize_zeroMeanUnitVar_image(copy.deepcopy(xr), self.network_input_image_width, self.network_input_image_height, self.network_input_channels)
#y = cv2.normalize(copy.deepcopy(xr), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# match network input -- [height, width, channels]
y = np.reshape(y, newshape=(self.network_input_image_height, self.network_input_image_width, self.network_input_channels))
# append to match network input -- [batch, height, width, channels]
input_list = []
input_list.append(y)
input_list = np.asarray(input_list).astype(np.float32)
# ======================
# MODIFY DROPOUT HERE FROM JSON FILE
# CHANGED VALUES FROM 0.0, 0.5, 1.0 -- same prediction score
# ======================
# run and get output
if self.dropout_training_Flag == 1:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: True, dropout_prob_op: self.dropout_prob_testing})
else:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: False})
tmpOut = cv2.resize(output[0,:,:,0], (self.original_input_image_width, self.original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
prediction = np.asarray((tmpOut > 0.5))
labels = np.asarray((lab > 0))
EPS = 0.0000001
#output_original = cv2.resize(output[0,:,:,0], (original_input_image_width, original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
loss = 2.0 * np.sum(labels * prediction, axis=(0, 1)) / (np.sum(labels ** 2 + prediction ** 2, axis=(0, 1)) + EPS)
loss_for_each_image.append(loss)
#place slice in final_vol
#print(curr_dataset_label_numbers[elem_idx][1])
#print(type(curr_dataset_label_numbers[elem_idx][1]))
final_vol[:,:,curr_dataset_label_numbers[elem_idx][1] - 1] = np.asarray(prediction*255.0).astype(np.uint8)
# dice mean over dataset
dice_mean_for_dataset = np.mean(loss_for_each_image)
print(dataset_name, dice_mean_for_dataset)
self.diceScore_for_each_dataset.append(dice_mean_for_dataset)
self.list_dataset_name.append(dataset_name)
This is the code for the inputs/outputs:
#===============
def create_placeholders(self):
self.placeholders['images'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_input_channels],
name='images',
dtype=tf.float32
)
self.placeholders['labels'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_output_channels],
name='labels',
dtype=tf.float32
)
self.placeholders['is_training'] = tf.placeholder(
shape=[],
name='is_training',
dtype=tf.bool
)
# dropout is included
if self.dropout_training_Flag == 1:
self.placeholders['dropout_prob'] = tf.placeholder(
shape=[],
name='dropout_prob',
dtype=tf.float32
)
#===============
def create_outputs(self):
if self.network_name == 'UNet':
print('\n')
print('Training UNet')
# dropout is included
if self.dropout_training_Flag == 1:
# train with dropout
unet_output = unet_dropout(
self.placeholders['images'],
self.placeholders['is_training'],
self.placeholders['dropout_prob'],
self.network_output_channels
)
if self.network_output_channels == 1:
self.outputs['sigmoid'] = unet_output
else:
self.outputs['sigmoid'] = unet_output
This is the code for my model:
#===============
def batch_norm_relu(inputs, is_training):
net = slim.batch_norm(inputs, is_training=is_training)
net = tf.nn.relu(net)
return net
#===============
def dropout (input, keep_prob, is_training):
if is_training == True:
dropout = tf.nn.dropout(input, keep_prob)
else:
dropout = input
return dropout
#===============
def model(inputs, is_training, keep_prob, num_classes):
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
base_num_kernels = 64
# =================================
# encoder
# 256
x = conv2d_fixed_padding(inputs=inputs, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b1 = x
output_list_b1 = [x]
output_b1 = dropout(output_b1, keep_prob, is_training)
output_b1 = tf.layers.max_pooling2d(inputs=output_b1, pool_size=2, strides=2, padding='SAME')
# =================================
# 128
x = conv2d_fixed_padding(inputs=output_b1, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b2 = x
output_list_b2 = [x]
output_b2 = dropout(output_b2, keep_prob, is_training)
# =================================
# decoder
# 128 -> 256
output_b3 = conv2d_transpose(output_b2, kernel_size=2, output_channels=base_num_kernels)
output_b4 = tf.concat([output_b3, x], axis=3)
# =================================
# 256
conv_final = conv2d_fixed_padding(inputs=output_b4, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
conv_final = conv2d_fixed_padding(inputs=conv_final, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
# =================================
# output
outputs = conv2d_fixed_padding(inputs=conv_final, filters=num_classes, kernel_size=3, stride=1)
if num_classes == 1:
outputs = tf.nn.sigmoid(outputs)
else:
h = outputs.get_shape().as_list()[1]
w = outputs.get_shape().as_list()[2]
outputs_reshaped = tf.reshape(outputs, np.asarray([-1, num_classes]))
outputs_final = tf.nn.softmax(outputs_reshaped)
outputs = tf.reshape(outputs_final, np.asarray([-1, h, w, num_classes]))
return outputs
This is the way that I save the network weights:
#===============
def __create_summary_manager(self):
self.saver = Saver(
self.sess,
self.placeholders,
self.outputs,
self.savepath
)
#===============
import tensorflow as tf
class Saver(object):
def __init__(self, sess, input_dict, output_dict, path):
self.sess = sess
self.input_dict = input_dict
self.output_dict = output_dict
self.path = path
self.iteration = 0
self.input_dict_info = {}
self.output_dict_info = {}
for key in input_dict.keys():
self.input_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.input_dict[key]
)
for key in output_dict.keys():
self.output_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.output_dict[key]
)
self.prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs=self.input_dict_info,
outputs=self.output_dict_info)
)
def save(self, iteration_val):
self.iteration += 1
export_path = os.path.join(
tf.compat.as_bytes(self.path),
tf.compat.as_bytes(str(iteration_val))
)
self.builder = tf.saved_model.builder.SavedModelBuilder(export_path)
self.builder.add_meta_graph_and_variables(
self.sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'prediction': self.prediction_signature,
}
)
self.builder.save()
I am trying to run some code on Deep embedding clustering on mnist with the help of Keras , however, I get the following error
from keras.datasets import mnist
import numpy as np
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
# input
input_img = Input(shape=(dims[0],), name='input')
x = input_img
# internal layers in encoder
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
# hidden layer
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
# internal layers in decoder
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
# output
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',
distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
pretrain_epochs = 300
batch_size = 256
save_dir = './results'
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb)
autoencoder.save_weights(save_dir + '/ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
y_pred_last = np.copy(y_pred_kmeans)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# computing an auxiliary target distribution
def target_distribution(q):
weight = q ** 2 / q.sum(0)
return (weight.T / weight.sum(1)).T
loss = 0
index = 0
maxiter = 8000
update_interval = 140
index_array = np.arange(x.shape[0])
tol = 0.001 # tolerance threshold to stop training
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q = model.predict(x, verbose=2 )
p = target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, y_pred), 5)
nmi = np.round(metrics.nmi(y, y_pred), 5)
ari = np.round(metrics.ari(y, y_pred), 5)
loss = np.round(loss, 5)
print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)
# check stop criterion - model convergence
delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
y_pred_last = np.copy(y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
break
idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
loss = model.train_on_batch(x=x[idx], y=p[idx])
index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0
model.save_weights(save_dir + '/DEC_model_final.h5')
model.load_weights(save_dir + '/DEC_model_final.h5')
the error:
with session.graph.as_default():
AttributeError: 'NoneType' object has no attribute 'graph'
(the problem might be in saving the model but I can't figure out why I am wrong.) my code runs perfectly in jupyter notebook but I can't run it in an editor like pycharm .please help.
This VGGNet was implemented using Tensorflow framework, from scratch, where all of the layers are defined in the code.
The main problem I am facing here is that the training accuracy, not to mention validation accuracy, goes up even though I wait it out for a decent amount of time. There are few problems that I suspect is causing this problem right now. First, I think the network is too deep and wide for cifar-10 dataset. Second, extracting data batch out of the whole dataset is not exhaustive, i.e. Batch selection is used over and over again over the whole dataset without eliminating those examples that were selected in the ongoing epoch.
However, still I could not get this code to work after many hours and days of experiments.
I wish I could extract the problematic code section to ask a question, but since I cannot pinpoint the exact section here, let me upload my whole code.
import os
import sys
import tensorflow as tf
import numpy as np
import scipy as sci
import math
import matplotlib.pyplot as plt
import time
import random
import imageio
import pickle
import cv2
import json
from pycocotools.coco import COCO
class SVGG:
def __init__(self, num_output_classes):
self.input_layer_size = 0
self.num_output_classes = num_output_classes
# Data
self.X = []
self.Y = []
self.working_x = []
self.working_y = []
self.testX = []
self.testY = []
# hard coded for now. Have to change.
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = 3072 # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = self.input_data_size * self.input_data_size # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.convolution_layers = []
self.convolution_weights = []
self.fully_connected_layers = []
self.fully_connected_weights = []
def feed_examples(self, input_X, input_Y):
"""
Feed examples to be learned
:param input_X: Training dataset X
:param input_Y: Traning dataset label
:return:
"""
# Take first input and calculate its size
# hard code size
self.X = input_X
self.Y = input_Y
self.input_data_size_flat = len(self.X[0]) * len(self.X[0][0]) * len(self.X[0][0][0])
def feed_test_data(self, test_X, test_Y):
self.testX = test_X
self.testY = test_Y
def run(self):
x = tf.placeholder(tf.float32, [None, self.input_data_size_flat], name='x')
x_data = tf.reshape(x, [-1, self.input_data_size, self.input_data_size, 3])
y_true = tf.placeholder(tf.float32, [None, self.num_output_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
"""
VGG layers
"""
# Create layers
######################################## Input Layer ########################################
input_layer, input_weight = self.create_convolution_layer(x_data, num_input_channels=3, filter_size=3, num_filters=64,
use_pooling=True) # False
######################################## Convolutional Layer ########################################
############### Conv Layer 1 #################
conv_1_1, w_1_1 = self.create_convolution_layer(input=input_layer, num_input_channels=64, filter_size=3, num_filters=64, use_pooling=False)
conv_1_2, w_1_2 = self.create_convolution_layer(input=conv_1_1, num_input_channels=64, filter_size=3, num_filters=128, use_pooling=True)
############### Conv Layer 2 #################
conv_2_1, w_2_1 = self.create_convolution_layer(input=conv_1_2, num_input_channels=128, filter_size=3, num_filters=128, use_pooling=False)
conv_2_2, w_2_2 = self.create_convolution_layer(input=conv_2_1, num_input_channels=128, filter_size=3, num_filters=256, use_pooling=True)
############### Conv Layer 3 #################
conv_3_1, w_3_1 = self.create_convolution_layer(input=conv_2_2, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_2, w_3_2 = self.create_convolution_layer(input=conv_3_1, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_3, w_3_3 = self.create_convolution_layer(input=conv_3_2, num_input_channels=256, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 4 #################
conv_4_1, w_4_1 = self.create_convolution_layer(input=conv_3_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_2, w_4_2 = self.create_convolution_layer(input=conv_4_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_3, w_4_3 = self.create_convolution_layer(input=conv_4_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 5 #################
conv_5_1, w_5_1 = self.create_convolution_layer(input=conv_4_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_2, w_5_2 = self.create_convolution_layer(input=conv_5_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_3, w_5_3 = self.create_convolution_layer(input=conv_5_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
layer_flat, num_features = self.flatten_layer(conv_5_3)
######################################## Fully Connected Layer ########################################
fc_1 = self.create_fully_connected_layer(input=layer_flat, num_inputs=num_features, num_outputs=4096)
fc_2 = self.create_fully_connected_layer(input=fc_1, num_inputs=4096, num_outputs=4096)
fc_3 = self.create_fully_connected_layer(input=fc_2, num_inputs=4096, num_outputs=self.num_output_classes, use_dropout=False)
# Normalize prediction
y_prediction = tf.nn.softmax(fc_3)
# The class-number is the index of the largest element
y_prediction_class = tf.argmax(y_prediction, axis=1)
# Cost-Fuction to be optimized
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_3, labels=y_true)
# => Now we have a measure of how well the model performs on each image individually. But in order to use the
# Cross entropy to guide the optimization of the model's variable swe need a single value, so we simply take the
# Average of the cross-entropy for all the image classifications
cost = tf.reduce_mean(cross_entropy)
# Optimizer
optimizer_adam = tf.train.AdamOptimizer(learning_rate=0.002).minimize(cost)
# Performance measure
correct_prediction = tf.equal(y_prediction_class, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
total_iterations = 0
num_iterations = 100000
start_time = time.time()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_iterations):
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=0, high=40000, batch_size=128)
feed_dict_train = {x: x_batch, y_true: y_true_batch}
sess.run(optimizer_adam, feed_dict_train)
if i % 100 == 99:
# Calculate the accuracy on the training-set.
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=40000, high=50000, batch_size=1000)
feed_dict_validate = {x: x_batch, y_true: y_true_batch}
acc = sess.run(accuracy, feed_dict=feed_dict_validate)
# Message for printing.
msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
# print(sess.run(y_prediction, feed_dict=feed_dict_train))
# print(sess.run(y_prediction_class, feed_dict=feed_dict_train))
print(msg.format(i + 1, acc))
if i % 10000 == 9999:
oSaver = tf.train.Saver()
oSess = sess
path = "./model/_" + "iteration_" + str(i) + ".ckpt"
oSaver.save(oSess, path)
if i == num_iterations - 1:
x_batch, y_true_batch, _ = self.get_batch(X=self.testX, Y=self.testY, low=0, high=10000, batch_size=10000)
feed_dict_test = {x: x_batch, y_true: y_true_batch}
test_accuracy = sess.run(accuracy, feed_dict=feed_dict_test)
msg = "Test Accuracy: {0:>6.1%}"
print(msg.format(test_accuracy))
def get_batch(self, X, Y, low=0, high=50000, batch_size=128):
x_batch = []
y_batch = np.ndarray(shape=(batch_size, self.num_output_classes))
index = np.random.randint(low=low, high=high, size=batch_size)
counter = 0
for idx in index:
x_batch.append(X[idx].flatten())
y_batch[counter] = one_hot_encoded(Y[idx], self.num_output_classes)
y_batch_cls = Y[idx]
counter += 1
return x_batch, y_batch, y_batch_cls
def generate_new_weights(self, shape):
w = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
return w
def generate_new_biases(self, shape):
b = tf.Variable(tf.constant(0.05, shape=[shape]))
return b
def create_convolution_layer(self, input, num_input_channels, filter_size, num_filters, use_pooling):
"""
:param input: The previous layer
:param num_input_channels: Number of channels in previous layer
:param filter_size: W and H of each filter
:param num_filters: Number of filters
:return:
"""
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = self.generate_new_weights(shape)
biases = self.generate_new_biases(num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
# Max Pooling
if use_pooling:
layer = tf.nn.max_pool(layer, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
# ReLu. Using elu for better performance
layer = tf.nn.elu(layer)
return layer, weights
def create_fully_connected_layer(self, input, num_inputs, num_outputs, use_dropout=True):
weights = self.generate_new_weights(shape=[num_inputs, num_outputs])
biases = self.generate_new_biases(shape=num_outputs)
layer = tf.matmul(input, weights) + biases
layer = tf.nn.elu(layer)
if use_dropout:
keep_prob = tf.placeholder(tf.float32)
keep_prob = 0.5
layer = tf.nn.dropout(layer, keep_prob)
return layer
def flatten_layer(self, layer):
"""
Flattens dimension that is output by a convolution layer.
Flattening is need to feed into a fully-connected-layer.
:param layer:
:return:
"""
# shape [num_images, img_height, img_width, num_channels]
layer_shape = layer.get_shape()
# Number of features h x w x channels
num_features = layer_shape[1: 4].num_elements()
# Reshape
layer_flat = tf.reshape(layer, [-1, num_features])
# Shape is now [num_images, img_height * img_width * num_channels]
return layer_flat, num_features
def unpickle(file):
with open(file, 'rb') as file:
dict = pickle.load(file, encoding='bytes')
return dict
def convert_to_individual_image(flat):
img_R = flat[0:1024].reshape((32, 32))
img_G = flat[1024:2048].reshape((32, 32))
img_B = flat[2048:3072].reshape((32, 32))
#B G R
mean = [125.3, 123.0, 113.9]
img = np.dstack((img_R - mean[0], img_G - mean[1], img_B - mean[2]))
img = np.array(img)
# img = cv2.resize(img, (224, 224), img)
return img
def read_coco_data(img_path, annotation_path):
coco = COCO(annotation_path)
ids = list(coco.imgs.keys())
ann_keys = list(coco.anns.keys())
print(coco.imgs[ids[0]])
print(coco.anns[ann_keys[0]])
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
if __name__ == '__main__':
data = []
labels = []
val_data = []
val_label = []
# cifar-10
counter = 0
for i in range(1, 6):
unpacked = unpickle("./cifar10/data_batch_" + str(i))
tmp_data = unpacked[b'data']
tmp_label = unpacked[b'labels']
inner_counter = 0
for flat in tmp_data:
converted = convert_to_individual_image(flat)
data.append(converted)
labels.append(tmp_label[inner_counter])
counter += 1
inner_counter += 1
cv2.imwrite("./img/" + str(counter) + ".jpg", converted)
# Test data
unpacked = unpickle("./cifar10/test_batch")
test_data = []
test_data_flat = unpacked[b'data']
test_label = unpacked[b'labels']
for flat in test_data_flat:
test_data.append(convert_to_individual_image(flat))
svgg = SVGG(10)
svgg.feed_examples(input_X=data, input_Y=labels)
svgg.feed_test_data(test_X=test_data, test_Y=test_label)
svgg.run()