tensorflow run python crash - python

tensorflow: tensorflow-gpu 0.12
Anaconda: anaconda4.2.9(python3.5)
GPU: Nvidia 940M(notebook)(2GB)
OS: win7-64bit sp1
Cuda: 8.0
cudnn: 5.0
IDE: pycharm
Mnist test OK under GPU(CNNs),But when it comes to my own project,python crashes.I debug my code and find function "**session.run()**" lead to this problem.The error is:
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:586] Could not identify NUMA node of /job:localhost/replica:0/task:0/gpu:0, defaulting to 0. Your kernel may not have been built with NUMA support.
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_event.cc:49] Error polling for event status: failed to query event: CUDA_ERROR_LAUNCH_FAILED
F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_event_mgr.cc:198] Unexpected Event status: 1
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:385] could not create cudnn handle: **CUDNN_STATUS_INTERNAL_ERROR**
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:352] could not destroy cudnn handle: **CUDNN_STATUS_BAD_PARAM**
F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:532] **Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms)**
Since Mnist run correctly, so there is no defects with my GPU driver 、cuda and cudnn .I really don't know how this problem comes.
This is my code:
import cv2
import os
import tensorflow as tf
import data_trans as dt
with tf.variable_scope('weights'):
weights={
# 60*60*3->60*60*32->30*30*32
'conv1':tf.get_variable('conv1',[5,5,3,32],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 30*30*32->30*30*64->15*15*64
'conv2':tf.get_variable('conv2',[5,5,32,64],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 15*15*64->12*12*128->6*6*128
'conv3':tf.get_variable('conv3',[4,4,64,128],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 6*6*128->256
'fc1':tf.get_variable('fc1',[6*6*128,256],initializer=tf.contrib.layers.xavier_initializer()),
# 256->2
'fc2':tf.get_variable('fc2',[256,2],initializer=tf.contrib.layers.xavier_initializer())
}
with tf.variable_scope('biases'):
biases = {
'conv1':tf.get_variable('conv1',[32,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'conv2':tf.get_variable('conv2',[64,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'conv3':tf.get_variable('conv3',[128,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'fc1':tf.get_variable('fc1',[256,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'fc2':tf.get_variable('fc2',[2,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32))
}
def inference(images):
images = (tf.cast(images,tf.float32)/255)
conv1 = tf.nn.bias_add(tf.nn.conv2d(images,weights['conv1'],strides=[1,1,1,1],padding='SAME'),biases['conv1'])
relu1 = tf.nn.relu(conv1)
pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1,weights['conv2'],strides=[1,1,1,1],padding='SAME'),biases['conv2'])
relu1 = tf.nn.relu(conv2)
pool2 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2,weights['conv3'],strides=[1,1,1,1],padding='VALID'),biases['conv3'])
relu3 = tf.nn.relu(conv3)
pool3 = tf.nn.max_pool(relu3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
flatten = tf.reshape(pool3,[-1,weights['fc1'].get_shape().as_list()[0]])
drop = tf.nn.dropout(flatten,0.5)
fc1 = tf.matmul(drop,weights['fc1']) + biases['fc1']
fc_relu1 = tf.nn.relu(fc1)
fc2 = tf.matmul(fc_relu1,weights['fc2']) + biases['fc2']
return fc2
def train():
dt.encode_to_tfrecords('../train_data/train.txt','../train_data','data.tfrecords',(60,60))
image,label = dt.decode_from_tfrecords('../train_data/data.tfrecords')
batch_image,batch_label = dt.get_batch(image,label,batch_size=10,crop_size=60)
inf = inference(batch_image)
predicts = tf.nn.softmax(inf)
cross_entropy = -tf.reduce_mean(batch_label * tf.log(predicts))
train_step = tf.train.GradientDescentOptimizer(1e-2).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(predicts, 1), tf.argmax(batch_label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
#if os.path.exists(os.path.join('model','model.ckpt')) is True:
# tf.train.Saver(max_to_keep=None).restore(sess,os.path.join('model','model.ckpt'))
for epcho in range(8):
print(sess.run(accuracy))
print('here!')
coord.request_stop()
coord.join(threads)
train()
data_trans.py contains three function use to transform image to tfrecords:
import cv2
import tensorflow as tf
def encode_to_tfrecords(label_file,data_root,new_name='data.tfrecords',resize=None):
writer = tf.python_io.TFRecordWriter(data_root + '/' + new_name)
num_example = 0
with open(label_file,'r') as f:
for l in f.readlines():
l = l.split()
path = data_root+'/'+l[0]
image = cv2.imread(path)
if resize is not None:
image = cv2.resize(image,resize)
height,width,nchannel = image.shape
label = int(l[1])
example = tf.train.Example(features=tf.train.Features(feature={
'height':tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
'width':tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
'nchannel':tf.train.Feature(int64_list=tf.train.Int64List(value=[nchannel])),
'image':tf.train.Feature(bytes_list=tf.train.BytesList(value=[image.tobytes()])),
'label':tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
}))
serialized = example.SerializeToString()
writer.write(serialized)
num_example += 1
print(label_file,'Sample_Num:',num_example)
writer.close()
#encode_to_tfrecords('../train_data/train.txt','../train_data')
def decode_from_tfrecords(filename,num_epoch=None):
filename_queue = tf.train.string_input_producer([filename],num_epoch)
reader = tf.TFRecordReader()
_,serialized = reader.read(filename_queue)
example = tf.parse_single_example(serialized,features={
'height':tf.FixedLenFeature([],tf.int64),
'width':tf.FixedLenFeature([],tf.int64),
'nchannel':tf.FixedLenFeature([],tf.int64),
'image':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)
})
label = tf.cast(example['label'],tf.int32)
image = tf.decode_raw(example['image'],tf.uint8)
image = tf.reshape(image,tf.stack([
tf.cast(example['height'],tf.int32),
tf.cast(example['width'],tf.int32),
tf.cast(example['nchannel'],tf.int32)
]))
return image, label
#encode_to_tfrecords("../train_data/train.txt","../train_data",'data.tfrecords')
#image,label=decode_from_tfrecords('../train_data/data.tfrecords')
#print image[0]
def get_batch(image,label,batch_size,crop_size):
distorted_image = tf.random_crop(image,[crop_size, crop_size, 3])
distorted_image = tf.image.random_flip_up_down(distorted_image)
images,label_batch = tf.train.shuffle_batch([distorted_image,label],batch_size=batch_size,capacity=130,min_after_dequeue=100)
return images,tf.one_hot(tf.reshape(label_batch,[batch_size]), 2)

Thanks All. i have solve this problem. it seems like a bug in tensorflow-gpu in windows(7/10) .function "tf.one_hot()" cannot execute correctly under win7(maybe tensorflow-gpu0.12 & win7) , we must explicitly set this function executed by cpu like:
tf.device('/cpu:0'):
tf.one_hot()

Related

Running Python (Pytorch) training script on remote machine causes SSH session crash with no error message

I am trying to train custom UNET model for multiclass segmentation with 25000 training images. I am running code on remote ubuntu machine via Putty SSH connection. The scripts starts running and after some epochs the entire Putty session crashes making it impossible to get the actual error that caused the crash.
I believe that in my main training script is not a problem, but maybe I have some problems in my script that create Datasets and DataLoaders. It looks like this:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from datetime import datetime
import torch
import torch.nn as nn
from torch.nn import ConvTranspose2d
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Module
from torch.nn import ModuleList
from torch.nn import ReLU
from torch.nn import functional as F
from torchvision.transforms import CenterCrop
import torchvision.transforms as T
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import time
from tqdm import tqdm
import random
from PIL import Image
to_tensor = T.ToTensor()
# determine the device to be used for training and evaluation
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# determine if we will be pinning memory during data loading
PIN_MEMORY = True if DEVICE == "cuda" else False
BATCH_SIZE = 64
# # Data importing
df_train = pd.read_csv("../../datasets/unet_cropped/df_train.csv")
df_val = pd.read_csv("../../datasets/unet_cropped/df_val.csv")
# Dictionaries
d_train = {}
for row, data in df_train.iterrows():
d_train[data['image_path']] = [data['class_1_path'], data['class_3_path'], data['class_4_path']]
d_val = {}
for row, data in df_val.iterrows():
d_val[data['image_path']] = [data['class_1_path'], data['class_3_path'], data['class_4_path']]
def image_to_gray(image_path):
# Open image
img = Image.open(image_path)
# Apply transformation and convert to Pytorch tensor
img_tensor = to_tensor(img)
# Convert into Gray-scale
img_torch_gray = T.functional.rgb_to_grayscale(img_tensor, num_output_channels= 1)
return img_torch_gray
def show(images, folder_path, title, subtitles = True):
""" images : list of images
"""
fig,axes = plt.subplots(nrows = len(images), ncols = 1, figsize=(6,7), constrained_layout=True)
fig.suptitle(title, fontsize=10)
if len(images) >1:
for i, image_id in enumerate(images):
image_path = folder_path + image_id
im_for_plot = image_to_gray(image_path)
if subtitles:
axes[i-1].set_title(image_id, fontsize=8)
axes[i-1].imshow(im_for_plot, cmap = "gray")
else:
image_id = images[0]
image_path = folder_path + image_id
im_for_plot = image_to_gray(image_path)
if subtitles:
axes.set_title(image_id, fontsize=8)
axes.imshow(im_for_plot.permute((1,2,0)), cmap = "gray")
plt.show()
def mask_to_gray4(mask_path):
""" For given list of mask_paths create 3-dim tensor.
Example:
mask_path = [mask_path_class_1, NaN, NaN]
final_mask = [ gray_scale_mask_class_1, torch_zeros(1, W, H), torch_zeros(1, W, H) ]
"""
final_mask = torch.zeros((3, 256, 256))
for i, sample in enumerate(mask_path):
# if sample is not NaN, continue
if sample!=sample:
continue
# Open image
img = Image.open(sample)
# Apply transformation and convert to Pytorch tensor
img_tensor = to_tensor(img)
# Convert into Gray-scale
img_torch_gray = T.functional.rgb_to_grayscale(img_tensor, num_output_channels= 1)
img_torch_gray[img_torch_gray>0] = 1
final_mask[i, :, :] = img_torch_gray
return final_mask
image_train_paths = []
mask_train_paths = []
for key, value in d_train.items():
image_train_paths.append(key)
mask_train_paths.append(value)
image_val_paths = []
mask_val_paths = []
for key, value in d_val.items():
image_val_paths.append(key)
mask_val_paths.append(value)
del d_train, d_val
# main function
class SegmentationDataset(Dataset):
def __init__(self, image_paths, mask_paths):
# store the image and mask filepaths
self.image_paths = image_paths
self.mask_paths = mask_paths
def __len__(self):
# return the number of total samples contained in the dataset
return len(self.image_paths)
def __getitem__(self, idx):
""" Loads and returns a sample from the dataset at the given index idx. """
# grab the image path from the current index
image_path = self.image_paths[idx]
image = image_to_gray(image_path)
# grab the mask path from the current index
mask_path = self.mask_paths[idx]
mask = mask_to_gray4(mask_path)
return (image, mask)
# # Data Loading
trainDS = SegmentationDataset(image_train_paths, mask_train_paths)
trainLoader = DataLoader(trainDS, shuffle = True, batch_size = BATCH_SIZE,
pin_memory = PIN_MEMORY)
valDS = SegmentationDataset(image_val_paths, mask_val_paths)
valLoader = DataLoader(valDS, shuffle = True, batch_size = BATCH_SIZE,
pin_memory = PIN_MEMORY)
# calculate steps per epoch for training and validation set
trainSteps = len(trainDS) // BATCH_SIZE
valSteps = len(valDS) // BATCH_SIZE
Maybe the problem has to do with some sort of memory leak or RAM constantly increasing during training, but I can't spot where exactly can that happen in this script.
Training script looks like this:
#!/usr/bin/env python
# coding: utf-8
# Import libraries
import pandas as pd
import numpy as np
import os
from datetime import datetime
import time
import random
from PIL import Image
import torch
import torch.nn as nn
from torch.nn import ConvTranspose2d
from torch.nn import Conv2d
from torch.nn import MaxPool2d, BatchNorm2d
from torch.nn import Module
from torch.nn import ModuleList
from torch.nn import ReLU
from torch.nn import functional as F
from torchvision.transforms import CenterCrop
import torchvision.transforms as T
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
torch.manual_seed(1)
from dataset import (
trainLoader,
valLoader,
trainSteps,
valSteps
)
from model import UNet
# determine the device to be used for training and evaluation
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# determine if we will be pinning memory during data loading
PIN_MEMORY = True if DEVICE == "cuda" else False
#----------------------------------------------------------------------------
NUM_CHANNELS = 1 # number of channels in the input - grayscale image
NUM_CLASSES = 3 # number of classes
NUM_LEVELS = 3 # number of levels in the U-Net model
# initialize learning rate, number of epochs to train for, and the batch size
INIT_LR = 1e-3
NUM_EPOCHS = 100
BATCH_SIZE = 64
threshold = 0.4 ######## --------------------------
dt_string = time.ctime()
model_name = "models/tuesday_25/unet" + "_th_" + str(threshold) + "_" + dt_string.replace(" ", "_")
path_model = model_name + '.pth'
path_param = model_name + '_model_param.txt'
eval_txt = model_name + "_eval.txt"
FILEPRINT = True
if FILEPRINT:
EVAL_FILE = open(eval_txt, "a+")
# print("[INFO] follow training and validation loss in last 'n' epochs by running:")
# print(f" >watch tail -n {eval_txt}")
def fileprint(*args):
if FILEPRINT:
print(*args, file = EVAL_FILE)
EVAL_FILE.flush()
else:
print(*args)
# # Training UNet model
class EarlyStopping():
# https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
def __init__(self, tolerance=50, min_delta=0):
self.tolerance = tolerance
self.min_delta = min_delta
self.counter = 0
self.early_stop = False
def __call__(self, train_loss, validation_loss):
if (validation_loss - train_loss) > self.min_delta:
# if self.counter == 0:
# print("Validation loss increase detected")
self.counter +=1
if self.counter >= self.tolerance:
self.early_stop = True
# def bce_dice_loss(predicted, truth, threshold):
# batch_size = len(truth)
# # BCE
# bce_loss = lossFunc(predicted, truth)
# # DICE
# predicted = torch.sigmoid(predicted.detach())
# predicted[predicted > threshold] = 1
# predicted[predicted <= threshold] = 0
# predicted = predicted.view(batch_size, -1)
# truth = truth.view(batch_size, -1)
# assert(predicted.shape == truth.shape)
# tp = (predicted * truth).sum(-1)
# fp = ((truth == 0.0).float() * predicted).sum(-1)
# fn = ((truth >= 1.0).float() * (predicted == 0.0).float()).sum(-1)
# dice_score = 2*tp / (2*tp + fp + fn)
# # BCE DICE
# bce_dice = 0.75 * bce_loss + 0.25 * (1 - dice_score)
# batch_bce_dice_loss = torch.nanmean(bce_dice)
# return batch_bce_dice_loss, (torch.nanmean(dice_score)).item()
# ---------------------------- Initialize UNet model -------------------------
model = UNet(nbClasses = NUM_CLASSES).to(DEVICE)
lossFunc = nn.BCEWithLogitsLoss()
opt = torch.optim.RAdam(model.parameters(), lr=INIT_LR)
scaler = torch.cuda.amp.GradScaler()
torch.autograd.set_detect_anomaly(True)
early_stopping = EarlyStopping(tolerance=500, min_delta=1e-5)
# initialize a dictionary to store training history
H = {"train_loss": [], "val_loss": [], "dice_score": []}
# ----------------------------- Training UNet model ----------------------------
# print("[INFO] training the network...")
startTime = time.time()
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
# print(f"Num of params: {params}")
# for e in tqdm(range(NUM_EPOCHS)):
for e in range(NUM_EPOCHS):
model.train() # set the model in training mode
totalTrainLoss = 0 # initialize the total training and validation loss
totalValLoss = 0
# totalValDiceScore = 0
# totalTrainDiceScore = 0
# loop over the training set
for (i, (x, y)) in enumerate(trainLoader):
opt.zero_grad() # first, zero out any previously accumulated gradients,
(x, y) = (x.to(DEVICE), y.to(DEVICE)) # send the input to the device
pred = model(x) # perform a forward pass
# print("pred :", pred.size())
# print("y:" , y.size())
# loss, dice_score = bce_dice_loss(pred, y, threshold)
loss = lossFunc(pred, y.float()) # calculate the training loss
scaler.scale(loss).backward() # perform backpropagation
scaler.step(opt)
scaler.update() # update model parameters
totalTrainLoss += loss.item() # add the loss to the total training loss
# totalTrainDiceScore += float(dice_score)
# switch off autograd
with torch.no_grad():
model.eval() # set the model in evaluation mode
# loop over the validation set
for (x, y) in valLoader:
(x, y) = (x.to(DEVICE), y.to(DEVICE))
pred = model(x)
loss = lossFunc(pred, y.float())
# loss, dice_score = bce_dice_loss(pred, y, threshold)
totalValLoss += loss.item()
# totalValDiceScore += float(dice_score)
# calculate the average training and validation loss
avgTrainLoss = totalTrainLoss / trainSteps
# avgTrainDiceScore = totalTrainDiceScore/ trainSteps
avgValLoss = totalValLoss / valSteps
# avgValDiceScore = totalValDiceScore/ valSteps
# early_stopping(avgTrainLoss, avgValLoss)
# if early_stopping.early_stop:
if (e>0) & ((e%5) == 0):
# print("We are at epoch: ", e)
path_model = model_name + ".pth"
path_param = model_name + '_model_param.txt'
torch.save(model.state_dict(), path_model)
with open(path_param, 'wt') as f:
f.write(f"Batch size used: {BATCH_SIZE}")
f.write(f"\nNumber of epochs: {NUM_EPOCHS}")
f.write(f"\nINIT_LR: {INIT_LR}")
f.write("\nModel parameters: \n")
f.write(str(model.eval()))
# break
text1 = "[INFO] EPOCH: {}/{}\n".format(e + 1, NUM_EPOCHS)
# text2 = "Train loss: {:.4f}, Train dice score: {:.4f}, Val loss: {:.4f}, Val dice score: {:.4f}\n".format(avgTrainLoss, avgTrainDiceScore, avgValLoss, avgValDiceScore)
text2 = "Train loss: {:.4f}, Val loss: {:.4f}\n".format(avgTrainLoss, avgValLoss)
fileprint(text1)
fileprint(text2)
# display the total time needed to perform the training
endTime = time.time()
# print("[INFO] total time taken to train the model: {:.2f}s".format(endTime - startTime))
# ------------------------------- Saving UNet model --------------------------------------------------
path_model = model_name + '.pth'
path_param = model_name + '_model_param.txt'
torch.save(model.state_dict(), path_model)
with open(path_model, 'wt') as f:
f.write(f"Batch size used: {BATCH_SIZE}")
f.write(f"\nNumber of epochs: {NUM_EPOCHS}")
f.write(f"\nINIT_LR: {INIT_LR}")
f.write("\nModel parameters: \n")
f.write(str(model.eval()))
Does anyone have any ideas how to solve this?
Thank you a lot

Latency issue with Tensorflow cuDNN model execution

I am having problems with a cuDNN RNN model I am trying to train on a set of natural language explanations and embeddings for the semantic parsing of texts. Here is what my RNN model architecture looks like on a simplified level:
class Cudnn_RNN:
def __init__(self, num_layers, num_units, mode="lstm", keep_prob=1.0, is_train=None, scope="cudnn_rnn"):
self.num_layers = num_layers
self.rnns = []
self.mode = mode
if mode == "gru":
rnn = tf.contrib.cudnn_rnn.CudnnGRU
elif mode == "lstm":
rnn = tf.contrib.cudnn_rnn.CudnnLSTM
else:
raise Exception("Unknown mode for rnn")
for layer in range(num_layers):
rnn_fw = rnn(1, num_units)
rnn_bw = rnn(1, num_units)
self.rnns.append((rnn_fw, rnn_bw, ))
def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
outputs = [tf.transpose(inputs, [1, 0, 2])]
for layer in range(self.num_layers):
rnn_fw, rnn_bw = self.rnns[layer]
output = dropout(outputs[-1], keep_prob=keep_prob, is_train=is_train)
with tf.variable_scope("fw_{}".format(layer)):
out_fw, state_fw = rnn_fw(output)
with tf.variable_scope("bw_{}".format(layer)):
inputs_bw = tf.reverse_sequence(output, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
out_bw, state_bw = rnn_bw(inputs_bw)
out_bw = tf.reverse_sequence(out_bw, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
outputs.append(tf.concat([out_fw, out_bw], axis=2))
if concat_layers is True:
res = tf.concat(outputs[1:], axis=2)
else:
res = outputs[-1]
res = tf.transpose(res, [1, 0, 2])
state_fw = tf.squeeze(state_fw[0], [0])
state_bw = tf.squeeze(state_bw[0], [0])
state = tf.concat([state_fw, state_bw], axis=1)
return res, state
The model is set up such that after data is loaded, it goes through pretraining, training, and then evaluation. For some reason the data is being loaded with no issues, but as soon as the model starts running it gets stuck, not even making it to the pretraining phase. Here is the data loading and execution code (model executes up until just before print('---Pretrain-----'):
def pseudo_labeling(config, data):
word2idx_dict, fixed_emb, traiable_emb, train_data, dev_data, test_data,pretrain_data,pretrain_data2 = data
pretrain_test_data = (pretrain_data[0][:config.pretrain_test_size],pretrain_data[1][:config.pretrain_test_size],pretrain_data[2][:config.pretrain_test_size,:])
pretrain_data = (pretrain_data[0][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[1][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[2][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size,:])
lfs = get_lfs(config, word2idx_dict)
identifier = "_{}".format(config.tag)
with tf.variable_scope("models", reuse=tf.AUTO_REUSE):
regex = Pat_Match(config)
match = Soft_Match(config,lfs['lfs'],np.array(lfs['rels'],np.float32),lfs['keywords'],lfs['keywords_rels'], lfs['raw_keywords'],mat=((fixed_emb, traiable_emb, )), word2idx_dict=word2idx_dict, pseudo=True)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
if os.path.exists('labeled_data.pkl'):
with open('labeled_data.pkl', 'rb') as f:
labeled_data = pickle.load(f)
with open('unlabeled_data.pkl', 'rb') as f:
unlabeled_data = pickle.load(f)
with open('weights.pkl', 'rb') as f:
lfs["weights"] = pickle.load(f)
else:
with open('exp2pat.json','r') as f:
exp2pat = json.load(f)
exp2pat = {int(key):val for key,val in exp2pat.items()}
lab_d = []
unlab_d = []
tacred_labeled = []
tacred_unlabeled = []
labeled_data = []
unlabeled_data = []
idxx = -1
idx2rel = {val:key for key,val in constant.LABEL_TO_ID.items()}
for x in tqdm(train_data):
idxx+=1
batch = [x["phrase"]]
res, pred = regex.match(batch)
lfs["weights"] += res[0]
new_dict = {}
if np.amax(res) > 0:
x["rel"] = pred.tolist()[0]
x["logic_form"] = np.argmax(res, axis=1).tolist()[0]
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
try:
new_dict['pat'] = exp2pat[np.argmax(res, axis=1).tolist()[0]]
lab_d.append(new_dict)
except:
new_dict['pat'] = -1
unlab_d.append(new_dict)
tacred_labeled.append((idxx,idx2rel[x['rel']]))
labeled_data.append(x)
else:
tacred_unlabeled.append(idxx)
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
new_dict['pat']=-1
x["rel"] = 0
unlab_d.append(new_dict)
unlabeled_data.append(x)
new_weight = np.array([elem for i, elem in enumerate(list(lfs['weights'])) if i in exp2pat],np.float32)
new_weight = new_weight/np.sum(new_weight)
lfs["weights"] = lfs["weights"] / np.sum(lfs["weights"])
with open('tacred_labeled.json','w') as f:
json.dump(tacred_labeled,f)
with open('tacred_unlabeled.json','w') as f:
json.dump(tacred_unlabeled,f)
with open('labeled_data.pkl','wb') as f:
pickle.dump(labeled_data,f)
with open('unlabeled_data.pkl','wb') as f:
pickle.dump(unlabeled_data,f)
with open('weights.pkl', 'wb') as f:
pickle.dump(lfs["weights"], f)
with open('lab_d.pkl','wb') as f:
pickle.dump(lab_d,f)
with open('unlab_d.pkl','wb') as f:
pickle.dump(unlab_d,f)
with open('weights_d.pkl','wb') as f:
pickle.dump(new_weight,f)
random.shuffle(unlabeled_data)
print('unlabdel data:',str(len(unlabeled_data)),'labeled data:',str(len(labeled_data)))
dev_history, test_history = [], []
dev_history2, test_history2 = [], []
with tf.Session(config=sess_config) as sess:
lr = float(config.init_lr)
writer = tf.summary.FileWriter(config.log_dir + identifier)
sess.run(tf.global_variables_initializer())
print('---Pretrain-----')
for epoch in range(config.pretrain_epoch):
loss_list,pretrain_loss_lis,sim_loss_lis = [],[],[]
for batch in get_pretrain_batch(config, pretrain_data, word2idx_dict):
pretrain_loss_prt,sim_loss_prt,loss,_ = sess.run([match.pretrain_loss,match.sim_loss,match.pretrain_loss_v2,match.pre_train_op],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.pretrain_labels: batch['labels'],match.is_train:True})
loss_list.append(loss)
pretrain_loss_lis.append(pretrain_loss_prt)
sim_loss_lis.append(sim_loss_prt)
print("{} epoch:".format(str(epoch)))
print("loss:{} pretrain_loss:{} sim_loss:{}".format(str(np.mean(loss_list)),str(np.mean(pretrain_loss_lis)),str(np.mean(sim_loss_lis))))
pred_labels = []
goldens = []
prt_id = 0
for batch in get_pretrain_batch(config,pretrain_data2,word2idx_dict,shuffle=False):
prt_id+=1
pp,ppp,pred_label = sess.run([match.prt_loss,match.prt_pred,match.pretrain_pred_labels],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.is_train:False,match.pretrain_labels: batch['labels']})
pred_label = list(pred_label)
golden = list(np.reshape(batch['labels'],[-1]))
assert len(golden)==len(pred_label)
pred_labels.extend(pred_label)
goldens.extend(golden)
p,r,f = f_score(pred_labels,goldens)
print('PRF:',(p,r,f))
if p>0.9 and r>0.9:
break
print('\n')
Here are my system specifications:
Tensorflow version: 1.14.0 (w/ GPU support)
Operating System: Linux 9.12
OS Distribution: Debian
OS Architecture: x86_64
Python version: 3.7.6
NLTK version: 3.4.5
CUDA version: 10.0
cuDNN version: 7.4.2
NVIDIA graphics card: Tesla T4
NVIDIA driver version: 410.104
Compiler version: GCC 6.3.0
If anyone would like to share their thoughts on why my model is unable to properly execute to the pretraining phase and beyond, I would greatly appreciate it. Thank you.

Tensorflow: strided_slice slicing error with while loop

I've created a rather complex seq2seq type model (based on "A Neural Transducer"), and in the latest version of Tensorflow, the following code returns the error:
Cannot use 'transducer_training/while/rnn/strided_slice' as input to 'gradients/transducer_training/while/rnn/while/Select_1_grad/Select/f_acc' because 'transducer_training/while/rnn/strided_slice' is in a while loop
The code worked before, only since the latest version has it stopped:
numpy (1.14.0)
protobuf (3.5.1) tensorflow (1.5.0) tensorflow-gpu
(1.3.0) tensorflow-tensorboard (1.5.1) Ubuntu version 16.04.3 LTS
(Xenial Xerus)
Code (To get the error just copy, paste and run it):
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple
from tensorflow.python.layers import core as layers_core
# NOTE: Time major
# ---------------- Constants Manager ----------------------------
class ConstantsManager(object):
def __init__(self, input_dimensions, input_embedding_size, inputs_embedded, encoder_hidden_units,
transducer_hidden_units, vocab_ids, input_block_size, beam_width):
assert transducer_hidden_units == encoder_hidden_units, 'Encoder and transducer have to have the same amount' \
'of hidden units'
self.input_dimensions = input_dimensions
self.vocab_ids = vocab_ids
self.E_SYMBOL = len(self.vocab_ids)
self.vocab_ids.append('E_SYMBOL')
self.GO_SYMBOL = len(self.vocab_ids)
self.vocab_ids.append('GO_SYMBOL')
self.vocab_size = len(self.vocab_ids)
self.input_embedding_size = input_embedding_size
self.inputs_embedded = inputs_embedded
self.encoder_hidden_units = encoder_hidden_units
self.transducer_hidden_units = transducer_hidden_units
self.input_block_size = input_block_size
self.beam_width = beam_width
self.batch_size = 1 # Cannot be increased, see paper
self.log_prob_init_value = 0
# ----------------- Model ---------------------------------------
class Model(object):
def __init__(self, cons_manager):
self.var_list = []
self.cons_manager = cons_manager
self.max_blocks, self.inputs_full_raw, self.transducer_list_outputs, self.start_block, self.encoder_hidden_init,\
self.trans_hidden_init, self.logits, self.encoder_hidden_state_new, \
self.transducer_hidden_state_new, self.train_saver = self.build_full_transducer()
self.targets, self.train_op, self.loss = self.build_training_step()
def build_full_transducer(self):
with tf.variable_scope('transducer_training'):
embeddings = tf.Variable(tf.random_uniform([self.cons_manager.vocab_size,
self.cons_manager.input_embedding_size], -1.0, 1.0),
dtype=tf.float32,
name='embedding')
# Inputs
max_blocks = tf.placeholder(dtype=tf.int32, name='max_blocks') # total amount of blocks to go through
if self.cons_manager.inputs_embedded is True:
input_type = tf.float32
else:
input_type = tf.int32
inputs_full_raw = tf.placeholder(shape=(None, self.cons_manager.batch_size,
self.cons_manager.input_dimensions), dtype=input_type,
name='inputs_full_raw') # shape [max_time, 1, input_dims]
transducer_list_outputs = tf.placeholder(shape=(None,), dtype=tf.int32,
name='transducer_list_outputs') # amount to output per block
start_block = tf.placeholder(dtype=tf.int32, name='transducer_start_block') # where to start the input
encoder_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.encoder_hidden_units), dtype=tf.float32,
name='encoder_hidden_init')
trans_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.transducer_hidden_units), dtype=tf.float32,
name='trans_hidden_init')
# Temporary constants, maybe changed during inference
end_symbol = tf.get_variable(name='end_symbol',
initializer=tf.constant_initializer(self.cons_manager.vocab_size),
shape=(), dtype=tf.int32)
# Turn inputs into tensor which is easily readable#
inputs_full = tf.reshape(inputs_full_raw, shape=[-1, self.cons_manager.input_block_size,
self.cons_manager.batch_size,
self.cons_manager.input_dimensions])
# Outputs
outputs_ta = tf.TensorArray(dtype=tf.float32, size=max_blocks)
init_state = (start_block, outputs_ta, encoder_hidden_init, trans_hidden_init)
# Initiate cells, NOTE: if there is a future error, put these back inside the body function
encoder_cell = tf.contrib.rnn.LSTMCell(num_units=self.cons_manager.encoder_hidden_units)
transducer_cell = tf.contrib.rnn.LSTMCell(self.cons_manager.transducer_hidden_units)
def cond(current_block, outputs_int, encoder_hidden, trans_hidden):
return current_block < start_block + max_blocks
def body(current_block, outputs_int, encoder_hidden, trans_hidden):
# --------------------- ENCODER ----------------------------------------------------------------------
encoder_inputs = inputs_full[current_block]
encoder_inputs_length = [tf.shape(encoder_inputs)[0]]
encoder_hidden_state = encoder_hidden
if self.cons_manager.inputs_embedded is True:
encoder_inputs_embedded = encoder_inputs
else:
encoder_inputs = tf.reshape(encoder_inputs, shape=[-1, self.cons_manager.batch_size])
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
# Build model
# Build previous state
encoder_hidden_c, encoder_hidden_h = tf.split(encoder_hidden_state, num_or_size_splits=2, axis=0)
encoder_hidden_c = tf.reshape(encoder_hidden_c, shape=[-1, self.cons_manager.encoder_hidden_units])
encoder_hidden_h = tf.reshape(encoder_hidden_h, shape=[-1, self.cons_manager.encoder_hidden_units])
encoder_hidden_state_t = LSTMStateTuple(encoder_hidden_c, encoder_hidden_h)
# encoder_outputs: [max_time, batch_size, num_units]
encoder_outputs, encoder_hidden_state_new = tf.nn.dynamic_rnn(
encoder_cell, encoder_inputs_embedded,
sequence_length=encoder_inputs_length, time_major=True,
dtype=tf.float32, initial_state=encoder_hidden_state_t)
# Modify output of encoder_hidden_state_new so that it can be fed back in again without problems.
encoder_hidden_state_new = tf.concat([encoder_hidden_state_new.c, encoder_hidden_state_new.h], axis=0)
encoder_hidden_state_new = tf.reshape(encoder_hidden_state_new,
shape=[2, -1, self.cons_manager.encoder_hidden_units])
# --------------------- TRANSDUCER --------------------------------------------------------------------
encoder_raw_outputs = encoder_outputs
# Save/load the state as one tensor, use encoder state as init if this is the first block
trans_hidden_state = tf.cond(current_block > 0, lambda: trans_hidden, lambda: encoder_hidden_state_new)
transducer_amount_outputs = transducer_list_outputs[current_block - start_block]
# Model building
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
embedding=embeddings,
start_tokens=tf.tile([self.cons_manager.GO_SYMBOL],
[self.cons_manager.batch_size]), # TODO: check if this looks good
end_token=end_symbol) # vocab size, so that it doesn't prematurely end the decoding
attention_states = tf.transpose(encoder_raw_outputs,
[1, 0, 2]) # attention_states: [batch_size, max_time, num_units]
attention_mechanism = tf.contrib.seq2seq.LuongAttention(
self.cons_manager.encoder_hidden_units, attention_states)
decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
transducer_cell,
attention_mechanism,
attention_layer_size=self.cons_manager.transducer_hidden_units)
projection_layer = layers_core.Dense(self.cons_manager.vocab_size, use_bias=False)
# Build previous state
trans_hidden_c, trans_hidden_h = tf.split(trans_hidden_state, num_or_size_splits=2, axis=0)
trans_hidden_c = tf.reshape(trans_hidden_c, shape=[-1, self.cons_manager.transducer_hidden_units])
trans_hidden_h = tf.reshape(trans_hidden_h, shape=[-1, self.cons_manager.transducer_hidden_units])
trans_hidden_state_t = LSTMStateTuple(trans_hidden_c, trans_hidden_h)
decoder = tf.contrib.seq2seq.BasicDecoder(
decoder_cell, helper,
decoder_cell.zero_state(1, tf.float32).clone(cell_state=trans_hidden_state_t),
output_layer=projection_layer)
outputs, transducer_hidden_state_new, _ = tf.contrib.seq2seq.dynamic_decode(decoder,
output_time_major=True,
maximum_iterations=transducer_amount_outputs)
logits = outputs.rnn_output # logits of shape [max_time,batch_size,vocab_size]
decoder_prediction = outputs.sample_id # For debugging
# Modify output of transducer_hidden_state_new so that it can be fed back in again without problems.
transducer_hidden_state_new = tf.concat(
[transducer_hidden_state_new[0].c, transducer_hidden_state_new[0].h],
axis=0)
transducer_hidden_state_new = tf.reshape(transducer_hidden_state_new,
shape=[2, -1, self.cons_manager.transducer_hidden_units])
# Note the outputs
outputs_int = outputs_int.write(current_block - start_block, logits)
return current_block + 1, outputs_int, encoder_hidden_state_new, transducer_hidden_state_new
_, outputs_final, encoder_hidden_state_new, transducer_hidden_state_new = \
tf.while_loop(cond, body, init_state, parallel_iterations=1)
# Process outputs
outputs = outputs_final.concat()
logits = tf.reshape(
outputs,
shape=(-1, 1, self.cons_manager.vocab_size)) # And now its [max_output_time, batch_size, vocab]
# For loading the model later on
logits = tf.identity(logits, name='logits')
encoder_hidden_state_new = tf.identity(encoder_hidden_state_new, name='encoder_hidden_state_new')
transducer_hidden_state_new = tf.identity(transducer_hidden_state_new, name='transducer_hidden_state_new')
train_saver = tf.train.Saver() # For now save everything
return max_blocks, inputs_full_raw, transducer_list_outputs, start_block, encoder_hidden_init,\
trans_hidden_init, logits, encoder_hidden_state_new, transducer_hidden_state_new, train_saver
def build_training_step(self):
targets = tf.placeholder(shape=(None,), dtype=tf.int32, name='targets')
targets_one_hot = tf.one_hot(targets, depth=self.cons_manager.vocab_size, dtype=tf.float32)
targets_one_hot = tf.Print(targets_one_hot, [targets], message='Targets: ', summarize=10)
targets_one_hot = tf.Print(targets_one_hot, [tf.argmax(self.logits, axis=2)], message='Argmax: ', summarize=10)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=targets_one_hot,
logits=self.logits)
loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)
return targets, train_op, loss
constants_manager = ConstantsManager(input_dimensions=1, input_embedding_size=11, inputs_embedded=False,
encoder_hidden_units=100, transducer_hidden_units=100, vocab_ids=[0, 1, 2],
input_block_size=1, beam_width=5)
model = Model(cons_manager=constants_manager)
I encounter a similar problem recently when I put a dynamic_rnn inside a scan (i.e. a while loop). It seems that the bug was introduced only in TensorFlow 1.5. You can try downgrade your TensorFlow version to 1.4 or upgrade to 1.6. Both should work.
In this particular case, the error seems to be raised incorrectly (see github issue in comments). In general, however, such errors mean the following:
The usage pattern that the error message is complaining about was always illegal. Earlier versions of TensorFlow just did not have good checks for it.
The core of the problem is that in TensorFlow's execution model, you cannot use a tensor that you create inside a while loop, outside of it. For a simple illustration of this, take a look at this test case.
You can just disable the check by immediately returning from here, but your computation graph will be malformed, which can lead to undefined behavior.
The correct fix is to add all the tensors that you want to access outside of the while loop (outside of cond and body functions) to the loop_vars and use them as returned from the tf.while_loop.

Tensorflow uses all memory for a single example

Here is my code for reading and passing a tf record batch through a network.
import os, sys
import tensorflow as tf
def read_and_decode_single_example(filename_queue):
# Unlike the TFRecordWriter, the TFRecordReader is symbolic
reader = tf.TFRecordReader()
# One can read a single serialized example from a filename
# serialized_example is a Tensor of type string.
_, serialized_example = reader.read(filename_queue)
# The serialized example is converted back to actual values.
# One needs to describe the format of the objects to be returned
features = tf.parse_single_example(
serialized_example,
features={
# We know the length of both fields. If not the
# tf.VarLenFeature could be used
'click': tf.FixedLenFeature([], tf.int64),
'title': tf.FixedLenFeature([25], tf.int64)
# maybe others eg data1:tf.FixLenFeature([],tf.float64)
})
# now return the converted data
lbl = features['click']
ttl = features['title']
return lbl, ttl
def read_batch_data(files, b_s):
min_after_dequeue = 8
num_threads = 2
batch_size = b_s
capacity = min_after_dequeue + (num_threads + 2) * batch_size
filename_queue = tf.train.string_input_producer(files, num_epochs=1)
c_n_c, tit = read_and_decode_single_example(filename_queue)
label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity,num_threads=num_threads, min_after_dequeue=min_after_dequeue)
return label_batch, title_batch
And the network code:
import math
import os,sys
import subprocess
import pickle
import load_data_labels
import numpy as np
import tensorflow as tf
import shutil
LOG_DIR = './log_dir'
def init_weights(shape, name):
return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)
def init_biases(shape, name):
return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)
def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
# Add layer name scopes for better graph visualization
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W_em = tf.Variable(embd_layer,name="word_embeddings")
embed_l = tf.nn.embedding_lookup(W_em, titles)
# can be reduce sum
embedding = tf.reduce_mean(embed_l, [1])
with tf.name_scope("layer1"):
h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
with tf.name_scope("layer2"):
h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
with tf.name_scope("layer3"):
return tf.add(tf.matmul(h2, w_o), b_o)
def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
for word in word_map:
vec = w2v_dict.get(word)
idx = word_map[word]
if vec is not None:
initW[idx,:] = vec
return initW
with open('./data/word_map.pickle', 'rb') as word_map_file:
word_map = pickle.load(word_map_file)
with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
w2vec = pickle.load(w2vec_file)
dataset_file= "./data/file000000000000_1000lines.tfrecords"
batch_size=4
trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
trY=tf.one_hot(trY,depth=2,axis = -1)
trY=tf.reshape(trY,[4,2])
print trY.get_shape()
print trX.get_shape()
w_h = init_weights([300, 625], "w_h")
w_h2 = init_weights([625, 625], "w_h2")
w_o = init_weights([625, 2], "w_o")
vocabulary_length=len(w2vec)
any_vector_in_dict = w2vec.itervalues().next()
emb_dim = len(any_vector_in_dict)
embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)
b_h = init_biases([625], "b_h")
b_h2 = init_biases([625], "b_h2")
b_o = init_biases([2],"b_o")
tf.summary.histogram("w_h_summar", w_h)
tf.summary.histogram("w_h2_summar", w_h2)
tf.summary.histogram("w_o_summar", w_o)
tf.summary.histogram("embedding_layer", embd_layer)
py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)
with tf.name_scope("cost"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
tf.summary.scalar("cost", cost)
with tf.name_scope("accuracy"):
correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))
tf.summary.scalar("accuracy", acc_op)
with tf.Session() as sess:
writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
merged = tf.summary.merge_all()
tf.global_variables_initializer().run()
for i in range(10):
sess.run(train_op)
summary, acc = sess.run([merged, acc_op])
writer.add_summary(summary, i) # Write summary
The problem is that the program crushes because it fills all of the RAM memory.The point that the crush starts is in merge_all statement of the network and it hangs in global_variable_initializer from it never returns but the memory is filled gradually.Maybe a queue left open?I can't find anything relevant and specific and tensorflow's docs are at least bad.I'm searching it around more than a week and I m starting to get extremely tired.Could anyone help?

Keras/Tensorflow: Training successful on MBP 13" with Theano, but throws ResourceExhaustedError on a powerful computer with TensorFlow

I have successfully trained a Keras model on a 13" Macbook Pro with Theano, albeit at a slow speed, but when I train the exact same model with the same data on a more powerful computer (32 GB RAM, 8 GB Nvidia Quadro GPU, 8 CPU cores) with TensorFlow on Ubuntu, the following error occurs:
Here is the script that I use:
from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import Nadam
from keras.preprocessing.image import random_rotation, random_shift, random_shear, random_zoom
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from random import shuffle
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")
import cv2
import h5py
import json
import os
import sys
import numpy as np
class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence, samples_generated_per_sample, config_file, training_save_fn, osr_save_fn):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.samples_generated_per_sample = samples_generated_per_sample
self.config_file = config_file
self.training_save_fn = training_save_fn
self.osr_save_fn = osr_save_fn
self.osr = None
def save_osr_model(self):
""" Save the OSR model to an HDF5 file
"""
# delete file if it already exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.osr_save_fn)
os.remove(self.osr_save_fn)
except OSError:
pass
print "Saving OSR model to \"{0}\"".format(self.osr_save_fn)
self.osr.save(self.osr_save_fn)
def load_osr_model(self):
""" Load the OSR model from an HDF5 file
"""
print "Loading OSR model from \"{0}\"".format(self.osr_save_fn)
self.osr = load_model(self.osr_save_fn)
def train_osr_model(self):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
batch_size = 25
training_sequence_generator = self.generate_training_sequences(batch_size=batch_size)
validation_sequence_generator = self.generate_training_sequences(batch_size=batch_size, validation_ratio=validation_ratio)
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = training_save_file.attrs["sample_count"]
pbi = ProgressDisplay()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
max_q_size=1,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)
def generate_training_sequences(self, batch_size, validation_ratio=0):
""" Generates training sequences from HDF5 file on demand
"""
while True:
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = int(training_save_file.attrs["sample_count"])
sample_idxs = range(0, sample_count)
shuffle(sample_idxs)
training_sample_idxs = sample_idxs[0:int((1-validation_ratio)*sample_count)]
validation_sample_idxs = sample_idxs[int((1-validation_ratio)*sample_count):]
# generate sequences for validation
if validation_ratio:
validation_sample_count = len(validation_sample_idxs)
batches = int(validation_sample_count/batch_size)
remainder_samples = validation_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][validation_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][validation_sample_idxs[-remainder_samples:]]
yield (X, Y)
# generate sequences for training
else:
training_sample_count = len(training_sample_idxs)
batches = int(training_sample_count/batch_size)
remainder_samples = training_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][training_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][training_sample_idxs[-remainder_samples:]]
yield (X, Y)
def print_osr_summary(self):
""" Prints a summary representation of the OSR model
"""
print "\n*** MODEL SUMMARY ***"
self.osr.summary()
def generate_osr_model(self):
""" Builds the optical speech recognizer model
"""
print "".join(["\nGenerating OSR model\n",
"-"*40])
with h5py.File(self.training_save_fn, "r") as training_save_file:
class_count = len(training_save_file.attrs["training_classes"].split(","))
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Dropout(0.2))
osr.add(Flatten())
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=class_count,
init="normal",
activation="softmax"))
print " - Compiling model"
optimizer = Nadam(lr=0.002,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-08,
schedule_decay=0.004)
osr.compile(loss="categorical_crossentropy",
optimizer=optimizer,
metrics=["categorical_accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "
def process_training_data(self):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(self.config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)
print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""
# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.training_save_fn)
os.remove(self.training_save_fn)
except OSError:
pass
# process and save training data into HDF5 file
print "Generating {0} samples from {1} samples via data augmentation\n".format(sample_count*self.samples_generated_per_sample,
sample_count)
sample_count = sample_count*self.samples_generated_per_sample
with h5py.File(self.training_save_fn, "w") as training_save_file:
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")
# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# accumulate samples and labels
samples_batch = self.process_frames(training_class_sequence_path)
label = [0]*len(training_classes)
label[class_label] = 1
for sample in samples_batch:
x_training_dataset[sample_idx] = sample
y_training_dataset[sample_idx] = label
# update sample index
sample_idx += 1
print "\n"
training_save_file.close()
print "Training data processed and saved to {0}".format(self.training_save_fn)
def process_frames(self, video_file_path):
""" Preprocesses sequence frames
"""
# haar cascades for localizing oral region
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
mouth_cascade = cv2.CascadeClassifier('haarcascade_mcs_mouth.xml')
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
# convert to grayscale, localize oral region, equalize frame dimensions, and accumulate valid frames
while success:
success, frame = video.read()
if success:
# convert to grayscale
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# localize single facial region
faces_coords = face_cascade.detectMultiScale(frame, 1.3, 5)
if len(faces_coords) == 1:
face_x, face_y, face_w, face_h = faces_coords[0]
frame = frame[face_y:face_y + face_h, face_x:face_x + face_w]
# localize oral region
mouth_coords = mouth_cascade.detectMultiScale(frame, 1.3, 5)
threshold = 0
for (mouth_x, mouth_y, mouth_w, mouth_h) in mouth_coords:
if (mouth_y > threshold):
threshold = mouth_y
valid_mouth_coords = (mouth_x, mouth_y, mouth_w, mouth_h)
else:
pass
mouth_x, mouth_y, mouth_w, mouth_h = valid_mouth_coords
frame = frame[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w]
# equalize frame dimensions
frame = cv2.resize(frame, (self.columns, self.rows)).astype('float32')
# accumulate frames
frames.append(frame)
# ignore multiple facial region detections
else:
pass
# equalize sequence lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = np.asarray(frames[0:self.frames_per_sequence])
# pixel normalizer
pix_norm = lambda frame: frame / 255.0
samples_batch = [[map(pix_norm, frames)]]
# random transformations for data augmentation
for _ in xrange(0, self.samples_generated_per_sample-1):
rotated_frames = random_rotation(frames, rg=45)
shifted_frames = random_shift(rotated_frames, wrg=0.25, hrg=0.25)
sheared_frames = random_shear(shifted_frames, intensity=0.79)
zoomed_frames = random_zoom(sheared_frames, zoom_range=(1.25, 1.25))
samples_batch.append([map(pix_norm, zoomed_frames)])
return samples_batch
class ProgressDisplay(Callback):
""" Progress display callback
"""
def on_batch_end(self, epoch, logs={}):
print " Batch {0:<4d} => Accuracy: {1:>8.4f} | Loss: {2:>8.4f} | Size: {3:>4d}".format(int(logs["batch"])+1,
float(logs["categorical_accuracy"]),
float(logs["loss"]),
int(logs["size"]))
if __name__ == "__main__":
# Example usage
osr = OpticalSpeechRecognizer(rows=100,
columns=150,
frames_per_sequence=45,
samples_generated_per_sample=10,
config_file="training_config.json",
training_save_fn="training_data.h5",
osr_save_fn="osr_model.h5")
osr.process_training_data()
osr.generate_osr_model()
osr.print_osr_summary()
osr.train_osr_model()
osr.save_osr_model()
osr.load_osr_model()
As of this writing, the problem seems to be a TensorFlow issue. For me, the solution was to switch the backend to Theano. To switch the Keras backend, perform the following steps:
Find the Keras configuration file
~/.keras/keras.json
Change the value of the backend field and, optionally, the ordering field
{
"image_dim_ordering": "th",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "theano"
}

Categories