Missing nodes in Tensorflow graph partitions - python

I have a Tensorflow graph that I managed to partition into two subgraphs, using the following code.
# graph_split.py
import os
import sys
import tensorflow as tf
import copy
from tensorflow.core.framework import graph_pb2
from tensorflow.python.framework.graph_util_impl import _extract_graph_summary, _assert_nodes_are_present, _bfs_for_reachable_nodes
def extract_sub_graph(graph_def, dest_nodes):
if not isinstance(graph_def, graph_pb2.GraphDef):
raise TypeError("graph_def must be a graph_pb2.GraphDef proto.")
if isinstance(dest_nodes, six.string_types):
raise TypeError("dest_nodes must be a list.")
name_to_input_name, name_to_node, name_to_seq_num = _extract_graph_summary(graph_def)
_assert_nodes_are_present(name_to_node, dest_nodes)
nodes_to_keep = _bfs_for_reachable_nodes(dest_nodes, name_to_input_name)
nodes_to_keep_copy = copy.deepcopy(nodes_to_keep)
for node in nodes_to_keep_copy:
if node not in dest_nodes:
nodes_to_keep.remove(node)
nodes_to_keep_list = sorted(
list(nodes_to_keep), key=lambda n: name_to_seq_num[n])
# Now construct the output GraphDef
out = graph_pb2.GraphDef()
for n in nodes_to_keep_list:
out.node.extend([copy.deepcopy(name_to_node[n])])
out.library.CopyFrom(graph_def.library)
out.versions.CopyFrom(graph_def.versions)
return out
def split_model(graph_def):
subgraphs = []
graph_nodes = [n for n in graph_def.node]
node_names = []
for t in graph_nodes:
node_names.append(t.name)
middle_node_index = int(len(graph_nodes) / 2)
subgraph_1_nodes = []
subgraph_2_nodes = []
for i in range(0, middle_node_index):
subgraph_1_nodes.append(node_names[i])
for i in range(middle_node_index, len(sub_graphs)):
subgraph_2_nodes.append(node_names[i])
subgraph_1 = extract_sub_graph(graph_def, subgraph_1_nodes)
subgraph_2 = extract_sub_graph(graph_def, subgraph_2_nodes)
subgraphs = [subgraph_1, subgraph_2]
return subgraphs
if __name__ == "__main__":
weights_path = "model.pb"
pbtxt_path = "protobuf_text.pbtxt"
with tf.gfile.FastGFile(weights_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
subgraphs = split_model(graph_def)
Now, I have to convert the frozen inference graphs into SavedModel objects and export them.
# frozen_to_saved.py
import tensorflow as tf
import os
import shutil
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
def frozen_to_saved(graph_def, export_folder="saved"):
export_dir = os.path.join(os.getcwd(), export_folder)
if os.path.exists(export_dir):
shutil.rmtree(export_dir)
os.mkdir(export_dir)
tf_version = tf.__version__.split('.')
if int(tf_version[0]) == 2:
builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_dir)
elif int(tf_version[0]) == 1:
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
sigs = {}
if int(tf_version[0]) == 2:
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
# name="" is important to ensure we don't get spurious prefixing
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inp = g.get_tensor_by_name(graph_def.node[0].name + ":0")
out = g.get_tensor_by_name(graph_def.node[-1].name + ":0")
sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
tf.saved_model.signature_def_utils.predict_signature_def(
{"in": inp}, {"out": out})
builder.add_meta_graph_and_variables(sess,
[tag_constants.SERVING],
signature_def_map=sigs)
elif int(tf_version[0]) == 1:
with tf.Session(graph=tf.Graph()) as sess:
# name="" is important to ensure we don't get spurious prefixing
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inp = g.get_tensor_by_name(graph_def.node[0].name + ":0")
out = g.get_tensor_by_name(graph_def.node[-1].name + ":0")
sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
tf.saved_model.signature_def_utils.predict_signature_def(
{"in": inp}, {"out": out})
builder.add_meta_graph_and_variables(sess,
[tag_constants.SERVING],
signature_def_map=sigs)
builder.save()
The problem occurs when tf.import_graph_def(graph_def, name="") in frozen_to_saved.py is called.
Traceback (most recent call last):
File "model_split.py", line 142, in <module>
save_graph(subgraphs)
File "model_split.py", line 92, in save_graph
frozen_to_saved.frozen_to_saved(graph, export_folder="subgraph{}_saved".format(idx))
File "F:\model_split\frozen_to_saved.py", line 50, in frozen_to_saved
tf.import_graph_def(graph_def, name="")
File "C:\Users\Deployment\.conda\envs\sw_2021\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\Deployment\.conda\envs\sw_2021\lib\site-packages\tensorflow\python\framework\importer.py", line 431, in import_graph_def
raise ValueError(str(e))
ValueError: Node 'BatchMultiClassNonMaxSuppression/map/while/Merge': Unknown input node 'BatchMultiClassNonMaxSuppression/map/while/NextIteration'
Apparently, when the node names are partitioned, some of them are missing from the splits. I have verified that the node names are partitioned equally.
I also noticed that for the partitions, the nodes missing in one is present in the other, and vice versa.
My suspicion is how I delete the actual nodes and reconstructed the graphs in extract_sub_graph().

Related

RuntimeError: Unknown type bool encountered in graph lowering. This type is not supported in ONNX export

I am trying to convert the Self-Correction-Human-Parsing to coreml.
The issue that I am facing is also open on https://github.com/pytorch/pytorch/issues/52889
and https://github.com/apple/coremltools/issues/1085.
The evaluate.py file (for reference) looks like this (after some addition to convert model to coreml) :
import coremltools as ct
import os
import argparse
import numpy as np
import torch
import torchvision
from torch.utils import data
from tqdm import tqdm
from PIL import Image as PILImage
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn
import networks
from datasets.datasets import LIPDataValSet
from utils.miou import compute_mean_ioU
from utils.transforms import BGR2RGB_transform
from utils.transforms import transform_parsing
import onnxruntime
import onnx
def get_arguments():
"""Parse all the arguments provided from the CLI.
Returns:
A list of parsed arguments.
"""
parser = argparse.ArgumentParser(description="Self Correction for Human Parsing")
# Network Structure
parser.add_argument("--arch", type=str, default='resnet101')
# Data Preference
parser.add_argument("--data-dir", type=str, default='./data/LIP')
parser.add_argument("--batch-size", type=int, default=1)
parser.add_argument("--input-size", type=str, default='473,473')
parser.add_argument("--num-classes", type=int, default=20)
parser.add_argument("--ignore-label", type=int, default=255)
parser.add_argument("--random-mirror", action="store_true")
parser.add_argument("--random-scale", action="store_true")
# Evaluation Preference
parser.add_argument("--log-dir", type=str, default='./log')
parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar')
parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.")
parser.add_argument("--save-results", action="store_true", help="whether to save the results.")
parser.add_argument("--flip", action="store_true", help="random flip during the test.")
parser.add_argument("--multi-scales", type=str, default='1', help="multiple scales during the test")
return parser.parse_args()
def get_palette(num_cls):
""" Returns the color map for visualizing the segmentation mask.
Args:
num_cls: Number of classes
Returns:
The color map
"""
n = num_cls
palette = [0] * (n * 3)
for j in range(0, n):
lab = j
palette[j * 3 + 0] = 0
palette[j * 3 + 1] = 0
palette[j * 3 + 2] = 0
i = 0
while lab:
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
i += 1
lab >>= 3
return palette
def multi_scale_testing(model, batch_input_im, crop_size=[473, 473], flip=True, multi_scales=[1]):
flipped_idx = (15, 14, 17, 16, 19, 18)
if len(batch_input_im.shape) > 4:
batch_input_im = batch_input_im.squeeze()
if len(batch_input_im.shape) == 3:
batch_input_im = batch_input_im.unsqueeze(0)
interp = torch.nn.Upsample(size=crop_size, mode='bilinear', align_corners=True)
ms_outputs = []
for s in multi_scales:
interp_im = torch.nn.Upsample(scale_factor=s, mode='bilinear', align_corners=True)
scaled_im = interp_im(batch_input_im)
print("Scaled_im:",type(scaled_im),scaled_im.shape)
# traced_model = torch.jit.trace(model, scaled_im.to("cuda:0"))
# parsing_output = model(scaled_im)
scripted_model = torch.jit.script(model)
print("Trying coreml part")
core_model = ct.convert(scripted_model,inputs=[ct.TensorType(shape=scaled_im.shape)])
core_model.save("human_parsing.mlmodel")
print("human parsing model saved!")
# torch.onnx.export(model,scaled_im.to("cuda:0"),"human_parsing.onnx",opset_version=11)
# print("Loading onnx model...")
# onnx_model = onnx.load("human_parsing.onnx")
# onnx.checker.check_model(onnx_model)
# ort_session = onnxruntime.InferenceSession("human_parsing.onnx")
# def to_numpy(tensor):
# return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
# # x = torch.rand(1,3,473,473)
# # ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(scaled_im)}
# parsing_output = ort_session.run(None, ort_inputs)
# print("Parsing output 1:", len(parsing_output))
parsing_output = parsing_output[0][-1]
parsing_output = np.expand_dims(parsing_output,axis=0)
parsing_output = torch.from_numpy(parsing_output)
print("Parsing output 2:",parsing_output.shape)
output = parsing_output[0]
if flip:
flipped_output = parsing_output[1]
flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :]
output += flipped_output.flip(dims=[-1])
output *= 0.5
print("output:", output)
print("output:", output.shape)
print("output Type:", type(output))
output = interp(output.unsqueeze(0))
print("output unsqueezed:", output.shape)
ms_outputs.append(output[0])
ms_fused_parsing_output = torch.stack(ms_outputs)
ms_fused_parsing_output = ms_fused_parsing_output.mean(0)
ms_fused_parsing_output = ms_fused_parsing_output.permute(1, 2, 0) # HWC
parsing = torch.argmax(ms_fused_parsing_output, dim=2)
parsing = parsing.data.cpu().numpy()
ms_fused_parsing_output = ms_fused_parsing_output.data.cpu().numpy()
return parsing, ms_fused_parsing_output
def main():
"""Create the model and start the evaluation process."""
args = get_arguments()
multi_scales = [float(i) for i in args.multi_scales.split(',')]
gpus = [int(i) for i in args.gpu.split(',')]
assert len(gpus) == 1
if not args.gpu == 'None':
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cudnn.benchmark = True
cudnn.enabled = True
h, w = map(int, args.input_size.split(','))
input_size = [h, w]
model = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=None)
IMAGE_MEAN = model.mean
IMAGE_STD = model.std
INPUT_SPACE = model.input_space
print('image mean: {}'.format(IMAGE_MEAN))
print('image std: {}'.format(IMAGE_STD))
print('input space:{}'.format(INPUT_SPACE))
if INPUT_SPACE == 'BGR':
print('BGR Transformation')
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=IMAGE_MEAN,
std=IMAGE_STD),
])
if INPUT_SPACE == 'RGB':
print('RGB Transformation')
transform = transforms.Compose([
transforms.ToTensor(),
BGR2RGB_transform(),
transforms.Normalize(mean=IMAGE_MEAN,
std=IMAGE_STD),
])
# Data loader
lip_test_dataset = LIPDataValSet(args.data_dir, 'val', crop_size=input_size, transform=transform, flip=args.flip)
num_samples = len(lip_test_dataset)
print('Total testing sample numbers: {}'.format(num_samples))
testloader = data.DataLoader(lip_test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True)
# num_samples =1
# Load model weight
state_dict = torch.load(args.model_restore)['state_dict']
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
name = k[7:] # remove `module.`
new_state_dict[name] = v
model.load_state_dict(new_state_dict)
model.cuda()
model.eval()
sp_results_dir = os.path.join(args.log_dir, 'sp_results')
if not os.path.exists(sp_results_dir):
os.makedirs(sp_results_dir)
palette = get_palette(20)
parsing_preds = []
scales = np.zeros((num_samples, 2), dtype=np.float32)
centers = np.zeros((num_samples, 2), dtype=np.int32)
with torch.no_grad():
for idx, batch in enumerate(tqdm(testloader)):
image, meta = batch
if (len(image.shape) > 4):
image = image.squeeze()
im_name = meta['name'][0]
c = meta['center'].numpy()[0]
s = meta['scale'].numpy()[0]
w = meta['width'].numpy()[0]
h = meta['height'].numpy()[0]
scales[idx, :] = s
centers[idx, :] = c
parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip,
multi_scales=multi_scales)
print("Parsing:",parsing.shape)
print("Logits:", logits.shape)
# if args.save_results:
if True:
print("Inside Save_results")
parsing_result = transform_parsing(parsing, c, s, w, h, input_size)
parsing_result_path = os.path.join(sp_results_dir, im_name + '.png')
# print("Parsing Result Path:", parsing_result_path)
output_im = PILImage.fromarray(np.asarray(parsing_result, dtype=np.uint8))
output_im.putpalette(palette)
output_im.save(parsing_result_path)
parsing_preds.append(parsing)
assert len(parsing_preds) == num_samples
mIoU = compute_mean_ioU(parsing_preds, scales, centers, args.num_classes, args.data_dir, input_size)
print(mIoU)
return
if __name__ == '__main__':
main()
I have successfully ported the model to onnx.
But I am running into issues while converting it to coreml.
Traceback (most recent call last):
File "evaluate.py", line 262, in <module>
main()
File "evaluate.py", line 240, in main
parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip,
File "evaluate.py", line 102, in multi_scale_testing
core_model = ct.convert(scripted_model,inputs=[ct.TensorType(shape=scaled_im.shape)])
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/_converters_entry.py", line 176, in convert
mlmodel = mil_convert(
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 128, in mil_convert
proto = mil_convert_to_proto(model, convert_from, convert_to,
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 171, in mil_convert_to_proto
prog = frontend_converter(model, **kwargs)
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 85, in __call__
return load(*args, **kwargs)
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/load.py", line 72, in load
converter = TorchConverter(torchscript, inputs, outputs, cut_at_symbols)
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 142, in __init__
raw_graph, params_dict = self._expand_and_optimize_ir(self.torchscript)
File "/anaconda/envs/schp/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 250, in _expand_and_optimize_ir
graph, params = _torch._C._jit_pass_lower_graph(
RuntimeError: Unknown type bool encountered in graph lowering. This type is not supported in ONNX export.

Why am I receiving ' FileNotFoundError: [Errno 2] No such file or directory:' in the terminal when i run `python ./train.py`

I'm following the steps provided by the author of a research paper on training an outdoor image classifier.
(Github: https://github.com/yuxiaoz/SGSN)
However,this is the error that I'm getting in my Ubuntu terminal:
Traceback (most recent call last):
File "./train.py", line 165, in <module>
main()
File "./train.py", line 63, in main
x_datalists = get_data_lists(args.x_data_txt_path) # a list of x images
File "./train.py", line 47, in get_data_lists
f = open(data_path, 'r')
FileNotFoundError: [Errno 2] No such file or directory: './datasets/x_traindata.txt'
Here is the training Python code provided by the author:
import argparse
from datetime import datetime
from random import shuffle
import os
import sys
import time
import math
import tensorflow as tf
import numpy as np
from utils import *
from train_image_reader import *
from net import *
parser = argparse.ArgumentParser(description='')
parser.add_argument("--snapshot_dir", default='./snapshots', help="path of snapshots")
parser.add_argument("--image_size", type=int, default=256, help="load image size")
parser.add_argument("--x_data_txt_path", default='./datasets/x_traindata.txt', help="txt of x images")
parser.add_argument("--y_data_txt_path", default='./datasets/y_traindata.txt', help="txt of y images")
parser.add_argument("--random_seed", type=int, default=1234, help="random seed")
parser.add_argument('--base_lr', type=float, default=0.0002, help='initial learning rate for adam')
parser.add_argument('--epoch', dest='epoch', type=int, default=50, help='# of epoch')
parser.add_argument('--epoch_step', dest='epoch_step', type=int, default=20, help='# of epoch to decay lr')
parser.add_argument("--lamda", type=float, default=10.0, help="L1 lamda")
parser.add_argument('--beta1', dest='beta1', type=float, default=0.5, help='momentum term of adam')
parser.add_argument("--summary_pred_every", type=int, default=200, help="times to summary.")
parser.add_argument("--save_pred_every", type=int, default=8000, help="times to save.")
parser.add_argument("--x_image_forpath", default='./datasets/train/X/images/', help="forpath of x training datas.")
parser.add_argument("--x_label_forpath", default='./datasets/train/X/labels/', help="forpath of x training labels.")
parser.add_argument("--y_image_forpath", default='./datasets/train/Y/images/', help="forpath of y training datas.")
parser.add_argument("--y_label_forpath", default='./datasets/train/Y/labels/', help="forpath of y training labels.")
args = parser.parse_args()
def save(saver, sess, logdir, step):
model_name = 'model'
checkpoint_path = os.path.join(logdir, model_name)
if not os.path.exists(logdir):
os.makedirs(logdir)
saver.save(sess, checkpoint_path, global_step=step)
print('The checkpoint has been created.')
def get_data_lists(data_path):
f = open(data_path, 'r')
datas=[]
for line in f:
data = line.strip("\n")
datas.append(data)
return datas
def l1_loss(src, dst):
return tf.reduce_mean(tf.abs(src - dst))
def gan_loss(src, dst):
return tf.reduce_mean((src-dst)**2)
def main():
if not os.path.exists(args.snapshot_dir):
os.makedirs(args.snapshot_dir)
x_datalists = get_data_lists(args.x_data_txt_path) # a list of x images
y_datalists = get_data_lists(args.y_data_txt_path) # a list of y images
tf.set_random_seed(args.random_seed)
x_img = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='x_img')
x_label = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='x_label')
y_img = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='y_img')
y_label = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='y_label')
fake_y = generator(image=x_img, reuse=False, name='generator_x2y') # G
fake_x_ = generator(image=fake_y, reuse=False, name='generator_y2x') # S
fake_x = generator(image=y_img, reuse=True, name='generator_y2x') # G'
fake_y_ = generator(image=fake_x, reuse=True, name='generator_x2y') # S'
dy_fake = discriminator(image=fake_y, gen_label = x_label, reuse=False, name='discriminator_y') # D
dx_fake = discriminator(image=fake_x, gen_label = y_label, reuse=False, name='discriminator_x') # D'
dy_real = discriminator(image=y_img, gen_label = y_label, reuse=True, name='discriminator_y') # D
dx_real = discriminator(image=x_img, gen_label = x_label, reuse=True, name='discriminator_x') #D'
final_loss = gan_loss(dy_fake, tf.ones_like(dy_fake)) + gan_loss(dx_fake, tf.ones_like(dx_fake)) + args.lamda*l1_loss(x_label, fake_x_) + args.lamda*l1_loss(y_label, fake_y_) # final objective function
dy_loss_real = gan_loss(dy_real, tf.ones_like(dy_real))
dy_loss_fake = gan_loss(dy_fake, tf.zeros_like(dy_fake))
dy_loss = (dy_loss_real + dy_loss_fake) / 2
dx_loss_real = gan_loss(dx_real, tf.ones_like(dx_real))
dx_loss_fake = gan_loss(dx_fake, tf.zeros_like(dx_fake))
dx_loss = (dx_loss_real + dx_loss_fake) / 2
dis_loss = dy_loss + dx_loss # discriminator loss
final_loss_sum = tf.summary.scalar("final_objective", final_loss)
dx_loss_sum = tf.summary.scalar("dx_loss", dx_loss)
dy_loss_sum = tf.summary.scalar("dy_loss", dy_loss)
dis_loss_sum = tf.summary.scalar("dis_loss", dis_loss)
discriminator_sum = tf.summary.merge([dx_loss_sum, dy_loss_sum, dis_loss_sum])
x_images_summary = tf.py_func(cv_inv_proc, [x_img], tf.float32) #(1, 256, 256, 3) float32
y_fake_cv2inv_images_summary = tf.py_func(cv_inv_proc, [fake_y], tf.float32) #(1, 256, 256, 3) float32
x_label_summary = tf.py_func(label_proc, [x_label], tf.float32) #(1, 256, 256, 3) float32
x_gen_label_summary = tf.py_func(label_inv_proc, [fake_x_], tf.float32) #(1, 256, 256, 3) float32
image_summary = tf.summary.image('images', tf.concat(axis=2, values=[x_images_summary, y_fake_cv2inv_images_summary, x_label_summary, x_gen_label_summary]), max_outputs=3)
summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph())
g_vars = [v for v in tf.trainable_variables() if 'generator' in v.name]
d_vars = [v for v in tf.trainable_variables() if 'discriminator' in v.name]
lr = tf.placeholder(tf.float32, None, name='learning_rate')
d_optim = tf.train.AdamOptimizer(lr, beta1=args.beta1)
g_optim = tf.train.AdamOptimizer(lr, beta1=args.beta1)
d_grads_and_vars = d_optim.compute_gradients(dis_loss, var_list=d_vars)
d_train = d_optim.apply_gradients(d_grads_and_vars) # update weights of D and D'
g_grads_and_vars = g_optim.compute_gradients(final_loss, var_list=g_vars)
g_train = g_optim.apply_gradients(g_grads_and_vars) # update weights of G, G', S and S'
train_op = tf.group(d_train, g_train)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=50)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
counter = 0 # training step
for epoch in range(args.epoch):
shuffle(x_datalists) # change the order of x images
shuffle(y_datalists) # change the order of y images
lrate = args.base_lr if epoch < args.epoch_step else args.base_lr*(args.epoch-epoch)/(args.epoch-args.epoch_step)
for step in range(len(x_datalists)):
counter += 1
x_image_resize, x_label_resize, y_image_resize, y_label_resize = TrainImageReader(args.x_image_forpath, args.x_label_forpath, args.y_image_forpath, args.y_label_forpath, x_datalists, y_datalists, step, args.image_size)
batch_x_image = np.expand_dims(np.array(x_image_resize).astype(np.float32), axis = 0)
batch_x_label = np.expand_dims(np.array(x_label_resize).astype(np.float32), axis = 0)
batch_y_image = np.expand_dims(np.array(y_image_resize).astype(np.float32), axis = 0)
batch_y_label = np.expand_dims(np.array(y_label_resize).astype(np.float32), axis = 0)
start_time = time.time()
feed_dict = { lr : lrate, x_img : batch_x_image, x_label : batch_x_label, y_img : batch_y_image, y_label : batch_y_label}
if counter % args.save_pred_every == 0:
final_loss_value, dis_loss_value, _ = sess.run([final_loss, dis_loss, train_op], feed_dict=feed_dict)
save(saver, sess, args.snapshot_dir, counter)
elif counter % args.summary_pred_every == 0:
final_loss_value, dis_loss_value, final_loss_sum_value, discriminator_sum_value, image_summary_value, _ = \
sess.run([final_loss, dis_loss, final_loss_sum, discriminator_sum, image_summary, train_op], feed_dict=feed_dict)
summary_writer.add_summary(final_loss_sum_value, counter)
summary_writer.add_summary(discriminator_sum_value, counter)
summary_writer.add_summary(image_summary_value, counter)
else:
final_loss_value, dis_loss_value, _ = \
sess.run([final_loss, dis_loss, train_op], feed_dict=feed_dict)
print('epoch {:d} step {:d} \t final_loss = {:.3f}, dis_loss = {:.3f}'.format(epoch, step, final_loss_value, dis_loss_value))
coord.request_stop()
coord.join(threads)
if __name__ == '__main__':
main()
Note:
I stumbled upon a somewhat similar problem and a proposed solution here(IOError: [Errno 2] No such file or directory (when it really exist) Python). But I'm not quite sure how or where to implement it in the author's code.
Your call is correct as i can see from the log the file running is ./train.py.
Now, in the repository i can't see any x_traindata.txt file. The only thing available inside the folder are 3 python files, unless you have created that file yourself, the error seems to be justified as there are no txt files inside it.

tensorflow: two models initialized in two separate graphs but they seen only in last one graph

import tensorflow as tf
import numpy as np
class SimpleModel():
pass
def declare_placeholders(self):
self.input_batch = tf.placeholder(dtype=tf.int32, shape=[None, None], name='input_batch')
SimpleModel.__declare_placeholders = classmethod(declare_placeholders)
def init_model(self):
self.__declare_placeholders()
SimpleModel.__init__ = classmethod(init_model)
g_1 = tf.Graph()
with g_1.as_default():
model1 = SimpleModel()
g_2 = tf.Graph()
with g_2.as_default():
model2 = SimpleModel()
I'dont expect AssertionError in next situation:
assert model1.input_batch.graph is g_1
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-5-1ffc6a2e45a2> in <module>
----> 1 assert model1.input_batch.graph is g_1
AssertionError:
But I see next:
assert model1.input_batch.graph is g_2 (????)
But I've initialized model1 inside g_1!
How can I modify SimpleModel to solve this problem?
I did something like this to load 2 different models in 2 separate graphs:
Here PATH_TO_MODEL_CKPT is the path of the saved model file or you can construct the model inside the graph.
import tensorflow as tf
from tensorflow.python.platform import gfile
import os
class Graph1(object):
def __init__(self, PATH_TO_MODEL_CKPT):
graph1 = tf.Graph()
with self.graph1.as_default():
model_exp = os.path.expanduser(PATH_TO_CKPT)
print('Model filename: %s' % model_exp)
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess1 = tf.Session(graph=graph1)
class Graph2(object):
def __init__(self, PATH_TO_MODEL_CKPT):
graph2 = tf.Graph()
with self.graph2.as_default():
model_exp = os.path.expanduser(PATH_TO_CKPT)
print('Model filename: %s' % model_exp)
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess2 = tf.Session(graph=graph2)
I've have solve problem: use class definition without pass and classmethod
import tensorflow as tf
import numpy as np
class SimpleModel():
def declare_placeholders(self):
self.input_batch = tf.placeholder(dtype=tf.int32, shape=[None, None], name='input_batch')
def __init__(self):
self.declare_placeholders()
g_1 = tf.Graph()
with g_1.as_default():
model1 = SimpleModel()
g_2 = tf.Graph()
with g_2.as_default():
model2 = SimpleModel()

Problem converting Keras model to Tensorflow-Lite using TocoConverter.from_keras_model_file

I'm facing an issue using TOCO to convert a Keras model to TfLite.
Followed the guide of: https://www.tensorflow.org/api_docs/python/tf/contrib/lite/TocoConverter
How I use TOCO:
def create_lite_model(keras_model_file):
lite_model_name = 'lite_model_file.tflite'
tf_lite_graph = os.path.join(WEIGHTS_DIRECTORY, lite_model_name)
converter = tf.contrib.lite.TocoConverter.from_keras_model_file(keras_model_file)
tf_lite_model = converter.convert()
open(tf_lite_graph, "wb").write(tf_lite_model)
Getting the following error:
File "/tensorflow/contrib/lite/python/lite.py", line 356, in from_keras_model_file
keras_model = _keras.models.load_model(model_file)
File "/tensorflow/python/keras/engine/saving.py", line 251, in load_model
training_config['weighted_metrics'])
KeyError: 'weighted_metrics'
Does anybody has a solution for this problem?
Until now I didn't found a solution, but I'm using a workaround.
Converting the Keras model to a tf Graph using a SaveBuilder to store the tf Graph and finally using TocoConverter.from_saved_model(...).
import os
import tensorflow as tf
from keras import backend as K
from keras.models import load_model
K.set_learning_phase(False)
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
graph = session.graph
with graph.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
output_names = output_names or []
output_names += [v.op.name for v in tf.global_variables()]
input_graph_def = graph.as_graph_def()
if clear_devices:
for node in input_graph_def.node:
node.device = ""
frozen_graph = tf.graph_util.convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names)
return frozen_graph
def create_lite_model_from_saved_model(saved_model_dir, tf_lite_path):
converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir)
tf_lite_model = converter.convert()
open(tf_lite_path, "wb").write(tf_lite_model)
def save_model(keras_model, session, pb_model_path):
x = keras_model.input
y = keras_model.output
prediction_signature = tf.saved_model.signature_def_utils.predict_signature_def({"inputs": x}, {"prediction": y})
builder = tf.saved_model.builder.SavedModelBuilder(pb_model_path)
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
signature = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature, }
builder.add_meta_graph_and_variables(session, [tf.saved_model.tag_constants.SERVING], signature_def_map=signature, legacy_init_op=legacy_init_op)
builder.save()
def run():
sess = K.get_session()
keras_model_name = 'keras_model.h5'
lite_model_name = 'lite_model_file.tflite'
keras_model_file_path = os.path.join('./weights', keras_model_name)
lite_model_file_path = os.path.join('./weights', lite_model_name)
pb_model_path = os.path.join('./weights', 'saveBuilder')
model = load_model(keras_model_file_path)
output_names = [node.op.name for node in model.outputs]
_ = freeze_session(sess, output_names=output_names)
save_model(keras_model=model, session=sess, pb_model_path=pb_model_path)
create_lite_model_from_saved_model(saved_model_dir=pb_model_path, tf_lite_path=lite_model_file_path)
if __name__ == "__main__":
run()
Maybe it's helpful for someone.
The feeze_session(...) function I used is from: How to export Keras .h5 to tensorflow .pb?
If this is a model you've written, make sure you define weighted_metrics in the model compilation:
model.compile(loss='binary_crossentropy', optimizer=<some_optimizer>, metrics=['accuracy'], weighted_metrics=['accuracy'])

sess.run() Memory Error

I'm modifying code from this github (https://github.com/hehefan/Video-Classification) so it can accept my own input. When I try to run it one video at a time for 5 epoch, it run with no problem. But when I try to get it run from multiple video in succession it run to Memory Error in sess.run() line.
Code:
import sys
import os
import numpy as np
import tensorflow as tf
import gzip
#import cPickle
import _pickle as cPickle
import random
from config import FLAGS
from models import DynamicRNN
from models import AveragePooling
filename = ['D2N2Sur', 'H2N2A', 'H2N2C', 'H2N2D', 'H2N2S', 'N2A', 'N2C', 'N2D', 'N2H', 'N2S', 'N2Sur', 'S2N2H']
#TRAINING LABEL
batch_label = list(range(12))
#DATA PROCESSING
data = []
batch_length = []
for name in filename:
#READ DATA
counter = 0
frame = 0
video=[]
l = ""
f = open("Train1/"+name+".txt", "r")
for line in f:
l = l + line[:-1]
counter += 1
if (counter == 365):
l = list(l)
video.append(l)
l = ""
counter = 0
frame += 1
#MAKE SURE ALL VIDEO HAVE SAME LENGTH
#PAD BY 0
frame = FLAGS.max_video_length - frame
for number in range(frame):
video.append([0]*FLAGS.feature_size)
#APPEND VIDEO TO DATA
data.append(video)
batch_length.append(FLAGS.max_video_length)
training_steps_per_epoch = len(data) // FLAGS.batch_size
if not os.path.exists(FLAGS.checkpoint_dir):
os.makedirs(FLAGS.checkpoint_dir)
model = AveragePooling(feature_size=FLAGS.feature_size, max_video_length=FLAGS.max_video_length,
num_classes=FLAGS.num_classes, cell_size=FLAGS.size, use_lstm=FLAGS.use_lstm,
learning_rate=FLAGS.learning_rate, learning_rate_decay_factor=FLAGS.learning_rate_decay_factor,
min_learning_rate=FLAGS.min_learning_rate, training_steps_per_epoch=training_steps_per_epoch,
max_gradient_norm=FLAGS.max_gradient_norm, keep_prob=FLAGS.keep_prob, is_training=True)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
model.saver.restore(sess, ckpt.model_checkpoint_path)
step = int(ckpt.model_checkpoint_path.split('-')[1])
else:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(1, FLAGS.num_epochs+1):
random.shuffle(data)
batch_feature = []
batch_feature.append(data)
feed_dict = {model.frame_feature_ph: batch_feature, model.video_length_ph:batch_length, model.video_label_ph:batch_label}
loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
step += 1
if step % FLAGS.steps_per_checkpoint == 0:
checkpoint_path = os.path.join(FLAGS.checkpoint_dir, "ckpt")
model.saver.save(sess, checkpoint_path, global_step=model.global_step)
print ("%5d: %3d, %.3f"%(step, epoch, loss))
sys.stdout.flush()
Error:
Traceback (most recent call last):
File "/root/Documents/EmotionRecognition/masstrain.py", line 114, in <module>
loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1093, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
File "/usr/local/lib/python3.5/dist-packages/numpy/core/numeric.py", line 482, in asarray
return array(a, dtype, copy=False, order=order)
MemoryError
Process finished with exit code 1
Anyone have a clue about this ?

Categories