Running threads in parallel takes more time then sequential execution in python - python

I have two ONNX deep learned models.
I want to run both the models parallelly.
I am using threads from python. But surprisingly it is taking more time then running both the models sequentially.
Task to be done.
make a class of model
load both the models in the init of that class.
run both the models parallelly for inferencing on the given input.
Is this normal behavior.
please suggest the workaround to this?
class ModelImp:
def __init__(self):
print('loading model...')
# Load your model here
curr_dir = os.getcwd()
model_path = os.path.join(curr_dir, "model", "hatev5.onnx")
self.hate_sess = onnxruntime.InferenceSession(model_path)
self.hate_input_name = self.hate_sess.get_inputs()[0].name
self.hate_seq_len=15
self.corona_seq_len=16
print('********************************Hate model loaded.**********************************************************')
model_path = os.path.join(curr_dir, "model", "corona.onnx")
self.corona_sess = onnxruntime.InferenceSession(model_path)
self.corona_input_name = self.corona_sess.get_inputs()[0].name
# self.model = keras.models.load_model(model_path, custom_objects={"gelu": gelu})
# print(self.model.summary())
print('********************************Corona model loaded.**********************************************************')
print("_________________________************MODEL.py : loading tokenizer ************___________________________")
curr_dir = os.getcwd()
vocab_path = os.path.join(curr_dir, "model", "vocab.txt")
self.wordpiece_tokenizer = tokenization.FullTokenizer(vocab_path, do_lower_case=True)
tokenizer_path = os.path.join(curr_dir, "model", "hate_tokenizer.json")
with open(tokenizer_path) as f:
data = json.load(f)
self.hate_tokenizer = tokenizer_from_json(data)
print("_________________________************ HATE MODEL.py : tokenizer loaded************___________________________")
tokenizer_path = os.path.join(curr_dir, "model", "corona_tokenizer.json")
with open(tokenizer_path) as f:
data = json.load(f)
self.corona_tokenizer = tokenizer_from_json(data)
print("_________________________************ CORONA MODEL.py : tokenizer loaded************___________________________")
curr_dir = os.getcwd()
# string version of Eval
# data is a string
def thread_eval(self,data,q):
# print("--------------------------------------corona started----------------------------------------------------------")
corona_lines = []
corona_line = ' '.join(trim(self.wordpiece_tokenizer.tokenize(data.strip()), self.corona_seq_len))
corona_lines.append(corona_line)
# print(texts)
corona_line_1 = self.corona_tokenizer.texts_to_sequences(corona_lines)
corona_line_2 = sequence.pad_sequences(corona_line_1, padding='post', maxlen=self.corona_seq_len)
corona_pred = self.corona_sess.run(None, {self.corona_input_name: corona_line_2})
corona_prob = corona_pred[0][0][1]
q.put(corona_prob)
# print("---------------------------------------corona ended------------------------------------------------------------")
def Eval(self, data):
try:
# pre_start = time.time()
# mp = ModelImp()
# with tf.Graph().as_default() as graph: #tf.device(config['gpu_device'] )
# print(data)
d = json.loads(data)
out_json = {}
if (not (("query" in d) or ("Query" in d))):
# print("Query: ",data)
score = -2 * 10000 # new_change
output = {"Output": [[score]]} # {"score" :score,"Succ" : False }
output_str = json.dumps(output)
return output_str
if ("query" in d):
query = d["query"][0] # new_change
# print("Query 1: ",query)
elif ("Query" in d):
query = d["Query"][0] # new_change
# print("Query 2: ",query)
if (len(query.strip()) == 0):
query = "good"
# print("Query 3: ",query)
## HATE MODEL input preprocess
que = queue.Queue()
x = threading.Thread(target=self.thread_eval, args=(query,que),daemon=True)
x.start()
hate_lines = []
hate_line = ' '.join(trim(self.wordpiece_tokenizer.tokenize(query.strip()), self.hate_seq_len))
hate_lines.append(hate_line)
# print(texts)
hate_line_1 = self.hate_tokenizer.texts_to_sequences(hate_lines)
hate_line_2 = sequence.pad_sequences(hate_line_1, padding='post', maxlen=self.hate_seq_len)
## CORONA MODEL input preprocess
# print(line_2)
# print("----------------------------------------hate started----------------------------------------")
hate_pred = self.hate_sess.run(None, {self.hate_input_name: hate_line_2})
# print("----------------------------------------hate ended----------------------------------------")
# print("pred: ",pred[0])
# prob = math.exp(pred[0][0][1])/(math.exp(pred[0][0][0]) + math.exp(pred[0][0][1]))
hate_prob = hate_pred[0][0][1]
# print("hate_prob: ",hate_prob)
# hate_score = int(hate_prob * 10000) # new_change
x.join()
corona_prob=que.get()
# print("pred: ",pred[0])
# prob = math.exp(pred[0][0][1])/(math.exp(pred[0][0][0]) + math.exp(pred[0][0][1]))
# print("corona_prob: ",corona_prob)
output_prob = max(corona_prob,hate_prob)
# corona_score = int(corona_prob * 10000) # new_change
output_score = int(output_prob * 10000)
output = {"Output": [[output_score]]} # {"score" :score,"Succ" : True }
output_str = json.dumps(output)
return output_str
except Exception as e:
print("Exception: ",data)
score = -3 * 10000 # new_change
output = {"Output": [[score]]} # {"score" :score,"Succ" : False }
output_str = json.dumps(output)
print(e)
return output_str

Related

Coco_eval - Average Precision and Recall

I trained my model with maskrcnn and now I need to test it. How can I extract AP and AR and plot the graph, ok I know how to plot with matplotlib, but I need to plot Precision-recall curve but for that don't know how to access AP and AR values. Where are they saved?
I'm using this coco_eval script, and from here I see in function summarize there are print("IoU metric: {}".format(iou_type)) and this I got in output and under that AP and AR results, but I can't find it here in code. Where is this calculation?
coco_eval.py
import json
import tempfile
import numpy as np
import copy
import time
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util
from collections import defaultdict
import utils
class CocoEvaluator(object):
def __init__(self, coco_gt, iou_types):
assert isinstance(iou_types, (list, tuple))
coco_gt = copy.deepcopy(coco_gt)
self.coco_gt = coco_gt
self.iou_types = iou_types
self.coco_eval = {}
for iou_type in iou_types:
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
self.img_ids = []
self.eval_imgs = {k: [] for k in iou_types}
def update(self, predictions):
img_ids = list(np.unique(list(predictions.keys())))
self.img_ids.extend(img_ids)
for iou_type in self.iou_types:
results = self.prepare(predictions, iou_type)
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
coco_eval = self.coco_eval[iou_type]
coco_eval.cocoDt = coco_dt
coco_eval.params.imgIds = list(img_ids)
img_ids, eval_imgs = evaluate(coco_eval)
self.eval_imgs[iou_type].append(eval_imgs)
def synchronize_between_processes(self):
for iou_type in self.iou_types:
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
def accumulate(self):
for coco_eval in self.coco_eval.values():
coco_eval.accumulate()
def summarize(self):
for iou_type, coco_eval in self.coco_eval.items():
print("IoU metric: {}".format(iou_type))
coco_eval.summarize()
def prepare(self, predictions, iou_type):
if iou_type == "bbox":
return self.prepare_for_coco_detection(predictions)
elif iou_type == "segm":
return self.prepare_for_coco_segmentation(predictions)
elif iou_type == "keypoints":
return self.prepare_for_coco_keypoint(predictions)
else:
raise ValueError("Unknown iou type {}".format(iou_type))
def prepare_for_coco_detection(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"bbox": box,
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
return coco_results
def prepare_for_coco_segmentation(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
scores = prediction["scores"]
labels = prediction["labels"]
masks = prediction["masks"]
masks = masks > 0.5
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
for mask in masks
]
for rle in rles:
rle["counts"] = rle["counts"].decode("utf-8")
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"segmentation": rle,
"score": scores[k],
}
for k, rle in enumerate(rles)
]
)
return coco_results
def prepare_for_coco_keypoint(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
keypoints = prediction["keypoints"]
keypoints = keypoints.flatten(start_dim=1).tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
'keypoints': keypoint,
"score": scores[k],
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results
def convert_to_xywh(boxes):
xmin, ymin, xmax, ymax = boxes.unbind(1)
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
def merge(img_ids, eval_imgs):
all_img_ids = utils.all_gather(img_ids)
all_eval_imgs = utils.all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
img_ids, eval_imgs = merge(img_ids, eval_imgs)
img_ids = list(img_ids)
eval_imgs = list(eval_imgs.flatten())
coco_eval.evalImgs = eval_imgs
coco_eval.params.imgIds = img_ids
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions
def createIndex(self):
# create index
# print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
# print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
maskUtils = mask_util
def loadRes(self, resFile):
"""
Load result file and return a result api object.
Args:
self (obj): coco object with ground truth annotations
resFile (str): file name of result file
Returns:
res (obj): result api object
"""
res = COCO()
res.dataset['images'] = [img for img in self.dataset['images']]
# print('Loading and preparing results...')
# tic = time.time()
if isinstance(resFile, torch._six.string_classes):
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id + 1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2] * bb[3]
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if 'bbox' not in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x2 - x1) * (y2 - y1)
ann['id'] = id + 1
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
createIndex(res)
return res
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
# tic = time.time()
# print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
# print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {
(imgId, catId): computeIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
self._paramsEval = copy.deepcopy(self.params)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return p.imgIds, evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################
And this is my code for evaluation:
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator
This is my results what I got:

Retrieving session crashed for unknown reason

I have the following code trying to profile the layer execution time of a sequential model which consists of three layers conv, pooling and dense. But I noticed running the below code causes the system to crash. I believe this could be due to the RAM issue. I tried to increase the ram and ran the code on GPU still retrieving the same issue.
def profiler(model, test_input):
data_input = test_input
running_times = []
for layer in model.layers:
im_imput =
tf.keras.layers.Input(batch_shape=model.get_layer(layer.name).get_input_shape_at(0))
im_out = layer( im_imput )
new_model = tf.keras.models.Model(inputs=im_imput, outputs=im_out )
start = time.time()
data_input = new_model.predict(data_input)
end = time.time() - start
milliseconds = end * 1000
running_times.append(milliseconds)
del new_model
return running_times
def build_model(input_dim, input_channels, conv_kernel, conv_strides, conv_filters, pool_size, pool_strides, dense_size):
x = tf.keras.layers.Input((input_dim,input_dim,input_channels))
conv = tf.keras.layers.Conv2D(kernel_size=(conv_kernel,conv_kernel), strides=(conv_strides,conv_strides), filters = conv_filters, padding="same")(x)
pool = tf.keras.layers.MaxPooling2D(pool_size=(pool_size,pool_size), strides=(pool_strides,pool_strides), padding="same")(conv)
flatten = tf.keras.layers.Flatten()(pool)
dense = tf.keras.layers.Dense(dense_size)(flatten)
model = tf.keras.models.Model(inputs=x, outputs=dense)
return model, flatten.shape.as_list()[-1]
input_dim = 224
input_channels = 3
conv_kernel = "3"
conv_strides = "1,2"
conv_filters = "64,128,256,512"
pool_size = "2"
pool_strides = "2"
dense_size = "1000"
def process_config(config):
tokens = config.split(",")
values = []
for token in tokens:
token = token.strip()
if token.find("-") == -1:
token = int(token)
values.append(token)
else:
start,end = token.split("-")
start = int(start.strip())
end = int(end.strip())
values = values + list(range(start,end+1))
return values
conv_kernel_range = process_config(conv_kernel)
conv_strides_range = process_config(conv_strides)
conv_filters_range = process_config(conv_filters)
pool_size_range = process_config(pool_size)
pool_strides_range = process_config(pool_strides)
dense_size_range = process_config(dense_size)
def evaluate_models(conv_kernel_range, conv_strides_range,conv_filters_range,pool_size_range,pool_strides_range,dense_size_range):
for conv_kernel in conv_kernel_range:
for conv_strides in conv_strides_range:
for conv_filters in conv_filters_range:
for pool_size in pool_size_range:
for pool_strides in pool_strides_range:
for dense_size in dense_size_range:
to_write = open("data.csv", "a+")
try:
model, dense_input_shape = build_model(conv_kernel, conv_strides, conv_filters, pool_size, pool_strides, dense_size)
except:
continue
random_input = np.random.randn(1,224,224,3)
running_times = profiler(model, random_input)
conv_running_time = running_times[1]
pool_running_time = running_times[2]
dense_running_time = running_times[-1]
to_write.write("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n" % (conv_kernel,conv_strides,conv_filters,pool_size,pool_strides,dense_input_shape, dense_size,conv_running_time,pool_running_time,dense_running_time))
to_write.close()
del model

How to create an efficient sliding window generator for timeseries event log with caseIDs

The solution I have now works, but the downside is that takes over 1 hour to fit a simple model. Most of the training time is lost in the python generator, in the code called WindowGenerator.gen
If we postfix all the data, I would think it would affect the models performance since we will then have zeros for most of the data.
I have an eventlog with data on the format (approximates): (600 000, 500)
The first feature is the group/caseID and the three last are labels. Each case has on average 6 events, but it deviates from 4 all the way to 100. In tensorflow v1 there was a generator that seemed like what I wanted: tf.contrib.training.batch_sequences_with_states
I want to load in batches that get postfixed, my solution was to create an empty 3d array in the desired shape defined by (batchsize, the max length case, num_features)
The idea is to create a sliding window, where we are guessing the target columns by an offset/shift.
Example:
batch_1 = pd.DataFrame([[1,1,0.1,11],
[1,2,0.2,12],
[1,3,0.3,13],
[1,4,0.4,14],
[1,4,0.4,14],
[2,5,0.5,15],
[2,6,0.6,16],
[2,7,0.7,17],
[3,8,0.8,18],
[3,9,0.9,19],
[3,10,0.7,20]], columns=["id", "x1","x2", "target"])
# we want to be on the form before sliding:
[ [[1,1,0.1,11],
[1,2,0.2,12],
[1,3,0.3,13],
[1,4,0.4,14],
[1,4,0.4,14] ],
[[2,5,0.5,15],
[2,6,0.6,16],
[2,7,0.7,17],
[2,7,0.7,17],
[2,0,0,0],
[2,0,0,0] ],
[3,8,0.8,18],
[3,9,0.9,19],
[3,10,0.7,20]
[3,0,0,0],
[3,0,0,0], ]
]
The splitting of data:
unique_caseids = eventvise_training_data.caseid.unique()
np.random.shuffle(unique_caseids)
n = len(unique_caseids)
train_ids = unique_caseids[0:int(n*0.7)]
val_ids = unique_caseids[int(n*0.7):int(n*0.9)]
test_ids = unique_caseids[int(n*0.9):]
train_df = eventvise_training_data[eventvise_training_data.caseid.isin(train_ids)]
val_df = eventvise_training_data[eventvise_training_data.caseid.isin(val_ids)]
test_df = eventvise_training_data[eventvise_training_data.caseid.isin(test_ids)]
The WindowGenerator object
class WindowGenerator(object):
def __init__(self, input_width, label_width, shift,
train_df=train_df, val_df=val_df, test_df=test_df,id_column=0,
label_columns=None, batchsize=32, target_neg_ind=-3):
# Store the raw data.
self.train_df = train_df
self.val_df = val_df
self.test_df = test_df
self.batchsize = batchsize
self.id_column = id_column
self.id_name = id_column
self.unique_ids = unique_ids
self.target_neg_ind = target_neg_ind
self.generated_train_counter = 0
self.generated_val_counter = 0
self.generated_test_counter = 0
if self.unique_ids is None:
if type(id_column) == int:
self.unique_train_ids = train_df[train_df.columns[id_column]].unique()
self.unique_val_ids = val_df[val_df.columns[id_column]].unique()
self.unique_test_ids = test_df[test_df.columns[id_column]].unique()
self.id_name = train_df.columns[id_column]
elif type(id_column) == str:
self.unique_train_ids = train_df[id_column].unique()
self.unique_val_ids = val_df[id_column].unique()
self.unique_test_ids = test_df[id_column].unique()
# need the length of unique ids
self.num_unique_train = len(self.unique_train_ids)
self.num_unique_val = len(self.unique_val_ids)
self.num_unique_test = len(self.unique_test_ids)
# Work out the label column indices.
self.label_columns = label_columns
if label_columns is not None:
self.label_columns_indices = {name: i for i, name in
enumerate(label_columns)}
self.column_indices = {name: i for i, name in
enumerate(train_df.columns)}
# Work out the window parameters.
self.input_width = input_width
self.label_width = label_width
self.shift = shift
self.total_window_size = input_width + shift
self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
self.label_name = list(self.label_columns_indices.keys())
def split_window(self, data, index_start_of_target=-3, type_of_data="train", seq_sliding=0, features=None):
seq_sliding = 0
if features is None:
features = self.generate_split_by_id(data, type_of_data=type_of_data)
if features is None:
return None, None, None, True
max_sliding = features.shape[1] - self.total_window_size
if seq_sliding > max_sliding:
return inputs, labels, features, False
if seq_sliding < 1:
input_slice = self.input_slice
output_slice = self.labels_slice
elif seq_sliding >= 1:
input_slice = slice(0+seq_sliding, self.input_width + seq_sliding)
output_slice = slice(0+seq_sliding, None)
inputs = features[:, input_slice, :index_start_of_target]
labels = features[:, output_slice, :]
if self.label_columns is not None:
labels = tf.stack( #-1 since we have removed the id columns
[labels[:, seq_sliding + self.label_start:seq_sliding+self.total_window_size, self.column_indices[name] - 1] for name in self.label_columns],
axis=-1)
# Slicing doesn't preserve static shape information, so set the shapes
# manually.
inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, len(self.label_columns)])
return inputs, labels, features, False
def generate_split_by_id(self, data, type_of_data):
# to get the appropriate data for continuing on generating new batches
counter, num_unique, unique_ids = self.get_data_info(type_of_data=type_of_data)
start = counter
end = counter+self.batchsize
id_num = []
if end > num_unique: # to stop the batch collection before we run out caseIDs
end = num_unique
print("§§Finished training on all the data -- reseting counter§§")
flag = 1
counter = 0
self.set_data_info(type_of_data=type_of_data, counter=counter)
return
for num in range(start, end):
id_num.append(unique_ids[num])
counter += 1
self.set_data_info(type_of_data=type_of_data, counter=counter)
stacking_blocks = []
max_timesteps = 0
for ids in id_num[:]:
temp = data[data[self.id_name] == ids].drop(columns=[self.id_name]).to_numpy("float64")
if temp.shape[0] > max_timesteps:
max_timesteps = temp.shape[0]
stacking_blocks.append(temp)
# will create a postfix 3d-tensor
fill_array = np.zeros((len(id_num),max_timesteps,temp.shape[1]))
for sample_idx, sample in enumerate(stacking_blocks):
for time_step_idx, time_step in enumerate(sample):
fill_array[sample_idx, time_step_idx] = time_step
return tf.stack(fill_array)
def gen(self, data, type_of_data):
while 1:
# reset the sliding
sliding = 0
features = None
while 1:
input_data, output_data, features, stop_flag = self.split_window(data,
index_start_of_target=self.target_neg_ind,
type_of_data=type_of_data, seq_sliding=sliding)
sliding += 1
# break whens we run out of batches
if input_data is None:
break
yield input_data, output_data
if stop_flag:
break
def get_data_info(self, type_of_data=None):
if type_of_data == "train":
counter = self.generated_train_counter
num_unique = self.num_unique_train
unique_ids = self.unique_train_ids
elif type_of_data == "val":
counter = self.generated_val_counter
num_unique = self.num_unique_val
unique_ids = self.unique_val_ids
elif type_of_data == "test":
counter = self.generated_test_counter
num_unique = self.num_unique_test
unique_ids = self.unique_test_ids
return counter, num_unique, unique_ids
def set_data_info(self, type_of_data=None, counter=0):
if type_of_data == "train":
self.generated_train_counter = counter
elif type_of_data == "val":
self.generated_val_counter = counter
elif type_of_data == "test":
self.generated_test_counter = counter

Latency issue with Tensorflow cuDNN model execution

I am having problems with a cuDNN RNN model I am trying to train on a set of natural language explanations and embeddings for the semantic parsing of texts. Here is what my RNN model architecture looks like on a simplified level:
class Cudnn_RNN:
def __init__(self, num_layers, num_units, mode="lstm", keep_prob=1.0, is_train=None, scope="cudnn_rnn"):
self.num_layers = num_layers
self.rnns = []
self.mode = mode
if mode == "gru":
rnn = tf.contrib.cudnn_rnn.CudnnGRU
elif mode == "lstm":
rnn = tf.contrib.cudnn_rnn.CudnnLSTM
else:
raise Exception("Unknown mode for rnn")
for layer in range(num_layers):
rnn_fw = rnn(1, num_units)
rnn_bw = rnn(1, num_units)
self.rnns.append((rnn_fw, rnn_bw, ))
def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
outputs = [tf.transpose(inputs, [1, 0, 2])]
for layer in range(self.num_layers):
rnn_fw, rnn_bw = self.rnns[layer]
output = dropout(outputs[-1], keep_prob=keep_prob, is_train=is_train)
with tf.variable_scope("fw_{}".format(layer)):
out_fw, state_fw = rnn_fw(output)
with tf.variable_scope("bw_{}".format(layer)):
inputs_bw = tf.reverse_sequence(output, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
out_bw, state_bw = rnn_bw(inputs_bw)
out_bw = tf.reverse_sequence(out_bw, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
outputs.append(tf.concat([out_fw, out_bw], axis=2))
if concat_layers is True:
res = tf.concat(outputs[1:], axis=2)
else:
res = outputs[-1]
res = tf.transpose(res, [1, 0, 2])
state_fw = tf.squeeze(state_fw[0], [0])
state_bw = tf.squeeze(state_bw[0], [0])
state = tf.concat([state_fw, state_bw], axis=1)
return res, state
The model is set up such that after data is loaded, it goes through pretraining, training, and then evaluation. For some reason the data is being loaded with no issues, but as soon as the model starts running it gets stuck, not even making it to the pretraining phase. Here is the data loading and execution code (model executes up until just before print('---Pretrain-----'):
def pseudo_labeling(config, data):
word2idx_dict, fixed_emb, traiable_emb, train_data, dev_data, test_data,pretrain_data,pretrain_data2 = data
pretrain_test_data = (pretrain_data[0][:config.pretrain_test_size],pretrain_data[1][:config.pretrain_test_size],pretrain_data[2][:config.pretrain_test_size,:])
pretrain_data = (pretrain_data[0][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[1][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[2][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size,:])
lfs = get_lfs(config, word2idx_dict)
identifier = "_{}".format(config.tag)
with tf.variable_scope("models", reuse=tf.AUTO_REUSE):
regex = Pat_Match(config)
match = Soft_Match(config,lfs['lfs'],np.array(lfs['rels'],np.float32),lfs['keywords'],lfs['keywords_rels'], lfs['raw_keywords'],mat=((fixed_emb, traiable_emb, )), word2idx_dict=word2idx_dict, pseudo=True)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
if os.path.exists('labeled_data.pkl'):
with open('labeled_data.pkl', 'rb') as f:
labeled_data = pickle.load(f)
with open('unlabeled_data.pkl', 'rb') as f:
unlabeled_data = pickle.load(f)
with open('weights.pkl', 'rb') as f:
lfs["weights"] = pickle.load(f)
else:
with open('exp2pat.json','r') as f:
exp2pat = json.load(f)
exp2pat = {int(key):val for key,val in exp2pat.items()}
lab_d = []
unlab_d = []
tacred_labeled = []
tacred_unlabeled = []
labeled_data = []
unlabeled_data = []
idxx = -1
idx2rel = {val:key for key,val in constant.LABEL_TO_ID.items()}
for x in tqdm(train_data):
idxx+=1
batch = [x["phrase"]]
res, pred = regex.match(batch)
lfs["weights"] += res[0]
new_dict = {}
if np.amax(res) > 0:
x["rel"] = pred.tolist()[0]
x["logic_form"] = np.argmax(res, axis=1).tolist()[0]
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
try:
new_dict['pat'] = exp2pat[np.argmax(res, axis=1).tolist()[0]]
lab_d.append(new_dict)
except:
new_dict['pat'] = -1
unlab_d.append(new_dict)
tacred_labeled.append((idxx,idx2rel[x['rel']]))
labeled_data.append(x)
else:
tacred_unlabeled.append(idxx)
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
new_dict['pat']=-1
x["rel"] = 0
unlab_d.append(new_dict)
unlabeled_data.append(x)
new_weight = np.array([elem for i, elem in enumerate(list(lfs['weights'])) if i in exp2pat],np.float32)
new_weight = new_weight/np.sum(new_weight)
lfs["weights"] = lfs["weights"] / np.sum(lfs["weights"])
with open('tacred_labeled.json','w') as f:
json.dump(tacred_labeled,f)
with open('tacred_unlabeled.json','w') as f:
json.dump(tacred_unlabeled,f)
with open('labeled_data.pkl','wb') as f:
pickle.dump(labeled_data,f)
with open('unlabeled_data.pkl','wb') as f:
pickle.dump(unlabeled_data,f)
with open('weights.pkl', 'wb') as f:
pickle.dump(lfs["weights"], f)
with open('lab_d.pkl','wb') as f:
pickle.dump(lab_d,f)
with open('unlab_d.pkl','wb') as f:
pickle.dump(unlab_d,f)
with open('weights_d.pkl','wb') as f:
pickle.dump(new_weight,f)
random.shuffle(unlabeled_data)
print('unlabdel data:',str(len(unlabeled_data)),'labeled data:',str(len(labeled_data)))
dev_history, test_history = [], []
dev_history2, test_history2 = [], []
with tf.Session(config=sess_config) as sess:
lr = float(config.init_lr)
writer = tf.summary.FileWriter(config.log_dir + identifier)
sess.run(tf.global_variables_initializer())
print('---Pretrain-----')
for epoch in range(config.pretrain_epoch):
loss_list,pretrain_loss_lis,sim_loss_lis = [],[],[]
for batch in get_pretrain_batch(config, pretrain_data, word2idx_dict):
pretrain_loss_prt,sim_loss_prt,loss,_ = sess.run([match.pretrain_loss,match.sim_loss,match.pretrain_loss_v2,match.pre_train_op],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.pretrain_labels: batch['labels'],match.is_train:True})
loss_list.append(loss)
pretrain_loss_lis.append(pretrain_loss_prt)
sim_loss_lis.append(sim_loss_prt)
print("{} epoch:".format(str(epoch)))
print("loss:{} pretrain_loss:{} sim_loss:{}".format(str(np.mean(loss_list)),str(np.mean(pretrain_loss_lis)),str(np.mean(sim_loss_lis))))
pred_labels = []
goldens = []
prt_id = 0
for batch in get_pretrain_batch(config,pretrain_data2,word2idx_dict,shuffle=False):
prt_id+=1
pp,ppp,pred_label = sess.run([match.prt_loss,match.prt_pred,match.pretrain_pred_labels],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.is_train:False,match.pretrain_labels: batch['labels']})
pred_label = list(pred_label)
golden = list(np.reshape(batch['labels'],[-1]))
assert len(golden)==len(pred_label)
pred_labels.extend(pred_label)
goldens.extend(golden)
p,r,f = f_score(pred_labels,goldens)
print('PRF:',(p,r,f))
if p>0.9 and r>0.9:
break
print('\n')
Here are my system specifications:
Tensorflow version: 1.14.0 (w/ GPU support)
Operating System: Linux 9.12
OS Distribution: Debian
OS Architecture: x86_64
Python version: 3.7.6
NLTK version: 3.4.5
CUDA version: 10.0
cuDNN version: 7.4.2
NVIDIA graphics card: Tesla T4
NVIDIA driver version: 410.104
Compiler version: GCC 6.3.0
If anyone would like to share their thoughts on why my model is unable to properly execute to the pretraining phase and beyond, I would greatly appreciate it. Thank you.

How to load images in batches in tensorflow?

I have been using pytorch and is new to tensorflow.
In pytorch we create loader class to which return single tuple of dataset and then pass the instance of the class to torch.data.Dataloader and define the batch-size and worker.
How do we do the same process in tensorflow?
class Dataset_Tuple_Loader(data.Dataset):
def __init__(self,img_root, csv_label_address, transforms,img_source ='local'):
add = img_root
if(img_source=='network'):
add,csv_label_address,status = DataDownload(img_root,csv_label_address)
self.imgs_address_list = glob.glob(add + '/*')
labels_csv = pd.read_csv(csv_label_address)
labels_csv.set_index('image_name', inplace=True)
self.labels_csv = labels_csv
self.transforms = transforms
def __len__(self):
return len(self.imgs_address_list)
def get_filename(self,filename):
base = os.path.basename(filename)
return (os.path.splitext(base)[0])
def __getitem__(self,index):
img_add = self.imgs_address_list[index]
img = Image.open(img_add)
img = self.transforms(np.asarray(img))
filename = self.get_filename(img_add)
label = int(self.labels_csv.loc[filename][0])
return img,label
params = {'batch_size': 2,
'shuffle': True,
'num_workers': 1}
csv = 'label_address'
image_root = 'img_add'
just_set = Dataset('local',image_root,csv)
generator = data.DataLoader(just_set,**params)
for tup in generator:
img = tup[0]
lb = tup[1]
print(img.shape,' ',lb)

Categories