Latency issue with Tensorflow cuDNN model execution

Latency issue with Tensorflow cuDNN model execution - python

I am having problems with a cuDNN RNN model I am trying to train on a set of natural language explanations and embeddings for the semantic parsing of texts. Here is what my RNN model architecture looks like on a simplified level:
class Cudnn_RNN:
def __init__(self, num_layers, num_units, mode="lstm", keep_prob=1.0, is_train=None, scope="cudnn_rnn"):
self.num_layers = num_layers
self.rnns = []
self.mode = mode
if mode == "gru":
rnn = tf.contrib.cudnn_rnn.CudnnGRU
elif mode == "lstm":
rnn = tf.contrib.cudnn_rnn.CudnnLSTM
else:
raise Exception("Unknown mode for rnn")
for layer in range(num_layers):
rnn_fw = rnn(1, num_units)
rnn_bw = rnn(1, num_units)
self.rnns.append((rnn_fw, rnn_bw, ))
def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
outputs = [tf.transpose(inputs, [1, 0, 2])]
for layer in range(self.num_layers):
rnn_fw, rnn_bw = self.rnns[layer]
output = dropout(outputs[-1], keep_prob=keep_prob, is_train=is_train)
with tf.variable_scope("fw_{}".format(layer)):
out_fw, state_fw = rnn_fw(output)
with tf.variable_scope("bw_{}".format(layer)):
inputs_bw = tf.reverse_sequence(output, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
out_bw, state_bw = rnn_bw(inputs_bw)
out_bw = tf.reverse_sequence(out_bw, seq_lengths=seq_len, seq_axis=0, batch_axis=1)
outputs.append(tf.concat([out_fw, out_bw], axis=2))
if concat_layers is True:
res = tf.concat(outputs[1:], axis=2)
else:
res = outputs[-1]
res = tf.transpose(res, [1, 0, 2])
state_fw = tf.squeeze(state_fw[0], [0])
state_bw = tf.squeeze(state_bw[0], [0])
state = tf.concat([state_fw, state_bw], axis=1)
return res, state
The model is set up such that after data is loaded, it goes through pretraining, training, and then evaluation. For some reason the data is being loaded with no issues, but as soon as the model starts running it gets stuck, not even making it to the pretraining phase. Here is the data loading and execution code (model executes up until just before print('---Pretrain-----'):
def pseudo_labeling(config, data):
word2idx_dict, fixed_emb, traiable_emb, train_data, dev_data, test_data,pretrain_data,pretrain_data2 = data
pretrain_test_data = (pretrain_data[0][:config.pretrain_test_size],pretrain_data[1][:config.pretrain_test_size],pretrain_data[2][:config.pretrain_test_size,:])
pretrain_data = (pretrain_data[0][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[1][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size],pretrain_data[2][config.pretrain_test_size:config.pretrain_test_size+config.pretrain_train_size,:])
lfs = get_lfs(config, word2idx_dict)
identifier = "_{}".format(config.tag)
with tf.variable_scope("models", reuse=tf.AUTO_REUSE):
regex = Pat_Match(config)
match = Soft_Match(config,lfs['lfs'],np.array(lfs['rels'],np.float32),lfs['keywords'],lfs['keywords_rels'], lfs['raw_keywords'],mat=((fixed_emb, traiable_emb, )), word2idx_dict=word2idx_dict, pseudo=True)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
if os.path.exists('labeled_data.pkl'):
with open('labeled_data.pkl', 'rb') as f:
labeled_data = pickle.load(f)
with open('unlabeled_data.pkl', 'rb') as f:
unlabeled_data = pickle.load(f)
with open('weights.pkl', 'rb') as f:
lfs["weights"] = pickle.load(f)
else:
with open('exp2pat.json','r') as f:
exp2pat = json.load(f)
exp2pat = {int(key):val for key,val in exp2pat.items()}
lab_d = []
unlab_d = []
tacred_labeled = []
tacred_unlabeled = []
labeled_data = []
unlabeled_data = []
idxx = -1
idx2rel = {val:key for key,val in constant.LABEL_TO_ID.items()}
for x in tqdm(train_data):
idxx+=1
batch = [x["phrase"]]
res, pred = regex.match(batch)
lfs["weights"] += res[0]
new_dict = {}
if np.amax(res) > 0:
x["rel"] = pred.tolist()[0]
x["logic_form"] = np.argmax(res, axis=1).tolist()[0]
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
try:
new_dict['pat'] = exp2pat[np.argmax(res, axis=1).tolist()[0]]
lab_d.append(new_dict)
except:
new_dict['pat'] = -1
unlab_d.append(new_dict)
tacred_labeled.append((idxx,idx2rel[x['rel']]))
labeled_data.append(x)
else:
tacred_unlabeled.append(idxx)
new_dict['tokens'] = x['phrase'].token
new_dict['start'] = min(x['phrase'].subj_posi,x['phrase'].obj_posi)+1
new_dict['end'] = max(x['phrase'].subj_posi,x['phrase'].obj_posi)-1
new_dict['rel'] = pred.tolist()[0]
new_dict['pat']=-1
x["rel"] = 0
unlab_d.append(new_dict)
unlabeled_data.append(x)
new_weight = np.array([elem for i, elem in enumerate(list(lfs['weights'])) if i in exp2pat],np.float32)
new_weight = new_weight/np.sum(new_weight)
lfs["weights"] = lfs["weights"] / np.sum(lfs["weights"])
with open('tacred_labeled.json','w') as f:
json.dump(tacred_labeled,f)
with open('tacred_unlabeled.json','w') as f:
json.dump(tacred_unlabeled,f)
with open('labeled_data.pkl','wb') as f:
pickle.dump(labeled_data,f)
with open('unlabeled_data.pkl','wb') as f:
pickle.dump(unlabeled_data,f)
with open('weights.pkl', 'wb') as f:
pickle.dump(lfs["weights"], f)
with open('lab_d.pkl','wb') as f:
pickle.dump(lab_d,f)
with open('unlab_d.pkl','wb') as f:
pickle.dump(unlab_d,f)
with open('weights_d.pkl','wb') as f:
pickle.dump(new_weight,f)
random.shuffle(unlabeled_data)
print('unlabdel data:',str(len(unlabeled_data)),'labeled data:',str(len(labeled_data)))
dev_history, test_history = [], []
dev_history2, test_history2 = [], []
with tf.Session(config=sess_config) as sess:
lr = float(config.init_lr)
writer = tf.summary.FileWriter(config.log_dir + identifier)
sess.run(tf.global_variables_initializer())
print('---Pretrain-----')
for epoch in range(config.pretrain_epoch):
loss_list,pretrain_loss_lis,sim_loss_lis = [],[],[]
for batch in get_pretrain_batch(config, pretrain_data, word2idx_dict):
pretrain_loss_prt,sim_loss_prt,loss,_ = sess.run([match.pretrain_loss,match.sim_loss,match.pretrain_loss_v2,match.pre_train_op],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.pretrain_labels: batch['labels'],match.is_train:True})
loss_list.append(loss)
pretrain_loss_lis.append(pretrain_loss_prt)
sim_loss_lis.append(sim_loss_prt)
print("{} epoch:".format(str(epoch)))
print("loss:{} pretrain_loss:{} sim_loss:{}".format(str(np.mean(loss_list)),str(np.mean(pretrain_loss_lis)),str(np.mean(sim_loss_lis))))
pred_labels = []
goldens = []
prt_id = 0
for batch in get_pretrain_batch(config,pretrain_data2,word2idx_dict,shuffle=False):
prt_id+=1
pp,ppp,pred_label = sess.run([match.prt_loss,match.prt_pred,match.pretrain_pred_labels],feed_dict={match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'],match.is_train:False,match.pretrain_labels: batch['labels']})
pred_label = list(pred_label)
golden = list(np.reshape(batch['labels'],[-1]))
assert len(golden)==len(pred_label)
pred_labels.extend(pred_label)
goldens.extend(golden)
p,r,f = f_score(pred_labels,goldens)
print('PRF:',(p,r,f))
if p>0.9 and r>0.9:
break
print('\n')
Here are my system specifications:
Tensorflow version: 1.14.0 (w/ GPU support)
Operating System: Linux 9.12
OS Distribution: Debian
OS Architecture: x86_64
Python version: 3.7.6
NLTK version: 3.4.5
CUDA version: 10.0
cuDNN version: 7.4.2
NVIDIA graphics card: Tesla T4
NVIDIA driver version: 410.104
Compiler version: GCC 6.3.0
If anyone would like to share their thoughts on why my model is unable to properly execute to the pretraining phase and beyond, I would greatly appreciate it. Thank you.

Related

Retrieving session crashed for unknown reason

I have the following code trying to profile the layer execution time of a sequential model which consists of three layers conv, pooling and dense. But I noticed running the below code causes the system to crash. I believe this could be due to the RAM issue. I tried to increase the ram and ran the code on GPU still retrieving the same issue.
def profiler(model, test_input):
data_input = test_input
running_times = []
for layer in model.layers:
im_imput =
tf.keras.layers.Input(batch_shape=model.get_layer(layer.name).get_input_shape_at(0))
im_out = layer( im_imput )
new_model = tf.keras.models.Model(inputs=im_imput, outputs=im_out )
start = time.time()
data_input = new_model.predict(data_input)
end = time.time() - start
milliseconds = end * 1000
running_times.append(milliseconds)
del new_model
return running_times
def build_model(input_dim, input_channels, conv_kernel, conv_strides, conv_filters, pool_size, pool_strides, dense_size):
x = tf.keras.layers.Input((input_dim,input_dim,input_channels))
conv = tf.keras.layers.Conv2D(kernel_size=(conv_kernel,conv_kernel), strides=(conv_strides,conv_strides), filters = conv_filters, padding="same")(x)
pool = tf.keras.layers.MaxPooling2D(pool_size=(pool_size,pool_size), strides=(pool_strides,pool_strides), padding="same")(conv)
flatten = tf.keras.layers.Flatten()(pool)
dense = tf.keras.layers.Dense(dense_size)(flatten)
model = tf.keras.models.Model(inputs=x, outputs=dense)
return model, flatten.shape.as_list()[-1]
input_dim = 224
input_channels = 3
conv_kernel = "3"
conv_strides = "1,2"
conv_filters = "64,128,256,512"
pool_size = "2"
pool_strides = "2"
dense_size = "1000"
def process_config(config):
tokens = config.split(",")
values = []
for token in tokens:
token = token.strip()
if token.find("-") == -1:
token = int(token)
values.append(token)
else:
start,end = token.split("-")
start = int(start.strip())
end = int(end.strip())
values = values + list(range(start,end+1))
return values
conv_kernel_range = process_config(conv_kernel)
conv_strides_range = process_config(conv_strides)
conv_filters_range = process_config(conv_filters)
pool_size_range = process_config(pool_size)
pool_strides_range = process_config(pool_strides)
dense_size_range = process_config(dense_size)
def evaluate_models(conv_kernel_range, conv_strides_range,conv_filters_range,pool_size_range,pool_strides_range,dense_size_range):
for conv_kernel in conv_kernel_range:
for conv_strides in conv_strides_range:
for conv_filters in conv_filters_range:
for pool_size in pool_size_range:
for pool_strides in pool_strides_range:
for dense_size in dense_size_range:
to_write = open("data.csv", "a+")
try:
model, dense_input_shape = build_model(conv_kernel, conv_strides, conv_filters, pool_size, pool_strides, dense_size)
except:
continue
random_input = np.random.randn(1,224,224,3)
running_times = profiler(model, random_input)
conv_running_time = running_times[1]
pool_running_time = running_times[2]
dense_running_time = running_times[-1]
to_write.write("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n" % (conv_kernel,conv_strides,conv_filters,pool_size,pool_strides,dense_input_shape, dense_size,conv_running_time,pool_running_time,dense_running_time))
to_write.close()
del model

Running threads in parallel takes more time then sequential execution in python

I have two ONNX deep learned models.
I want to run both the models parallelly.
I am using threads from python. But surprisingly it is taking more time then running both the models sequentially.
Task to be done.
make a class of model
load both the models in the init of that class.
run both the models parallelly for inferencing on the given input.
Is this normal behavior.
please suggest the workaround to this?
class ModelImp:
def __init__(self):
print('loading model...')
# Load your model here
curr_dir = os.getcwd()
model_path = os.path.join(curr_dir, "model", "hatev5.onnx")
self.hate_sess = onnxruntime.InferenceSession(model_path)
self.hate_input_name = self.hate_sess.get_inputs()[0].name
self.hate_seq_len=15
self.corona_seq_len=16
print('********************************Hate model loaded.**********************************************************')
model_path = os.path.join(curr_dir, "model", "corona.onnx")
self.corona_sess = onnxruntime.InferenceSession(model_path)
self.corona_input_name = self.corona_sess.get_inputs()[0].name
# self.model = keras.models.load_model(model_path, custom_objects={"gelu": gelu})
# print(self.model.summary())
print('********************************Corona model loaded.**********************************************************')
print("_________________________************MODEL.py : loading tokenizer ************___________________________")
curr_dir = os.getcwd()
vocab_path = os.path.join(curr_dir, "model", "vocab.txt")
self.wordpiece_tokenizer = tokenization.FullTokenizer(vocab_path, do_lower_case=True)
tokenizer_path = os.path.join(curr_dir, "model", "hate_tokenizer.json")
with open(tokenizer_path) as f:
data = json.load(f)
self.hate_tokenizer = tokenizer_from_json(data)
print("_________________________************ HATE MODEL.py : tokenizer loaded************___________________________")
tokenizer_path = os.path.join(curr_dir, "model", "corona_tokenizer.json")
with open(tokenizer_path) as f:
data = json.load(f)
self.corona_tokenizer = tokenizer_from_json(data)
print("_________________________************ CORONA MODEL.py : tokenizer loaded************___________________________")
curr_dir = os.getcwd()
# string version of Eval
# data is a string
def thread_eval(self,data,q):
# print("--------------------------------------corona started----------------------------------------------------------")
corona_lines = []
corona_line = ' '.join(trim(self.wordpiece_tokenizer.tokenize(data.strip()), self.corona_seq_len))
corona_lines.append(corona_line)
# print(texts)
corona_line_1 = self.corona_tokenizer.texts_to_sequences(corona_lines)
corona_line_2 = sequence.pad_sequences(corona_line_1, padding='post', maxlen=self.corona_seq_len)
corona_pred = self.corona_sess.run(None, {self.corona_input_name: corona_line_2})
corona_prob = corona_pred[0][0][1]
q.put(corona_prob)
# print("---------------------------------------corona ended------------------------------------------------------------")
def Eval(self, data):
try:
# pre_start = time.time()
# mp = ModelImp()
# with tf.Graph().as_default() as graph: #tf.device(config['gpu_device'] )
# print(data)
d = json.loads(data)
out_json = {}
if (not (("query" in d) or ("Query" in d))):
# print("Query: ",data)
score = -2 * 10000 # new_change
output = {"Output": [[score]]} # {"score" :score,"Succ" : False }
output_str = json.dumps(output)
return output_str
if ("query" in d):
query = d["query"][0] # new_change
# print("Query 1: ",query)
elif ("Query" in d):
query = d["Query"][0] # new_change
# print("Query 2: ",query)
if (len(query.strip()) == 0):
query = "good"
# print("Query 3: ",query)
## HATE MODEL input preprocess
que = queue.Queue()
x = threading.Thread(target=self.thread_eval, args=(query,que),daemon=True)
x.start()
hate_lines = []
hate_line = ' '.join(trim(self.wordpiece_tokenizer.tokenize(query.strip()), self.hate_seq_len))
hate_lines.append(hate_line)
# print(texts)
hate_line_1 = self.hate_tokenizer.texts_to_sequences(hate_lines)
hate_line_2 = sequence.pad_sequences(hate_line_1, padding='post', maxlen=self.hate_seq_len)
## CORONA MODEL input preprocess
# print(line_2)
# print("----------------------------------------hate started----------------------------------------")
hate_pred = self.hate_sess.run(None, {self.hate_input_name: hate_line_2})
# print("----------------------------------------hate ended----------------------------------------")
# print("pred: ",pred[0])
# prob = math.exp(pred[0][0][1])/(math.exp(pred[0][0][0]) + math.exp(pred[0][0][1]))
hate_prob = hate_pred[0][0][1]
# print("hate_prob: ",hate_prob)
# hate_score = int(hate_prob * 10000) # new_change
x.join()
corona_prob=que.get()
# print("pred: ",pred[0])
# prob = math.exp(pred[0][0][1])/(math.exp(pred[0][0][0]) + math.exp(pred[0][0][1]))
# print("corona_prob: ",corona_prob)
output_prob = max(corona_prob,hate_prob)
# corona_score = int(corona_prob * 10000) # new_change
output_score = int(output_prob * 10000)
output = {"Output": [[output_score]]} # {"score" :score,"Succ" : True }
output_str = json.dumps(output)
return output_str
except Exception as e:
print("Exception: ",data)
score = -3 * 10000 # new_change
output = {"Output": [[score]]} # {"score" :score,"Succ" : False }
output_str = json.dumps(output)
print(e)
return output_str

neural network of 3 layers from scratch (debugging)

I followed https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/?unapproved=573495&moderation-hash=e897cca43ec874103aed4efd605b10aa#comment-573495 and made a neural network but evry time i run it, an error with the lists occurs. pls help debug it. my code is on google colab link - https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn?usp=sharing
"""ml_nolibs.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn
"""
from random import seed,random
import numpy as np
def make_nn(n_inps,n_midd,n_outs):
network_weights = []
weights_input_hidden = [{'weights':[random() for i in range(n_inps+1)]} for i in range(n_midd) ]
weights_hidden_output = [{'weights':[random() for i in range(n_midd+1)]} for i in range(n_outs) ]
network_weights.append(weights_input_hidden)
network_weights.append(weights_hidden_output)
return network_weights
print(make_nn(1,2,2))
def sigmoid(n):
return 1/(1+np.exp(-n))
def activate(weights,inputs):
if (len(weights)-1) != len(inputs):
return "length error ____ activat"
ret = weights[-1]
for i in range(len(inputs)):
ret+= inputs[i] * weights[i]
return sigmoid(ret)
def forward_propagate(network,inputs):
print(inputs)
outs = []
for layer_of_connections in network:
outs = []
for neuron in layer_of_connections:
neuron['output']=activate(neuron['weights'],inputs)
outs.append(neuron['output'])
inputs = outs
return outs
print(forward_propagate([[{'weights': [0.7160471173051909, 0.5215147566159989]}, {'weights': [0.604197405116923, 0.4628263091169783]}], [{'weights': [0.4638546941280478, 0.5191037591740162, 0.8253877642443779]}, {'weights': [0.4635745950402146, 0.6078498529022434, 0.0074536694308950935]}]]
,[1]))
"""testing"""
network = make_nn(2,4,3)
print(forward_propagate(network,[6.9,9.6]))
"""assigning blame"""
def transfer_d(output):
return output*(1-output)
def backpropagate(network,expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = []
if i == (len(network) -1):
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected - neuron['output'])
else:
for j in range(len(layer)):
err = 0
for neuron in network[i+1]:
err+=neuron['weights'][j]*neuron['delta']
errors.append(err)
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j]*transfer_d(neuron['output'])
"""# TRAINING TIME!"""
def update_weights(network,inps,l_rate = .1):
processed_inps = inps[:-1]
for i in range(len(network)):
if i!=0:
processed_inps = [neuron['output'] for neuron in network[i-1]]
for neuron in network[i]:
for j in range(len(processed_inps)):
neuron['weights'][j]+=l_rate*neuron['delta']*inputs[j]
neuron['weights'][-1]+=l_rate*neuron['delta ']
def choose_ele(l):
return l[int(random()*(len(l)-1))]
def train_netw(network,data,n_outputs,l_rate = .1,n_epoch = 10000):#n_outputs is used for onr hot encoding using binary vector
for epoch in range(n_epoch):
sum_error = 0
row = choose_ele(data)
nn_outs = forward_propagate(network,row[:-1])
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error = sum([(expected[i]-nn_outs[i])**2 for i in range(len(expected))])
backpropagate(network,expected)
update_weights(network,row[:-1])#possible mistake
if epoch%100 == 0:
print('epoch = %d err = %.3f'%(epoch,sum_error))
data = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
n_inps = data.__len__()-1
n_outs = len(set([d[-1] for d in data]))
netw = make_nn(n_inps,2,n_outs)
train_netw(netw,data,n_outs,.5,200)
print(network)
the error shown is as follows -
enter image description here

tensorflow run python crash

tensorflow: tensorflow-gpu 0.12
Anaconda: anaconda4.2.9(python3.5)
GPU: Nvidia 940M(notebook)（2GB）
OS: win7-64bit sp1
Cuda: 8.0
cudnn: 5.0
IDE: pycharm
Mnist test OK under GPU(CNNs),But when it comes to my own project,python crashes.I debug my code and find function "**session.run()**" lead to this problem.The error is:
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:586] Could not identify NUMA node of /job:localhost/replica:0/task:0/gpu:0, defaulting to 0. Your kernel may not have been built with NUMA support.
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_event.cc:49] Error polling for event status: failed to query event: CUDA_ERROR_LAUNCH_FAILED
F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_event_mgr.cc:198] Unexpected Event status: 1
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:385] could not create cudnn handle: **CUDNN_STATUS_INTERNAL_ERROR**
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:352] could not destroy cudnn handle: **CUDNN_STATUS_BAD_PARAM**
F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:532] **Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms)**
Since Mnist run correctly, so there is no defects with my GPU driver 、cuda and cudnn .I really don't know how this problem comes.
This is my code:
import cv2
import os
import tensorflow as tf
import data_trans as dt
with tf.variable_scope('weights'):
weights={
# 60*60*3->60*60*32->30*30*32
'conv1':tf.get_variable('conv1',[5,5,3,32],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 30*30*32->30*30*64->15*15*64
'conv2':tf.get_variable('conv2',[5,5,32,64],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 15*15*64->12*12*128->6*6*128
'conv3':tf.get_variable('conv3',[4,4,64,128],initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 6*6*128->256
'fc1':tf.get_variable('fc1',[6*6*128,256],initializer=tf.contrib.layers.xavier_initializer()),
# 256->2
'fc2':tf.get_variable('fc2',[256,2],initializer=tf.contrib.layers.xavier_initializer())
}
with tf.variable_scope('biases'):
biases = {
'conv1':tf.get_variable('conv1',[32,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'conv2':tf.get_variable('conv2',[64,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'conv3':tf.get_variable('conv3',[128,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'fc1':tf.get_variable('fc1',[256,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32)),
'fc2':tf.get_variable('fc2',[2,],initializer=tf.constant_initializer(value=0.0,dtype=tf.float32))
}
def inference(images):
images = (tf.cast(images,tf.float32)/255)
conv1 = tf.nn.bias_add(tf.nn.conv2d(images,weights['conv1'],strides=[1,1,1,1],padding='SAME'),biases['conv1'])
relu1 = tf.nn.relu(conv1)
pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1,weights['conv2'],strides=[1,1,1,1],padding='SAME'),biases['conv2'])
relu1 = tf.nn.relu(conv2)
pool2 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2,weights['conv3'],strides=[1,1,1,1],padding='VALID'),biases['conv3'])
relu3 = tf.nn.relu(conv3)
pool3 = tf.nn.max_pool(relu3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
flatten = tf.reshape(pool3,[-1,weights['fc1'].get_shape().as_list()[0]])
drop = tf.nn.dropout(flatten,0.5)
fc1 = tf.matmul(drop,weights['fc1']) + biases['fc1']
fc_relu1 = tf.nn.relu(fc1)
fc2 = tf.matmul(fc_relu1,weights['fc2']) + biases['fc2']
return fc2
def train():
dt.encode_to_tfrecords('../train_data/train.txt','../train_data','data.tfrecords',(60,60))
image,label = dt.decode_from_tfrecords('../train_data/data.tfrecords')
batch_image,batch_label = dt.get_batch(image,label,batch_size=10,crop_size=60)
inf = inference(batch_image)
predicts = tf.nn.softmax(inf)
cross_entropy = -tf.reduce_mean(batch_label * tf.log(predicts))
train_step = tf.train.GradientDescentOptimizer(1e-2).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(predicts, 1), tf.argmax(batch_label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
#if os.path.exists(os.path.join('model','model.ckpt')) is True:
# tf.train.Saver(max_to_keep=None).restore(sess,os.path.join('model','model.ckpt'))
for epcho in range(8):
print(sess.run(accuracy))
print('here!')
coord.request_stop()
coord.join(threads)
train()
data_trans.py contains three function use to transform image to tfrecords:
import cv2
import tensorflow as tf
def encode_to_tfrecords(label_file,data_root,new_name='data.tfrecords',resize=None):
writer = tf.python_io.TFRecordWriter(data_root + '/' + new_name)
num_example = 0
with open(label_file,'r') as f:
for l in f.readlines():
l = l.split()
path = data_root+'/'+l[0]
image = cv2.imread(path)
if resize is not None:
image = cv2.resize(image,resize)
height,width,nchannel = image.shape
label = int(l[1])
example = tf.train.Example(features=tf.train.Features(feature={
'height':tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
'width':tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
'nchannel':tf.train.Feature(int64_list=tf.train.Int64List(value=[nchannel])),
'image':tf.train.Feature(bytes_list=tf.train.BytesList(value=[image.tobytes()])),
'label':tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
}))
serialized = example.SerializeToString()
writer.write(serialized)
num_example += 1
print(label_file,'Sample_Num:',num_example)
writer.close()
#encode_to_tfrecords('../train_data/train.txt','../train_data')
def decode_from_tfrecords(filename,num_epoch=None):
filename_queue = tf.train.string_input_producer([filename],num_epoch)
reader = tf.TFRecordReader()
_,serialized = reader.read(filename_queue)
example = tf.parse_single_example(serialized,features={
'height':tf.FixedLenFeature([],tf.int64),
'width':tf.FixedLenFeature([],tf.int64),
'nchannel':tf.FixedLenFeature([],tf.int64),
'image':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)
})
label = tf.cast(example['label'],tf.int32)
image = tf.decode_raw(example['image'],tf.uint8)
image = tf.reshape(image,tf.stack([
tf.cast(example['height'],tf.int32),
tf.cast(example['width'],tf.int32),
tf.cast(example['nchannel'],tf.int32)
]))
return image, label
#encode_to_tfrecords("../train_data/train.txt","../train_data",'data.tfrecords')
#image,label=decode_from_tfrecords('../train_data/data.tfrecords')
#print image[0]
def get_batch(image,label,batch_size,crop_size):
distorted_image = tf.random_crop(image,[crop_size, crop_size, 3])
distorted_image = tf.image.random_flip_up_down(distorted_image)
images,label_batch = tf.train.shuffle_batch([distorted_image,label],batch_size=batch_size,capacity=130,min_after_dequeue=100)
return images,tf.one_hot(tf.reshape(label_batch,[batch_size]), 2)

Thanks All. i have solve this problem. it seems like a bug in tensorflow-gpu in windows(7/10) .function "tf.one_hot()" cannot execute correctly under win7(maybe tensorflow-gpu0.12 & win7) , we must explicitly set this function executed by cpu like:
tf.device('/cpu:0'):
tf.one_hot()

Tensorflow evaluate: Aborted (core dumped)

tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Latency issue with Tensorflow cuDNN model execution - python

Related

Retrieving session crashed for unknown reason

Running threads in parallel takes more time then sequential execution in python

neural network of 3 layers from scratch (debugging)

tensorflow run python crash

Tensorflow evaluate: Aborted (core dumped)

Categories

Resources