This is my my code but the output is really bad.
and by that i mean the values are way off than what they are supposed to be, the testing error is very high. if i de-normalize the values and compare the difference, it's massive.
I have two questions:
1) Can anyone tell me why this is happening and what i can do to make it perform better?
2) When the values goes through so many functions, how do i get the output back to the original format.
I am new to this and jumped into a complex topic immediately, so i know my code isn't the best, if you could tell me how to improve that would be great as well! anyways, so please bear with me!
The data i used was a list of multiples of two.
ps: when i used the tensorflow models like dynamic_rnn() the output i got was accurate and also i just had to denormalize the output to get the number in the original format( correct size that is), how will just denormalizing it get the output, i dont get that!!
Thanks!
# LSTM [ Many to One ]
# START
# imports
import csv
import numpy as np
import tensorflow as tf
import sys
import os
import json
from random import shuffle
from tensorflow.python import debug as tf_debug
# CALCULATE ALL POSSIBLE BATCH SIZES
def calculate_batch_sizes(n_train):
batch_sizes = []
for i in range(2, int(n_train/2)):
if n_train % i == 0 and n_train / i > 1:
batch_sizes.append(i)
return batch_sizes
def de_normalize(value, m1, m2):
return (value*(m1-m2)) + m2
class lstm_network():
name = "lstm_"
# initialization function
def __init__(self, config_params):
self.sequence_length = config_params["sequence_length"]
self.batch_size = config_params["batch_size"]
self.hidden_layers_size = config_params["hidden_layers_size"]
self.data_path = config_params["data_path"]
self.n_epochs = config_params["no_of_epochs"]
self.learning_rate = config_params["learning_rate"]
self.w_igate, self.w_fgate, self.w_ogate, self.w_cgate = tf.get_variable('w_igate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_fgate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_ogate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_cgate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer())
self.u_igate, self.u_fgate, self.u_ogate, self.u_cgate = tf.get_variable('u_igate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_fgate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_ogate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_cgate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer())
self.outputs = [0.0] * self.batch_size
self.testing_loss = float(0)
self.training_loss = float(0)
self.ft, self.ct, self._ct, self.it = [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size)
self.ot, self.ht, self.ct_prev, self.ht_prev = [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size), np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
self.w_output_layer = tf.get_variable('w_output_layer', shape = [self.hidden_layers_size, 1], initializer = tf.contrib.layers.xavier_initializer())
print("\n Object of class lstm_network initialized with the given configuration")
# print values function
def print_model_info(self):
print("\n\n\n\t\t MODEL INFORMATION\n\n")
print("\n Weights of the LSTM layer: ")
print("\n\n input Gate Weights: \n w: ", self.w_igate,"\n u: ", self.u_igate)
print("\n\n Forget Gate Weights: \n w: ", self.w_fgate,"\n u: ", self.u_fgate)
print("\n\n Context Gate Weights: \n w: ", self.w_cgate,"\n u: ", self.u_cgate)
print("\n\n Output Gate Weights: \n w: ", self.w_ogate,"\n u: ", self.u_ogate)
print("\n\n Average loss while training: ", self.training_loss)
print("\n\n Average loss while testing: ", self.testing_loss)
# loading function
def load_data(self):
with open(self.data_path, 'r') as data_file:
data_reader = csv.reader(data_file, delimiter = ',')
self.data = [float(row[1]) for row in data_reader]
self.data_max, self.data_min, self.n_data = float(max(self.data)), float(min(self.data)), len(self.data)
for i in range(len(self.data)):
self.data[i] = float( (self.data[i]-self.data_min)/(self.data_max-self.data_min) )
self.data_x = [ self.data[i:i+self.sequence_length] for i in range(self.n_data - self.sequence_length-1)]
self.data_y = [ self.data[i] for i in range(self.sequence_length+1, self.n_data)]
self.n_data = len(self.data_x)
temp = list(zip(self.data_x,self.data_y))
shuffle(temp)
test_size = 0.25
self.data_x, self.data_y = zip(*temp)
self.trainx, self.trainy, self.testx, self.testy = self.data_x[:-int(test_size*self.n_data)], self.data_y[:-int(test_size*self.n_data)], self.data_x[-int(test_size*self.n_data):], self.data_y[-int(test_size*self.n_data):]
self.n_train, self.n_test = len(self.trainx), len(self.testx)
batch_sizes = []
batch_sizes.extend(calculate_batch_sizes(self.n_train))
while self.batch_size not in batch_sizes:
print("\n batch size provided in the initial configuration cannot be used, please select one from the following batch sizes:\n",batch_sizes)
self.batch_size = int(input("\n enter a batch size: "))
self.n_train_batches = int( self.n_train/self.batch_size )
self.trainx, self.trainy, self.testx, self.testy = np.float32(self.trainx), np.float32(self.trainy), np.float32(self.testx), np.float32(self.testy)
self.trainx_batches, self.trainy_batches = self.trainx.reshape(self.n_train_batches, self.batch_size, self.sequence_length), self.trainy.reshape(self.n_train_batches,self.batch_size, 1)
print("\n data loaded succesfully")
# graph building and training function
def build_graph_train(self):
outputs = [0.0]*self.batch_size#tf.placeholder(tf.float32, shape = [1, self.batch_size])
x = self.trainx_batches
ht_prev = tf.reshape(np.float32([0]*(self.hidden_layers_size)), [1, self.hidden_layers_size]) #[tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size], name = 'ht_prev')
ct_prev = tf.reshape(np.float32([0]*(self.hidden_layers_size)), [1, self.hidden_layers_size]) #tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size], name = 'ct_prev')
self.ht_prev = np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
self.ct_prev = np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
for i1 in range(self.n_train_batches):
for i2 in range(self.batch_size):
#self.ht_prev = [self.ht_prev[i:i+9] for i in range(0, self.hidden_layers_size, 9)]
self.ft = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_fgate) + tf.matmul(ht_prev, self.u_fgate) )
self.it = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_igate) + tf.matmul(ht_prev, self.u_igate) )
self.ot = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_ogate) + tf.matmul(ht_prev, self.u_ogate) )
self._ct = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_cgate) + tf.matmul(ht_prev, self.u_cgate) )
self.ct = tf.tanh(tf.multiply(self.ft, ct_prev) + tf.multiply(self.it, self._ct))
self.ht = tf.multiply(self.ot, self.ct)
ht_prev = self.ht
ct_prev = self.ct
outputs[i2] = tf.nn.relu( tf.matmul(self.ht, self.w_output_layer) )
loss = tf.reduce_mean(tf.square(tf.subtract(outputs, self.trainy_batches[i1])))
self.ht_prev = ht_prev
self.ct_prev = ct_prev
self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)
print("\n Graph built \n\n Now training begins...\n")
#training
i = 0
avg_loss = float(0)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
for ep in range(self.n_epochs + 1):
#ht_prev = np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
#ct_prev = np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
#loss.eval( feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) })
sess.run(self.train_op)#, feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) } )#, ht_prev: np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size), ct_prev: np.float32([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size) })
if ep % 10 == 0:
i += 1
mse = loss.eval()# feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) })
avg_loss = float(avg_loss + mse)
print("\n Epoch: ", ep, "\t Loss: ", mse)
avg_loss = float(avg_loss/i)
self.training_loss = avg_loss
print("\n Training Loss: ", avg_loss)
# Predict function
def predict(self):
print("\n testing begins...")
x_test_row = tf.placeholder(tf.float32, shape = [1, self.sequence_length])
avg_error = float(0)
input_row = []
output_row = 0.0
predictions = []
#ht_prev = tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size]) # ht_prev = tf.varaible(self.ht_prev)
#ct_prev = tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size]) # ct_prev = tf.varaible(self.ct_prev)
# one forward pass
self.ft = tf.sigmoid( tf.matmul(x_test_row, self.w_fgate) + tf.matmul(self.ht_prev, self.u_fgate) )
self.it = tf.sigmoid( tf.matmul(x_test_row, self.w_igate) + tf.matmul(self.ht_prev, self.u_igate ) )
self.ot = tf.sigmoid( tf.matmul(x_test_row, self.w_ogate) + tf.matmul(self.ht_prev, self.u_ogate) )
self._ct = tf.sigmoid( tf.matmul(x_test_row, self.w_cgate) + tf.matmul(self.ht_prev, self.u_cgate) )
self.ct = tf.tanh(tf.multiply(self.ft, self.ct_prev) + tf.multiply(self.it, self._ct))
self.ht = tf.multiply(self.ot,self.ct)
pred_output = tf.nn.relu( tf.matmul(self.ht, self.w_output_layer) )
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
print("\n loaded the variables")
for i1 in range(self.n_test):
del input_row[:]
output_row = float(self.testy[i1])
for i2 in range(self.sequence_length):
input_row.append(self.testx[i1][i2])
#sess.run(pred_output, feed_dict = { x_test_row: np.array(input_row).reshape(1, self.sequence_length), ht_prev:self.ht_prev, ct_prev: self.ct_prev })
predictions.append([pred_output.eval(feed_dict = { x_test_row: np.float32(input_row).reshape(1, self.sequence_length) }), output_row])
avg_error += abs(predictions[i1][0] - output_row)
avg_error = float(avg_error/i1)
self.testing_loss = avg_error
print("\n testing Error: ", avg_error)
return np.array(predictions)
# save model function
def save_model(self):
print("\n\n model's information saved in model_info.txt and weights stored in model.json\n\n")
f = open("model.json", "w+")
model_dict = { 'w_output_layer': self.w_output_layer, 'w_igate': self.w_igate, 'u_igate': self.u_igate, 'w_fgate': self.w_fgate, 'u_fgate': self.u_fgate, 'w_cgate': self.w_cgate, 'u_cgate': self.u_cgate, 'w_ogate': self.w_ogate, 'u_ogate': self.u_ogate }
f.write(str(model_dict))
f.close()
# main function()
def main():
# parameters of the network
config_params = dict()
config_params["sequence_length"] = 3
config_params["batch_size"] = 33
config_params["hidden_layers_size"] = 9
config_params["data_path"] = "data.csv"
config_params["no_of_epochs"] = 2000
config_params["learning_rate"] = 0.01
# object of class lstm_network
test_object = lstm_network(config_params)
test_object.load_data()
test_object.build_graph_train()
predictions = test_object.predict()
print("\n predictions are: \n", predictions)
test_object.save_model()
# run
main()
for this configuration:
Average testing error i got was: 0.15911798179149628
Average training error i got was: 0.10901389649110053
They look low im guessing because of normalizing the values
Related
I am trying to build the autoencoder structure detailed in this IEEE article. The autoencoder uses a separable loss function where it is required that I create a custom loss function for the "cluster loss" term of the separable loss function as a function of the average output of the encoder. I create my own layer called RffConnected that calculates the cluster loss and utilizes the add_loss method. Otherwise, this RffConnected layer should act as just a normal deep layer.
Here are my relevant code snippets:
import matplotlib.pyplot as plot
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import math
from matplotlib.figure import Figure
import tensorflow as tf
import keras
from keras import layers
import random
import time
from os import listdir
#loads data from a text file
def loadData(basePath, samplesPerFile, sampleRate):
real = []
imag = []
fileOrder = []
for file in listdir(basePath):
if((file != "READ_ME") and ((file != "READ_ME.txt"))):
fid = open(basePath + "\\" + file, "r")
fileOrder.append(file)
t = 0
sampleEvery = samplesPerFile / sampleRate
temp1 = []
temp2 = []
times = []
for line in fid.readlines():
times.append(t)
samples = line.split("\t")
temp1.append(float(samples[0]))
temp2.append(float(samples[1]))
t = t + sampleEvery
real.append(temp1)
imag.append(temp2)
fid.close()
real = np.array(real)
imag = np.array(imag)
return real, imag, times, fileOrder
#####################################################################################################
#Breaks up and randomizes data
def breakUpData(real, imag, times, numPartitions, basePath):
if(len(real) % numPartitions != 0):
raise ValueError("Error: The length of the dataset must be divisible by the number of partitions.")
newReal = []
newImag = []
newTimes = []
fileOrder = listdir(basePath)
dataFiles = []
interval = int(len(real[0]) / numPartitions)
for i in range(0, interval):
newTimes.append(times[i])
for i in range(0, len(real)):
tempI = []
tempQ = []
for j in range(0, len(real[0])):
tempI.append(real[i, j])
tempQ.append(imag[i, j])
if((j + 1) % interval == 0):
newReal.append(tempI)
newImag.append(tempQ)
#fileName = fileOrder[i][0: fileOrder[i].find("_") + 3]
dataFiles.append(fileOrder[i])
tempI = []
tempQ = []
#randomizes the broken up dataset and the file list
for i in range(0, len(newReal)):
r = random.randint(0, len(newReal) - 1)
tempReal = newReal[i]
tempImag = newImag[i]
newReal[i] = newReal[r]
newImag[i] = newImag[r]
newReal[r] = tempReal
newImag[r] = tempImag
tempFile = dataFiles[i]
dataFiles[i] = dataFiles[r]
dataFiles[r] = tempFile
#return np.array(newReal), np.array(newImag), newTimes, dataFiles
return newReal, newImag, newTimes, dataFiles
#####################################################################################################
#custom loss layer for the RffAe-S that calculates the clustering loss term
class RffConnected(layers.Layer):
def __init__(self, output_dim, batchSize, beta, alpha):
super(RffConnected, self).__init__()
# self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
#array = np.zeros(output_dim)
self.iters = 0.0
self.beta = beta
self.alpha = alpha
self.batchSize = batchSize
self.output_dim = output_dim
self.sum = tf.zeros(output_dim, tf.float64)
self.moving_average = tf.zeros(output_dim, tf.float64)
self.clusterloss = tf.zeros(output_dim, tf.float64)
self.sum = tf.cast(self.sum, tf.float32)
self.moving_average = tf.cast(self.moving_average, tf.float32)
self.clusterloss = tf.cast(self.clusterloss, tf.float32)
# self.sum = keras.Input(shape=(self.output_dim,))
# self.moving_average = keras.Input(shape=(self.output_dim,))
# self.clusterloss = keras.Input(shape=(self.output_dim,))
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel', \
shape = (int(input_shape[-1]), self.output_dim), \
initializer = 'normal', trainable = True)
#self.kernel = tf.cast(self.kernel, tf.float64)
super(RffConnected, self).build(int(input_shape[-1]))
def call(self, inputs):
#keeps track of training epochs
self.iters = self.iters + 1
#inputs = tf.cast(inputs, tf.float64)
#where this custom layer acts as a normal layer- the loss then uses this
#calc = keras.backend.dot(inputs, self.kernel)
calc = tf.matmul(inputs, self.kernel)
#cumulative sum of deep encoded features
#self.sum = state_ops.assign(self.sum, tf.reshape(tf.math.add(self.sum, calc), tf.shape(self.sum)))
#self.sum = tf.ops.state_ops.assign(self.sum, tf.math.add(self.sum, calc))
#self.sum.assign_add(calc)
self.sum = tf.math.add(self.sum, calc)
#calculate the moving average and loss if we have already trained one batch
if(self.iters >= self.batchSize):
self.moving_average = tf.math.divide(self.sum, self.iters)
self.clusterloss = tf.math.exp(\
tf.math.multiply(-1 * self.beta, tf.math.reduce_sum(tf.math.square(tf.math.subtract(inputs, self.moving_average)))))
#self.add_loss(tf.math.multiply(self.clusterloss, self.alpha))
self.add_loss(self.clusterloss.numpy() * self.alpha)
return calc
#####################################################################################################
def customloss(y_true, y_pred):
loss = tf.square(y_true - y_pred)
print(loss)
return loss
#####################################################################################################
realTraining = np.array(real[0:2200])
realTesting = np.array(real[2200:-1])
imagTraining = np.array(imag[0:2200])
imagTesting = np.array(imag[2200:-1])
numInputs = len(realTraining[0])
i_sig = keras.Input(shape=(numInputs,))
q_sig = keras.Input(shape=(numInputs,))
iRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(i_sig)
rff1 = keras.Model(inputs=i_sig, outputs=iRff)
qRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(q_sig)
rff2 = keras.Model(inputs=q_sig, outputs=qRff)
combined = layers.Concatenate()([iRff, qRff])
combineRff = tf.keras.layers.experimental.RandomFourierFeatures(4 * numInputs, \
kernel_initializer='gaussian', scale=10.0)(combined)
preprocess = keras.Model(inputs=[iRff, qRff], outputs=combineRff)
#print(realTraining[0:5])
preprocessedTraining = preprocess.predict([realTraining, imagTraining])
preprocessedTesting = preprocess.predict([realTesting, imagTesting])
################## Entering Encoder ######################
encoderIn = keras.Input(shape=(4*numInputs,))
#connected1 = layers.Dense(100, activation="sigmoid")(encoderIn)
clusterLossLayer = RffConnected(100, 30, 1.00, 100.00)(encoderIn)
#clusterLossLayer = myRffConnected(256)(connected1)
encoder = keras.Model(inputs=encoderIn, outputs=clusterLossLayer)
################## Entering Decoder ######################
connected2 = layers.Dense(125, activation="sigmoid")(clusterLossLayer)
relu1 = layers.ReLU()(connected2)
dropout = layers.Dropout(0.2)(relu1)
reshape1 = layers.Reshape((25, 5, 1))(dropout)
bn1 = layers.BatchNormalization()(reshape1)
trans1 = layers.Conv2DTranspose(1, (4, 2))(bn1)
ups1 = layers.UpSampling2D(size=(2, 1))(trans1)
relu2 = layers.ReLU()(ups1)
bn2 = layers.BatchNormalization()(relu2)
trans2 = layers.Conv2DTranspose(1, (4, 2))(bn2)
ups2 = layers.UpSampling2D(size=(2, 1))(trans2)
relu3 = layers.ReLU()(ups2)
bn3 = layers.BatchNormalization()(relu3)
trans3 = layers.Conv2DTranspose(1, (5, 2))(bn3)
ups3 = layers.UpSampling2D(size=(2, 1))(trans3)
relu4 = layers.ReLU()(ups3)
bn4 = layers.BatchNormalization()(relu4)
trans4 = layers.Conv2DTranspose(1, (7, 1))(bn4)
reshape2 = layers.Reshape((4*numInputs, 1, 1))(trans4)
autoencoder = keras.Model(inputs=encoderIn, outputs=reshape2)
encoded_input = keras.Input(shape=(None, 100))
decoder_layer = autoencoder.layers[-1]
#autoencoder.summary()
autoencoder.compile(optimizer='adam', loss=[autoencoder.losses[-1], customloss], metrics=['accuracy', 'accuracy'])
autoencoder.fit(preprocessedTraining, preprocessedTraining, epochs=100, batch_size=20, shuffle=True, validation_data=(preprocessedTesting, preprocessedTesting))
It seems like it runs for two training epochs then it gives me an error. I end up getting this error when I run it:
ValueError: Could not interpret loss function identifier: Tensor("rff_connected_137/Const:0", shape=(100,), dtype=float32)
I've already spent a considerable amount of time debugging this thing, although if you spot any more errors I would appreciate a heads-up. Thank you in advance.
According to the documentation of the keras Keras Model Training-Loss, the 'loss' attribute can take the value of float tensor (except for the sparse loss functions returning integer arrays) with a specific shape.
If it is necessary to combine two loss functions, it would be better to perform mathematical calculations within your custom loss function to return an output of float tensor. This reference might be a help Keras CustomLoss definition.
I followed https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/?unapproved=573495&moderation-hash=e897cca43ec874103aed4efd605b10aa#comment-573495 and made a neural network but evry time i run it, an error with the lists occurs. pls help debug it. my code is on google colab link - https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn?usp=sharing
"""ml_nolibs.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn
"""
from random import seed,random
import numpy as np
def make_nn(n_inps,n_midd,n_outs):
network_weights = []
weights_input_hidden = [{'weights':[random() for i in range(n_inps+1)]} for i in range(n_midd) ]
weights_hidden_output = [{'weights':[random() for i in range(n_midd+1)]} for i in range(n_outs) ]
network_weights.append(weights_input_hidden)
network_weights.append(weights_hidden_output)
return network_weights
print(make_nn(1,2,2))
def sigmoid(n):
return 1/(1+np.exp(-n))
def activate(weights,inputs):
if (len(weights)-1) != len(inputs):
return "length error ____ activat"
ret = weights[-1]
for i in range(len(inputs)):
ret+= inputs[i] * weights[i]
return sigmoid(ret)
def forward_propagate(network,inputs):
print(inputs)
outs = []
for layer_of_connections in network:
outs = []
for neuron in layer_of_connections:
neuron['output']=activate(neuron['weights'],inputs)
outs.append(neuron['output'])
inputs = outs
return outs
print(forward_propagate([[{'weights': [0.7160471173051909, 0.5215147566159989]}, {'weights': [0.604197405116923, 0.4628263091169783]}], [{'weights': [0.4638546941280478, 0.5191037591740162, 0.8253877642443779]}, {'weights': [0.4635745950402146, 0.6078498529022434, 0.0074536694308950935]}]]
,[1]))
"""testing"""
network = make_nn(2,4,3)
print(forward_propagate(network,[6.9,9.6]))
"""assigning blame"""
def transfer_d(output):
return output*(1-output)
def backpropagate(network,expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = []
if i == (len(network) -1):
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected - neuron['output'])
else:
for j in range(len(layer)):
err = 0
for neuron in network[i+1]:
err+=neuron['weights'][j]*neuron['delta']
errors.append(err)
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j]*transfer_d(neuron['output'])
"""# TRAINING TIME!"""
def update_weights(network,inps,l_rate = .1):
processed_inps = inps[:-1]
for i in range(len(network)):
if i!=0:
processed_inps = [neuron['output'] for neuron in network[i-1]]
for neuron in network[i]:
for j in range(len(processed_inps)):
neuron['weights'][j]+=l_rate*neuron['delta']*inputs[j]
neuron['weights'][-1]+=l_rate*neuron['delta ']
def choose_ele(l):
return l[int(random()*(len(l)-1))]
def train_netw(network,data,n_outputs,l_rate = .1,n_epoch = 10000):#n_outputs is used for onr hot encoding using binary vector
for epoch in range(n_epoch):
sum_error = 0
row = choose_ele(data)
nn_outs = forward_propagate(network,row[:-1])
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error = sum([(expected[i]-nn_outs[i])**2 for i in range(len(expected))])
backpropagate(network,expected)
update_weights(network,row[:-1])#possible mistake
if epoch%100 == 0:
print('epoch = %d err = %.3f'%(epoch,sum_error))
data = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
n_inps = data.__len__()-1
n_outs = len(set([d[-1] for d in data]))
netw = make_nn(n_inps,2,n_outs)
train_netw(netw,data,n_outs,.5,200)
print(network)
the error shown is as follows -
enter image description here
I save the trained model after a certain number of episodes with the special save() function of the DDPG class (the network is saved when the reward reaches zero), but when I restore the model again using saver.restore(), the network gives out a reward equal to approximately -1800. Why is this happening, maybe I'm doing something wrong? My network:
import tensorflow as tf
import numpy as np
import gym
epsiode_steps = 500
# learning rate for actor
lr_a = 0.001
# learning rate for critic
lr_c = 0.002
gamma = 0.9
alpha = 0.01
memory = 10000
batch_size = 32
render = True
class DDPG(object):
def __init__(self, no_of_actions, no_of_states, a_bound, ):
self.memory = np.zeros((memory, no_of_states * 2 + no_of_actions + 1), dtype=np.float32)
# initialize pointer to point to our experience buffer
self.pointer = 0
self.sess = tf.Session()
self.noise_variance = 3.0
self.no_of_actions, self.no_of_states, self.a_bound = no_of_actions, no_of_states, a_bound,
self.state = tf.placeholder(tf.float32, [None, no_of_states], 's')
self.next_state = tf.placeholder(tf.float32, [None, no_of_states], 's_')
self.reward = tf.placeholder(tf.float32, [None, 1], 'r')
with tf.variable_scope('Actor'):
self.a = self.build_actor_network(self.state, scope='eval', trainable=True)
a_ = self.build_actor_network(self.next_state, scope='target', trainable=False)
with tf.variable_scope('Critic'):
q = self.build_crtic_network(self.state, self.a, scope='eval', trainable=True)
q_ = self.build_crtic_network(self.next_state, a_, scope='target', trainable=False)
self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval')
self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target')
self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval')
self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target')
# update target value
self.soft_replace = [
[tf.assign(at, (1 - alpha) * at + alpha * ae), tf.assign(ct, (1 - alpha) * ct + alpha * ce)]
for at, ae, ct, ce in zip(self.at_params, self.ae_params, self.ct_params, self.ce_params)]
q_target = self.reward + gamma * q_
td_error = tf.losses.mean_squared_error(labels=(self.reward + gamma * q_), predictions=q)
self.ctrain = tf.train.AdamOptimizer(lr_c).minimize(td_error, name="adam-ink", var_list=self.ce_params)
a_loss = - tf.reduce_mean(q)
# train the actor network with adam optimizer for minimizing the loss
self.atrain = tf.train.AdamOptimizer(lr_a).minimize(a_loss, var_list=self.ae_params)
tf.summary.FileWriter("logs2", self.sess.graph)
# initialize all variables
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
self.saver.restore(self.sess, "Pendulum/nn.ckpt")
def choose_action(self, s):
a = self.sess.run(self.a, {self.state: s[np.newaxis, :]})[0]
a = np.clip(np.random.normal(a, self.noise_variance), -2, 2)
return a
def learn(self):
# soft target replacement
self.sess.run(self.soft_replace)
indices = np.random.choice(memory, size=batch_size)
batch_transition = self.memory[indices, :]
batch_states = batch_transition[:, :self.no_of_states]
batch_actions = batch_transition[:, self.no_of_states: self.no_of_states + self.no_of_actions]
batch_rewards = batch_transition[:, -self.no_of_states - 1: -self.no_of_states]
batch_next_state = batch_transition[:, -self.no_of_states:]
self.sess.run(self.atrain, {self.state: batch_states})
self.sess.run(self.ctrain, {self.state: batch_states, self.a: batch_actions, self.reward: batch_rewards,
self.next_state: batch_next_state})
# we define a function store_transition which stores all the transition information in the buffer
def store_transition(self, s, a, r, s_):
trans = np.hstack((s, a, [r], s_))
index = self.pointer % memory
self.memory[index, :] = trans
self.pointer += 1
if self.pointer > memory:
self.noise_variance *= 0.99995
self.learn()
# we define the function build_actor_network for builing our actor network and after crtic network
def build_actor_network(self, s, scope, trainable)
with tf.variable_scope(scope):
l1 = tf.layers.dense(s, 30, activation=tf.nn.tanh, name='l1', trainable=trainable)
a = tf.layers.dense(l1, self.no_of_actions, activation=tf.nn.tanh, name='a', trainable=trainable)
return tf.multiply(a, self.a_bound, name="scaled_a")
def build_crtic_network(self, s, a, scope, trainable):
with tf.variable_scope(scope):
n_l1 = 30
w1_s = tf.get_variable('w1_s', [self.no_of_states, n_l1], trainable=trainable)
w1_a = tf.get_variable('w1_a', [self.no_of_actions, n_l1], trainable=trainable)
b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
net = tf.nn.tanh(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
q = tf.layers.dense(net, 1, trainable=trainable)
return q
def save(self):
self.saver.save(self.sess, "Pendulum/nn.ckpt")
env = gym.make("Pendulum-v0")
env = env.unwrapped
env.seed(1)
no_of_states = env.observation_space.shape[0]
no_of_actions = env.action_space.shape[0]
a_bound = env.action_space.high
ddpg = DDPG(no_of_actions, no_of_states, a_bound)
total_reward = []
no_of_episodes = 300
# for each episodes
for i in range(no_of_episodes):
# initialize the environment
s = env.reset()
# episodic reward
ep_reward = 0
for j in range(epsiode_steps):
env.render()
# select action by adding noise through OU process
a = ddpg.choose_action(s)
# peform the action and move to the next state s
s_, r, done, info = env.step(a)
# store the the transition to our experience buffer
# sample some minibatch of experience and train the network
ddpg.store_transition(s, a, r, s_)
# update current state as next state
s = s_
# add episodic rewards
ep_reward += r
if int(ep_reward) == 0 and i > 200:
ddpg.save()
print("save")
quit()
if j == epsiode_steps - 1:
total_reward.append(ep_reward)
print('Episode:', i, ' Reward: %i' % int(ep_reward))
break
i am trying to implement multidimentional lstm in tensorflow, I am using TensorArray to remember previous states, i am using a complicated way to get two neigbours state(above and from left). tf.cond want that both posible condition to exist and to have the same number of inputs. this is why i added one more cell.zero_state to the (last index +1) of the states. then i using a function to get the correct indexes to the states. when i am trying to use an optimizer in order to minimize a cost, i getting that error:
InvalidArgumentError (see above for traceback): TensorArray
MultiDimentionalLSTMCell-l1-multi-l1/state_ta_262#gradients: Could not
read from TensorArray index 809 because it has not yet been written
to.
Can someone tell how to fix it?
Ps: without optimizer it works!
class MultiDimentionalLSTMCell(tf.nn.rnn_cell.RNNCell):
"""
Note that state_is_tuple is always True.
"""
def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):
self._num_units = num_units
self._forget_bias = forget_bias
self._activation = activation
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)
#property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM).
#param: imputs (batch,n)
#param state: the states and hidden unit of the two cells
"""
with tf.variable_scope(scope or type(self).__name__):
c1,c2,h1,h2 = state
# change bias argument to False since LN will add bias via shift
concat = tf.nn.rnn_cell._linear([inputs, h1, h2], 5 * self._num_units, False)
i, j, f1, f2, o = tf.split(1, 5, concat)
new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) +
c2 * tf.nn.sigmoid(f2 + self._forget_bias) + tf.nn.sigmoid(i) *
self._activation(j))
new_h = self._activation(new_c) * tf.nn.sigmoid(o)
new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)
return new_h, new_state
def multiDimentionalRNN_whileLoop(rnn_size,input_data,sh,dims=None,scopeN="layer1"):
"""Implements naive multidimentional recurent neural networks
#param rnn_size: the hidden units
#param input_data: the data to process of shape [batch,h,w,chanels]
#param sh: [heigth,width] of the windows
#param dims: dimentions to reverse the input data,eg.
dims=[False,True,True,False] => true means reverse dimention
#param scopeN : the scope
returns [batch,h/sh[0],w/sh[1],chanels*sh[0]*sh[1]] the output of the lstm
"""
with tf.variable_scope("MultiDimentionalLSTMCell-"+scopeN):
cell = MultiDimentionalLSTMCell(rnn_size)
shape = input_data.get_shape().as_list()
if shape[1]%sh[0] != 0:
offset = tf.zeros([shape[0], sh[0]-(shape[1]%sh[0]), shape[2], shape[3]])
input_data = tf.concat(1,[input_data,offset])
shape = input_data.get_shape().as_list()
if shape[2]%sh[1] != 0:
offset = tf.zeros([shape[0], shape[1], sh[1]-(shape[2]%sh[1]), shape[3]])
input_data = tf.concat(2,[input_data,offset])
shape = input_data.get_shape().as_list()
h,w = int(shape[1]/sh[0]),int(shape[2]/sh[1])
features = sh[1]*sh[0]*shape[3]
batch_size = shape[0]
x = tf.reshape(input_data, [batch_size,h,w, features])
if dims is not None:
x = tf.reverse(x, dims)
x = tf.transpose(x, [1,2,0,3])
x = tf.reshape(x, [-1, features])
x = tf.split(0, h*w, x)
sequence_length = tf.ones(shape=(batch_size,), dtype=tf.int32)*shape[0]
inputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='input_ta')
inputs_ta = inputs_ta.unpack(x)
states_ta = tf.TensorArray(dtype=tf.float32, size=h*w+1,name='state_ta',clear_after_read=False)
outputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='output_ta')
states_ta = states_ta.write(h*w, tf.nn.rnn_cell.LSTMStateTuple(tf.zeros([batch_size,rnn_size], tf.float32),
tf.zeros([batch_size,rnn_size], tf.float32)))
def getindex1(t,w):
return tf.cond(tf.less_equal(tf.constant(w),t),
lambda:t-tf.constant(w),
lambda:tf.constant(h*w))
def getindex2(t,w):
return tf.cond(tf.less(tf.constant(0),tf.mod(t,tf.constant(w))),
lambda:t-tf.constant(1),
lambda:tf.constant(h*w))
time = tf.constant(0)
def body(time, outputs_ta, states_ta):
constant_val = tf.constant(0)
stateUp = tf.cond(tf.less_equal(tf.constant(w),time),
lambda: states_ta.read(getindex1(time,w)),
lambda: states_ta.read(h*w))
stateLast = tf.cond(tf.less(constant_val,tf.mod(time,tf.constant(w))),
lambda: states_ta.read(getindex2(time,w)),
lambda: states_ta.read(h*w))
currentState = stateUp[0],stateLast[0],stateUp[1],stateLast[1]
out , state = cell(inputs_ta.read(time),currentState)
outputs_ta = outputs_ta.write(time,out)
states_ta = states_ta.write(time,state)
return time + 1, outputs_ta, states_ta
def condition(time,outputs_ta,states_ta):
return tf.less(time , tf.constant(h*w))
result , outputs_ta, states_ta = tf.while_loop(condition, body, [time,outputs_ta,states_ta])
outputs = outputs_ta.pack()
states = states_ta.pack()
y = tf.reshape(outputs, [h,w,batch_size,rnn_size])
y = tf.transpose(y, [2,0,1,3])
if dims is not None:
y = tf.reverse(y, dims)
return y
def tanAndSum(rnn_size,input_data,scope):
outs = []
for i in range(2):
for j in range(2):
dims = [False]*4
if i!=0:
dims[1] = True
if j!=0:
dims[2] = True
outputs = multiDimentionalRNN_whileLoop(rnn_size,input_data,[2,2],
dims,scope+"-multi-l{0}".format(i*2+j))
outs.append(outputs)
outs = tf.pack(outs, axis=0)
mean = tf.reduce_mean(outs, 0)
return tf.nn.tanh(mean)
graph = tf.Graph()
with graph.as_default():
input_data = tf.placeholder(tf.float32, [20,36,90,1])
#input_data = tf.ones([20,36,90,1],dtype=tf.float32)
sh = [2,2]
out1 = tanAndSum(20,input_data,'l1')
out = tanAndSum(25,out1,'l2')
cost = tf.reduce_mean(out)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
#out = multiDimentionalRNN_raw_rnn(2,input_data,sh,dims=[False,True,True,False],scopeN="layer1")
#cell = MultiDimentionalLSTMCell(10)
#out = cell.zero_state(2, tf.float32).c
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
ou,k,_ = session.run([out,cost,optimizer],{input_data:np.ones([20,36,90,1],dtype=np.float32)})
print(ou.shape)
print(k)
You should add parameter parallel_iterations=1 to your while loop call.
Such as:
result, outputs_ta, states_ta = tf.while_loop(
condition, body, [time,outputs_ta,states_ta], parallel_iterations=1)
This is required because inside body you perform read and write operations on the same tensor array (states_ta). And in case of parallel loop execution(parallel_iterations > 1) some thread may try to read info from tensorArray, that was not written to it by another one.
I've test your code snippet with parallel_iterations=1 on tensorflow 0.12.1 and it works as expected.
tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res