pytorch classifier for mnist data not work

pytorch classifier for mnist data not work - python

I'm new to pytorch, and I try to train a simple classifier with mnist data. However, my classifier's accuracy is about 10%, I tried several method to adjust the network, but failed, the classifier's output label is always the same, all 0, or all 7, or all 6. Please tell me what is wrong with the code.(I know I should use DataLoader, I will take a look at it later, now I just want to make the classifier's accuracy looks gook)
# coding=utf-8
# 数据为data中的handwritten_digit
import struct
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
data_folder = '../data/handwritten_digit/'
dt = torch.get_default_dtype()
train_label_file = 'train-labels-idx1-ubyte'
train_img_file = 'train-images-idx3-ubyte'
test_img_file = 't10k-images-idx3-ubyte'
test_label_file = 't10k-labels-idx1-ubyte'
model_path = './handwritten_digit_recognition_net3.pth'
def timer(func):
def cal_time(*args, **kw):
start_time = time.time()
out = func(*args, **kw)
end_time = time.time()
print('函数 ', func.__name__, ' 运行耗时', end_time-start_time, '秒', sep = '')
return out
return cal_time
def read_imgs(file):
with open(data_folder+file, 'rb') as frb:
# 先读取meta
magic_num, img_num, row_num, col_num = struct.unpack('>IIII', frb.read(16))
# print(magic_num, img_num, row_num, col_num)
# img = np.fromfile(frb, dtype = np.uint8, count = row_num*col_num).reshape(row_num, col_num)
# print(img, img.shape, 'img')
imgs = np.fromfile(frb, dtype = np.uint8).reshape(img_num, row_num, col_num)
# imgs = np.fromfile(frb, dtype = np.uint8, count = row_num*col_num*img_num).reshape(img_num, row_num, col_num)
return torch.from_numpy(imgs).type(dt).unsqueeze(1).unsqueeze(1)
def read_labels(file):
with open(data_folder+file, 'rb') as frb:
# 先读取meta
magic_num, label_num = struct.unpack('>II', frb.read(8))
# print(magic_num, label_num)
labels = np.fromfile(frb, dtype = np.uint8)
return torch.from_numpy(labels).type(dt)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 12, 5)
self.conv2 = nn.Conv2d(12, 12, 5)
self.pool = nn.MaxPool2d(2, 2)
self.linear1 = nn.Linear(12*16, 30)
self.linear2 = nn.Linear(30, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 12*16)
# print(x.size(), 'x.size()')
x = F.relu(self.linear1(x))
x = self.linear2(x)
return x
#timer
def train_and_save_net():
train_imgs = read_imgs(train_img_file)
train_labels = read_labels(train_label_file)
test_imgs = read_imgs(test_img_file)
test_labels = read_labels(test_label_file)
# label = torch.zeros(1, 10)
# label[0][int(train_labels[0])] = 1
# print(label)
# print(train_labels[0])
# return
net = Net()
# criterion = nn.MSELoss()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)
print('Start Training')
sum_loss = 0
for i, img in enumerate(train_imgs):
optimizer.zero_grad()
predicted = net(img)
# label = torch.zeros(1, 10)
# label[0][int(train_labels[i])] = 1
label = torch.tensor([train_labels[i]], dtype = torch.long)
# print(predicted, predicted.size(), 'predicted')
# print(label, label.size(), 'label')
loss = criterion(predicted, label)
loss.backward()
optimizer.step()
sum_loss += loss.item()
if i % 2000 == 1999:
print('已经训练了', i+1, '张图片，', '完成进度：', '%.2f'%((i+1)/len(train_labels)*100), '%', sep = '')
print('loss为：', sum_loss/2000)
sum_loss = 0
print('End Training')
torch.save(net.state_dict(), model_path)
print('End Saving Net Parameters')
def load_net():
net = Net()
net.load_state_dict(torch.load(model_path))
return net
#timer
def evaluate():
train_imgs = read_imgs(train_img_file)
train_labels = read_labels(train_label_file)
test_imgs = read_imgs(test_img_file)
test_labels = read_labels(test_label_file)
net = load_net()
# 直观感受
for i in range(5):
img = train_imgs[i]
# plt.imshow(img.squeeze(), cmap = 'gray')
# plt.show()
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
predicted = predicted.item()
print('预测的分类是：', predicted, '，实际的分类是：', int(train_labels[i].item()), sep = '')
# 训练集精度
total = len(train_labels)
correct = 0
for i in range(len(train_labels)):
img = train_imgs[i]
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
label = int(train_labels[i].item())
if predicted == label:
correct += 1
print('训练集上的准确率为：', '%.2f'%(correct/total*100), '%', sep = '')
total = len(test_labels)
correct = 0
pre_arr = []
for i in range(len(test_labels)):
img = test_imgs[i]
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
label = int(test_labels[i].item())
pre_arr.append(predicted)
if predicted == label:
correct += 1
print('测试集上的准确率为：', '%.2f'%(correct/total*100), '%', sep = '')
print('模型判断为0的个数/总判断数 为：', pre_arr.count(0), '/', len(pre_arr), sep = '')
#timer
def test():
predicted_vector = torch.randn(1,10)
_, predicted = torch.max(predicted_vector, 1)
print(predicted.item())
if __name__ == '__main__':
train_and_save_net()
# test()
evaluate()

Well, I seem to figure out where the problem is, I changed the learning rate from 1e-3 to 1e-4, then the accuracy reached about 97%...

Related

Keras - ValueError: Could not interpret loss function identifier

I am trying to build the autoencoder structure detailed in this IEEE article. The autoencoder uses a separable loss function where it is required that I create a custom loss function for the "cluster loss" term of the separable loss function as a function of the average output of the encoder. I create my own layer called RffConnected that calculates the cluster loss and utilizes the add_loss method. Otherwise, this RffConnected layer should act as just a normal deep layer.
Here are my relevant code snippets:
import matplotlib.pyplot as plot
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import math
from matplotlib.figure import Figure
import tensorflow as tf
import keras
from keras import layers
import random
import time
from os import listdir
#loads data from a text file
def loadData(basePath, samplesPerFile, sampleRate):
real = []
imag = []
fileOrder = []
for file in listdir(basePath):
if((file != "READ_ME") and ((file != "READ_ME.txt"))):
fid = open(basePath + "\\" + file, "r")
fileOrder.append(file)
t = 0
sampleEvery = samplesPerFile / sampleRate
temp1 = []
temp2 = []
times = []
for line in fid.readlines():
times.append(t)
samples = line.split("\t")
temp1.append(float(samples[0]))
temp2.append(float(samples[1]))
t = t + sampleEvery
real.append(temp1)
imag.append(temp2)
fid.close()
real = np.array(real)
imag = np.array(imag)
return real, imag, times, fileOrder
#####################################################################################################
#Breaks up and randomizes data
def breakUpData(real, imag, times, numPartitions, basePath):
if(len(real) % numPartitions != 0):
raise ValueError("Error: The length of the dataset must be divisible by the number of partitions.")
newReal = []
newImag = []
newTimes = []
fileOrder = listdir(basePath)
dataFiles = []
interval = int(len(real[0]) / numPartitions)
for i in range(0, interval):
newTimes.append(times[i])
for i in range(0, len(real)):
tempI = []
tempQ = []
for j in range(0, len(real[0])):
tempI.append(real[i, j])
tempQ.append(imag[i, j])
if((j + 1) % interval == 0):
newReal.append(tempI)
newImag.append(tempQ)
#fileName = fileOrder[i][0: fileOrder[i].find("_") + 3]
dataFiles.append(fileOrder[i])
tempI = []
tempQ = []
#randomizes the broken up dataset and the file list
for i in range(0, len(newReal)):
r = random.randint(0, len(newReal) - 1)
tempReal = newReal[i]
tempImag = newImag[i]
newReal[i] = newReal[r]
newImag[i] = newImag[r]
newReal[r] = tempReal
newImag[r] = tempImag
tempFile = dataFiles[i]
dataFiles[i] = dataFiles[r]
dataFiles[r] = tempFile
#return np.array(newReal), np.array(newImag), newTimes, dataFiles
return newReal, newImag, newTimes, dataFiles
#####################################################################################################
#custom loss layer for the RffAe-S that calculates the clustering loss term
class RffConnected(layers.Layer):
def __init__(self, output_dim, batchSize, beta, alpha):
super(RffConnected, self).__init__()
# self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
#array = np.zeros(output_dim)
self.iters = 0.0
self.beta = beta
self.alpha = alpha
self.batchSize = batchSize
self.output_dim = output_dim
self.sum = tf.zeros(output_dim, tf.float64)
self.moving_average = tf.zeros(output_dim, tf.float64)
self.clusterloss = tf.zeros(output_dim, tf.float64)
self.sum = tf.cast(self.sum, tf.float32)
self.moving_average = tf.cast(self.moving_average, tf.float32)
self.clusterloss = tf.cast(self.clusterloss, tf.float32)
# self.sum = keras.Input(shape=(self.output_dim,))
# self.moving_average = keras.Input(shape=(self.output_dim,))
# self.clusterloss = keras.Input(shape=(self.output_dim,))
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel', \
shape = (int(input_shape[-1]), self.output_dim), \
initializer = 'normal', trainable = True)
#self.kernel = tf.cast(self.kernel, tf.float64)
super(RffConnected, self).build(int(input_shape[-1]))
def call(self, inputs):
#keeps track of training epochs
self.iters = self.iters + 1
#inputs = tf.cast(inputs, tf.float64)
#where this custom layer acts as a normal layer- the loss then uses this
#calc = keras.backend.dot(inputs, self.kernel)
calc = tf.matmul(inputs, self.kernel)
#cumulative sum of deep encoded features
#self.sum = state_ops.assign(self.sum, tf.reshape(tf.math.add(self.sum, calc), tf.shape(self.sum)))
#self.sum = tf.ops.state_ops.assign(self.sum, tf.math.add(self.sum, calc))
#self.sum.assign_add(calc)
self.sum = tf.math.add(self.sum, calc)
#calculate the moving average and loss if we have already trained one batch
if(self.iters >= self.batchSize):
self.moving_average = tf.math.divide(self.sum, self.iters)
self.clusterloss = tf.math.exp(\
tf.math.multiply(-1 * self.beta, tf.math.reduce_sum(tf.math.square(tf.math.subtract(inputs, self.moving_average)))))
#self.add_loss(tf.math.multiply(self.clusterloss, self.alpha))
self.add_loss(self.clusterloss.numpy() * self.alpha)
return calc
#####################################################################################################
def customloss(y_true, y_pred):
loss = tf.square(y_true - y_pred)
print(loss)
return loss
#####################################################################################################
realTraining = np.array(real[0:2200])
realTesting = np.array(real[2200:-1])
imagTraining = np.array(imag[0:2200])
imagTesting = np.array(imag[2200:-1])
numInputs = len(realTraining[0])
i_sig = keras.Input(shape=(numInputs,))
q_sig = keras.Input(shape=(numInputs,))
iRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(i_sig)
rff1 = keras.Model(inputs=i_sig, outputs=iRff)
qRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(q_sig)
rff2 = keras.Model(inputs=q_sig, outputs=qRff)
combined = layers.Concatenate()([iRff, qRff])
combineRff = tf.keras.layers.experimental.RandomFourierFeatures(4 * numInputs, \
kernel_initializer='gaussian', scale=10.0)(combined)
preprocess = keras.Model(inputs=[iRff, qRff], outputs=combineRff)
#print(realTraining[0:5])
preprocessedTraining = preprocess.predict([realTraining, imagTraining])
preprocessedTesting = preprocess.predict([realTesting, imagTesting])
################## Entering Encoder ######################
encoderIn = keras.Input(shape=(4*numInputs,))
#connected1 = layers.Dense(100, activation="sigmoid")(encoderIn)
clusterLossLayer = RffConnected(100, 30, 1.00, 100.00)(encoderIn)
#clusterLossLayer = myRffConnected(256)(connected1)
encoder = keras.Model(inputs=encoderIn, outputs=clusterLossLayer)
################## Entering Decoder ######################
connected2 = layers.Dense(125, activation="sigmoid")(clusterLossLayer)
relu1 = layers.ReLU()(connected2)
dropout = layers.Dropout(0.2)(relu1)
reshape1 = layers.Reshape((25, 5, 1))(dropout)
bn1 = layers.BatchNormalization()(reshape1)
trans1 = layers.Conv2DTranspose(1, (4, 2))(bn1)
ups1 = layers.UpSampling2D(size=(2, 1))(trans1)
relu2 = layers.ReLU()(ups1)
bn2 = layers.BatchNormalization()(relu2)
trans2 = layers.Conv2DTranspose(1, (4, 2))(bn2)
ups2 = layers.UpSampling2D(size=(2, 1))(trans2)
relu3 = layers.ReLU()(ups2)
bn3 = layers.BatchNormalization()(relu3)
trans3 = layers.Conv2DTranspose(1, (5, 2))(bn3)
ups3 = layers.UpSampling2D(size=(2, 1))(trans3)
relu4 = layers.ReLU()(ups3)
bn4 = layers.BatchNormalization()(relu4)
trans4 = layers.Conv2DTranspose(1, (7, 1))(bn4)
reshape2 = layers.Reshape((4*numInputs, 1, 1))(trans4)
autoencoder = keras.Model(inputs=encoderIn, outputs=reshape2)
encoded_input = keras.Input(shape=(None, 100))
decoder_layer = autoencoder.layers[-1]
#autoencoder.summary()
autoencoder.compile(optimizer='adam', loss=[autoencoder.losses[-1], customloss], metrics=['accuracy', 'accuracy'])
autoencoder.fit(preprocessedTraining, preprocessedTraining, epochs=100, batch_size=20, shuffle=True, validation_data=(preprocessedTesting, preprocessedTesting))
It seems like it runs for two training epochs then it gives me an error. I end up getting this error when I run it:
ValueError: Could not interpret loss function identifier: Tensor("rff_connected_137/Const:0", shape=(100,), dtype=float32)
I've already spent a considerable amount of time debugging this thing, although if you spot any more errors I would appreciate a heads-up. Thank you in advance.

According to the documentation of the keras Keras Model Training-Loss, the 'loss' attribute can take the value of float tensor (except for the sparse loss functions returning integer arrays) with a specific shape.
If it is necessary to combine two loss functions, it would be better to perform mathematical calculations within your custom loss function to return an output of float tensor. This reference might be a help Keras CustomLoss definition.

PyTorch PPO implementation for Cartpole-v0 getting stuck in local optima

I have implemented PPO for Cartpole-VO environment. However, it does not converge in certain iterations of the game. Sometimes it gets stuck in local optima. I have implemented the algorithm using the TD-0 advantage i.e.
A(s_t) = R(t+1) + \gamma V(S_{t+1}) - V(S_t)
Here is my code:
def running_average(x, n):
N = n
kernel = np.ones(N)
conv_len = x.shape[0]-N
y = np.zeros(conv_len)
for i in range(conv_len):
y[i] = kernel # x[i:i+N] # matrix multiplication operator: np.mul
y[i] /= N
return y
class ActorNetwork(nn.Module):
def __init__(self, state_dim, n_actions, learning_rate=0.0003, epsilon_clipping=0.3, update_epochs=10):
super().__init__()
self.n_actions = n_actions
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, n_actions),
nn.Softmax(dim=-1)
).float()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
self.epsilon_clipping = epsilon_clipping
self.update_epochs = update_epochs
def forward(self, X):
return self.model(X)
def predict(self, state):
if state.ndim < 2:
action_probs = self.model(torch.FloatTensor(state).unsqueeze(0).float())
else:
action_probs = self.model(torch.FloatTensor(state))
return action_probs.squeeze(0).data.numpy()
def update(self, states, actions, deltas, old_prob):
batch_size = len(states)
state_batch = torch.Tensor(states)
action_batch = torch.Tensor(actions)
delta_batch = torch.Tensor(deltas)
old_prob_batch = torch.Tensor(old_prob)
for k in range(self.update_epochs):
pred_batch = self.model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
ratio = torch.exp(torch.log(prob_batch) - torch.log(old_prob_batch))
clipped = torch.clamp(ratio, 1 - self.epsilon_clipping, 1 + self.epsilon_clipping) * delta_batch
loss_r = -torch.min(ratio*delta_batch, clipped)
loss = torch.mean(loss_r)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
class CriticNetwork(nn.Module):
def __init__(self, state_dim, learning_rate=0.001):
super().__init__()
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 1),
).float()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
def forward(self, X):
return self.model(X)
def predict(self, state):
if state.ndim < 2:
values = self.model(torch.FloatTensor(state).unsqueeze(0).float())
else:
values = self.model(torch.FloatTensor(state))
return values.data.numpy()
def update(self, states, targets):
state_batch = torch.Tensor(states)
target_batch = torch.Tensor(targets)
pred_batch = self.model(state_batch)
loss = torch.nn.functional.mse_loss(pred_batch, target_batch.unsqueeze(1))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
def train_ppo_agent(env, episode_length, max_episodes, gamma, visualize_step, learning_rate_actor=0.0003, learning_rate_critic=0.001, epsilon_clipping=0.2, actor_update_epochs=10):
model_actor = ActorNetwork(env.observation_space.shape[0], env.action_space.n, learning_rate=learning_rate_actor,
epsilon_clipping=epsilon_clipping, update_epochs=actor_update_epochs)
model_critic = CriticNetwork(env.observation_space.shape[0], learning_rate=learning_rate_critic)
EPISODE_LENGTH = episode_length
MAX_EPISODES = max_episodes
GAMMA = gamma
VISUALIZE_STEP = max(1, visualize_step)
score = []
for episode in range(MAX_EPISODES):
curr_state = env.reset()
done = False
all_episode_t = []
score_episode = 0
for t in range(EPISODE_LENGTH):
act_prob = model_actor.predict(curr_state)
action = np.random.choice(np.array(list(range(env.action_space.n))), p=act_prob)
value = model_critic.predict(curr_state)
prev_state = curr_state
curr_state, reward, done, info = env.step(action)
score_episode += reward
e_t = {'state': prev_state, 'action':action, 'action_prob':act_prob[action],'reward': reward, 'value': value}
all_episode_t.append(e_t)
if done:
break
score.append(score_episode)
episode_values = [all_episode_t[t]['value'] for t in range(len(all_episode_t))]
next_state_estimates = [episode_values[i].item() for i in range(1, len(episode_values))]
next_state_estimates.append(0)
boostrap_estimate = []
for t in range(len(all_episode_t)):
G = all_episode_t[t]['reward'] + GAMMA * next_state_estimates[t]
boostrap_estimate.append(G)
episode_target = np.array(boostrap_estimate)
episode_values = np.array(episode_values)
# compute the advantage for each state in the episode: R_{t+1} + \gamma * V(S_{t+1}) - V_{t}
adv_batch = episode_target-episode_values
state_batch = np.array([all_episode_t[t]['state'] for t in range(len(all_episode_t))])
action_batch = np.array([all_episode_t[t]['action'] for t in range(len(all_episode_t))])
old_actor_prob = np.array([all_episode_t[t]['action_prob'] for t in range(len(all_episode_t))])
model_actor.update(state_batch, action_batch, adv_batch, old_actor_prob)
model_critic.update(state_batch, episode_target)
# print the status after every VISUALIZE_STEP episodes
if episode % VISUALIZE_STEP == 0 and episode > 0:
print('Episode {}\tAverage Score: {:.2f}'.format(episode, np.mean(score[-VISUALIZE_STEP:-1])))
# domain knowledge applied to stop training: if the average score across last 100 episodes is greater than 195, game is solved
if np.mean(score[-100:-1]) > 195:
break
# Training plot: Episodic reward over Training Episodes
score = np.array(score)
avg_score = running_average(score, visualize_step)
plt.figure(figsize=(15, 7))
plt.ylabel("Episodic Reward", fontsize=12)
plt.xlabel("Training Episodes", fontsize=12)
plt.plot(score, color='gray', linewidth=1)
plt.plot(avg_score, color='blue', linewidth=3)
plt.scatter(np.arange(score.shape[0]), score, color='green', linewidth=0.3)
plt.savefig("temp/cartpole_ppo_training_plot.pdf")
# return the trained models
return model_actor, model_critic
def main():
env = gym.make('CartPole-v0')
episode_length = 300
n_episodes = 5000
gamma = 0.99
vis_steps = 100
learning_rate_actor = 0.0003
actor_update_epochs = 10
epsilon_clipping = 0.2
learning_rate_critic = 0.001
# train the PPO agent
model_actor, model_critic = train_ppo_agent(env, episode_length, n_episodes, gamma, vis_steps,
learning_rate_actor=learning_rate_actor,
learning_rate_critic=learning_rate_critic,
epsilon_clipping=epsilon_clipping,
actor_update_epochs=actor_update_epochs)
Am I missing something, or is this kind of behaviour expected if one uses simple TD-0 advantages for PPO, given the nature of the Cartpole environment?

If you remove the "-" (the negative marker) in line:
loss_r = -torch.min(ratio*delta_batch, clipped)
The score will then start to steadily increase over time. Before this fix you had negative loss which would increase over time. This is not how loss should work for neural networks. As gradient descent works to minimize the loss. So you want a positive loss which can be minimized by optimizer.
Hope my answer is somewhat clear, and sorry I cannot go into deeper detail.
My run can be seen in the attached image:

Triplet learning with Keras produces unexpected results

I am trying to implement Keras triplet learning.
Here are two versions of get_triplet() I used:
"Trivial":
def get_triplet(nSplitIdx, bIsTrain):
positiveClass = np.random.choice(arrLabels)
# Depending train or validate, select range.
# say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
if(bIsTrain):
nMinIdx = 0
nMaxIdx = nSplitIdx - 1
else:
nMinIdx = nSplitIdx
nMaxIdx = NUM_OF_IMAGES_PER_CLASS - 1 - TESTING_IMAGES_PER_CLASS
# Get 3 indices: for base image and for positive example, from same class. And one more for negative example.
# TBD: figure (test) if SAME image should be used in a positive pair, like [img[1], img[1]]?
nImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)
while nImageIdx[0] == nImageIdx[1]:
nImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))
negativeClass = np.random.choice(arrLabels)
while negativeClass['Id'] == positiveClass['Id']:
negativeClass = np.random.choice(arrLabels)
negativeFileName = negativeClass['ImageNames'][nImageIdx[2]]
# nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass.
# Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
return nImageIdx, positiveClass, negativeClass
and "semi-hard" (though not so elegant):
# Used to mine ONLY
def getDistance(a, b):
distance = (a - b)**2
distance = sum(distance)
return distance
graph = tf.get_default_graph()
def getExampleIdx(positiveClass, negativeClass, bIsPositive, nAncorIdx):
global embedding_model
# tensorflow model is not loaded and used in the same thread. One workaround is to force tensorflow to use the gloabl default graph.
global graph
with graph.as_default():
imgAncor = positiveClass['Images'][nAncorIdx]
if(bIsPositive):
cClass = positiveClass
else:
cClass = negativeClass
arrExamples = []
arrExamples.append(np.array(imgAncor, dtype="float32") / 255.) # Add ancor image as 0th element
if(bIsPositive):
for i in range(TRAINING_IMAGES_PER_CLASS):
# img = loadImage(cClass, i, datagen)
# arrExamples.append(np.array(img, dtype="float32") / 255.)
# Note: no augmentation here
img = cClass['Images'][i]
arrExamples.append(np.array(img, dtype="float32") / 255.)
else:
# For a batch
if(BATCH_SIZE > len(arrLabels)):
nMinRange = len(arrLabels)
nStartClassIdx = 0
else:
nMinRange = BATCH_SIZE + 1
nStartClassIdx = np.random.choice(range(len(arrLabels) - nMinRange))
for nClassId in range(nStartClassIdx, nStartClassIdx + nMinRange):
cClass = arrLabels[nClassId]
if cClass['Id'] == positiveClass['Id']:
continue
# We do it to speed it up, but generally, it should be uncommented: for instead of rand
i = np.random.choice(range(TRAINING_IMAGES_PER_CLASS))
#for i in range(TRAINING_IMAGES_PER_CLASS):
# Note: no augmentation here
img = cClass['Images'][i]
arrExamples.append(np.array(img, dtype="float32") / 255.)
arrPredictionsPos = embedding_model.predict([arrExamples])
# Get distances between 0th predictions and other predictions
arrDistancesPos = []
for i in range(1, len(arrPredictionsPos)):
arrDistancesPos.append(getDistance(arrPredictionsPos[0], arrPredictionsPos[i]))
#print("{}, ".format(arrDistancesPos[i - 1]))
if(bIsPositive):
#print("Positive: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmax()]))
return np.array(arrDistancesPos).argmax(), positiveClass
else:
#print("Negative: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmin()]))
nNegativeIdx = np.array(arrDistancesPos).argmin() % TRAINING_IMAGES_PER_CLASS
return nNegativeIdx, arrLabels[nStartClassIdx + nNegativeIdx // TRAINING_IMAGES_PER_CLASS]
# ---
def get_triplet(nSplitIdx, bIsTrain):
# Select random class
positiveClass = np.random.choice(arrLabels)
negativeClass = np.random.choice(arrLabels)
while negativeClass['Id'] == positiveClass['Id']:
negativeClass = np.random.choice(arrLabels)
# Depending train or validate, select range.
# say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
if(bIsTrain):
nMinIdx = 0
nMaxIdx = nSplitIdx
else:
nMinIdx = nSplitIdx
nMaxIdx = NUM_OF_IMAGES_PER_CLASS - TESTING_IMAGES_PER_CLASS
arrImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)
if(bIsTrain):
arrImageIdx[1], positiveClass = getExampleIdx(positiveClass, positiveClass, True, arrImageIdx[0])
arrImageIdx[2], negativeClass = getExampleIdx(positiveClass, negativeClass, False, arrImageIdx[0])
else:
while arrImageIdx[0] == arrImageIdx[1]:
arrImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))
#negativeFileName = negativeClass['ImageNames'][arrImageIdx[2]]
# nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass.
# Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
return arrImageIdx, positiveClass, negativeClass
Now, the generator:
from time import time
#t0 = time()
#t1 = time()
#print('get_triplet takes %f' %(t1-t0))
def gen(bIsTrain):
#nSplitIdx = int(NUM_OF_IMAGES_PER_CLASS * TESTING_SPLIT)
while True:
arrBaseExamples = []
arrPositiveExamples = []
arrNegativeExamples = []
for i in range(BATCH_SIZE):
nImageIdx, positiveClass, negativeClass = get_triplet(TRAINING_IMAGES_PER_CLASS, bIsTrain)
#t0 = time()
baseExampleImg = loadImage(positiveClass, nImageIdx[0], datagen)
positiveExampleImg = loadImage(positiveClass, nImageIdx[1], datagen)
negativeExampleImg = loadImage(negativeClass, nImageIdx[2], datagen)
#t1 = time()
#print('loaded in %f' %(t1-t0))
arrBaseExamples.append(baseExampleImg)
arrPositiveExamples.append(positiveExampleImg)
arrNegativeExamples.append(negativeExampleImg)
#base = preprocess_input(np.array(arrBaseExamples)) / 255. #'a' #preprocess_input(np.array(arrBaseExamples))
base = np.array(arrBaseExamples) / 255.
#train_datagen.fit(base)
#positive = preprocess_input(np.array(arrPositiveExamples)) / 255.
positive = np.array(arrPositiveExamples) / 255.
#train_datagen.fit(positive)
#negative = preprocess_input(np.array(arrNegativeExamples)) / 255.
negative = np.array(arrNegativeExamples) / 255.
#train_datagen.fit(negative)
label = None
yield ({'anchor_input': base, 'positive_input': positive, 'negative_input': negative}, label)
and finally, triplet_loss function (stolen from Kaggle):
ALPHA = 0.2 # Triplet Loss Parameter
def triplet_loss(inputs, dist='sqeuclidean', margin='maxplus'):
anchor, positive, negative = inputs
positive_distance = K.square(anchor - positive)
negative_distance = K.square(anchor - negative)
if dist == 'euclidean':
positive_distance = K.sqrt(K.sum(positive_distance, axis=-1, keepdims=True))
negative_distance = K.sqrt(K.sum(negative_distance, axis=-1, keepdims=True))
elif dist == 'sqeuclidean':
positive_distance = K.sum(positive_distance, axis=-1, keepdims=True)
negative_distance = K.sum(negative_distance, axis=-1, keepdims=True)
loss = positive_distance - negative_distance + ALPHA
if margin == 'maxplus':
loss = K.maximum(0.0, 1 + loss)
elif margin == 'softplus':
loss = K.log(1 + K.exp(loss))
return K.mean(loss)
The model is "transfer learning":
def createModel(nL2):
base_model = ResNet50(weights='imagenet', include_top=False, pooling='max')
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6, name="classifier_dropout")(x)
x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_0", kernel_regularizer=regularizers.l2(nL2))(x)
x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_1", kernel_regularizer=regularizers.l2(nL2))(x)
x = Dense(EMBEDDING_DIM, activation='softmax', name="classifier_dense", kernel_regularizer=regularizers.l2(nL2))(x)
x = Lambda(lambda x: K.l2_normalize(x,axis=1), name="lambda")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
I create and teach it on four Kaggle's dog species (the original dataset has 120, I only use four):
BATCH_SIZE = 16
EPOCHS = 200
arrParams = [[0.8, 3]]
checkpoint = ModelCheckpoint(best_weights_filepath, monitor="val_loss", save_best_only=True, save_weights_only=True, mode='auto')
callbacks_list = [checkpoint] # , early]
gen_train = gen(True)
gen_valid = gen(False)
for i in range(0, len(arrParams)):
nL2 = arrParams[i][0]
EMBEDDING_DIM = arrParams[i][1]
deleteSavedNet(best_weights_filepath)
embedding_model, triplet_model = createModel(nL2)
nNumOfClasses = len(arrLabels)
nNumOfTrainSamples = TRAINING_IMAGES_PER_CLASS * nNumOfClasses
nNumOfValidSamples = VALIDATION_IMAGES_PER_CLASS * nNumOfClasses
STEP_SIZE_TRAIN = nNumOfTrainSamples // BATCH_SIZE
if(STEP_SIZE_TRAIN == 0):
STEP_SIZE_TRAIN = 1
STEP_SIZE_VALID = nNumOfValidSamples // BATCH_SIZE
if(STEP_SIZE_VALID == 0):
STEP_SIZE_VALID = 1
triplet_model.compile(loss=None, optimizer="adam", metrics=['binary_accuracy']) #metrics=['accuracy'])
history = triplet_model.fit_generator(gen_train, validation_data=gen_valid,
epochs=EPOCHS, verbose=1, steps_per_epoch=STEP_SIZE_TRAIN, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list)
print(nL2, EMBEDDING_DIM)
plotHistoryLoss()
It learns:
deleteSavedNet():File removed
Initializing model
Finished initializing model
Epoch 1/200
6/6 [==============================] - 44s 7s/step - loss: 8.0306 - val_loss: 7.6388
Epoch 2/200
6/6 [==============================] - 15s 2s/step - loss: 7.0082 - val_loss: 6.7307
...
Epoch 200/200
6/6 [==============================] - 15s 3s/step - loss: 0.7046 - val_loss: 0.7043
But (notice I used 3 dim. embeding) when I plot results, I see line, not clasters :
nL2 = arrParams[0][0]
EMBEDDING_DIM = arrParams[0][1]
embedding_model, triplet_model = createModel(nL2)
loadBestModel()
def data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
i = 0
arrImages = []
arrImageLabels = []
arrImageClasses = []
for nImageIdx in range(len(arrAllImages)):
if(i == 0):
arrImages = []
arrImageLabels = []
arrImageClasses = []
i += 1
arrImg = img_to_array(arrAllImages[nImageIdx])
arrImg = datagen.random_transform(arrImg) / 255.
arrImg = np.array(arrImg, dtype="float32")
arrImages.append(arrImg)
arrImageLabels.append(arrAllImageLabels[nImageIdx])
arrImageClasses.append(arrAllImageClasses[nImageIdx])
if i == BATCH_SIZE:
i = 0
arrImages = np.array(arrImages)
yield arrImages, arrImageLabels, arrImageClasses
if i != 0:
arrImages = np.array(arrImages)
yield arrImages, arrImageLabels, arrImageClasses
raise StopIteration()
arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []
for cClass in arrLabels:
for nIdx in range(TRAINING_IMAGES_PER_CLASS):
arrAllImages.append(cClass['Images'][nIdx])
arrAllImageLabels.append(cClass['ImageNames'][nIdx])
arrAllImageClasses.append(cClass['Id'])
train_preds = []
train_file_names = []
train_class_names = []
np.random.seed(7)
for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
predicts = embedding_model.predict(imgs)
predicts = predicts.tolist()
train_preds += predicts
train_file_names += fnames
train_class_names += classes
train_preds = np.array(train_preds)
arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []
for cClass in arrLabels:
#for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
#for nIdx in range(TRAINING_IMAGES_PER_CLASS):
for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
arrAllImages.append(cClass['Images'][nIdx])
arrAllImageLabels.append(cClass['ImageNames'][nIdx])
arrAllImageClasses.append(cClass['Id'])
test_preds = []
test_file_names = []
test_class_names = []
np.random.seed(7)
for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
predicts = embedding_model.predict(imgs)
predicts = predicts.tolist()
test_preds += predicts
test_file_names += fnames
test_class_names += classes
test_preds = np.array(test_preds)
neigh = NearestNeighbors(n_neighbors=6)
neigh.fit(train_preds)
#neigh.fit(arrTrainingClasterCenters)
distances_test, neighbors_test = neigh.kneighbors(test_preds)
distances_test, neighbors_test = distances_test.tolist(), neighbors_test.tolist()
data = pd.read_csv(working_path + "DogRecognizer/dogs/train_dogs.csv")
file_id_mapping = {k: v for k, v in zip(data.Image.values, data.Id.values)}
preds_str = []
arrSearchPositions = []
for filepath, distance, neighbour_ in zip(test_file_names, distances_test, neighbors_test):
sample_result = []
sample_classes = []
for d, n in zip(distance, neighbour_):
#class_train = arrLabels[n]['Id']
train_file = train_file_names[n].split(os.sep)[-1]
class_train = file_id_mapping[train_file]
sample_classes.append(class_train)
sample_result.append((class_train, d))
#if "new_whale" not in sample_classes:
# sample_result.append(("new_whale", 0.1))
sample_result.sort(key=lambda x: x[1], reverse=True)
sample_result = sample_result[:5]
preds_str.append(" ".join([x[0] for x in sample_result]))
nTotalSuccess = 0
for i, strClassNames in enumerate(preds_str):
if(test_class_names[i] in strClassNames):
strContains = ": Yes"
nTotalSuccess += 1
else:
strContains = ": No"
#print(test_class_names[i], ": ", strClassNames, " (", strContains, ")")
print ("Success rate: ", nTotalSuccess / (i+1) )
import pylab as pl
from sklearn import neighbors, datasets
import matplotlib.cm as cm
h = 0.02
knn=neighbors.KNeighborsClassifier(n_neighbors=6)
knn.fit(test_preds, test_class_names)
x_min, x_max = test_preds[:,0].min() - .01, test_preds[:,0].max() + .02
y_min, y_max = test_preds[:,1].min() - .01, test_preds[:,1].max() + .02
z_min, z_max = test_preds[:,2].min() - .01, test_preds[:,2].max() + .01
xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h), np.arange(z_min, z_max, h))
#Z = knn.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])
color_space = []
for i in range(len(test_class_names)):
if(test_class_names[i] == 'pembroke'):
color_space.append('red')
#test_preds[:,1][i] += 0.01
elif(test_class_names[i] == 'maltese_dog'):
color_space.append('green')
#test_preds[:,1][i] += 0.02
elif(test_class_names[i] == 'gordon_setter'):
color_space.append('orange')
#test_preds[:,1][i] += 0.03
else:
color_space.append('blue')
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
import random
fig = pyplot.figure(figsize=(16, 8))
ax = Axes3D(fig)
pl.set_cmap(pl.cm.Paired)
ax.scatter(test_preds[:,0], test_preds[:,1], test_preds[:,2], c= color_space)
pyplot.show()
Here is the plot:
Why is it line, not clasters?

lstm rnn from scratch giving bad results

This is my my code but the output is really bad.
and by that i mean the values are way off than what they are supposed to be, the testing error is very high. if i de-normalize the values and compare the difference, it's massive.
I have two questions:
1) Can anyone tell me why this is happening and what i can do to make it perform better?
2) When the values goes through so many functions, how do i get the output back to the original format.
I am new to this and jumped into a complex topic immediately, so i know my code isn't the best, if you could tell me how to improve that would be great as well! anyways, so please bear with me!
The data i used was a list of multiples of two.
ps: when i used the tensorflow models like dynamic_rnn() the output i got was accurate and also i just had to denormalize the output to get the number in the original format( correct size that is), how will just denormalizing it get the output, i dont get that!!
Thanks!
# LSTM [ Many to One ]
# START
# imports
import csv
import numpy as np
import tensorflow as tf
import sys
import os
import json
from random import shuffle
from tensorflow.python import debug as tf_debug
# CALCULATE ALL POSSIBLE BATCH SIZES
def calculate_batch_sizes(n_train):
batch_sizes = []
for i in range(2, int(n_train/2)):
if n_train % i == 0 and n_train / i > 1:
batch_sizes.append(i)
return batch_sizes
def de_normalize(value, m1, m2):
return (value*(m1-m2)) + m2
class lstm_network():
name = "lstm_"
# initialization function
def __init__(self, config_params):
self.sequence_length = config_params["sequence_length"]
self.batch_size = config_params["batch_size"]
self.hidden_layers_size = config_params["hidden_layers_size"]
self.data_path = config_params["data_path"]
self.n_epochs = config_params["no_of_epochs"]
self.learning_rate = config_params["learning_rate"]
self.w_igate, self.w_fgate, self.w_ogate, self.w_cgate = tf.get_variable('w_igate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_fgate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_ogate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('w_cgate', shape = [self.sequence_length, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer())
self.u_igate, self.u_fgate, self.u_ogate, self.u_cgate = tf.get_variable('u_igate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_fgate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_ogate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer()), tf.get_variable('u_cgate', shape = [self.hidden_layers_size, self.hidden_layers_size], initializer = tf.contrib.layers.xavier_initializer())
self.outputs = [0.0] * self.batch_size
self.testing_loss = float(0)
self.training_loss = float(0)
self.ft, self.ct, self._ct, self.it = [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size)
self.ot, self.ht, self.ct_prev, self.ht_prev = [0.0]*(self.hidden_layers_size), [0.0]*(self.hidden_layers_size), np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size), np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
self.w_output_layer = tf.get_variable('w_output_layer', shape = [self.hidden_layers_size, 1], initializer = tf.contrib.layers.xavier_initializer())
print("\n Object of class lstm_network initialized with the given configuration")
# print values function
def print_model_info(self):
print("\n\n\n\t\t MODEL INFORMATION\n\n")
print("\n Weights of the LSTM layer: ")
print("\n\n input Gate Weights: \n w: ", self.w_igate,"\n u: ", self.u_igate)
print("\n\n Forget Gate Weights: \n w: ", self.w_fgate,"\n u: ", self.u_fgate)
print("\n\n Context Gate Weights: \n w: ", self.w_cgate,"\n u: ", self.u_cgate)
print("\n\n Output Gate Weights: \n w: ", self.w_ogate,"\n u: ", self.u_ogate)
print("\n\n Average loss while training: ", self.training_loss)
print("\n\n Average loss while testing: ", self.testing_loss)
# loading function
def load_data(self):
with open(self.data_path, 'r') as data_file:
data_reader = csv.reader(data_file, delimiter = ',')
self.data = [float(row[1]) for row in data_reader]
self.data_max, self.data_min, self.n_data = float(max(self.data)), float(min(self.data)), len(self.data)
for i in range(len(self.data)):
self.data[i] = float( (self.data[i]-self.data_min)/(self.data_max-self.data_min) )
self.data_x = [ self.data[i:i+self.sequence_length] for i in range(self.n_data - self.sequence_length-1)]
self.data_y = [ self.data[i] for i in range(self.sequence_length+1, self.n_data)]
self.n_data = len(self.data_x)
temp = list(zip(self.data_x,self.data_y))
shuffle(temp)
test_size = 0.25
self.data_x, self.data_y = zip(*temp)
self.trainx, self.trainy, self.testx, self.testy = self.data_x[:-int(test_size*self.n_data)], self.data_y[:-int(test_size*self.n_data)], self.data_x[-int(test_size*self.n_data):], self.data_y[-int(test_size*self.n_data):]
self.n_train, self.n_test = len(self.trainx), len(self.testx)
batch_sizes = []
batch_sizes.extend(calculate_batch_sizes(self.n_train))
while self.batch_size not in batch_sizes:
print("\n batch size provided in the initial configuration cannot be used, please select one from the following batch sizes:\n",batch_sizes)
self.batch_size = int(input("\n enter a batch size: "))
self.n_train_batches = int( self.n_train/self.batch_size )
self.trainx, self.trainy, self.testx, self.testy = np.float32(self.trainx), np.float32(self.trainy), np.float32(self.testx), np.float32(self.testy)
self.trainx_batches, self.trainy_batches = self.trainx.reshape(self.n_train_batches, self.batch_size, self.sequence_length), self.trainy.reshape(self.n_train_batches,self.batch_size, 1)
print("\n data loaded succesfully")
# graph building and training function
def build_graph_train(self):
outputs = [0.0]*self.batch_size#tf.placeholder(tf.float32, shape = [1, self.batch_size])
x = self.trainx_batches
ht_prev = tf.reshape(np.float32([0]*(self.hidden_layers_size)), [1, self.hidden_layers_size]) #[tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size], name = 'ht_prev')
ct_prev = tf.reshape(np.float32([0]*(self.hidden_layers_size)), [1, self.hidden_layers_size]) #tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size], name = 'ct_prev')
self.ht_prev = np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
self.ct_prev = np.array([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
for i1 in range(self.n_train_batches):
for i2 in range(self.batch_size):
#self.ht_prev = [self.ht_prev[i:i+9] for i in range(0, self.hidden_layers_size, 9)]
self.ft = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_fgate) + tf.matmul(ht_prev, self.u_fgate) )
self.it = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_igate) + tf.matmul(ht_prev, self.u_igate) )
self.ot = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_ogate) + tf.matmul(ht_prev, self.u_ogate) )
self._ct = tf.sigmoid( tf.matmul(tf.reshape(x[i1][i2], [1, self.sequence_length]), self.w_cgate) + tf.matmul(ht_prev, self.u_cgate) )
self.ct = tf.tanh(tf.multiply(self.ft, ct_prev) + tf.multiply(self.it, self._ct))
self.ht = tf.multiply(self.ot, self.ct)
ht_prev = self.ht
ct_prev = self.ct
outputs[i2] = tf.nn.relu( tf.matmul(self.ht, self.w_output_layer) )
loss = tf.reduce_mean(tf.square(tf.subtract(outputs, self.trainy_batches[i1])))
self.ht_prev = ht_prev
self.ct_prev = ct_prev
self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)
print("\n Graph built \n\n Now training begins...\n")
#training
i = 0
avg_loss = float(0)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
for ep in range(self.n_epochs + 1):
#ht_prev = np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
#ct_prev = np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size)
#loss.eval( feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) })
sess.run(self.train_op)#, feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) } )#, ht_prev: np.float32([0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size), ct_prev: np.float32([0.0]*(self.hidden_layers_size)).reshape(1, self.hidden_layers_size) })
if ep % 10 == 0:
i += 1
mse = loss.eval()# feed_dict= { x: np.float32(self.trainx_batches).reshape(self.n_train_batches, self.batch_size, self.sequence_length) })
avg_loss = float(avg_loss + mse)
print("\n Epoch: ", ep, "\t Loss: ", mse)
avg_loss = float(avg_loss/i)
self.training_loss = avg_loss
print("\n Training Loss: ", avg_loss)
# Predict function
def predict(self):
print("\n testing begins...")
x_test_row = tf.placeholder(tf.float32, shape = [1, self.sequence_length])
avg_error = float(0)
input_row = []
output_row = 0.0
predictions = []
#ht_prev = tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size]) # ht_prev = tf.varaible(self.ht_prev)
#ct_prev = tf.placeholder(tf.float32, shape = [1, self.hidden_layers_size]) # ct_prev = tf.varaible(self.ct_prev)
# one forward pass
self.ft = tf.sigmoid( tf.matmul(x_test_row, self.w_fgate) + tf.matmul(self.ht_prev, self.u_fgate) )
self.it = tf.sigmoid( tf.matmul(x_test_row, self.w_igate) + tf.matmul(self.ht_prev, self.u_igate ) )
self.ot = tf.sigmoid( tf.matmul(x_test_row, self.w_ogate) + tf.matmul(self.ht_prev, self.u_ogate) )
self._ct = tf.sigmoid( tf.matmul(x_test_row, self.w_cgate) + tf.matmul(self.ht_prev, self.u_cgate) )
self.ct = tf.tanh(tf.multiply(self.ft, self.ct_prev) + tf.multiply(self.it, self._ct))
self.ht = tf.multiply(self.ot,self.ct)
pred_output = tf.nn.relu( tf.matmul(self.ht, self.w_output_layer) )
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
print("\n loaded the variables")
for i1 in range(self.n_test):
del input_row[:]
output_row = float(self.testy[i1])
for i2 in range(self.sequence_length):
input_row.append(self.testx[i1][i2])
#sess.run(pred_output, feed_dict = { x_test_row: np.array(input_row).reshape(1, self.sequence_length), ht_prev:self.ht_prev, ct_prev: self.ct_prev })
predictions.append([pred_output.eval(feed_dict = { x_test_row: np.float32(input_row).reshape(1, self.sequence_length) }), output_row])
avg_error += abs(predictions[i1][0] - output_row)
avg_error = float(avg_error/i1)
self.testing_loss = avg_error
print("\n testing Error: ", avg_error)
return np.array(predictions)
# save model function
def save_model(self):
print("\n\n model's information saved in model_info.txt and weights stored in model.json\n\n")
f = open("model.json", "w+")
model_dict = { 'w_output_layer': self.w_output_layer, 'w_igate': self.w_igate, 'u_igate': self.u_igate, 'w_fgate': self.w_fgate, 'u_fgate': self.u_fgate, 'w_cgate': self.w_cgate, 'u_cgate': self.u_cgate, 'w_ogate': self.w_ogate, 'u_ogate': self.u_ogate }
f.write(str(model_dict))
f.close()
# main function()
def main():
# parameters of the network
config_params = dict()
config_params["sequence_length"] = 3
config_params["batch_size"] = 33
config_params["hidden_layers_size"] = 9
config_params["data_path"] = "data.csv"
config_params["no_of_epochs"] = 2000
config_params["learning_rate"] = 0.01
# object of class lstm_network
test_object = lstm_network(config_params)
test_object.load_data()
test_object.build_graph_train()
predictions = test_object.predict()
print("\n predictions are: \n", predictions)
test_object.save_model()
# run
main()
for this configuration:
Average testing error i got was: 0.15911798179149628
Average training error i got was: 0.10901389649110053
They look low im guessing because of normalizing the values

Tensorflow evaluate: Aborted (core dumped)

tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

pytorch classifier for mnist data not work - python

Well, I seem to figure out where the problem is, I changed the learning rate from 1e-3 to 1e-4, then the accuracy reached about 97%...

Related

Keras - ValueError: Could not interpret loss function identifier

PyTorch PPO implementation for Cartpole-v0 getting stuck in local optima

Triplet learning with Keras produces unexpected results

lstm rnn from scratch giving bad results

Tensorflow evaluate: Aborted (core dumped)

Categories

Resources