pytorch: "multi-target not supported" error message - python

So I want to classify some (3, 50, 50) pictures. First I loaded the dataset from the file without a dataloader or batches, it worked. Now, after adding both things I get that error:
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
I found a lot of answers in the internet, mostly to use target.squeeze(1) but it doesn´t work for me.
My target-batch looks like following:
tensor([[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0]], device='cuda:0')
Shouldn't that be okay?
Here the full code (notice that Im only creating the structure of the model on which Im going to apply the full and correct dataset afterwards, because I dont have the full data yet, only 32 pictures and no labels, thats why I added torch.tensor([1, 0]) as a placeholder for all labels):
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
from torch.autograd import Variable
import numpy as np
from PIL import Image
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# model structur:
self.conv1 = nn.Conv2d(3, 10, kernel_size=(5,5), stride=(1,1))
self.conv2 = nn.Conv2d(10, 20, kernel_size=(5,5), stride=(1,1)) # with mapool: output = 20 * (9,9) feature-maps -> flatten
self.fc1 = nn.Linear(20*9*9, 250)
self.fc2 = nn.Linear(250, 100)
self.fc3 = nn.Linear(100, 2)
def forward(self, x):
# conv layers
x = F.relu(self.conv1(x)) # shape: 1, 10, 46, 46
x = F.max_pool2d(x, 2, 2) # shape: 1, 10, 23, 23
x = F.relu(self.conv2(x)) # shape: 1, 20, 19, 19
x = F.max_pool2d(x, 2, 2) # shape: 1, 20, 9, 9
# flatten to dense layer:
x = x.view(-1, 20*9*9)
# dense layers
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
output = F.log_softmax(self.fc3(x), dim=1)
return output
class Run:
def __init__(self, epochs, learning_rate, dropout, momentum):
# load model
self.model = Model().cuda()
# hyperparameters:
self.epochs = epochs
self.learning_rate = learning_rate
self.dropout = dropout
def preporcessing(self):
dataset_folder = "/media/theodor/hdd/Programming/BWKI/dataset/bilder/"
dataset = []
for i in range(0, 35):
sample_image = Image.open(dataset_folder + str(i) + ".png")
data = torch.from_numpy(np.array(sample_image)).type("torch.Tensor").reshape(3, 50, 50)
target = torch.tensor([[1, 0]])
sample = (data, target)
dataset.append(sample)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=8)
return train_loader
def train(self):
train_set = self.preporcessing()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epochs):
epoch_loss = 0
for i, data in enumerate(train_set, 0):
sample, target = data
# set data as cuda varibale
sample = Variable(sample.float().cuda())
target = Variable(target.cuda())
# initialize optimizer
optimizer.zero_grad()
# predict
output = self.model(sample)
# backpropagation
print(output, target.squeeze(1))
loss = criterion(output, target.squeeze(1)) # ERROR MESSAGE: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print("loss after epoch [", epoch, "|", self.epochs, "] :", epoch_loss)
run = Run(10, 0.001, 0.5, 0.9)
run.train()
So I expected it to start training (of course not learning anything because the labels are wrong).

For nn.CrossEntropyLoss the target has to be a single number from the interval [0, #classes] instead of a one-hot encoded target vector. Your target is [1, 0], thus PyTorch thinks you want to have multiple labels per input which is not supported.
Replace your one-hot-encoded targets:
[1, 0] --> 0
[0, 1] --> 1

Related

TypeError: new(): argument 'size' must be tuple of ints, but found element of type NoneType at pos 2 when using pytorch, using nn.linear

File "C:\Users\J2\Desktop\Pytorchseries\thenn.py", line 50, in
net = Net()
TypeError: new(): argument 'size' must be tuple of ints, but found element of type NoneType at pos 2
If it helps I was following the sentdex pytorch tutorial. Any help would be appreciated. I am new to machine learning, and I was hoping that this would work. Please help me out!
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import tqdm
training_data = np.load('training_data.npy', allow_pickle=True)
print(len(training_data))
X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])
plt.imshow(X[0], cmap='gray')
print(y[0])
class Net(nn.Module):
def __init__(self):
super().__init__() # just run the init of parent class (nn.Module)
self.conv1 = nn.Conv2d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
self.conv2 = nn.Conv2d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
self.conv3 = nn.Conv2d(64, 128, 5)
x = torch.randn(50,50).view(-1,1,50,50)
self._to_linear = None
self.convs(x)
self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
self.fc2 = nn.Linear(512, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).
def convs(self, x):
# max pooling over 2x2
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
def forward(self, x):
x = self.convs(x)
x = x.view(-1, self._to_linear) # .view is reshape ... this flattens X before
x = F.relu(self.fc1(x))
x = self.fc2(x) # bc this is our output layer. No activation here.
return F.softmax(x, dim=1)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
net = Net()
print(net)
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1 # lets reserve 10% of our data for validation
val_size = int(len(X)*VAL_PCT)
print(val_size)
train_X = X[:-val_size]
train_y = y[:-val_size]
test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X), len(test_X))
BATCH_SIZE = 100
EPOCHS = 1
for epoch in range(EPOCHS):
for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
#print(f"{i}:{i+BATCH_SIZE}")
batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
batch_y = train_y[i:i+BATCH_SIZE]
net.zero_grad()
outputs = net(batch_X)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step() # Does the update
print(f"Epoch: {epoch}. Loss: {loss}")
correct = 0
total = 0
with torch.no_grad():
for i in tqdm(range(len(test_X))):
real_class = torch.argmax(test_y[i])
net_out = net(test_X[i].view(-1, 1, 50, 50))[0] # returns a list,
predicted_class = torch.argmax(net_out)
if predicted_class == real_class:
correct += 1
total += 1
print("Accuracy: ", round(correct/total, 3))
The issue is with self._to_linear. You use it in __init__ as:
self._to_linear = None
self.convs(x)
self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
The call to nn.Linear has it as a parameter. This parameter should equal the number of input features in the linear layer, and cannot be None, since the value will determine the shape of the layer (number of weights and biases). How to fix this depends on what you're trying to achieve.

What transformation do I need to do in order to run dataset through neural network?

I'm new to deep learning and Pytorch, but I hope someone can help me out with this. My dataset contains images from different sizes. I'm trying to create a simple neural network that can classify images. However, I'm getting mismatch errors.
Neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3)
self.conv2 = nn.Conv2d(32, 32, 3)
self.fc1 = nn.Linear(32 * 3 * 3, 200)
self.fc2 = nn.Linear(200, 120)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
My first convolution layer has 1 input channel, because I transform the images to grayscale images. 32 output channels was an arbitrary decision. The final fully-connected layer has 120 output channels, because there are 120 different classes.
Determine transformations and assign training set and validation set
transform = transforms.Compose(
[transforms.Grayscale(1),
transforms.RandomCrop((32,32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
data_dir = 'dataset'
full_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform = transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
trainset, valset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=4,
shuffle=False, num_workers=2)
classes = full_dataset.classes
I transform the images to grayscale, because they are gray anyway. I crop the images to 32, because the images have different sizes and I figured that they must all be the same size when putting it through the neural network. Everything is working fine so far.
Train neural network
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
When running this last piece of code, I get the following error: size mismatch, m1: [3584 x 28], m2: [288 x 200] at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/TH/generic/THTensorMath.cpp:2070 when the following line is being executed: outputs = net(inputs)
My code is a variation of the code provided in this Pytorch tutorial. Can someone tell me what I'm doing wrong?
UPDATE
I updated the neural network class to this:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
But now I get an error at loss = criterion(outputs, labels):
Assertion cur_target >= 0 && cur_target < n_classes' failed. at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/THNN/generic/ClassNLLCriterion.c:93
In your first configuration, you have configured self.fc1 incorrectly. You need the input to be of dimensions 32 * 28 * 28 instead of 32 * 3 * 3 as your images are 32 * 32 and kernel and stride are 3 and 1 respectively. See this video for a simpler explanation. Try adjusting your second configuration yourself now, if you can't, comment below.

Tensorflow model outputs probability values larger than 1

I'm working on this classification program where i'm training my model to predict whether the object is a nut or a screw. I created my own dataset since i did not get any. I trained my model but i'm not getting correct predictions. Probability of values go beyond 1, basically i get garbage values.
I get this predicted value: [[9.990779e-01 9.220659e-04]]
#Training code
import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os
# Adding Seed so that random initialization is consistent
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
batch_size = 20
# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'
# We shall load all the training and validation images and labels into
memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes,
validation_size=validation_size)
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-
set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation-
set:\t{}".format(len(data.valid.labels)))
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size,
num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 32
fc_layer_size = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,
num_input_channels,
conv_filter_size,
num_filters):
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)
## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
filter=weights,
strides=[1, 1, 1, 1],
padding='SAME')
layer += biases
## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
layer_shape = layer.get_shape()
## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
num_features = layer_shape[1:4].num_elements()
## Now, we Flatten the layer so we shall have to reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
num_inputs,
num_outputs,
use_relu=True):
# Let's define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,
num_input_channels=num_channels,
conv_filter_size=filter_size_conv1,
num_filters=num_filters_conv1)
layer_conv2 = create_convolutional_layer(input=layer_conv1,
num_input_channels=num_filters_conv1,
conv_filter_size=filter_size_conv2,
num_filters=num_filters_conv2)
layer_conv3 = create_convolutional_layer(input=layer_conv2,
num_input_channels=num_filters_conv2,
conv_filter_size=filter_size_conv3,
num_filters=num_filters_conv3)
layer_flat = create_flatten_layer(layer_conv3)
layer_fc1 = create_fc_layer(input=layer_flat,
num_inputs=layer_flat.get_shape()
[1:4].num_elements(),
num_outputs=fc_layer_size,
use_relu=True)
layer_fc2 = create_fc_layer(input=layer_fc1,
num_inputs=fc_layer_size,
num_outputs=num_classes,
use_relu=False)
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
session.run(tf.global_variables_initializer())
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation
Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,
total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch =
data.train.next_batch(batch_size)
x_valid_batch, y_valid_batch, _, valid_cls_batch =
data.valid.next_batch(batch_size)
feed_dict_tr = {x: x_batch,
y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,
y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples / batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples / batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')
total_iterations += num_iteration
train(num_iteration=3000)
#Prediction code
import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse
# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
image_size=128
num_channels=3
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)
## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))
# Accessing the default graph which we have restored
graph = tf.get_default_graph()
# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the
network
y_pred = graph.get_tensor_by_name("y_pred:0")
## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))
### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
print(result)
9.990779e-01 actually is below 1. You could see it as: 9.990779 * (the exponential of -01).

GRU same configurations but in two different ways produces two different output in tensorflow

I would like to do some sequence prediction in tensorflow using GRU. so I have created the same model in 2 different ways as follows:
In model 1 I have a 2 GRUs, one after the other, that is, the new_state1, the final hidden state of the first GRU, acts as the initial state to the second GRU. Therefore, the model outputs new_state1 and new_state2 consequentially. Note that this is not a 2 layer model, but only 1 layer. From the code below, I divided the input and the output into 2 parts where GRU1 takes the first part, and the second GRU takes the second part.
Also the random_seed is set and fixed for both model so that results can be comparable.
Model 1
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
to_run_list = [new_state1, new_state2]
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
new_s1, new_s2 = sess.run(to_run_list, feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.shape(new_s1), np.shape(new_s2))
print(np.mean(new_s1), np.mean(new_s2))
print(np.sum(new_s1), np.sum(new_s2))
In this model, Instead of having 2 different GRU, I created one, and I divided the input and labels into 2 different parts as well, and I used a for loop to iterate over my input dataset. Then the final state is taken and fed back into the same model as initial state.
Note that both model1 and model2 have the very first initial state of zeros.
Model 2
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps = 500
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
inputs = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="inputs")
labels = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
grads_initial_state = tf.placeholder(dtype=m_dtype, shape=[None, cell_size], name="prev_grads")
this_is_last_batch = tf.placeholder(dtype=tf.bool, name="this_is_last_batch")
def model(input_feat):
with tf.variable_scope("GRU"):
cell = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("cell"):
# output1: shape=[1, time_steps, 32]
output, new_state = tf.nn.dynamic_rnn(cell, input_feat, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("output"):
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps, 1])
print(output.get_shape().as_list(), "1")
return output, new_state
output, new_state = model(inputs)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 1000 // 500 = 2
num_iterations = seq_length // time_steps
print("num_iterations:", num_iterations)
final_states = []
to_run_list = [grads_wrt_initial_state, new_state]
for i in range(num_iterations):
current_xt = x_t[i * time_steps: (i + 1)*time_steps]
current_xt_plus_1 = x_t_plus_1[i*time_steps: (i + 1)*time_steps]
in1 = np.reshape(current_xt, newshape=(1, time_steps, 1))
l1 = np.reshape(current_xt_plus_1, newshape=(1, time_steps, 1))
i_s = np.zeros([1, cell_size])
if i == 0:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: i_s})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
else:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: final_states[-1]})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
Finally, after printing out the statistics of new_state1 and new_state2 in model1, they were different from the new_state, after each iteration, in model2.
I would like to know how to fix this problem and why is that happening.
Edit:
I have figured out that the weights values of the gru in both files are different
Now how can I reproduce the same results in 2 the different files even after setting the random seed?
Any help is much appreciated!!!
so to reproduce the same results in different files, tf.set_random_seed() is not enough. I figured out that we need to also set the seed for the intializers of the gru cells as well as the initializers of the weights in the dense layer at the output (this is at least acccording to my model); so the definition of the cell is now:
cell1 = tf.nn.rnn_cell.GRUCell(cell_size, kernel_initializer=tf.glorot_normal_initializer(seed=123, dtype=m_dtype))
And for the dense layer:
output = tf.layers.dense(output, units=1, kernel_initializer=tf.glorot_uniform_initializer(seed=123, dtype=m_dtype))
Note that any other initializer could be used as long as we set the seed the dtype for it.

Tensorflow adam optimizer in a .pb model

I have developed a TensorFlow code that use Adam optimizer, then saved the graph and export the .pb model and correctly loaded it, my problem is when i feed it with new input image i don't get the same result compared to the result given by this code:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, array_to_img
import cv2
import tensorflow as tf
import numpy
import numpy as np
def get_image2(imgSrc):
img = load_img(imgSrc, True) # this is a PIL image
x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150
#x = x.reshape((1,) + x.shape)
x = x.astype(float)
x *= 1./255.
#x = cv2.resize(x,(512,512))
return x
def sobel2(image):
# Shape = height x width.
#image = tf.placeholder(tf.float32, shape=[None, None])
# Shape = 1 x height x width x 1.
image_resized = image#tf.expand_dims(image, 0)
Gx = tf.nn.conv2d(image_resized, sobel_x_filter, strides=[1, 1, 1, 1], padding='SAME')
Gy = tf.nn.conv2d(image_resized, sobel_y_filter,strides=[1, 1, 1, 1], padding='SAME')
#grad = tf.sqrt(tf.add(tf.pow(Gx,2),tf.pow(Gy,2)))
#grad = tf.pow(Gx,2) + tf.pow(Gy,2)
#grad = tf.truediv(grad,3.)
#grad = tf.reshape(grad, img_shape)
return Gx, Gy
image = get_image2('1.jpg')
img_shape = image.shape
print img_shape
img_h, img_w,_= img_shape
sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], tf.float32)
sobel_x_filter = tf.reshape(sobel_x, [3, 3, 1, 1])
sobel_y_filter = tf.transpose(sobel_x_filter, [1, 0, 2, 3])
input_img = tf.placeholder(tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]], name="input_img")
#input_img = tf.placeholder(tf.float32, [1, 512, 512, 1], name="input_img")
gain = tf.Variable(tf.constant(1, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]]), name="gain")
offset = tf.Variable(tf.constant(0, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]]), name="offset")
enhanced_img = tf.add(tf.multiply(input_img, gain), offset, name = "enahnced")
#----------------------------------------------------------
# COST
#----------------------------------------------------------
input_img_deriv_x, input_img_deriv_y = sobel2(input_img)
enhanced_img_deriv_x, enhanced_img_deriv_y = sobel2(enhanced_img)
white_img = tf.constant(1, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]])
image_pixels_count = img_h * img_w
white_cost = tf.reduce_sum(tf.pow(enhanced_img - white_img, 2))
sobel_cost = tf.reduce_sum(tf.pow(enhanced_img_deriv_x - input_img_deriv_x, 2) +
tf.pow(enhanced_img_deriv_y - input_img_deriv_y,2))
cost = tf.add(white_cost, tf.multiply(0.2, sobel_cost), name = "cost") # + tf.reduce_sum(gain - 1) + tf.reduce_sum(offset)
#----------------------------------------------------------
# TRAIN
#----------------------------------------------------------
# Parameters
learning_rate = 0.0001
training_epochs = 100
display_step = 5
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
image = image.reshape([-1,img_shape[0],img_shape[1],img_shape[2]])
#print image.shape
#print image.shape
feed = {input_img: image }
# Start training
with tf.Session() as sess:
#Run the initializer
print(sess.run(init))
# Fit all training data
for epoch in range(training_epochs):
sess.run([optimizer, cost], feed_dict = feed)
print(tf.reduce_sum(offset.eval()).eval())
if (epoch+1) % display_step == 0:
gen_img = sess.run(enhanced_img, feed_dict = feed)
gen_img = np.squeeze(gen_img, axis=0)
print(gen_img.shape)
gen_img *= 255
cv2.imwrite("result/output_2_{0}.png".format(epoch), gen_img)
I noticed that when i save the graph the optimizer state is also saved, so when i load the model and feed it with new image he will produce a false result since he will use the saved value related to the image i have used when i saved it.
How i can make the model run the optimizer for new images without using the saved parameters from previous input.
Do you understand how the optimizer actually works ?
The goal of the optimizer is to update the model weights with respect to the gradient. Concerning Adam it has two inner variables which are updated during training, it's part of the adam algorithm. So this behavior is perfectly normal. If you want to "reset" adam variables, it's perfectly doable, however I highly doubt that it's what you want to do... Very rare situations require you to do this. Btw. if you reset adam state, you will break the whole logic of the optimizer.
If you try to evaluate a new image at inference time, the optimizer should not be run, and thus your model output should not be impacted by Adam or any other optimizer.
If you try to continue the training from a preivously saved checkpoint, I would recommend that you keep the Adam state if the dataset is the same (not a transfer learning approach), and thus you should not reset adam's variables.
Btw. if you really want to reset adam, this is how you will do it:
optimizer_reset_op = tf.variables_initializer(optimizer.variables())
sess.run(optimizer_reset_op)

Categories