I am new to tensorflow.
I want to build a 3 layer neural network,
i) I have declared weight W inside tf.function for which it is throwing the following error:
ValueError: tf.function-decorated function tried to create variables on non-first call.
ii) And also I have created a dictionary data of sample_batch which has x :features and y:labels which I want to pass to tf.function and use it later.But I am not understanding how to do it?
Here is a snippet of my code:
#tf.function
def forward_pass1(batch):
print((batch))
W = tf.Variable(tf.initializers.TruncatedNormal(stddev=np.sqrt(2.0 / 784))(shape=(784, 128)), name="W")
biases = tf.Variable(tf.zeros(shape=(128)), name="b")
hidden1 = tf.nn.relu(tf.matmul(batch['x'], W) + biases)
print(hidden1)
sample_batch = federated_train_data[5][-1]
forward_pass1((sample_batch))
Can any one please help?
In your case:
Initialize the variables outside the function and pass it as arguments
W = tf.Variable(tf.initializers.TruncatedNormal(stddev=np.sqrt(2.0 / 784))(shape=(784, 128)), name="W")
biases = tf.Variable(tf.zeros(shape=(128)), name="b")
Your function should be like this:
#tf.function
def forward_pass1(batch,W,biases):
print((batch))
hidden1 = tf.nn.relu(tf.matmul(batch['x'], W) + biases)
print(hidden1)
sample_batch = federated_train_data[5][-1]
forward_pass1((sample_batch))
The better way is to do in this way as per the Tensorflow Guide:
from tensorflow.keras import layers
class Linear(layers.Layer):
def __init__(self, units=128, input_dim=784):
super(Linear, self).__init__()
w_init = tf.random_normal_initializer()
self.W = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.biases = tf.Variable(initial_value=b_init(shape=(units,),
dtype='float32'),
trainable=True)
def call(self, inputs):
return tf.nn.relu(tf.matmul(inputs, self.W) + self.biases)
x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)
Related
I am working on a project with tensorflow, and my code keep warning WARNING:tensorflow:5 out of the last 5 calls to <function _BaseOptimizer._update_step_xla at 0x7ff7d01cbaf0> triggered tf.function retracing, and WARNING:tensorflow:6 out of the last 6 calls to <function _BaseOptimizer._update_step_xla at 0x7ff7d01cbaf0> triggered tf.function retracing. As shown in picture:
warning
Here is the replicable part of code that causes the problem
import tensorflow as tf
class Conv(tf.keras.layers.Layer):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1):
super(Conv, self).__init__()
self.dilations = dilation
init = tf.keras.initializers.HeNormal()
self.kernel = tf.Variable(init([kernel_size, in_channels, out_channels], dtype=tf.float32),trainable=True)
self.bias = tf.Variable(tf.zeros([1, 1, out_channels], dtype=tf.float32),trainable=True)
def call(self, x):
conv = tf.nn.conv1d(tf.transpose(x,perm = [0,2,1]), self.kernel, 1, padding='SAME', dilations=self.dilations)
out = tf.transpose(conv + self.bias,perm = [0,2,1])
return out
class Imputer(tf.keras.Model):
def __init__(self, in_channels, res_channels, skip_channels, out_channels):
super(Imputer, self).__init__()
self.init_conv = Conv(in_channels, res_channels, kernel_size=1)
self.final_conv1 = Conv(skip_channels, skip_channels, kernel_size=1)
self.final_conv2 = Conv(skip_channels, out_channels,kernel_size = 1)
def call(self, x):
x = tf.nn.relu(self.init_conv(x))
y = self.final_conv2(tf.nn.relu(self.final_conv1(x)))
return y
model = Imputer(4,4,4,4)
input_data = tf.random.uniform([4,4,4],minval=0,maxval=3,dtype=tf.dtypes.float32,seed=1)
myloss = tf.keras.losses.MeanSquaredError()
z = tf.random.normal(shape = [4,4,4],mean = 0,stddev=1)
optimizer = tf.keras.optimizers.Adam(learning_rate= 0.001)
with tf.GradientTape() as tape:
predicition = model(input_data)
loss = myloss(z, predicition)
gradients = tape.gradient(loss, model.trainable_variables)
print(optimizer.build(model.trainable_variables))
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
I think I do not have the possible problems as indicated by warning and the warning is related to I call Conv layer multiple times, my question is why the warning is caused and how to properly deal with the situation that I have to call a custom layer multiple times?
I have the following code:
import torch
from torch.nn.utils.stateless import functional_call
import torch.autograd as autograd
import torch.nn as nn
# This is the model
class Encoder(nn.Module):
def __init__(self, action_dim, z_dim, skill_length):
super().__init__()
print(action_dim)
self.lin1 = nn.Linear(action_dim, action_dim)
self.lstm = nn.LSTM(input_size=action_dim, hidden_size=z_dim, batch_first=True)
self.lin2 = nn.Linear(z_dim, z_dim)
def forward(self, skill):
a, b, c = skill.shape
skill = skill.reshape(-1, skill.shape[-1])
embed = self.lin1(skill)
embed = embed.reshape(a, b, c)
mean, _ = self.lstm(embed)
mean = mean[:, -1, :]
mean = self.lin2(mean)
return mean
# This is the initialization function
def pars(model):
params = {}
for name, param in model.named_parameters():
if len(param.shape) == 1:
init = torch.nn.init.constant_(param, 0)
else:
init = torch.nn.init.orthogonal_(param)
params[name] = nn.Parameter(init)
return params
# Initializating the model
model = Encoder(4, 2, 5)
x = torch.rand(3, 5, 4)
params = pars(model)
# Running the model with functional_call and calculating gradient.
samp = functional_call(model, params, x)
grad_f = autograd.grad(torch.mean(samp), params.values(),
retain_graph=True, allow_unused=True)
print(grad_f)
# grad_f has gradient for the linear layer, but None for the LSTM layer.
# Running the model without functional_call and calculating gradient.
samp = model(x)
grad = autograd.grad(torch.mean(samp), model.parameters(), retain_graph=True)
print(grad)
# grad has gradient for all layers, e.g., linears and lstm.
I know the problem is with the LSTM layer because when I use a linear layer with nn.Linear, then the gradient depends on std as well as the linear layer. Unfortunately, I do not know to resolve this problem. I'd appreciate any help.
*Edit: I heavily edited the code provided just to further simplify the example. This code can be copied and run.
Update Dec 11, 2022
class Encoder(nn.Module):
def __init__(self, action_dim, z_dim, skill_length):
super().__init__()
print(action_dim)
self.lin1 = nn.Linear(action_dim, action_dim)
self.lstm = nn.LSTM(input_size=action_dim, hidden_size=z_dim, batch_first=True)
self.lin2 = nn.Linear(z_dim, z_dim)
def forward(self, skill):
a, b, c = skill.shape
skill = skill.reshape(-1, skill.shape[-1])
embed = self.lin1(skill)
embed = embed.reshape(a, b, c)
mean, _ = self.lstm(embed)
pdb.set_trace()
grad1 = autograd.grad(mean.mean(), params.values(),
retain_graph=True, allow_unused=True)
# This gives gradient for the self.lin1 layer, and None for the LSTM
grad2 = autograd.grad(mean.mean(), self.parameters(),
retain_graph=True, allow_unused=True)
# This gives gradient the LSTM, but None for the self.lin1 layer
mean = mean[:, -1, :]
mean = self.lin2(mean)
return mean
When I run it the regular without functional_call and calling directly the model, then autograd.grad(mean.mean(), self.parameters(), allow_unused=True, retain_graph=True) has gradient for the self.lin1 and LSTM layer.
I don't know if this information is useful, but putting out there just in case.
I am trying to learn to do custom layer, I followed the steps in keras.io.
Code -
class Linear(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Linear, self).__init__()
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
for index in range(inputs.shape[0]):
...
return tf.matmul(inputs, self.w) + self.b
This shows the error -
TypeError: in user code:
<ipython-input-3-314addf0c624>:39 call *
for index in range(inputs.shape[0]):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:365 range_ **
return _py_range(start_or_stop, stop, step)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:390 _py_range
return range(start_or_stop)
TypeError: 'NoneType' object cannot be interpreted as an integer
when I run this Linear class separately, it works fine. But, when I run this layer as a trainable model, it shows this error.
How to solve this, thanks
As default, shape of inputs is [batch_size,width,height,channels], and, when you create your model, batch_size is set to None.
import os
# os.environ['KERAS_BACKEND'] = 'theano'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # suppress Tensorflow messages
import tensorflow as tf
from keras.layers import *
from keras.models import *
class CustomLinear(Layer):
def __init__(self, batch_size,units=32, input_dim=32):
super(CustomLinear, self).__init__()
self.batch_size = batch_size
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
# for index in range(self.batch_size):
# print(index)
return tf.matmul(inputs, self.w) + self.b
batch_size = 10
model = Sequential()
model.add(Input(shape=(2,32)))
model.add(CustomLinear(batch_size = batch_size)) # inputs (None, 2, 32)
x = tf.random.normal((batch_size,2,32)) # dummy data
model(x) # inputs (10, 2, 32)
Mostly, batch_size is not required for the calculations within the layer. But, if you still need it, you can add an argument (e.g. batch_size) to your CustomLinear, define your batch_size beforehand, and access to it inside __call__ function.
I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.
I need your help. I am trying to modify the python-based neural network for MNIST data classification developed by M. Nielsen [http://neuralnetworksanddeeplearning.com/index.html].
Particularly, I am using networ3.py script. It employs Theano library.
The last layer in this network is softmax, but in the future I want to use this program for regression purposes and, therefore, I need to modify it changing the last layer to sigmoid one.
When I simply change the
activation_fn=softmax
to
activation_fn=sigmoid
the program is not working properly.
The important parts of the code are provided below.
# Initialization of the neural network
net = Network([
ConvPoolLayer(input_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(input_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU, p_dropout=0.0),
SoftmaxLayer(n_in=100, n_out=10, activation_fn=softmax, p_dropout=0.0)],
mini_batch_size)
...
# Softmax layer
class SoftmaxLayer(object):
def __init__(self, n_in, n_out, activation_fn, p_dropout):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX), name='w', borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX), name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1) # ??? Change
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(T.dot(self.inpt_dropout, self.w) + self.b)
# Return the log-likelihood cost
def cost(self, net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
# Return the accuracy for the mini-batch
def accuracy(self, y):
return T.mean(T.eq(y, self.y_out))
I made following modifications:
1) changed the way how targets are represented (before it was 0, 5, 8, ... or any number corresponding to the picture that should be classified). Now it is vectors with 10 elements 0 is equivalent to [1,0,0,0..,0], 5 is equivalent to [0,0,0,0,0,1,0,...0], etc.
Correspondingly code should be fixed for this new format (minor modifications).
2) changed the layer definition (code is below). The main changes are in cost and accuracy.
# Fully connected layer
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn, p_dropout):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX), name='w', borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX), name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = self.output #T.argmax(self.output, axis=1) # ??? Change
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(T.dot(self.inpt_dropout, self.w) + self.b)
# Return the cross-entropy cost ??? Change
def cost(self, net):
xent = -net.y*T.log(self.output_dropout) - (1-net.y)*T.log(1-self.output_dropout)
return T.mean(xent)
# Accuracy for the mini-batch
def accuracy(self, y):
y_pred = T.argmax(self.y_out, axis=1)
y_targ = T.argmax(y, axis=1)
return T.mean(T.eq(y_targ, y_pred))