I want create a layer Linear like y = w*x+b, with w is matrix[Mxd], x is matrix[dx1], b is bias as matrix[Mx1], with M is the units in a layer, d is dim of vector x.
my code like:
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
print(self.units)
def build(self, input_shape):
self.w = self.add_weight(
shape=( self.units, input_shape[1]),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,1), initializer="random_normal", trainable=True
)
print(input_shape[1])
def call(self, inputs):
return tf.matmul(self.w, inputs) + self.b
def get_output_shape_for(self, input_shape):
return (self.units, 1)
And when i create model i use:
input_tensor1 = Input((2,1))
L11 = Linear(32)(input_tensor1)
model = Model(inputs=input_tensor1, outputs=L11)
model.summary()
In this code d=2 and M=32
As i think y =w*x+b, y will be a matrix[Mx1] cause w is [Mxd], x is [dx1] and b is [Mx1]. But when model summary, output_shape is (None, 2, 1)
Can anyone explain it? where my code wrong?
Related
I have this custom layer:
class PhysicalLayer(keras.layers.Layer):
def __init__(self,units, speed):
self.units = units
self.speed = speed
super(PhysicalLayer, self).__init__()
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="random_normal", trainable=True)
def call(self, inputs):
squareSpeed = tf.math.square(self.speed)
vibrationMax = tf.math.reduce_max(inputs, axis = 1, keepdims = True)
inputsSpeed = tf.math.divide(squareSpeed, vibrationMax)
print(tf.shape(inputsSpeed))
print(tf.shape(self.w))
multiplication = tf.multiply(squareSpeed, self.w)
return tf.matmul(inputsSpeed , self.w) + self.b
#return multiplication + self.b
And when I try to build this following model
inputs = keras.Input(shape=(500,))
dense = layers.Dense(64, activation="relu")
x = PhysicalLayer(1, rotationSpeed)(inputs)
x = dense(x)
x = layers.Dense(32, activation="relu")(x)
outputs = layers.Dense(1)(x)
modelPhi = keras.Model(inputs=inputs, outputs=outputs, name="model_phi_custom")
I have the following error:
ValueError: Dimensions must be equal, but are 1 and 500 for '{{node physical_layer_34/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](physical_layer_34/truediv, physical_layer_34/MatMul/ReadVariableOp)' with input shapes: [?,1], [500,1].
I tried to use multiplication instead of matmul but then for fitting the model, I need to use a batch_size of 500 or i come across this error:
Node: 'gradient_tape/mean_absolute_error/sub/BroadcastGradientArgs'Incompatible shapes: [500,1] vs. [64,1]
How can I fix this ?
Thanks
I am trying to learn to do custom layer, I followed the steps in keras.io.
Code -
class Linear(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Linear, self).__init__()
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
for index in range(inputs.shape[0]):
...
return tf.matmul(inputs, self.w) + self.b
This shows the error -
TypeError: in user code:
<ipython-input-3-314addf0c624>:39 call *
for index in range(inputs.shape[0]):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:365 range_ **
return _py_range(start_or_stop, stop, step)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:390 _py_range
return range(start_or_stop)
TypeError: 'NoneType' object cannot be interpreted as an integer
when I run this Linear class separately, it works fine. But, when I run this layer as a trainable model, it shows this error.
How to solve this, thanks
As default, shape of inputs is [batch_size,width,height,channels], and, when you create your model, batch_size is set to None.
import os
# os.environ['KERAS_BACKEND'] = 'theano'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # suppress Tensorflow messages
import tensorflow as tf
from keras.layers import *
from keras.models import *
class CustomLinear(Layer):
def __init__(self, batch_size,units=32, input_dim=32):
super(CustomLinear, self).__init__()
self.batch_size = batch_size
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
# for index in range(self.batch_size):
# print(index)
return tf.matmul(inputs, self.w) + self.b
batch_size = 10
model = Sequential()
model.add(Input(shape=(2,32)))
model.add(CustomLinear(batch_size = batch_size)) # inputs (None, 2, 32)
x = tf.random.normal((batch_size,2,32)) # dummy data
model(x) # inputs (10, 2, 32)
Mostly, batch_size is not required for the calculations within the layer. But, if you still need it, you can add an argument (e.g. batch_size) to your CustomLinear, define your batch_size beforehand, and access to it inside __call__ function.
Here's my code
#Here's my custom GRU layer
class GRU(keras.layers.Layer):
def __init__(self, units, **kwargs):
super(GRU, self).__init__(**kwargs)
self.units = units
self.h_init = False
def build(self, input_shape):
dims = input_shape[-1]
self.steps = input_shape[-2]
self.wr = self.add_weight(name="wr", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.br = self.add_weight(name="br", shape=[self.units], initializer="zeros")
self.wz = self.add_weight(name="wz", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.bz = self.add_weight(name="bz", shape=[self.units], initializer="zeros")
self.wn = self.add_weight(name="wn", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.bn = self.add_weight(name="bn", shape=[self.units], initializer="zeros")
super(GRU, self).build(input_shape)
def dense_forward(self, w, b, x, activation):
return activation(x # w + b)
def update_step(self, x):
self.z.assign(self.dense_forward(self.wz, self.bz, tf.concat([x, self.h], axis=1), tf.nn.sigmoid))
self.r.assign(self.dense_forward(self.wr, self.br, tf.concat([x, self.h], axis=1), tf.nn.sigmoid))
self.n.assign(self.dense_forward(self.wn, self.bn, tf.concat([x, self.r * self.h], axis=1), tf.nn.tanh))
# z = tf.nn.sigmoid(self.wz # tf.concat([x, self.h], axis=1) + self.bz)
# r = tf.nn.sigmoid(self.wr # tf.concat([x, self.h], axis=1) + self.br)
# n = tf.nn.tanh(self.wn # tf.concat([x, r * self.h], axis=1)+ self.bn)
self.h = (1 - self.z) * self.h + self.z * self.n
def call(self, x):
if not self.h_init:
initializer = tf.zeros_initializer()
self.h = initializer(shape=(x.shape[0], self.units))
self.z = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.r = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.n = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.h_init = True
assert x.shape[1] == self.steps
for step in tf.range(self.steps):
self.update_step(x[:, step, :])
return self.h
def compute_output_shape(self, input_shape):
return tf.TensorShape([None, self.units])
def get_config(self):
config = super(GRU, self).get_config()
config.update({'units': self.units})
return config
#Here's my custom dense layer
class Dense(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
def build(self, batch_input_shape):
self.kernel = self.add_weight(
name="kernel", shape=[batch_input_shape[-1], self.units],
initializer="glorot_normal")
self.bias = self.add_weight(
name="bias", shape=[self.units], initializer="zeros")
super().build(batch_input_shape)
def call(self, X):
return self.activation(X # self.kernel + self.bias)
def compute_output_shape(self, batch_input_shape):
return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
def get_config(self):
base_config = super().get_config()
return {**base_config, "units": self.units,
"activation": keras.activations.serialize(self.activation)}
'''
gru = GRU(units=32)
gru.build(input_shape=(None, 128, 2))
gru.compute_output_shape(input_shape=(None, 128, 2))
dense = Dense(1)
dense.build((None, 32, 1))
dense.compute_output_shape(batch_input_shape=tf.TensorShape([None, 32, 1]))
'''
#Here's the model to use my custom GRU for regression
class GRUReg(keras.models.Model):
def __init__(self, units, **kwargs):
super(GRUReg, self).__init__(**kwargs)
self.gru = GRU(units=units)
self.dense = Dense(1)
def build(self, input_shape):
self.gru.build(input_shape)
self.dense.build(self.gru.compute_output_shape(input_shape))
def call(self, x):
x = self.gru(x)
return self.dense(x)
Here's the training codes
model = GRUReg(32)
model.build([None, 128, 2])
model(tf.ones((2, 128, 2)))
model.compile(optimizer=keras.optimizers.RMSprop(), loss='mae', metrics=['mae'])
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
Then fit
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, callbacks=[reduce_lr])
An error occurs in the model.fit line
Here's the Error
:13 call *
x = self.gru(x)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:776 __call__
outputs = base_layer_utils.mark_as_return(outputs, acd)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py:639 mark_as_return
return nest.map_structure(_mark_as_return, outputs)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\nest.py:568 map_structure
structure[0], [func(*x) for x in entries],
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\nest.py:568
structure[0], [func(*x) for x in entries],
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py:625 _mark_as_return
return_tensor = acd.mark_as_return(tensor)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\auto_control_deps.py:167 mark_as_return
tensor = array_ops.identity(tensor)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\dispatch.py:180 wrapper
return target(*args, **kwargs)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\ops\array_ops.py:267 identity
ret = gen_array_ops.identity(input, name=name)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py:3829 identity
"Identity", input=input, name=name)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\op_def_library.py:742 _apply_op_helper
attrs=attr_protos, op_def=op_def)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\func_graph.py:591 _create_op_internal
inp = self.capture(inp)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\func_graph.py:641 capture
% (tensor, tensor.graph, self))
InaccessibleTensorError: The tensor 'Tensor("add_4:0", shape=(2, 32), dtype=float32)' cannot be accessed here: it is defined in another function or code block. Use return values, explicit Python locals or TensorFlow collections to access it. Defined in: FuncGraph(name=gru_reg_6_gru_11_while_body_19141, id=2570529255624); accessed from: FuncGraph(name=keras_graph, id=2566523294088).
I wrote the code out of the similar codes from a tf2 book which made me more baffled.
Them I googled the error but cannot find an appropriate solution to the Error...
My OS:win10
Tensorflow's Version:'2.1.0'
Python: '3.7'
CudaToolKit:10.1
Graphics Card:GTX1660Ti
I am writing a custom model with tf.keras and in a recurrent node I need to get the value of my 1D input as an int.
That recurrent node needs to build a 1xN tensor which elements are the result of N iterations of a function f(x).
So I created a numpy array of size N which is filled by one element at each iteration, then I convert the numpy array to a tensor.
The problem is I can't get the value of my 1D tensor as an int.
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
def f(x):
return 3*x
class myLayer(layers.Layer):
def __init__(self, units=1, input_dim=1):
super(myLayer, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=False)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=False,
)
####### IMPORTANT PART HERE ######
def call(self, inputs):
# In this example N = 20
# Define numpy array
x = np.zeros(20)
# Set its first value to my 1D input # ERROR HERE
x[0] = inputs[0]
# Assign the other element of x
for i in range(1,20):
x[i] = f(x[i-1])
# Cast to tensor
return tf.constant(x, shape=(1,20))
class Linear(layers.Layer):
def __init__(self, units=1, input_dim=20):
super(Linear, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(units,),
dtype='float32'),
trainable=True)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
def build_model():
model = tf.keras.Sequential([
myLayer(),
Linear()
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
#model.build([1])
return model
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print(epoch)
print('.', end='')
train_X = np.linspace(0,99, num=100)
train_y = 2*train_X
train_X = train_X / np.linalg.norm(train_X)
model = build_model()
#print(model.summary())
epochs = 10
history = model.fit(train_X, train_y, epochs=epochs, validation_split=0.2, verbose=0, callbacks=[PrintDot()])
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
# x = tf.ones((1,1)) * 0.21
# horse_layer = Horseshoe()
# y = horse_layer(x)
# print(y)
I'm trying to implement CRelu layer in Keras
One option that seems work is to use Lambda layer:
def _crelu(x):
x = tf.nn.crelu(x, axis=-1)
return x
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = Lambda(_crelu)(x)
return x
But I wonder is Lamda layer introduce some overhead in training or inference process?
My second attemp is to create keras layer that is wrapper around tf.nn.crelu
class CRelu(Layer):
def __init__(self, **kwargs):
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=-1)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = CRelu()(x)
return x
Which version will be more efficient?
Also looking forward for pure Keras implementation, if it's possible.
I don't think there is a significant difference between the two implementations speed-wise.
The Lambda implementation is the simplest actually but writing a custom Layer as you have done usually is better, especially for what regards model saving and loading (get_config method).
But in this case it doesn't matter as the CReLU is trivial and don't require saving and restoring parameters. You can store the axis parameter actually as in the code below. In this way it will be retrieved automatically when the model is loaded.
class CRelu(Layer):
def __init__(self, axis=-1, **kwargs):
self.axis = axis
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=self.axis)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def get_config(self, input_shape):
config = {'axis': self.axis, }
base_config = super(CReLU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))