I have this custom layer:
class PhysicalLayer(keras.layers.Layer):
def __init__(self,units, speed):
self.units = units
self.speed = speed
super(PhysicalLayer, self).__init__()
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="random_normal", trainable=True)
def call(self, inputs):
squareSpeed = tf.math.square(self.speed)
vibrationMax = tf.math.reduce_max(inputs, axis = 1, keepdims = True)
inputsSpeed = tf.math.divide(squareSpeed, vibrationMax)
print(tf.shape(inputsSpeed))
print(tf.shape(self.w))
multiplication = tf.multiply(squareSpeed, self.w)
return tf.matmul(inputsSpeed , self.w) + self.b
#return multiplication + self.b
And when I try to build this following model
inputs = keras.Input(shape=(500,))
dense = layers.Dense(64, activation="relu")
x = PhysicalLayer(1, rotationSpeed)(inputs)
x = dense(x)
x = layers.Dense(32, activation="relu")(x)
outputs = layers.Dense(1)(x)
modelPhi = keras.Model(inputs=inputs, outputs=outputs, name="model_phi_custom")
I have the following error:
ValueError: Dimensions must be equal, but are 1 and 500 for '{{node physical_layer_34/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](physical_layer_34/truediv, physical_layer_34/MatMul/ReadVariableOp)' with input shapes: [?,1], [500,1].
I tried to use multiplication instead of matmul but then for fitting the model, I need to use a batch_size of 500 or i come across this error:
Node: 'gradient_tape/mean_absolute_error/sub/BroadcastGradientArgs'Incompatible shapes: [500,1] vs. [64,1]
How can I fix this ?
Thanks
Related
I'm new to PyTorch not able to figure out what I'm doing wrong, below is the code
x_np, y_np = datasets.make_regression(n_samples=100,n_features=1,noise=20,random_state=0)
x = torch.from_numpy(x_np.astype(np.float32))
y = torch.from_numpy(y_np.astype(np.float32))
y = y.view(y.shape[0],1)
n_samples, n_features = x.shape
class Regression(nn.Module):
def __init__(self, inputsize, outputsize, hiddensize):
super(Regression, self).__init__()
self.hidden_size = hiddensize
self.input_size = inputsize
self.output_size = outputsize
self.i2h = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
self.h2o = nn.Linear(self.input_size+self.hidden_size, self.output_size)
def forward(self, x):
hidden = torch.zeros(1, self.hidden_size)
print(x.shape)
print(hidden.shape)
combined = torch.cat((x,hidden), 1)
hidden = self.i2h(combined)
output = self.h2o(combined)
return output
model = Regression(n_features, n_features, 16)
lr = 0.01
loss = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), lr = lr)
for epoch in range(1000):
ypred = model(x)
l = loss(y, ypred)
l.backward()
opt.step()
opt.zero_grad()
if epoch % 100 == 0:
[w, b] = model.parameters()
print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')
While training, I am getting this error
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 100 but got size 1 for tensor number 1 in the list
i2h maps self.input_size + self.hidden_size dimension to self.hidden_size, so for h2o, you have to define a mapping starting from self.hidden dimension. Also, you have to update the forward accordingly. Here is the complete code:
class Regression(nn.Module):
def __init__(self, inputsize, outputsize, hiddensize):
super(Regression, self).__init__()
self.hidden_size = hiddensize
self.input_size = inputsize
self.output_size = outputsize
self.i2h = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
self.h2o = nn.Linear(self.hidden_size, self.output_size)
def forward(self, x):
hidden = torch.zeros(1, self.hidden_size)
print(x.shape)
print(hidden.shape)
combined = torch.cat((x,hidden), 1)
hidden = self.i2h(combined)
output = self.h2o(hidden)
return output
I am trying to learn to do custom layer, I followed the steps in keras.io.
Code -
class Linear(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Linear, self).__init__()
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
for index in range(inputs.shape[0]):
...
return tf.matmul(inputs, self.w) + self.b
This shows the error -
TypeError: in user code:
<ipython-input-3-314addf0c624>:39 call *
for index in range(inputs.shape[0]):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:365 range_ **
return _py_range(start_or_stop, stop, step)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/operators/py_builtins.py:390 _py_range
return range(start_or_stop)
TypeError: 'NoneType' object cannot be interpreted as an integer
when I run this Linear class separately, it works fine. But, when I run this layer as a trainable model, it shows this error.
How to solve this, thanks
As default, shape of inputs is [batch_size,width,height,channels], and, when you create your model, batch_size is set to None.
import os
# os.environ['KERAS_BACKEND'] = 'theano'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # suppress Tensorflow messages
import tensorflow as tf
from keras.layers import *
from keras.models import *
class CustomLinear(Layer):
def __init__(self, batch_size,units=32, input_dim=32):
super(CustomLinear, self).__init__()
self.batch_size = batch_size
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_dim, units), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
)
def call(self, inputs):
print('inputs', inputs.shape)
# for index in range(self.batch_size):
# print(index)
return tf.matmul(inputs, self.w) + self.b
batch_size = 10
model = Sequential()
model.add(Input(shape=(2,32)))
model.add(CustomLinear(batch_size = batch_size)) # inputs (None, 2, 32)
x = tf.random.normal((batch_size,2,32)) # dummy data
model(x) # inputs (10, 2, 32)
Mostly, batch_size is not required for the calculations within the layer. But, if you still need it, you can add an argument (e.g. batch_size) to your CustomLinear, define your batch_size beforehand, and access to it inside __call__ function.
I want create a layer Linear like y = w*x+b, with w is matrix[Mxd], x is matrix[dx1], b is bias as matrix[Mx1], with M is the units in a layer, d is dim of vector x.
my code like:
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
print(self.units)
def build(self, input_shape):
self.w = self.add_weight(
shape=( self.units, input_shape[1]),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,1), initializer="random_normal", trainable=True
)
print(input_shape[1])
def call(self, inputs):
return tf.matmul(self.w, inputs) + self.b
def get_output_shape_for(self, input_shape):
return (self.units, 1)
And when i create model i use:
input_tensor1 = Input((2,1))
L11 = Linear(32)(input_tensor1)
model = Model(inputs=input_tensor1, outputs=L11)
model.summary()
In this code d=2 and M=32
As i think y =w*x+b, y will be a matrix[Mx1] cause w is [Mxd], x is [dx1] and b is [Mx1]. But when model summary, output_shape is (None, 2, 1)
Can anyone explain it? where my code wrong?
I am using tensorflow 2.0 for training my own attention model,
however I ran into one big issue when building my decoder class,
like this
TypeError Traceback (most recent call last)
<ipython-input-19-3042369c4295> in <module>
9 enc_hidden_h=fw_sample_state_h,
10 enc_hidden_c=fw_sample_state_c,
---> 11 enc_output=sample_output)
12
13 print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))
TypeError: __call__() missing 1 required positional argument: 'inputs'
And my encoder-decoder attention model is like below
Encoder: which is a self-defined pBLSTM
class Encoder(tf.keras.Model):
def __init__(self, lstm_units, final_units, batch_sz, conv_filters, mfcc_dims):
super(Encoder, self).__init__()
self.lstm_units = lstm_units
self.final_units = final_units
self.batch_sz = batch_sz
self.conv_filters = conv_filters
self.mfcc_dims = mfcc_dims
# Convolution layer to extract feature after MFCC
self.conv_feat = tf.keras.layers.Conv1D(filters=self.conv_filters,
kernel_size=self.mfcc_dims,
padding='valid',
activation='relu',
strides=self.mfcc_dims)
def call(self, x):
'''
build a pyramidal LSTM neural network encoder
'''
# Convolution Feature Extraction
x = self.conv_feat(x)
# initialize states for forward and backward
initial_state_fw = None
initial_state_bw = None
counter = 0
while(x.shape[1] > self.final_units):
counter += 1
# forward LSTM
fw_output, fw_state_h, fw_state_c = self.build_lstm(True)(x, initial_state=initial_state_fw)
# backward LSTM
bw_output, bw_state_h, bw_state_c = self.build_lstm(False)(x, initial_state=initial_state_bw)
x = tf.concat([fw_output, bw_output], -1)
x = self.reshape_pyramidal(x)
initial_state_fw = [fw_state_h, fw_state_c]
initial_state_bw = [bw_state_h, bw_state_c]
print(f"Encoder pyramid layer number: {counter}\n")
return x, (fw_state_h, fw_state_c), (bw_state_h, bw_state_c)
def build_lstm(self, back=True):
'''
build LSTM layer for forward and backward
'''
return tf.keras.layers.LSTM(units=self.lstm_units,
return_sequences=True,
return_state=True,
go_backwards=back)
def reshape_pyramidal(self, outputs):
'''
After concatenating forward and backward outputs
return the reshaped output
'''
batch_size, time_steps, num_units = outputs.shape
return tf.reshape(outputs, (batch_size, -1, num_units * 2))
Attention Model: which is built following this paper:
https://arxiv.org/abs/1508.04025v5
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# query hidden state shape == (batch_size, hidden size)
# query_with_time_axis shape == (batch_size, 1, hidden size)
# values shape == (batch_size, max_len, hidden size)
# we are doing this to broadcast addition along the time axis to calculate the score
query_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
# the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
Decoder: A 1-layer LSTM decoder
class Decoder(tf.keras.Model):
def __init__(self, target_sz, embedding_dim, decoder_units, batch_sz, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.batch_sz = batch_sz
self.decoder_units = decoder_units
self.embedding = tf.keras.layers.Embedding(target_sz, embedding_dim)
self.attention = BahdanauAttention(self.decoder_units)
self.lstm = tf.keras.layers.LSTM(units=self.decoder_units, return_sequences=True, return_state=True)
self.fc = tf.keras.layers.Dense(target_sz)
def call(self, x, enc_hidden_h, enc_hidden_c, enc_output):
'''
build LSTM decoder
'''
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the LSTM
output, state_h, state_c = self.lstm(x)
# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[-1]))
# output shape == (batch_size, vocab)
x = self.fc(output)
return x, (state_h, state_c), attention_weights
I ran into that error when testing with my example input like below
example_input_batch, example_target_batch = next(iter(dataset))
sample_output, (fw_sample_state_h, fw_sample_state_c), bw_sample_state = encoder(example_input_batch)
decoder = Decoder(target_sz=PHONEME_SIZE,
embedding_dim=EMBEDDING_DIM,
decoder_units=LSTM_UNITS,
batch_sz=BATCH_SIZE)
sample_target_size = tf.random.uniform((BATCH_SIZE, 1))
sample_decoder_output, sample_decoder_hidden, attention_weights = decoder(
x=sample_target_size,
enc_hidden_h=fw_sample_state_h,
enc_hidden_c=fw_sample_state_c,
enc_output=sample_output)
As discussed in the comments, the problem was that the poster was inheriting from tf.keras.Model while creating the Decoder() class. And this superclass was expecting an inputs argument in the __call__() operator.
So, this error can be resolved by change x to inputs in the Decoder.call() method like so:
def call(self, inputs, enc_hidden_h, enc_hidden_c, enc_output):
'''
build LSTM decoder
'''
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(inputs)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the LSTM
output, state_h, state_c = self.lstm(x)
# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[-1]))
# output shape == (batch_size, vocab)
x = self.fc(output)
return x, (state_h, state_c), attention_weights
I am writing a custom model with tf.keras and in a recurrent node I need to get the value of my 1D input as an int.
That recurrent node needs to build a 1xN tensor which elements are the result of N iterations of a function f(x).
So I created a numpy array of size N which is filled by one element at each iteration, then I convert the numpy array to a tensor.
The problem is I can't get the value of my 1D tensor as an int.
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
def f(x):
return 3*x
class myLayer(layers.Layer):
def __init__(self, units=1, input_dim=1):
super(myLayer, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=False)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=False,
)
####### IMPORTANT PART HERE ######
def call(self, inputs):
# In this example N = 20
# Define numpy array
x = np.zeros(20)
# Set its first value to my 1D input # ERROR HERE
x[0] = inputs[0]
# Assign the other element of x
for i in range(1,20):
x[i] = f(x[i-1])
# Cast to tensor
return tf.constant(x, shape=(1,20))
class Linear(layers.Layer):
def __init__(self, units=1, input_dim=20):
super(Linear, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(units,),
dtype='float32'),
trainable=True)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
def build_model():
model = tf.keras.Sequential([
myLayer(),
Linear()
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
#model.build([1])
return model
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print(epoch)
print('.', end='')
train_X = np.linspace(0,99, num=100)
train_y = 2*train_X
train_X = train_X / np.linalg.norm(train_X)
model = build_model()
#print(model.summary())
epochs = 10
history = model.fit(train_X, train_y, epochs=epochs, validation_split=0.2, verbose=0, callbacks=[PrintDot()])
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
# x = tf.ones((1,1)) * 0.21
# horse_layer = Horseshoe()
# y = horse_layer(x)
# print(y)