How can I call `tf.nn.avg_pool2d()` with tensors? - python

I am trying to call tf.nn.avg_pool2d() inside a function decorated with #tf.fuction. How do I have to pass the parameters ksize and strides?
Both, ksize and strides change during execution. Therefore, I store them in a tf.Variable.
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.lod_in = tf.Variable(initial_value=5.0, name='level of detail', trainable=False)
self.k_size = tf.Variable(initial_value=[1, 1, 1, 1], trainable=False)
def call(self, inputs, training=None, mask=None):
x = inputs
x = self.special_method(x)
return x
#tf.function
def special_method(self, x):
factor = int(2 ** tf.floor(self.lod_in))
print(type(factor))
# Method 1 - TypeError: Expected int for argument 'ksize' not <tf.Tensor 'Cast:0' shape=() dtype=int8>.
ksize = [1, factor, factor, 1]
x = tf.nn.avg_pool2d(x, ksize=ksize, strides=ksize, padding='VALID')
# Method 2 - AttributeError: 'Tensor' object has no attribute 'numpy'
# self.k_size.assign([1, factor, factor, 1])
# x = tf.nn.avg_pool2d(x, ksize=self.k_size, strides=self.k_size, padding='VALID')
return x
def get_config(self):
config = super(MyModel, self).get_config()
return config
model = MyModel()
model.compile()
x = tf.ones(shape=[8, 128, 128, 16])
y = model(x)
Edit
I run my code in graph mode and would like to change self.lod_in during execution after a certain amount of steps.

tf.nn.avg_pool2d expects a python int or a list of int, not a Tensor. Just use normal python lists or integer.
From the documentation (emphasis is mine):
Args
input Tensor of rank N+2, of shape [batch_size] +
input_spatial_shape + [num_channels] if data_format does not start
with "NC" (default), or [batch_size, num_channels] +
input_spatial_shape if data_format starts with "NC". Pooling happens
over the spatial dimensions only.
ksize An int or list of ints that
has length 1, N or N+2. The size of the window for each dimension of
the input tensor.
strides An int or list of ints that has length 1, N
or N+2. The stride of the sliding window for each dimension of the
input tensor.
import tensorflow as tf
import math
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.lod_in = 5.0
def call(self, inputs, training=None, mask=None):
x = inputs
x = self.special_method(x)
return x
#tf.function
def special_method(self, x):
factor = int(2 ** math.floor(self.lod_in))
ksize = [1, factor, factor, 1]
x = tf.nn.avg_pool2d(x, ksize=ksize, strides=ksize, padding='VALID')
return x
def get_config(self):
config = super(MyModel, self).get_config()
return config

A possible solution was suggested here. However, it is only implemented for max_pooling but not for average_pooling.
from tensorflow.python.ops import gen_nn_ops
conv_pooled = gen_nn_ops.max_pool_v2(
conv,
ksize=[1,1, tf.shape(h_conv)[-2], 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")

Related

How to change the size of the inputs with a bias in a custom layer in Tensorflow Python

I have a custom layer like in this example:
class SuperMaximum(keras.layers.Layer):
def __init__(self,units, **kwargs):
super(SuperMaximum, self).__init__(**kwargs)
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
name="w",
shape=(self.units,),
initializer="random_normal",
trainable=True,
)
def call(self, inputs):
maximum = tf.math.reduce_max(inputs * self.w, axis=1, keepdims=True)
return maximum
What i want to implement is a way to change the size of the inputs through the epochs (let's say the inputs size is 10 at for the first epoch the layer receive just 50% of the total inputs, and for the second 60%, etc.).
At first I though of adding those lines in the call function:
def call(self, inputs):
taille = len(inputs)
taille = float(taille)
inputsWindow = inputs[int(taille * (1.0-self.window)) : int(taille * (1.0+self.window))]
maximum = tf.math.reduce_max(inputsWindow * self.w, axis=1, keepdims=True)
But without success. I have this following error:
TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got <tf.Tensor 'super_maximum_10/Cast_1:0' shape=(1,) dtype=int32>
Next I tried this:
def call(self, inputs):
X = []
for i in range(int(float(len(inputs)) * self.window)):
X.append(test[i])
X = tf.stack(X)
X = tf.cast(X, tf.float32)
maximum = tf.math.reduce_max(X * self.w, axis=1, keepdims=True)
return maximum
But obtain the following error:
ValueError: Shape must be rank 0 but is rank 1
for 'limit' for '{{node super_maximum_13/range}} = Range[Tidx=DT_INT32](super_maximum_13/range/start, super_maximum_13/Maximum, super_maximum_13/range/delta)' with input shapes: [], [1], [].
Is there any other way where I can implement this ?
Thanks
Edit:
Here is the line that is throwing the error (for the both way tried):
def ModelCustomSuperMax():
inputs = keras.Input(shape=(500,), name="digits")
x = SuperMaximum(1)(inputs)
x1 = keras.layers.Dense(32, activation="relu",name="firstCustomMax")(x)
x2 = keras.layers.Dense(16, activation="relu",name="secondCustomMax")(x1)
outputs = keras.layers.Dense(1, name="predictionsCustomMax")(x2)
return keras.Model(inputs=inputs, outputs=outputs)
modelSuperMaxCustom = ModelCustomSuperMax()

max value of a tensor in graph mode tensorflow

I have this tf code in graph mode (it has a training function wrapped by #tf.function) where I need to get the max value of a tensor x with type
<class 'tensorflow.python.framework.ops.Tensor'> Tensor("x_3:0", shape=(100,), dtype=int64).
Then I need to use the max value of x as one of the arguments in the shape argument of tf.reshape(). If I print the output of tf.reduce_max(x) I get Tensor("Max:0", shape=(), dtype=int64), which is an invalid argument for tf.reshape(). I have tried tf.reduce_max(x).numpy() and it throws the error message 'Tensor' object has no attribute 'numpy'
So how do I get the max value of a tensor in graph mode in tf 2.6.0?
UPDATE This is my code with the necessary details (I hope) to see what is going on:
MyModel.py
class MyModel(tf.keras.Model):
def __init__(self, ...,hidden_size,name='model',**kwargs):
super(MyModel, self).__init__(name=name, **kwargs)
self.hidden_size = hidden_size
def call(self, inputs, training=True):
x1, input, target, length, y = inputs
batch_size = input.shape[0]
print('check point 2', length, tf.reduce_max(length))
padded_outputs = tf.reshape(tf.boolean_mask(outputs_dec,mask), shape=(batch_size,tf.reduce_max(length),self.hidden_size))
print('check point 3',padded_outputs.shape)
#tf.function
def train(self, inputs, optimizer):
with tf.GradientTape() as tape:
costs = self.call(inputs)
gradients = tape.gradient(self.loss, self.params)
optimizer.apply_gradients(zip(gradients, self.params))
train_mymodel.py
tr_data = tf.data.Dataset.from_tensor_slices((x1_tr,
x2.input,
x2.target,
x2.length,
y_tr))\
.batch(args.batch_size)
while int(checkpoint.step) < args.epochs:
for i, (x1_batch, input_batch, target_batch, length_batch, y_batch) in enumerate(tr_data):
print('check point 1', length_batch)
costs, obj_fn = mymodel.train((x1_batch, input_batch, target_batch, length_batch, y_batch),optimizer)
check point 1 tf.Tensor([300 300 ... 300 300], shape=(100,),type=int64)
check point 2 Tensor("x_3:0", shape=(100,), dtype=int64) Tensor("Max_1:0", shape=(), dtype=int64)
check point 3 (100, None, 500)
The shape of padded_outputs should be (100, 300, 500).
UPDATE2 The error happens when the graph is traced. If I hard code shape=(batch_size,300,self.hidden_size) and use tf.print(batch_size,tf.reduce_max(length),self.hidden_size) then the code runs without error messages and the output of tf.print() is (100,300,500). Is it any way to avoid such behavior?
It should work by simply passing the reduced tensor as an argument:
import tensorflow as tf
tf.random.set_seed(1)
#tf.function
def reshape_on_max_value():
tensor1 = tf.random.uniform((5, 2), maxval=5, dtype=tf.int32)
tensor2 = tf.random.uniform((4, 1), maxval=5, dtype=tf.int32)
x = tf.reduce_max(tensor1)
tf.print(type(tensor1), type(tensor2))
tf.print(tf.reshape(tensor2, [x, 1, 1]).shape)
reshape_on_max_value()
<class 'tensorflow.python.framework.ops.Tensor'> <class 'tensorflow.python.framework.ops.Tensor'>
TensorShape([4, 1, 1])

conv2d getting bad input

I have trained a classifier and now trying to load it and run some predictions
I am getting an error that is provided below
....
return self._conv_forward(input, self.weight, self.bias)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/conv.py", line 439, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
TypeError: conv2d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (list, Parameter, Parameter, tuple, tuple, tuple, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (list, Parameter, Parameter, tuple, tuple, tuple, int)
Here is the code
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
Transformer - used to encode images
transformer = transforms.Compose([
transforms.RandomHorizontalFlip(0.5),
transforms.ToTensor(),
])
Getting a file and converting to Tensor
def get_file_as_tensor(file_path):
with np.load(file_path) as f:
melspec_image_array = f['arr_0']
image = Image.fromarray(melspec_image_array, mode='RGB')
image_tensor = transformer(image).div_(255).float()
return image_tensor.clone().detach()
Prediction function that is on top of the stack because the error occures when I run model([tensor])
def predict(tensor, model):
yhat = model([tensor])
yhat = yhat.clone().detach()
return yhat
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, 1, 1),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
)
self.conv2 = nn.Sequential(
nn.Conv2d(out_channels, out_channels, 3, 1, 1),
nn.ReLU(),
nn.Dropout(0.5)
)
self._init_weights()
def _init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, 2)
return x
class Classifier(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.conv = nn.Sequential(
ConvBlock(in_channels=3, out_channels=64),
ConvBlock(in_channels=64, out_channels=128),
ConvBlock(in_channels=128, out_channels=256),
ConvBlock(in_channels=256, out_channels=512),
)
self.fc = nn.Sequential(
nn.Dropout(0.4),
nn.Linear(512, 128),
nn.PReLU(),
#nn.BatchNorm1d(128),
nn.Dropout(0.2),
nn.Linear(128, num_classes),
)
def forward(self, x):
x = self.conv(x)
x = torch.mean(x, dim=3)
x, _ = torch.max(x, dim=2)
x = self.fc(x)
return x
PATH = "models/model.pt"
model = Classifier()
model.load_state_dict(torch.load(PATH))
model.eval()
cry_file_path = "processed_np/car_file.npz"
car_tensor = get_file_as_tensor(car_file_path)
no_car_file_path = "raw_negative_processed/nocar-1041.npz"
no_car_tensor = get_file_as_tensor(no_car_file_path)
car_prediction = predict(car_tensor, model)
no_cry_prediction = predict(no_car_tensor, model)
print("car", car_prediction)
print("no car", no_car_prediction)
The code is self explanatory but SO keeps asking for more text
Would really appreciate some help as I am new to ML
def predict(tensor, model):
yhat = model(tensor.unsqueeze(0))
yhat = yhat.clone().detach()
return yhat
You should use this method definition instead of yours.
Why are you applying your model to [tensor], that is to a python list containing a single element tensor?
You should apply your model to tensor directly: model(tensor).
You might need to add a singleton "batch dimension" to tensor. See this answer for more details.
The error is about conv2d() function not module.
The only thing I can think of here is that your input data is incorrect. Make sure it is a tensor in a form of (B, C, H, W).

Why return self.head(x.view(x.size(0), -1)) in the nn.Module for pyTorch reinforcement learning example

I understand that the balancing the pole example requires 2 outputs. Reinforcement Learning (DQN) Tutorial
Here is the output for self.head
print ('x',self.head)
x = Linear(in_features=512, out_features=2, bias=True)
When I run the epochs below is the outputs:
print (self.head(x.view(x.size(0), -1)))
return self.head(x.view(x.size(0), -1))
tensor([[-0.6945, -0.1930]])
tensor([[-0.0195, -0.1452]])
tensor([[-0.0906, -0.1816]])
tensor([[ 0.0631, -0.9051]])
tensor([[-0.0982, -0.5109]])
...
The size of x is:
x = torch.Size([121, 32, 2, 8])
So I am trying to understand what x.view(x.size(0), -1) is doing?
I understand from the comment in the code that it's returning:
Returns tensor([[left0exp,right0exp]...]).
But how does x which is torch.Size([121, 32, 2, 8]) being reduced to a tensor of size 2?
Is there an alternative way of writing that makes more sense? What if I had 4 outputs. How would I represent that? Why x.size(0). Why -1?
So appears to take self.head with 4 outputs to 2 outputs. Is that correct?
At the bottom is that class I am referring:
class DQN(nn.Module):
def __init__(self, h, w, outputs):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 5, stride = 2):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
x.view(x.size(0), -1) is flattening the tensor, this is because the Linear layer only accepts a vector (1d array). To break it down, x.view() reshapes the tensor of the specified shape (more info). x.shape(0) returns 1st dimension of the tensor (which is the batch size, this should remain the constant). The -1 in x.view() is a filler, in other words, its dimensions that we don't know, so PyTorch automatically calculates it. For example, if x = torch.tensor([1,2,3,4]), to reshape the tensor to a 2x2, you could do x.view(2,2) or x.view(2,-1) or x.view(-1,2).
The output shape is not a tensor shape of 2, but that of 121,2 (the 121 is the batch size, and the 2 comes from the Linear layers output). So to change the output size from 2, to 4, you would have to change the outputs argument in the __init__ function to 4.

Concatenating hidden units using Keras

I am trying to concatenate the hidden units. For example, I have 3 units, h1,h2,h3 then I want the new layer to have [h1;h1],[h1;h2],[h1;h3],[h2;h1]....
So, I have tried:
class MyLayer(Layer):
def __init__(self,W_regularizer=None,W_constraint=None, **kwargs):
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.W_constraint = constraints.get(W_constraint)
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
# Create a trainable weight variable for this layer.
self.W = self.add_weight((input_shape[-1],input_shape[-1]),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint,
trainable=True)
super(MyLayer, self).build(input_shape)
def call(self, x,input_shape):
conc=K.concatenate([x[:, :-1, :], x[:, 1:, :]],axis=1)# help needed here
uit = K.dot(conc, self.W)# W has input_shape[-1],input_shape[-1]
return uit
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[1],input_shape[-1]
I am not sure what should I return for the second argument of my output shape.
from keras.layers import Input, Lambda, LSTM
from keras.models import Model
import keras.backend as K
from keras.layers import Lambda
lstm=LSTM(64, return_sequences=True)(input)
something=MyLayer()(lstm)
You could implement the concatenation operation that you described by leveraging itertools.product in order to compute the cartesian product of the temporal dimension's indices. The call method could be coded as follows:
def call(self, x):
prod = product(range(nb_timesteps), repeat=2)
conc_prod = []
for i, j in prod:
c = K.concatenate([x[:, i, :], x[:, j, :]], axis=-1) # Shape=(batch_size, 2*nb_features)
c_expanded = c[:, None, :] # Shape=(batch_size, 1, 2*nb_features)
conc_prod.append(c_expanded)
conc = K.concatenate(conc_prod, axis=1) # Shape=(batch_size, nb_timesteps**2, 2*nb_features)
uit = K.dot(conc, self.W) # W has shape 2*input_shape[-1], input_shape[-1]
return uit # Shape=(batch_size, nb_timesteps**2, nb_features)
In the example that you provided, nb_timesteps would be 3. Note also that the shape of the weights should be (2*input_shape[-1], input_shape[-1]) for the dot product to be valid.
Disclaimer: I am not sure what you want to achieve.

Categories