How do you get the "actual" shape of a tensor at training time? e.g.,
(None, 64) -> (128, 64)
In other words, at training time, I get a shape like (None, 64) where None means the first dimension of the tensor is dynamic w.r.t to the input size, and 64 is an example value for the second dimension. I assume at training time, the "actual" size of that tensor is known to the framework, so I am wondering how/if I can get the actual size of the tensor, where None is evaluated to the train/test/eval dataset size. Hence, I am interested to get (128, 64) instead of (None, 64) where 128 is the size of the input.
Please consider the following simplified code example.
class ALayer(tensorflow.keras.layers.Layer):
def call(self, inputs):
features = tf.matmul(inputs, self.kernel) + self.bias
# These are the different approaches I've tried.
print(features.shape)
# This prints: (None, 64)
print(tf.shape(features)
# This prints: Tensor("ALayer/Shape:0", shape=(2,), dtype=int32)
return features
input_layer = layers.Input(input_dim)
x = ALayer()([input_layer])
x = layers.Dense(1)(x)
model = keras.Model(inputs=[input_layer], outputs=[x])
model.compile()
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, (y_train)))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, (y_val)))
model.fit(train_dataset, validation_data=val_dataset)
You should use tf.print since eager execution is activated by default in TF 2.7:
import tensorflow as tf
class ALayer(tf.keras.layers.Layer):
def __init__(self, units=32):
super(ALayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
features = tf.matmul(inputs, self.w) + self.b
tf.print('Features shape -->', tf.shape(features), '\n')
return features
input_layer = tf.keras.layers.Input(shape=(10,))
x = ALayer(10)(input_layer)
x = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs=[input_layer], outputs=[x])
model.compile(loss=tf.keras.losses.BinaryCrossentropy())
X_train, y_train = tf.random.normal((64, 10)), tf.random.uniform((64,), maxval=2, dtype=tf.int32)
X_val, y_val = tf.random.normal((64, 10)), tf.random.uniform((64,), maxval=2, dtype=tf.int32)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)
model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=0)
Features shape --> [32 10]
Features shape --> [32 10]
Features shape --> [32 10]
Features shape --> [32 10]
<keras.callbacks.History at 0x7fab3ce15910>
Related
Hello below is the pytorch model I am trying to run. But getting error. I have posted the error trace as well. It was running very well unless I added convolution layers. I am still new to deep learning and Pytorch. So I apologize if this is silly question. I am using conv1d so why should conv1d expect 3 dimensional input and it is also getting a 2d input which is also odd.
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(CROP_SIZE*CROP_SIZE*3, 512)
self.conv1d1 = nn.Conv1d(in_channels=512, out_channels=64, kernel_size=1, stride=2)
self.fc2 = nn.Linear(64, 128)
self.conv1d2 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2)
self.fc3 = nn.Linear(64, 256)
self.conv1d3 = nn.Conv1d(in_channels=256, out_channels=64, kernel_size=1, stride=2)
self.fc4 = nn.Linear(64, 256)
self.fc4 = nn.Linear(256, 128)
self.fc5 = nn.Linear(128, 64)
self.fc6 = nn.Linear(64, 32)
self.fc7 = nn.Linear(32, 64)
self.fc8 = nn.Linear(64, frame['landmark_id'].nunique())
def forward(self, x):
x = F.relu(self.conv1d1(self.fc1(x)))
x = F.relu(self.conv1d2(self.fc2(x)))
x = F.relu(self.conv1d3(self.fc3(x)))
x = F.relu(self.fc4(x))
x = F.relu(self.fc5(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return F.log_softmax(x, dim=1)
net = Net()
import torch.optim as optim
loss_function = nn.CrossEntropyLoss()
net.to(torch.device('cuda:0'))
for epoch in range(3): # 3 full passes over the data
optimizer = optim.Adam(net.parameters(), lr=0.001)
for data in tqdm(train_loader): # `data` is a batch of data
X = data['image'].to(device) # X is the batch of features
y = data['landmarks'].to(device) # y is the batch of targets.
optimizer.zero_grad() # sets gradients to 0 before loss calc. You will do this likely every step.
output = net(X.view(-1,CROP_SIZE*CROP_SIZE*3)) # pass in the reshaped batch
# print(np.argmax(output))
# print(y)
loss = F.nll_loss(output, y) # calc and grab the loss value
loss.backward() # apply this loss backwards thru the network's parameters
optimizer.step() # attempt to optimize weights to account for loss/gradients
print(loss) # print loss. We hope loss (a measure of wrong-ness) declines!
Error trace
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-42-f5ed7999ce57> in <module>
5 y = data['landmarks'].to(device) # y is the batch of targets.
6 optimizer.zero_grad() # sets gradients to 0 before loss calc. You will do this likely every step.
----> 7 output = net(X.view(-1,CROP_SIZE*CROP_SIZE*3)) # pass in the reshaped batch
8 # print(np.argmax(output))
9 # print(y)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-37-6d3e34d425a0> in forward(self, x)
16
17 def forward(self, x):
---> 18 x = F.relu(self.conv1d1(self.fc1(x)))
19 x = F.relu(self.conv1d2(self.fc2(x)))
20 x = F.relu(self.conv1d3(self.fc3(x)))
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
210 _single(0), self.dilation, self.groups)
211 return F.conv1d(input, self.weight, self.bias, self.stride,
--> 212 self.padding, self.dilation, self.groups)
213
214
RuntimeError: Expected 3-dimensional input for 3-dimensional weight [64, 512, 1], but got 2-dimensional input of size [4, 512] instead
You should learn how convolutions work (e.g. see this answer) and some neural network basics (this tutorial from PyTorch).
Basically, Conv1d expects inputs of shape [batch, channels, features] (where features can be some timesteps and can vary, see example).
nn.Linear expects shape [batch, features] as it is fully connected and each input feature is connected to each output feature.
You can verify those shapes by yourself, for torch.nn.Linear:
import torch
layer = torch.nn.Linear(20, 10)
data = torch.randn(64, 20) # [batch, in_features]
layer(data).shape # [64, 10], [batch, out_features]
For Conv1d:
layer = torch.nn.Conv1d(in_channels=20, out_channels=10, kernel_size=3, padding=1)
data = torch.randn(64, 20, 15) # [batch, channels, timesteps]
layer(data).shape # [64, 10, 15], [batch, out_features]
layer(torch.randn(32, 20, 25)).shape # [32, 10, 25]
BTW. As you are working with images, you should use torch.nn.Conv2d instead.
Most of the Pytorch functions work on batch data i.e they accept input of size (batch_size, shape). #Szymon Maszke already posted answer related to that.
So in your case, you can use unsqueeze and sqeeze functions for adding and removing extra dimensions.
Here's the sample code:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(100, 512)
self.conv1d1 = nn.Conv1d(in_channels=512, out_channels=64, kernel_size=1, stride=2)
self.fc2 = nn.Linear(64, 128)
def forward(self, x):
x = self.fc1(x)
x = x.unsqueeze(dim=2)
x = F.relu(self.conv1d1(x))
x = x.squeeze()
x = self.fc2(x)
return x
net = Net()
bsize = 4
inp = torch.randn((bsize, 100))
out = net(inp)
print(out.shape)
I am working on sentiment analysis, I want to classify the output into 4 classes. For loss I am using cross-entropy.
The problem is PyTorch cross-entropy needs the input of (batch_size, output) which is am having trouble with.
I am taking a batch size of 12 and sequence size is 32
import torch.nn as nn
class RNN(nn.Module):
def __init__(self, hidden_dim = 256, input_size = 32 , num_layers = 1, num_classes=4, vocab_size = len(vocab_to_int)+1, embedding_dim=100):
super().__init__()
self.input_size = input_size
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.num_classes = num_classes
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers)
self.fc1 = nn.Linear(hidden_dim, 50)
self.fc2 = nn.Linear(50, 4)
def forward(self, x, hidden):
x = self.embedding(x)
x = x.view(32, 12, 100)
x, hidden = self.lstm(x, hidden)
x = x.contiguous().view(-1, 256)
x = self.fc1(x) # output shape ([384, 50])
x = self.fc2(x) # output shape [384, 4]
return x, hidden
def init_hidden(self, batch_size=12):
weight = next(self.parameters()).data
hidden = (weight.new(self.num_layers, 12, self.hidden_dim).zero_().cuda(), weight.new(self.num_layers, 12, self.hidden_dim).zero_().cuda())
return hidden
According to the CrossEntropyLoss docs:
input has to be a Tensor of size (C) for unbatched input, (minibatch,C) [for batched input] [...]
The code you provided is only the RNN class and not the data processing and the actual call to CrossEntropyLoss, but the error you stated in the comments makes me think that you didn't reshape the labels tensor to have the same size as the output from the neural network. Therefore, you'd be calculating the loss of a tensor with size (384, 4) against another tensor which I infer is of size (12, 32). Your labels tensor should be of size (384) to match the first dimension of the neural network output.
Also, you don't have to manually reshape your tensors, you can reshape them after the forward() call through the torch.nn.utils.rnn.pack_padded_sequence() function. If you do apply this function to both the output of the neural network and the labels, you will have a tensor of size (384, 4) that PyTorch can handle in the call to CrossEntropyLoss. See the note in the pack_padded_sequence() function docs for more details.
I am trying to experiment creating a LeNet-300-100 dense neural network using TensorFlow 2's model sub-classing. The code that I have is as follows:
batch_size = 32
num_epochs = 20
# Load MNIST dataset-
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
# Convert class vectors/target to binary class matrices or one-hot encoded values-
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
X_train.shape, y_train.shape
# ((60000, 28, 28), (60000, 10))
X_test.shape, y_test.shape
# ((10000, 28, 28), (10000, 10))
class LeNet300(Model):
def __init__(self, **kwargs):
super(LeNet300, self).__init__(**kwargs)
self.flatten = Flatten()
self.dense1 = Dense(units = 300, activation = 'relu')
self.dense2 = Dense(units = 100, activation = 'relu')
self.op = Dense(units = 10, activation = 'softmax')
def call(self, inputs):
x = self.flatten(inputs)
x = self.dense1(x)
x = self.dense2(x)
return self.op(x)
# Instantiate an object using LeNet-300-100 dense model-
model = LeNet300()
# Compile the defined model-
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy']
)
# Define early stopping callback-
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
monitor = 'val_loss', min_delta = 0.001,
patience = 3)
# Train defined and compiled model-
history = model.fit(
x = X_train, y = y_train,
batch_size = batch_size, shuffle = True,
epochs = num_epochs,
callbacks = [early_stopping_callback],
validation_data = (X_test, y_test)
)
On calling "model.fit()", it gives the following error:
ValueError: Shape mismatch: The shape of labels (received (320,))
should equal the shape of logits except for the last dimension
(received (32, 10)).
What's going wrong?
Thanks
The loss SparseCategoricalCrossentropy doesn't take one-hot encoding to calculate loss. In the documentation, they mention that
Use this crossentropy loss function when there are two or more label classes. We expect labels to be provided as integers. If you want to provide labels using one-hot representation, please use CategoricalCrossentropy loss. There should be # classes floating point values per feature for y_pred and a single floating point value per feature for y_true.
As a result of this you are getting the error. If you observe the stacktrace the error arises in the loss function,
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:1569 sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/keras/backend.py:4941 sparse_categorical_crossentropy
labels=target, logits=output)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py:4241 sparse_softmax_cross_entropy_with_logits_v2
labels=labels, logits=logits, name=name)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/home/ubuntu/.local/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py:4156 sparse_softmax_cross_entropy_with_logits
logits.get_shape()))
ValueError: Shape mismatch: The shape of labels (received (320,)) should equal the shape of logits except for the last dimension (received (32, 10)).
I would suggest using CategoricalCrossentropy.
This is because input to first Dense layer should be flattened. MNIST data has 28x28 grid/image for every digit. This 28x28 data should be flattened to 784 input numbers.
So just before first Dense(...) layer insert Flatten() keras layer i.e. do Flatten()(inputs).
See this doc of Flatten layer for reference.
I am building a sequence-to-label classifier, where the input data are text sequences and output labels are binary. The model is very simple, with GRU hidden layers and a Word Embeddings input layer. I want a [n, 60] input to output a [n, 1] label, but the Torch model returns a [n, 60] output.
My model, with minimal layers:
class Model(nn.Module):
def __init__(self, weights_matrix, hidden_size, num_layers):
super(Model, self).__init__()
self.embedding, num_embeddings, embedding_dim = create_emb_layer(weights_matrix, True)
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(embedding_dim, hidden_size, num_layers, batch_first=True)
self.out = nn.Linear(hidden_size, 1)
def forward(self, inp, hidden):
emb = self.embedding(inp);
out, hidden = self.gru(emb, hidden)
out = self.out(out);
return out, hidden;
def init_hidden(self, batch_size):
return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device);
Model Layers:
Model(
(embedding): Embedding(184901, 100)
(gru): GRU(100, 60, num_layers=3, batch_first=True)
(out): Linear(in_features=60, out_features=1, bias=True)
)
Input shapes of my data are: X : torch.Size([64, 60]), and Y : torch.Size([64, 1]), for a single batch of size 64.
When I run the X tensor through the model, it should output a single label, however, the output from the classifier is torch.Size([64, 60, 1]). To run the model, I do the following:
for epoch in range(1):
running_loss = 0.0;
batch_size = 64;
hidden = model.init_hidden(batch_size)
for ite, data in enumerate(train_loader, 0):
x, y = data[:,:-1], data[:,-1].reshape(-1,1)
optimizer.zero_grad();
outputs, hidden = model(x, hidden);
hidden = Variable(hidden.data).to(device);
loss = criterion(outputs, y);
loss.backward();
optimizer.step();
running_loss = running_loss + loss.item();
if ite % 2000 == 1999:
print('[%d %5d] loss: %.3f'%(epoch+1, ite+1, running_loss / 2000))
running_loss = 0.0;
When I print the shape of outputs, it is 64x60x1 rather than 64x1. What I also don't get is how the criterion function is able to calculate the loss when the shapes of outputs and labels are inconsistent. With Tensorflow, this would always throw an error, but it doesn't with Torch.
The output from your model is of shape torch.Size([64, 60, 1]) i.e. 64 is the batch size, and (60, 1) corresponds [n, 1] as expected.
Assuming you're using nn.CrossEntropy(input, target), it expected the input to be (N,C) and target to be (N), where C is number of classes.
Your output is consistent, and hence loss is evaluated.
For example,
outputs = torch.randn(3, 2, 1)
target = torch.empty(3, 1, dtype=torch.long).random_(2)
criterion = nn.CrossEntropyLoss(reduction='mean')
print(outputs)
print(target)
loss = criterion(outputs, target)
print(loss)
# outputs
tensor([[[ 0.5187],
[ 1.0320]],
[[ 0.2169],
[ 2.4480]],
[[-0.4895],
[-0.6096]]])
tensor([[0],
[1],
[0]])
tensor(0.5731)
Read more here.
Assume that I have 77 samples to train my CNN, and my batch size is 10. Then the last batch has a batch size of 7 instead of 10. Somehow when I pass it to the loss function such as nn.MSELoss(), it gives me the error:
RuntimeError: The size of tensor a (10) must match the size of tensor
b (7) at non-singleton dimension 1
So pytorch doesn't support batches with different sizes?
My code in doubt:
import numpy as np
import torch
from torch import nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, (5,4))
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(64, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Net()
batch_size = 10
# Generating Artifical data
x_train = torch.randn((77,1,20,20))
y_train = torch.randint(0,10,size=(77,),dtype=torch.float)
trainset = torch.utils.data.TensorDataset(x_train,y_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i%10==0:
print('epoch{}, step{}, loss: {}'.format(epoch + 1, i + 1, running_loss))
# print("frac post = {}".format(frac_post))
running_loss = 0.0
The problem is not due to the batch size, but to a failure to broadcast properly between the 10 outputs of your CNN and the single label provided in each example.
If you look at the model output and label tensor shapes during the batch where the error is thrown,
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7])
you'll see that the labels are stored in a singleton tensor. According to pytorch broadcasting rules, to be broadcastable two tensors have to be compatible in all trailing dimensions. In this case, the trailing dimension of the model output (10) is incompatible with that of the label (7).
To fix, either add a dummy dimension to the label (assuming you actually want to broadcast the labels to match your ten network outputs), or define a network with scalar outputs. For example:
y_train = torch.randint(0,10,size=(77,1),dtype=torch.float)
results in
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7,1])
# these are broadcastable