how to save Keras Model instance of class - python

am using an Seq2Seq project from Google that use Encoder/Decoder, there is the 2 encoder and decoder class :
#ENCODER
class EncoderNetwork(tf.keras.Model):
def __getstate__(self):
d = self.__dict__.copy()
d.pop('_parents', None)
return d
def __init__(self,input_vocab_size,embedding_dims, rnn_units ):
super().__init__()
self.encoder_embedding = tf.keras.layers.Embedding(input_dim=input_vocab_size,
output_dim=embedding_dims)
self.encoder_rnnlayer = tf.keras.layers.LSTM(rnn_units,return_sequences=True,
return_state=True )
encoder_embedding = self.encoder_embedding
encoder_rnnlayer = self.encoder_rnnlayer
#DECODER
class DecoderNetwork(tf.keras.Model):
def __getstate__(self):
d = self.__dict__.copy()
d.pop('_parents', None)
return d
def __init__(self,output_vocab_size, embedding_dims, rnn_units):
super().__init__()
self.decoder_embedding = tf.keras.layers.Embedding(input_dim=output_vocab_size,
output_dim=embedding_dims)
self.dense_layer = tf.keras.layers.Dense(output_vocab_size)
self.decoder_rnncell = tf.keras.layers.LSTMCell(rnn_units)
# Sampler
self.sampler = tfa.seq2seq.sampler.TrainingSampler()
# Create attention mechanism with memory = None
self.attention_mechanism = self.build_attention_mechanism(dense_units,None,BATCH_SIZE*[Tx])
self.rnn_cell = self.build_rnn_cell(BATCH_SIZE)
self.decoder = tfa.seq2seq.BasicDecoder(self.rnn_cell, sampler= self.sampler,
output_layer=self.dense_layer)
def build_attention_mechanism(self, units,memory, memory_sequence_length):
return tfa.seq2seq.LuongAttention(units, memory = memory,
memory_sequence_length=memory_sequence_length)
#return tfa.seq2seq.BahdanauAttention(units, memory = memory, memory_sequence_length=memory_sequence_length)
# wrap decodernn cell
def build_rnn_cell(self, batch_size ):
rnn_cell = tfa.seq2seq.AttentionWrapper(self.decoder_rnncell, self.attention_mechanism,
attention_layer_size=dense_units)
return rnn_cell
def build_decoder_initial_state(self, batch_size, encoder_state,Dtype):
decoder_initial_state = self.rnn_cell.get_initial_state(batch_size = batch_size,
dtype = Dtype)
decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)
return decoder_initial_state
i create an instance of EncoderNetwork and DecoderNetwork with my argument and use the loss_function and train_step already defined to train my model
def loss_function(y_pred, y):
#shape of y [batch_size, ty]
#shape of y_pred [batch_size, Ty, output_vocab_size]
sparsecategoricalcrossentropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
reduction='none')
loss = sparsecategoricalcrossentropy(y_true=y, y_pred=y_pred)
mask = tf.logical_not(tf.math.equal(y,0)) #output 0 for y=0 else output 1
mask = tf.cast(mask, dtype=loss.dtype)
loss = mask* loss
loss = tf.reduce_mean(loss)
return loss
def train_step(input_batch, output_batch,encoder_initial_cell_state):
#initialize loss = 0
loss = 0
with tf.GradientTape() as tape:
encoder_emb_inp = encoderNetwork.encoder_embedding(input_batch)
a, a_tx, c_tx = encoderNetwork.encoder_rnnlayer(encoder_emb_inp,
initial_state =encoder_initial_cell_state)
#[last step activations,last memory_state] of encoder passed as input to decoder Network
# Prepare correct Decoder input & output sequence data
decoder_input = output_batch[:,:-1] # ignore <end>
#compare logits with timestepped +1 version of decoder_input
decoder_output = output_batch[:,1:] #ignore <start>
# Decoder Embeddings
decoder_emb_inp = decoderNetwork.decoder_embedding(decoder_input)
#Setting up decoder memory from encoder output and Zero State for AttentionWrapperState
decoderNetwork.attention_mechanism.setup_memory(a)
decoder_initial_state = decoderNetwork.build_decoder_initial_state(BATCH_SIZE,
encoder_state=[a_tx, c_tx],
Dtype=tf.float32)
#BasicDecoderOutput
outputs, _, _ = decoderNetwork.decoder(decoder_emb_inp,initial_state=decoder_initial_state,
sequence_length=BATCH_SIZE*[Ty-1])
logits = outputs.rnn_output
#Calculate loss
loss = loss_function(logits, decoder_output)
#Returns the list of all layer variables / weights.
variables = encoderNetwork.trainable_variables + decoderNetwork.trainable_variables
# differentiate loss wrt variables
gradients = tape.gradient(loss, variables)
#grads_and_vars – List of(gradient, variable) pairs.
grads_and_vars = zip(gradients,variables)
optimizer.apply_gradients(grads_and_vars)
return loss
the training does not use fit() methode but like this :
epochs = 20
for i in range(1, epochs+1):
encoder_initial_cell_state = initialize_initial_state()
total_loss = 0.0
for ( batch , (input_batch, output_batch)) in enumerate(dataset.take(steps_per_epoch)):
batch_loss = train_step(input_batch, output_batch, encoder_initial_cell_state)
total_loss += batch_loss
if (batch+1)%5 == 0:
print("total loss: {} epoch {} batch {} ".format(batch_loss.numpy(), i, batch+1))
the result are fine and the predict fonction work perfectly (custom predict function), but how can i save the model ? i tried pickel and keras.save() but it doesn't work any idea ?

Related

cant's save a keras/tensorflow model

I want to save a model having two models inside.
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = text_encoder
self.image_encoder = image_encoder
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")
Full Code.
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = text_encoder
self.image_encoder = image_encoder
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")
#property
def metrics(self):
return [self.loss_tracker]
def call(self, features, training=False):
# Place each encoder on a separate GPU (if available).
# TF will fallback on available devices if there are fewer than 2 GPUs.
with tf.device("/gpu:0"):
# Get the embeddings for the captions.
caption_embeddings = text_encoder(features["caption"], training=training)
with tf.device("/gpu:1"):
# Get the embeddings for the images.
image_embeddings = vision_encoder(features["image"], training=training)
return caption_embeddings, image_embeddings
def compute_loss(self, caption_embeddings, image_embeddings):
logg = tf.constant([[0, 0], [62, 61], [0, 0]])
image_em = tf.pad(image_embeddings, logg, mode ='CONSTANT', constant_values=0)
print("image em is", image_em.shape)
logits = (
tf.matmul(caption_embeddings, image_em, transpose_b=True)
/ self.temperature
)
print("logits shape is", logits.shape)
images_similarity = tf.matmul(
image_em, image_em, transpose_b=True
)
print("images similarity shape is ", images_similarity.shape)
captions_similarity = tf.matmul(
caption_embeddings, caption_embeddings, transpose_b=True
)
print("captions similarity shape is", captions_similarity.shape)
targets = keras.activations.softmax(
(captions_similarity + images_similarity) / (2 * self.temperature)
)
print("targets shape is ", targets.shape)
# Compute the loss for the captions using crossentropy
captions_loss = keras.losses.kl_divergence(
y_true=targets, y_pred=logits, #from_logits=True
)
print("caption loss is", captions_loss.shape)
# Compute the loss for the images using crossentropy
images_loss = keras.losses.kl_divergence(
y_true=tf.transpose(targets), y_pred=tf.transpose(logits),
)
# Return the mean of the loss over the batch.
print("image loss is", images_loss.shape)
#return (captions_loss + images_loss) / 2
return tf.matmul( captions_loss, images_loss) / 2
#(tf.reduce_sum(captions_loss, axis=0) + images_loss) / 2
#return (captions_loss[0, :] + images_loss + captions_loss[1, :]) / 2
def train_step(self, features):
with tf.GradientTape() as tape:
# Forward pass
caption_embeddings, image_embeddings = self(features, training=True)
loss = self.compute_loss(caption_embeddings, image_embeddings)
# Backward pass
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
# Monitor loss
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
def test_step(self, features):
caption_embeddings, image_embeddings = self(features, training=False)
loss = self.compute_loss(caption_embeddings, image_embeddings)
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
When I save the dual encoder then I get this error. NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using save_weights`.
However I can save text_encoder and image_encoder.
Can I load these models like below will it work?
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = new_model = tf.keras.models.load_model('text_encoder')
self.image_encoder = new_model = tf.keras.models.load_model('image_encoder')
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")

L-BFGS Optimizer Not Changing Loss But Adam Is

I am building a physics-informed neural network to approximate PDEs. I am getting okay results with just using the adam optimizer however I want to get better results. I am attempting to use adam for say 10,000 iteration then the L-BFGS optimizer (pytorch) for the last 1,000.
However when using my L-BFGS optimizer the loss of the network never changes and remain constant. Here is my closure function used in my PINN for L-BFGS
def closure(self):
lbfgs_optim.zero_grad()
train_loss = PINN.loss(xt_train_ICBC, u_train_ICBC, xt_resid, f_hat_train)
train_loss.backward()
return train_loss
Also my optimizer paramters for both adam and L-BFGS
epochs_adam, epochs_lbfgs = 10000, 1000
adam_optim = torch.optim.Adam(PINN.parameters(), lr=lr, weight_decay=1e-5)
lbfgs_optim = torch.optim.LBFGS(PINN.parameters(), lr=lr, history_size = 20,
max_iter = 50, line_search_fn = "strong_wolfe")
I am using one for loop for adam then another for loop for L-BFGS, here is how adam is used in my code, which works
train_loss = PINN.loss(xt_train_ICBC, u_train_ICBC, xt_resid, f_hat_train)
... # print loss's, append to lists
adam_optim.zero_grad()
train_loss.backward()
adam_optim.step()
Then for my L-BFGS, which seems not to work, all I call in my epoch loop is
lbfgs_optim.step(PINN.closure)
and I don't see any changes in the loss, why is that?
Versions being used: Python 3.9.12, PyTorch 1.11.0 and NumPy 1.21.5
EDIT: PINN code, and optimization/training code
class NN(nn.Module):
# Heat Equation PDE
def __init__(self, layers):
super().__init__()
self.activation = nn.Sigmoid()
self.loss_function = nn.MSELoss(reduction='mean')
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
for i in range(len(layers)-1):
nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
nn.init.zeros_(self.linears[i].bias.data)
def forward(self, x):
a = x.float()
for i in range(0, len(layers)-2):
z = self.linears[i](a)
a = self.activation(z)
a = self.linears[-1](a)
return a
def lossICBC(self, x_ICBC, u_ICBC):
"""MSE losses for oundary and initial conditions"""
loss_ICBC = self.loss_function(self.forward(x_ICBC), u_ICBC)
return loss_ICBC
def lossPDE(self, xt_residual, f_hat):
"""Residual loss for collocation points"""
g = xt_residual.clone().float()
g.requires_grad=True
f = self.forward(g)
f_xt = autograd.grad(f, g, torch.ones(g.shape[0], 1).to(device), create_graph=True)[0]
f_xx_tt = autograd.grad(f_xt, g, torch.ones(g.shape).to(device), create_graph=True)[0]
f_t = f_xt[:,[1]] # extract just the t values
f_xx = f_xx_tt[:,[0]] # extract just the x values
f = f_t - k*f_xx
return self.loss_function(f, f_hat)
def closure(self):
lbfgs_optim.zero_grad()
train_loss = PINN.loss(xt_train_ICBC, u_train_ICBC, xt_resid, f_hat_train)
train_loss.backward()
return train_loss
def loss(self, x_ICBC, u_ICBC, xt_residual, f_hat):
"""Total loss"""
loss_ICBC = self.lossICBC(x_ICBC, u_ICBC)
loss_PDE = self.lossPDE(xt_residual, f_hat) #f_hat=torch.zeros()
return loss_PDE + loss_ICBC
lr_adam = 0.001
lr_lbfgs = 1
epochs_adam = 20000
adam_optim = torch.optim.Adam(PINN.parameters(), lr=lr)
epochs_lbfgs = 100
lbfgs_optim = torch.optim.LBFGS(PINN.parameters(), lr=15, history_size = 20,
max_iter = 50, line_search_fn = "strong_wolfe")
Training loops
for i in range(0, epochs_adam+1):
train_loss = PINN.loss(xt_train_ICBC, u_train_ICBC, xt_resid, f_hat_train)
adam_optim.zero_grad()
train_loss.backward()
adam_optim.step()
for i in range(0, epochs_lbfgs+1):
train_loss = lbfgs_optim.step(PINN.closure)

PyTorch Custom LSTM architecture not learning

I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

The pytorch training model cannot be created successfully

I would like to do a neural network for regression analysis using optuna based on this site.
I would like to create a model with two 1D data as input and one 1D data as output in batch learning.
x is the training data and y is the teacher data.
class Model(nn.Module):
# コンストラクタ(インスタンス生成時の初期化)
def __init__(self,trial, mid_units1, mid_units2):
super(Model, self).__init__()
self.linear1 = nn.Linear(2, mid_units1)
self.bn1 = nn.BatchNorm1d(mid_units1)
self.linear2 = nn.Linear(mid_units1, mid_units2)
self.bn2 = nn.BatchNorm1d(mid_units2)
self.linear3 = nn.Linear(mid_units2, 1)
self.activation = trial_activation(trial)
def forward(self, x):
x = self.linear1(x)
x = self.bn1(x)
x = self.activation(x)
x = self.linear2(x)
device = "cuda" if torch.cuda.is_available() else "cpu"
EPOCH = 100
x = torch.from_numpy(a[0].astype(np.float32)).to(device)
y = torch.from_numpy(a[1].astype(np.float32)).to(device)
def train_epoch(model, optimizer, criterion):
model.train()
optimizer.zero_grad() # 勾配情報を0に初期化
y_pred = model(x) # 予測
loss = criterion(y_pred.reshape(y.shape), y) # 損失を計算(shapeを揃える)
loss.backward() # 勾配の計算
optimizer.step() # 勾配の更新
return loss.item()
def trial_activation(trial):
activation_names = ['ReLU','logsigmoid']
activation_name = trial.suggest_categorical('activation', activation_names)
if activation_name == activation_names[0]:
activation = F.relu
else:
activation = F.logsigmoid
return activation
def objective(trial):
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 中間層のユニット数の試行
mid_units1 = int(trial.suggest_discrete_uniform("mid_units1", 1024*2,1024*4, 64*2))
mid_units2 = int(trial.suggest_discrete_uniform("mid_units2", 1024, 1024*2, 64*2))
net = Model(trial, mid_units1, mid_units2).to(device)
criterion = nn.MSELoss()
# 最適化手法の試行
optimizer = trial_optimizer(trial, net)
train_loss = 0
for epoch in range(EPOCH):
train_loss = train_epoch(net, optimizer, criterion, device)
torch.save(net.state_dict(), str(trial.number) + "new1.pth")
return train_loss
strage_name = "a.sql"
study_name = 'a'
study = optuna.create_study(
study_name = study_name,
storage='sqlite:///' + strage_name,
load_if_exists=True,
direction='minimize')
TRIAL_SIZE = 100
study.optimize(objective, n_trials=TRIAL_SIZE)
error message
---> 28 loss = criterion(y_pred.reshape(y.shape), y) # 損失を計算(shapeを揃える)
29 loss.backward() # 勾配の計算
30 optimizer.step() # 勾配の更新
AttributeError: 'NoneType' object has no attribute 'reshape'
Because of the above error, I checked the value of y_pred and found it to be None.
model.train()
optimizer.zero_grad()
I am thinking that these two lines may be wrong, but I don't know how to solve this problem.
With PyTorch, when you call y_pred = model(x) that will call the forward function which is defined in the Model class.
So, y_pred will get the result of the forward function, in your case, it returns nothing, that's why you get a None value. You can change the forward function as below:
def forward(self, x):
x = self.linear1(x)
x = self.bn1(x)
x = self.activation(x)
x = self.linear2(x)
return x

How to initialize a neural network by user defined parameters in tensorflow

I am trying to implement a Neural Network using tensorflow. I want my model to initialize itself using default parameters or any weight size list passed by users. But the compute graph generated by my code seems incorrect. How can I initialize a NN by user-defined parameters in tensorflow?
def setUp(self):
self.hidden_layer_ = len(self.hidden_layer_sizes)
self.weights = []
self.biases = []
size_list = [self.input_size]
size_list.extend(list(self.hidden_layer_sizes))
i = 0
while i + 1 < len(size_list):
prev_size = size_list[i]
size = size_list[i+1]
w = tf.Variable(tf.truncated_normal([prev_size, size], stddev=0.1))
b = tf.Variable(tf.zeros([size]))
self.weights.append(w)
self.biases.append(b)
i += 1
self.w_out = tf.Variable(tf.truncated_normal([size_list[-1], self.output_size], stddev=0.1))
self.b_out = tf.Variable(tf.zeros([self.output_size]))
self.input_x = tf.placeholder(tf.float32, [None, self.input_size], name='input')
self.input_y = tf.placeholder(tf.float32, [None, self.output_size], name='label')
self.keep_prob_plh = tf.placeholder(tf.float32)
self.layers = [self.input_x]
for w, b in zip(self.weights, self.biases):
hidden = ACT_FUN_DICT[self.activation](tf.matmul(self.layers[-1], w) + b)
hidden_drop = tf.nn.dropout(hidden, self.keep_prob_plh)
self.layers.append(hidden_drop)
with tf.variable_scope('output'):
self.output_layer = tf.nn.sigmoid(tf.matmul(self.layers[-1], self.w_out) + self.b_out)
self.cost_func = tf.reduce_mean(tf.reduce_sum(tf.pow((self.input_y - self.output_layer), 2)))
self.cost_summary = tf.summary.scalar('Cost', self.cost_func)
self.optimizer = SOLVER_DICT[self.solver](self.learning_rate).minimize(self.cost_func)
root_logdir = './tf_logs'
now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
log_dir = "{}/run-{}/".format(root_logdir, now)
self.file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())
self.sess.run(tf.global_variables_initializer())
I implement the NN using a class, the size of weights are stored in the member self.hidden_layer_sizes.
class MLPClassifier(BaseEstimator, TransformerMixin):
def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='sgd', alpha=0.0001,
learning_rate=0.001, max_iter=200, random_state=42, keep_prob=0.75, logged = True):
self.hidden_layer_sizes = hidden_layer_sizes
self.activation = activation
self.solver = solver
self.alpha = alpha
self.learning_rate = learning_rate
self.max_iter = max_iter
self.random_state = random_state
self.keep_prob = keep_prob
self.fitted = False
self.logged = True
self.sess = tf.Session()
self.sess.as_default()
That's all my code, and this is my compute graph:

Categories