first of all I thank , I tried to train model with pytorch but I got the following error: AttributeError: 'KMeans' object has no attribute 'labels_'.I am trying to model a extract features point cloud using deep learning in pytorch. I get the following error . Could anyone help on this? ************** *************** Thanks!
# Training loop
def training_loop(gpu, training_dataloader, model, loss_fn, optimizer):
losses = []
correct = 0
batch_results = dict()
conf_mat = np.zeros((10,10))
for batch_n, batch in enumerate(training_dataloader): #batch[batch, pos, ptr, y]
batch_size = int(batch.batch.size()[0] / sample_points)
if dimensionality == 3:
# Input dim [:,3] for your geometry x,y,z
X = batch.pos.cuda(non_blocking=True).view(batch_size, sample_points, -1) + torch.normal(
torch.zeros(batch_size, sample_points, dimensionality), torch.full((batch_size, sample_points,
dimensionality), fill_value=0.1)).cuda(gpu)
else:
# Input dim [:,6] for your geometry x,y,z and normals nx,ny,nz
X = torch.cat((batch.pos.cuda(non_blocking=True), batch.normal.cuda(non_blocking=True)), 1).view(batch_size, sample_points, -1) + torch.normal(
torch.zeros(batch_size, sample_points, dimensionality), torch.full((batch_size, sample_points,
dimensionality), fill_value=0.1)).cuda(gpu)
y = batch.y.cuda(non_blocking=True).flatten() #size (batch_size) --> torch.Size([8])
# Compute predictions
pred = model(None, X) #size (batch_size,classes) --> torch.Size([8, 10])
if overall_classes_loss:
# weighted CE Loss over all classes
loss = loss_fn(pred, y)
else:
# weighted batchwise Loss
sample_count = np.array([[x, batch.y.tolist().count(x)] for x in batch.y])[:,1]
batch_weights = 1. / sample_count
batch_weights = torch.from_numpy(batch_weights)
batch_weights = batch_weights.double()
loss = element_weighted_loss(pred, batch.y, batch_weights, gpu)
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
print(f"Loss: {loss}")
tensor_list_y = [torch.ones_like(y) for _ in range(dist.get_world_size())]
tensor_list_pred = [torch.ones_like(y) for _ in range(dist.get_world_size())]
torch.distributed.all_gather(tensor_list_y, y, group=None, async_op=False)
torch.distributed.all_gather(tensor_list_pred, pred.argmax(1), group=None, async_op=False)
tensor_list_y = torch.cat(tensor_list_y)
tensor_list_pred = torch.cat(tensor_list_pred)
# Confusion Matrix
conf_mat += confusion_matrix(tensor_list_y.cpu().detach().numpy(), tensor_list_pred.cpu().detach().numpy(), labels=np.arange(0,10))
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
# Save batch predictions
batch_results[batch_n] = {'true':tensor_list_y, 'pred':tensor_list_pred}
if verbosity == True:
print(f"\n\nTRAIN on GPU:{gpu}: True Label {y} - Prediction {pred.argmax(1)} - Loss {loss}")
truevalue = '\t\t'.join(classes[items] for items in y.tolist())
predvalues = '\t\t'.join(classes[items] for items in pred.argmax(1).tolist())
print(f"INFO on GPU:{gpu}: TRAIN - True Value\t {truevalue}")
print(f"INFO on GPU:{gpu}: TRAIN - Predictions\t {predvalues}")
if batch_n % 25 == 0:
torch.distributed.reduce(loss, 0)
return torch.tensor(losses, device=f"cuda:{gpu}"), torch.tensor(correct, device=f"cuda:{gpu}"), batch_results, conf_mat
# Test loop
def test_loop(gpu, test_dataloader, model, loss_fn):
test_losses = []
correct = 0
batch_results = dict()
conf_mat = np.zeros((10,10))
with torch.no_grad():
for batch_n, batch in enumerate(test_dataloader):
batch_size = int(batch.batch.size()[0] / sample_points)
if dimensionality == 3:
# Input dim [:,3] for your geometry x,y,z
X = batch.pos.cuda(non_blocking=True).view(batch_size, sample_points, -1)
else:
# Input dim [:,6] for your geometry x,y,z and normals nx,ny,nz
X = torch.cat((batch.pos.cuda(non_blocking=True), batch.normal.cuda(non_blocking=True)), 1).view(batch_size, sample_points, -1)
y = batch.y.cuda(non_blocking=True).flatten()
pred = model(None, X) #size (batch,classes) per batch_n
if overall_classes_loss:
# weighted CE Loss over all classes
loss = loss_fn(pred, y)
else:
# weighted batchwise Loss
sample_count = np.array([[x, batch.y.tolist().count(x)] for x in batch.y])[:,1]
batch_weights = 1. / sample_count
batch_weights = torch.from_numpy(batch_weights)
batch_weights = batch_weights.double()
loss = element_weighted_loss(pred, batch.y, batch_weights, gpu)
test_losses.append(loss.item())
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
print(f"Loss: {loss}")
tensor_list_y = [torch.ones_like(y) for _ in range(dist.get_world_size())]
tensor_list_pred = [torch.ones_like(y) for _ in range(dist.get_world_size())]
torch.distributed.all_gather(tensor_list_y, y, group=None, async_op=False)
torch.distributed.all_gather(tensor_list_pred, pred.argmax(1), group=None, async_op=False)
tensor_list_y = torch.cat(tensor_list_y)
tensor_list_pred = torch.cat(tensor_list_pred)
# Confusion Matrix
conf_mat += confusion_matrix(tensor_list_y.cpu().detach().numpy(), tensor_list_pred.cpu().detach().numpy(), labels=np.arange(0,10))
# Save batch predictions
batch_results[batch_n] = {'true':tensor_list_y, 'pred':tensor_list_pred}
if verbosity == True:
print(f"\n\nTEST on GPU:{gpu}: True Label {y} - Prediction {pred.argmax(1)} - Loss {loss}")
truevalue = '\t\t'.join(classes[items] for items in y.tolist())
predvalues = '\t\t'.join(classes[items] for items in pred.argmax(1).tolist())
print(f"INFO on GPU:{gpu}: TEST - True Value\t {truevalue}")
print(f"INFO on GPU:{gpu}: TEST - Predictions\t {predvalues}")
test_loss = statistics.mean(test_losses)
return torch.tensor(correct, device=f"cuda:{gpu}"), torch.tensor(test_loss, device=f"cuda:{gpu}"), batch_results, conf_mat
def train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, scheduler, dir_path, initial_epoch):
epoch_losses = []
training_accuracies = []
test_losses = []
test_accuracies = []
learning_rates = []
counter = 0 #early stopping counter
batchwise_results = dict()
# Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=20)
for i in range(initial_epoch, initial_epoch + epochs):
if gpu == 0:
if initial_epoch > 0:
print(f"\n\nEpoch {i}\n-------------------------------")
else:
print(f"\n\nEpoch {i + 1}\n-------------------------------")
# TRAIN
losses, training_accuracy, train_batch_result, train_conf_mat = training_loop(gpu, training_dataloader, model, loss_fn, optimizer)
average_loss = torch.mean(losses)
torch.distributed.reduce(average_loss, 0, torch.distributed.ReduceOp.SUM)
torch.distributed.reduce(training_accuracy, 0, torch.distributed.ReduceOp.SUM)
# TEST
test_accuracy, test_loss, test_batch_result, test_conf_mat = test_loop(gpu, test_dataloader, model, loss_fn)
torch.distributed.reduce(test_accuracy, 0, torch.distributed.ReduceOp.SUM)
torch.distributed.reduce(test_loss, 0, torch.distributed.ReduceOp.SUM)
# save results
batchwise_results[i] = {'train':train_batch_result, 'test':test_batch_result}
if gpu == 0: # the following operations are performed only by the process running in the first gpu
average_loss = average_loss / torch.tensor(gpus, dtype=torch.float) # average loss among all gpus
test_accuracy = test_accuracy / torch.tensor(len(test_dataloader.dataset),
dtype=torch.float) * torch.tensor(100.0)
training_accuracy = training_accuracy / torch.tensor(len(training_dataloader.dataset),
dtype=torch.float) * torch.tensor(100.0)
test_loss = test_loss / torch.tensor(gpus, dtype=torch.float)
epoch_losses.append(average_loss.item())
training_accuracies.append(training_accuracy.item())
test_losses.append(test_loss.item())
test_accuracies.append(test_accuracy.item())
learning_rates.append((optimizer.param_groups[0])["lr"])
print(f"\nBatch size: {batch_size * int(gpus)}")
print(f"average Training Loss: {average_loss.item():.6f}")
print(f"average Test Loss: {test_loss.item():.6f}")
print(f"\naverage Training Acc: {training_accuracy.item():.6f}")
print(f"average Test Acc: {test_accuracy.item():.6f}")
printLearningRate(optimizer)
scheduler.step(test_loss)
# saving model checkpoint
save_checkpoint(model, optimizer, scheduler, i, epoch_losses, training_accuracies, test_losses, test_accuracies, learning_rates,
os.path.join(dir_path, f"epoch{i}.pth"), {key: value for key, value in batchwise_results[i].items() if key == 'train'}, {key: value for key, value in batchwise_results[i].items() if key == 'test'}, train_conf_mat, test_conf_mat)
#TODO: implement ONNX Export
# early stopping scheduler
if early_stopping(test_losses) == True:
counter += 1
print(f"Early Stopping counter: {counter} of {patience}")
else:
counter += 0
if counter < patience:
pass
else:
print("\n\nEarly Stopping activated")
print(f"Training stopped at Epoch{i + 1}")
dist.destroy_process_group()
exit()
def train(gpu, gpus, world_size):
torch.manual_seed(0)
torch.cuda.set_device(gpu)
try:
dist.init_process_group(backend='nccl', world_size=world_size, rank=gpu) #for distributed GPU training
except RuntimeError:
print("\n\nINFO:RuntimeError is raised >> Used gloo backend instead of nccl!\n")
dist.init_process_group(backend='gloo', world_size=world_size, rank=gpu) #as a fallback option
dir_path = None
if gpu == 0:
dir_path = "stackgraphConvPool3DPnet"
createdir(dir_path)
training_number = next_training_number(dir_path)
dir_path = os.path.join(dir_path, f"train{training_number}")
createdir(dir_path)
#save hyper-parameters in txt protocol file
save_hyperparameters(dir_path, 'hyperparameters.txt')
print("\nINFO: Protocol File saved successfully . . .")
model = Classifier(shrinkingLayers, mlpClassifier)
torch.cuda.set_device(gpu)
model.cuda(gpu)
#setting up optimizer
if optimizer_str == "SGD":
optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=weight_decay)
elif optimizer_str == "RMSprop":
optimizer = torch.optim.RMSprop(model.parameters(), learning_rate, weight_decay=weight_decay)
else:
optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=weight_decay)
# single-program multiple-data training paradigm (Distributed Data-Parallel Training)
model = DDP(model, device_ids=[gpu])
if dimensionality == 3:
training_data = ModelNet("ModelNet10_train_data", transform=lambda x: NormalizeScale()(SamplePoints(num=sample_points)(x)))
else:
training_data = ModelNet("ModelNet10_train_data", transform=lambda x: NormalizeScale()(NormalizeRotation()(SamplePoints(num=sample_points, remove_faces=True, include_normals=True)(x))))
training_sampler = DistributedWeightedSampler(training_data, num_replicas=world_size) #weight unbalanced classes by 1/cls_count
training_dataloader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=data_shuffle, num_workers=0,
pin_memory=True, sampler=training_sampler)
if dimensionality == 3:
test_data = ModelNet("ModelNet10_test_data", train=False, transform=lambda x: NormalizeScale()(SamplePoints(num=sample_points)(x)))
else:
test_data = ModelNet("ModelNet10_test_data", train=False, transform=lambda x: NormalizeScale()(NormalizeRotation()(SamplePoints(num=sample_points, remove_faces=True, include_normals=True)(x))))
test_sampler = DistributedWeightedSampler(test_data, num_replicas=world_size) #weight unbalanced classes by 1/cls_count
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=data_shuffle, num_workers=0,
pin_memory=True, sampler=test_sampler)
# weighted CE Loss over all Classes C
class_sample_count = np.array([len(np.where(training_data.data.y == t)[0]) for t in np.unique(training_data.data.y)])
weight = 1. / class_sample_count
weight = torch.from_numpy(weight)
weight = weight.float()
loss_fn = nn.CrossEntropyLoss(weight=weight).cuda(gpu)
# continue training from certain checkpoint
continue_from_scratch = True if args.resume is None else False
if continue_from_scratch:
if gpu == 0:
print("\nINFO: Train from scratch has started . . .")
train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, None, dir_path, 0)
else:
checkpoint_path = "stackgraphConvPool3DPnet/" + args.resume
if gpu == 0:
print(f"\nINFO: Train has started from certain checkpoint {checkpoint_path.split('/')[2].split('.')[0]} in {checkpoint_path.split('/')[1]} . . .")
model.load_state_dict(torch.load(checkpoint_path)['model_state_dict'], strict=False)
optimizer.load_state_dict(torch.load(checkpoint_path)['optimizer_state_dict'])
final_epoch = (torch.load("stackgraphConvPool3DPnet/" + args.resume)['epoch'])+1
train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, None, dir_path, final_epoch)
code tools:
class KMeansInitMostDistantFromMean:
def __call__(self, *args, **kwargs):
X, k = args
mean = np.mean(X, axis=0)
arg_sorted = np.argsort(np.apply_along_axis(lambda y: euclidean(mean, y), 1, X))
output = X[np.flip(arg_sorted)[:k]]
return output
class KMeansInit:
def __call__(self, *args, **kwargs):
X, k = args
current_centroids = np.expand_dims(np.mean(X, axis=0), 0)
for i in range(k - 1):
X, current_centroids = self.next_centroid(X, current_centroids)
return current_centroids
def next_centroid(self, X, curr_centroids):
highest_dist = 0.0
next_centroid = None
next_centroid_index = None
for i, x in enumerate(X):
max_dist = np.amax(np.apply_along_axis(lambda y: euclidean(x, y), 1, curr_centroids))
if max_dist > highest_dist:
next_centroid = x
highest_dist = max_dist
next_centroid_index = i
return np.delete(X, next_centroid_index, 0), np.append(curr_centroids, np.expand_dims(next_centroid, 0), 0)
class Conv(gnn.MessagePassing):
def __init__(self, sigma: nn.Module, F: nn.Module, W: nn.Module, M: nn.Module, C: int, P: int):
super().__init__(aggr="mean")
self.sigma = sigma
self.F = F
self.W = W
self.M = M
self.C = C
self.P = P
self.B = torch.randn(C+P, requires_grad=True)
def forward(self, feature_matrix, edge_index):
return self.propagate(edge_index, feature_matrix=feature_matrix)
def message(self, feature_matrix_i, feature_matrix_j):
message = self.F(feature_matrix_j - feature_matrix_i)
message = message.view(-1, self.C + self.P, self.C)
feature_matrix_i_ = feature_matrix_i.unsqueeze(2)
output = torch.bmm(message, feature_matrix_i_).squeeze()
return output
def update(self, aggr_out, feature_matrix):
Weight = self.M(aggr_out)
aggr_out = aggr_out * Weight
transform = self.W(feature_matrix)
transform = transform.view(-1, self.C + self.P, self.C)
feature_matrix = feature_matrix.unsqueeze(2)
transformation = torch.bmm(transform, feature_matrix).squeeze()
aggr_out = aggr_out + transformation
output = aggr_out + self.B
output = self.sigma(output)
return output
class Aggregation(nn.Module):
def __init__(self, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.mlp1 = mlp1
self.mlp2 = mlp2
self.softmax = nn.Softmax(0)
def forward(self, feature_matrix_batch: torch.Tensor, conv_feature_matrix_batch: torch.Tensor):
N, I, D = feature_matrix_batch.size()
N_, I_, D_ = conv_feature_matrix_batch.size()
augmentation = D_ - D
if augmentation > 0:
feature_matrix_batch = F.pad(feature_matrix_batch, (0, augmentation))
S1 = torch.mean(feature_matrix_batch, 1)
S2 = torch.mean(conv_feature_matrix_batch, 1)
Z1 = self.mlp1(S1)
Z2 = self.mlp2(S2)
M = self.softmax(torch.stack((Z1, Z2), 0))
M1 = M[0]
M2 = M[1]
M1 = M1.unsqueeze(1).expand(-1, I, -1)
M2 = M2.unsqueeze(1).expand(-1, I, -1)
output = (M1 * feature_matrix_batch) + (M2 * conv_feature_matrix_batch)
return output
class MaxPool(nn.Module):
def __init__(self, k: int):
super().__init__()
self.k = k
def forward(self, feature_matrix_batch: torch.Tensor, cluster_index: torch.Tensor):
N, I, D = feature_matrix_batch.size()
feature_matrix_batch = feature_matrix_batch.view(-1, D)
output = scatter_max(feature_matrix_batch, cluster_index, dim=0)[0]
output = output.view(N, self.k, -1)
return output
class GraphConvPool3DPnet(nn.Module):
def __init__(self, shrinkingLayers: [ShrinkingUnit], mlp: nn.Module):
super().__init__()
self.neuralNet = nn.Sequential(*shrinkingLayers, mlp)
def forward(self, x: torch.Tensor, pos: torch.Tensor):
feature_matrix_batch = torch.cat((pos, x), 2) if x is not None else pos
return self.neuralNet(feature_matrix_batch)
class ShrinkingUnitStack(nn.Module):
def __init__(self, input_stack: int, stack_fork: int, mlp: nn.Module, learning_rate: int, k: int, kmeansInit, n_init, sigma: nn.Module, F: nn.Module, W: nn.Module,
M: nn.Module, C, P, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.stack_fork = stack_fork
stack_size = input_stack * stack_fork
self.selfCorrStack = SelfCorrelationStack(stack_size, mlp, learning_rate)
self.kmeansConvStack = KMeansConvStack(stack_size, k, kmeansInit, n_init, sigma, F, W, M, C, P)
self.localAdaptFeaAggreStack = AggregationStack(stack_size, mlp1, mlp2)
self.graphMaxPoolStack = MaxPoolStack(stack_size, k)
def forward(self, feature_matrix_batch):
feature_matrix_batch = torch.repeat_interleave(feature_matrix_batch, self.stack_fork, dim=0)
feature_matrix_batch = self.selfCorrStack(feature_matrix_batch)
feature_matrix_batch_, conv_feature_matrix_batch, cluster_index = self.kmeansConvStack(feature_matrix_batch)
feature_matrix_batch = self.localAdaptFeaAggreStack(feature_matrix_batch, conv_feature_matrix_batch)
output = self.graphMaxPoolStack(feature_matrix_batch, cluster_index)
return output
class SelfCorrelationStack(nn.Module):
def __init__(self, stack_size: int, mlp: nn.Module, learning_rate: int = 1.0):
super().__init__()
self.selfCorrelationStack = nn.ModuleList([SelfCorrelation(copy.deepcopy(mlp), learning_rate) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor):
# feature_matrix_batch size = (S,N,I,D) where S=stack_size, N=batch number, I=members, D=member dimensionality
output = selfCorrThreader(self.selfCorrelationStack, feature_matrix_batch)
# output size = (S,N,I,D) where where S=stack_size, N=batch number, I=members, D=member dimensionality
return output
class KMeansConvStack(nn.Module):
def __init__(self, stack_size: int, k: int, kmeansInit, n_init: int, sigma: nn.Module, F: nn.Module, W: nn.Module,
M: nn.Module, C: int, P: int):
super().__init__()
self.kmeansConvStack = nn.ModuleList([
KMeansConv(k, kmeansInit, n_init, copy.deepcopy(sigma), copy.deepcopy(F), copy.deepcopy(W),
copy.deepcopy(M), C, P) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor):
# feature_matrix_batch size = (S,N,I,D) where S=stack size, N=batch number, I=members, D=member dimensionality
feature_matrix_batch, conv_feature_matrix_batch, cluster_index = kmeansConvThreader(self.kmeansConvStack,
feature_matrix_batch)
return feature_matrix_batch, conv_feature_matrix_batch, cluster_index
class AggregationStack(nn.Module):
def __init__(self, stack_size: int, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.localAdaptFeatAggreStack = nn.ModuleList([Aggregation(copy.deepcopy(mlp1), copy.deepcopy(mlp2)) for i
in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor, conv_feature_matrix_batch: torch.Tensor):
output = threader(self.localAdaptFeatAggreStack, feature_matrix_batch, conv_feature_matrix_batch)
return output
class MaxPoolStack(nn.Module):
def __init__(self, stack_size: int, k: int):
super().__init__()
self.graphMaxPoolStack = nn.ModuleList([MaxPool(k) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor, cluster_index: torch.Tensor):
output = threader(self.graphMaxPoolStack, feature_matrix_batch, cluster_index)
return output
def selfCorrThreader(modules, input_tensor):
list_append = []
threads = []
for i, t in enumerate(input_tensor):
threads.append(Thread(target=selfCorrAppender, args=(modules[i], t, list_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list_append.sort()
list_append = list(map(lambda x: x[1], list_append))
return torch.stack(list_append)
def selfCorrAppender(module, tensor, list_append, index):
list_append.append((index, module(tensor)))
def kmeansConvThreader(modules, input_tensor):
list1_append = []
list2_append = []
list3_append = []
threads = []
for i, t in enumerate(input_tensor):
threads.append(
Thread(target=kmeansAppender, args=(modules[i], t, list1_append, list2_append, list3_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list1_append.sort()
list2_append.sort()
list3_append.sort()
list1_append = list(map(lambda x: x[1], list1_append))
list2_append = list(map(lambda x: x[1], list2_append))
list3_append = list(map(lambda x: x[1], list3_append))
return torch.stack(list1_append), torch.stack(list2_append), torch.stack(list3_append)
def kmeansAppender(module, input, list1_append, list2_append, list3_append, index):
x, y, z = module(input)
list1_append.append((index, x))
list2_append.append((index, y))
list3_append.append((index, z))
def threader(modules, input_tensor1, input_tensor2):
list_append = []
threads = []
for i, t in enumerate(input_tensor1):
threads.append(Thread(target=threaderAppender, args=(modules[i], t, input_tensor2[i], list_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list_append.sort()
list_append = list(map(lambda x: x[1], list_append))
return torch.stack(list_append)
def threaderAppender(module, t1, t2, list_append, index):
list_append.append((index, module(t1, t2)))
class Classifier(nn.Module):
def __init__(self, shrinkingLayersStack: [ShrinkingUnitStack], mlp: nn.Module):
super().__init__()
self.neuralNet = nn.Sequential(*shrinkingLayersStack)
self.mlp = mlp
def forward(self, x: torch.Tensor, pos: torch.Tensor):
feature_matrix_batch = pos.unsqueeze(0)
output = self.neuralNet(feature_matrix_batch)
output = torch.mean(output, dim=0)
return self.mlp(output)
Error:
thank you for your help
The attribute labels_ of a KMeans object is created once you actually compute the clusters by running .fit() (or .fit_predict(), or .fit_transform()).
Simple example:
>>> from sklearn.cluster import KMeans
>>> from numpy.random import random
>>> X = random((10,2))
>>> X
array([[0.2096706 , 0.69704806],
[0.31732618, 0.29607599],
[0.10372159, 0.56911046],
[0.30922255, 0.07952464],
[0.21190404, 0.46823665],
[0.67134948, 0.95702692],
[0.14781526, 0.24619197],
[0.89931979, 0.96301003],
[0.88256126, 0.07569739],
[0.70776912, 0.92997521]])
>>> clustering = KMeans(n_clusters=3)
>>> clustering.labels_
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'KMeans' object has no attribute 'labels_'
>>> clustering.fit(X)
KMeans(n_clusters=3)
>>> clustering.labels_
array([0, 0, 0, 0, 0, 1, 0, 1, 2, 1], dtype=int32)
I was running DSSM_N and DSSM on a dataset with batch size 512 on 2060.
However,
DSSM_N costs ~35ms per batch
DSSM. costs ~400ms per batch.
What makes this huge performance difference? I have checked profiling which said
that DSSM costs ~350ms on All Others Time. How can I fix the DSSM implementation?
Many thanks in advance.
Edited as suggested by Micheal:
The main difference is DSSM makes a hash-table-like lookup (notice tf.nn.embedding_lookup and IntegerLookup) which makes the dataset preprocess a little bit simpler while in DSSM_N this lookup was done in dataset preprocess in advance. However, I don't believe this simple hash table like makes such a big difference. What was I doing wrong?
import pickle
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
import tensorflow_text as text # required for BERT hub model
from keras.layers import Layer, Embedding, Dense, Concatenate, BatchNormalization, Dropout, Dot, Hashing, TextVectorization, GRU, IntegerLookup
from keras import Model
import random
from ..config import *
from ..util import *
def embedding_sequence_reduce_mean(x, mask):
# float[B,L,E], bool[B,L] -> float[B,E]
x = tf.ragged.boolean_mask(x, mask) # (B, Lr, E) remove masked data
x = tf.reduce_mean(x, axis=1) # (B, E)
x = tf.where(tf.math.is_nan(x), 0.0, x) # nan to 0
return x
def embedding_masked_to_zero(x, mask):
mask = tf.expand_dims( # B -> B 1 align for broadcasting
tf.cast(mask, dtype=tf.float32), axis=1)
return x * mask
USER_ID_DIM = 128
MEDIA_ID_DIM = 64
GENRE_DIM = 32
ORIGIN_DIM = 32
LATENT_DIM = latent_dim
N_HASH = 8
N_BIN = 1024
print('N_HASH', N_HASH)
print('N_BIN', N_BIN)
class HashEmbedding(Layer):
# TODO: with_importance is not supported
def __init__(
self, n_hash, n_bin, output_dim,
embeddings_initializer='uniform', embeddings_regularizer=None,
activity_regularizer=None, embeddings_constraint=None,
mask_zero=False, input_length=None, **kwargs
):
super(HashEmbedding, self).__init__()
self.mask_zero = mask_zero
self.n_hash = n_hash
self.n_bin = n_bin
# salts no duplication
self.salts = random.sample(range(self.n_hash * 32), self.n_hash)
self.hashs = [Hashing(
num_bins=self.n_bin,
# if mask_zero then hash 0 to 0
mask_value=(0 if self.mask_zero else None),
salt=self.salts[i])
for i in range(self.n_hash)]
self.embedding = Embedding(
self.n_bin, output_dim,
embeddings_initializer=embeddings_initializer,
embeddings_regularizer=embeddings_regularizer,
activity_regularizer=activity_regularizer,
embeddings_constraint=embeddings_constraint,
mask_zero=mask_zero, input_length=input_length
)
def compute_mask(self, inputs, mask=None):
if not self.mask_zero:
return None
return tf.not_equal(inputs, 0)
def call(self, inputs):
shape = inputs.shape
hash = tf.stack([hash(inputs) # [I], n_hash
for hash in self.hashs], axis=len(shape))
x = self.embedding(hash) # [I], n_hash, emb_dim
x = tf.reduce_sum(x, axis=len(shape)) # [I], emb_dim
return x
class StringVectorization(Layer):
def __init__(self, vocab, embedding_dim=32, output_dim=16):
super(StringVectorization, self).__init__()
self.text_vectorization = TextVectorization(
vocabulary=vocab, split='character')
self.embedding = Embedding(
self.text_vectorization.vocabulary_size(), embedding_dim, mask_zero=True)
self.gru = GRU(output_dim)
def call(self, inputs): # B, S
x = self.text_vectorization(inputs)
x = self.embedding(x)
return self.gru(x)
class TfBertZh(Layer): # 128 - 2 input length limit
def __init__(self): # output_dim 768
super(TfBertZh, self).__init__()
self.preprocess = hub.KerasLayer(
zh_preprocessor_model_file, trainable=False)
self.encoder = hub.KerasLayer(zh_encoder_model_file, trainable=False)
def call(self, inputs):
x = self.preprocess(inputs)
x = self.encoder(x)['pooled_output']
return x
class DNN(Layer):
def __init__(self):
super(DNN, self).__init__()
self.concat = Concatenate(axis=1)
self.dense1 = Dense(64)
self.bn = BatchNormalization()
self.drop = Dropout(0.1)
self.dense2 = Dense(32)
def call(self, inputs: list):
from keras.activations import tanh
x = self.concat(inputs)
x = self.drop(tanh(self.bn(self.dense1(x))))
x = tanh(self.dense2(x))
return x
with open(stats_file_pkl, 'rb') as f:
sinfo = pickle.load(f)
with open(vocab_file_pkl, 'rb') as f:
vocab = pickle.load(f)
class DSSM_N(Model):
def __init__(self):
super(DSSM_N, self).__init__()
self.user_id = HashEmbedding(
N_HASH, N_BIN, USER_ID_DIM, mask_zero=True)
self.item_id = Embedding(
sinfo['media_id']['unique'], MEDIA_ID_DIM, mask_zero=True)
self.genre = Embedding(
sinfo['genre_id']['unique'], GENRE_DIM, mask_zero=True)
self.origin = Embedding(
sinfo['origin_id']['unique'], ORIGIN_DIM, mask_zero=True)
self.user_dnn = DNN()
self.item_dnn = DNN()
self.dot = Dot(axes=1, normalize=False)
def call(self, inputs):
u = self.compute_user_latent({'id': inputs['user']})
n_pos = inputs['pos'].shape[1]
n_neg = inputs['neg'].shape[1]
ui_pos = []
ui_neg = []
def signal(u, i):
return tf.exp(self.dot([u, i]))
for j in range(n_pos):
i = self.compute_item_latent({
'id': inputs['pos'][:, j],
'genre': inputs['pos_genre'][:, j, :], # B N 4
'origin': inputs['pos_origin'][:, j, :] # B N 2
})
ui_pos.append(signal(u, i))
ui_pos = tf.add_n(ui_pos)
for j in range(n_neg):
i = self.compute_item_latent({
'id': inputs['neg'][:, j],
'genre': inputs['neg_genre'][:, j, :],
'origin': inputs['neg_origin'][:, j, :]
})
ui_neg.append(signal(u, i))
ui_neg = tf.add_n(ui_neg)
return tf.squeeze(ui_pos / (ui_pos + ui_neg))
def compute_user_latent(self, inputs):
id = self.user_id(inputs['id'])
latent = self.user_dnn([id])
return latent
def compute_item_latent(self, inputs):
id = self.item_id(inputs['id'])
genre = self.genre(inputs['genre']) # B 4 -> B 4 E
genre = embedding_sequence_reduce_mean(genre, genre._keras_mask)
origin = self.origin(inputs['origin']) # B 2 -> B 2 E
origin = embedding_sequence_reduce_mean(origin, origin._keras_mask)
latent = self.item_dnn([id, genre, origin])
return latent
user_df = pd.read_pickle(preprocessed_user_file_pkl)
media_df = pd.read_pickle(preprocessed_media_file_pkl)
genre_df = pd.read_pickle(clean_genre_file_pkl)
origin_df = pd.read_pickle(clean_origin_file_pkl)
class MediaPreprocess(Layer):
def __init__(self):
super(MediaPreprocess, self).__init__()
self.lookup = IntegerLookup(vocabulary=list(media_df['id']))
self.genre_table = tf.Variable(
[[0] * 4] + list(media_df['genre']), dtype=tf.int32, trainable=False)
self.origin_table = tf.Variable(
[[0] * 2] + list(media_df['origin']), dtype=tf.int32, trainable=False)
self.id_embedding = Embedding(
self.lookup.vocabulary_size() + 1, MEDIA_ID_DIM, mask_zero=True)
self.genre_embedding =\
Embedding(genre_df['id'].max() + 1, GENRE_DIM, mask_zero=True)
self.origin_embedding =\
Embedding(origin_df['id'].max() + 1, ORIGIN_DIM, mask_zero=True)
def __call__(self, inputs):
index = self.lookup(inputs) # B -> B
vector = self.id_embedding(index) # B -> B E
vector = embedding_masked_to_zero(vector, vector._keras_mask)
genre = tf.nn.embedding_lookup(self.genre_table, index)
genre = self.genre_embedding(genre)
genre = embedding_sequence_reduce_mean(genre, genre._keras_mask)
origin = tf.nn.embedding_lookup(self.origin_table, index)
origin = self.origin_embedding(origin)
origin = embedding_sequence_reduce_mean(origin, origin._keras_mask)
return {
'id': vector,
'genre': genre,
'origin': origin}
class UserPreprocess(Layer):
def __init__(self):
super(UserPreprocess, self).__init__()
self.lookup = IntegerLookup(vocabulary=list(user_df['id']))
self.embedding = HashEmbedding(
N_HASH, N_BIN, USER_ID_DIM, mask_zero=True)
def __call__(self, inputs):
vector = self.embedding(inputs)
vector = embedding_masked_to_zero(vector, vector._keras_mask)
return {'id': vector}
class DSSM(Model):
def __init__(self, *args, **kwargs):
super(DSSM, self).__init__()
self.user_pp = UserPreprocess()
self.item_pp = MediaPreprocess()
self.user_nn = DNN()
self.item_nn = DNN()
dot = Dot(axes=1, normalize=False)
self.signal = lambda u, i: tf.exp(dot([u, i]))
def call(self, inputs):
user = inputs['user'] # B
pos_s = inputs['pos'] # B N_POS=1
neg_s = inputs['neg'] # B N_NEG=7
n_pos = pos_s.shape[1]
n_neg = neg_s.shape[1]
u = self.user_pp(user)['id'] # B E(uid)
u = self.user_nn([u]) # B L
def compute_ui(i_s, count):
ui = []
for j in range(count):
i = self.item_pp(i_s[:, j])
i = self.item_nn([i['id'], i['genre'], i['origin']])
ui.append(self.signal(u, i))
return tf.add_n(ui) # C B 1 -> B 1
pos_ui = compute_ui(pos_s, n_pos) # B 1
neg_ui = compute_ui(neg_s, n_neg) # B 1
return tf.squeeze(pos_ui / (neg_ui + pos_ui)) # B
Howdy!
Recently I have built my own library for neural networks.
Without convolutional layers it worked fine. However, now, that I have implemented convolutional layers, it doesn't improve (in comparison to the dense nn) at all, which is inacceptable for more complex tasks like for instance Pneumonia Detection.
For backprop, every layer updates it's own values and passes it's input gradient to the layer behind it. For forwardprop every layer just gives its output as input to the next layer.
In the example below the model is set up for pneumonia detection with 128px * 128px images.
The accuracy allways stays under 60% no matter how much it's trained.
Here is the relevant code:
model = nG.NeuralNetwork([
nG.Convolutional(1, (128, 128), 3, 8),
nG.Pooling(2),
nG.ReLU(),
nG.Convolutional(8, (63, 63), 3, 16),
nG.Pooling(2),
nG.ReLU(),
nG.Convolutional(16, (31, 31), 3, 16),
nG.Pooling(2),
nG.ReLU(),
nG.Convolutional(16, (15, 15), 3, 32),
nG.Pooling(2),
nG.ReLU(),
nG.Convolutional(32, (7, 7), 3, 64),
nG.ReLU(),
nG.Reshape((64, 5, 5), (1600, 1)),
nG.Dense(1600, 128),
nG.Tanh(),
nG.Dense(128, 2),
nG.Tanh()],
nG.MSE()
)
model.train(images, labels, epochs=3, lr=0.01)
class NeuralNetwork:
def __init__(self, layers, loss):
self.layers = layers
self.loss = loss
def forwardProp(self, input):
output = input
for layer in self.layers:
output = layer.forwardProp(output)
return output
def backwardProp(self, errorDeriv, lr):
deltaOutput = errorDeriv
for layer in reversed(self.layers):
deltaOutput = layer.backwardProp(deltaOutput, lr)
def train(self, xTrain, yTrain, epochs=1, lr=1, interimResult=False):
corrects = 0
print("Precompiling ... This might take a few seconds", end="\n\n")
for epoch in range(epochs):
print(f"{epoch+1}th epoch:")
round_start = time.time()
i = -1
for X, Y in zip(xTrain, yTrain):
i += 1
start = time.time()
output = self.forwardProp(X)
errorDeriv = self.loss.errorDerivative(output, Y)
self.backwardProp(errorDeriv, lr)
--
class Convolutional():
def __init__(self, input_depth, input_size, kernel_size, depth):
self.input_depth = input_depth
self.input_size = input_size
self.kernel_size = kernel_size
self.depth = depth
self.kernels = np.random.uniform(-0.5, 0.5, (depth, input_depth, kernel_size, kernel_size))
self.bias = [np.random.uniform(-0.5, 0.5, (input_size[0] - kernel_size + 1, input_size[1] - kernel_size + 1)) for i in range(depth)]
self.input = None
def forwardProp(self, input):
self.input = input
output = get_output(input, self.depth, self.input_size, self.kernel_size, self.input_depth, self.kernels, self.bias)
return output
def backwardProp(self, output_delta, lr):
kernels_gradient, input_delta = get_gradients(self.kernels, self.input, self.depth, self.input_depth, output_delta)
self.kernels -= lr * kernels_gradient
self.bias -= lr * output_delta
return input_delta
#numba.njit
def get_gradients(kernels, input, depth, input_depth, output_delta):
kernels_gradient = np.zeros(kernels.shape)
input_delta = np.zeros(input.shape)
for i in range(depth):
for j in range(input_depth):
kernels_gradient[i, j] = valid_correlate(input[j], output_delta[i])
input_delta[j] += full_convolve(output_delta[i], kernels[i, j])
return kernels_gradient, input_delta
#numba.njit(fastmath=True, nogil=True)
def get_output(input, depth, input_size, kernel_size, input_depth, kernels, bias):
out = np.zeros((depth, input_size[0] - kernel_size + 1, input_size[0] - kernel_size + 1))
for k in range(depth):
for i in range(input_depth):
out[k] += valid_correlate(input[i], kernels[k][i])
out[k] += bias[k]
return out
class Pooling:
def __init__(self, size):
self.size = size
self.input = None
def forwardProp(self, input):
self.input = input
output = []
for i in range(input.shape[0]):
output.append(pool(input[i], self.size))
output = np.asarray(output)
return output
def backwardProp(self, output_delta, lr):
input_delta = anti_pool(output_delta, self.input.shape, self.size, self.input)
return input_delta
def anti_pool(output_delta, input_shape, size, input):
input_delta = np.zeros(input_shape)
for l in range(input_delta.shape[0]):
for x in range(output_delta.shape[1]):
for y in range(output_delta.shape[2]):
area_start = (x * size, y * size)
area_end = (min((x + 1) * size, input_delta.shape[1]),
min((y + 1) * size, input_delta.shape[2]))
area = (input[l, area_start[0]:area_end[0], area_start[1]:area_end[1]])
highest_pos = np.unravel_index(area.argmax(), area.shape)
highest_pos = [x * size + highest_pos[0],
y * size + highest_pos[1]]
input_delta[l, highest_pos[0], highest_pos[1]] = output_delta[l, x, y]
return input_delta
#numba.njit("float64[:,:](float64[:,:], int32)")
def pool(mat, size):
def pool_at_position(mat, pos):
end_pos = (min(mat.shape[0], pos[0] + size),
min(mat.shape[1], pos[1] + size))
area = mat[pos[0]:end_pos[0], pos[1]:end_pos[1]]
result = np.max(area)
return result
output_size = (int(np.ceil(mat.shape[0] / size)), int(np.ceil(mat.shape[1] / size)))
output = np.zeros(output_size)
for x in range(output_size[0]):
for y in range(output_size[1]):
output[x, y] = pool_at_position(mat, (x * size, y * size))
return output
class Dense:
def __init__(self, inputSize, outputSize):
self.weights = np.random.randn(outputSize, inputSize)
self.bias = np.random.randn(outputSize, 1)
def forwardProp(self, input):
self.input = input
return np.dot(self.weights, self.input) + self.bias
def backwardProp(self, output_gradient, lr):
weights_gradient = np.dot(output_gradient, self.input.T)
input_gradient = np.dot(self.weights.T, output_gradient)
self.weights -= lr * weights_gradient
self.bias -= lr * output_gradient
return input_gradient
class Tanh:
def __init__(self):
self.input = None
self.output = None
def forwardProp(self, input):
self.input = input
self.output = tanh(input)
return self.output
def backwardProp(self, outputDelta, lr):
inputDelta = 1 - (np.tanh(self.input) ** 2)
inputDelta *= outputDelta
return inputDelta
#numba.vectorize
def tanh(x):
return np.tanh(x)
class ReLU:
def __init__(self):
self.input = None
self.output = None
def forwardProp(self, input):
self.input = input
self.output = np.maximum(input, 0)
return self.output
def backwardProp(self, outputDelta, lr):
inputDelta = np.multiply(outputDelta, np.vectorize(self.anti_relu)(self.input))
return inputDelta
def anti_relu(self, x):
if x < 0:
return 0
else:
return 1
class MSE :
def __init__(self):
pass
def errorFunction(self, output, Y):
error = (output - Y) ** 2
return error
def errorDerivative(self, output, Y):
error_deriv = 2 * (output - Y)
return error_deriv
For the functions/Classes that I've not included I'm dead sure that they work.
I spent the last couple of days reading over the code and still haven't found the problem yet.
I would be extremely thankful for any kind of response.
Kind Regards
Eirik