i can't understand this lambda functions - python

i am studying machine learning with python.
and this code is from Standford Uiv classes.
i was trying to grasp these codes but failed.
The problem is loss_W = lambda W: self.loss(x,t).
isn't it True that loss_W(1) or loss_W(2) or anything cannot change the result?
i can't understand that the results of these two codes are different.
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
# f(x+h)
x[idx] = float(tmp_val) + h
fxh1 = f(x)
# f(x-h)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
return grad
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1)
z1 = sigmoid(a1)
a2 = np.dot(z1,W2)
y = softmax(a2)
return y
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y,t)
def accuracy(self, x,t):
y = self.predict(x)
y = np.argmax(y, axis=0)
t = np.argmax(t, axis=0)
data_len = len(x)
accuracy = np.sum(y==t)/float(data_len)
return accuracy
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads

This lambda alone loss_W = lambda W: self.loss(x,t) is indifferent from the value of W. This function can be simplified like this:
x = 1 # Just some random value
t = 5 # Just some random value
def simplified_lambda_function(W):
return (x,t)
The code snippet you posted indicates that there is a class definition outside somewhere as
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1']
self is undefined here. Because of this we cant be sure if they are truly identical, but most likely they are.

Related

AttributeError: 'KMeans' object has no attribute 'labels_' pytorch

first of all I thank , I tried to train model with pytorch but I got the following error: AttributeError: 'KMeans' object has no attribute 'labels_'.I am trying to model a extract features point cloud using deep learning in pytorch. I get the following error . Could anyone help on this? ************** *************** Thanks!
# Training loop
def training_loop(gpu, training_dataloader, model, loss_fn, optimizer):
losses = []
correct = 0
batch_results = dict()
conf_mat = np.zeros((10,10))
for batch_n, batch in enumerate(training_dataloader): #batch[batch, pos, ptr, y]
batch_size = int(batch.batch.size()[0] / sample_points)
if dimensionality == 3:
# Input dim [:,3] for your geometry x,y,z
X = batch.pos.cuda(non_blocking=True).view(batch_size, sample_points, -1) + torch.normal(
torch.zeros(batch_size, sample_points, dimensionality), torch.full((batch_size, sample_points,
dimensionality), fill_value=0.1)).cuda(gpu)
else:
# Input dim [:,6] for your geometry x,y,z and normals nx,ny,nz
X = torch.cat((batch.pos.cuda(non_blocking=True), batch.normal.cuda(non_blocking=True)), 1).view(batch_size, sample_points, -1) + torch.normal(
torch.zeros(batch_size, sample_points, dimensionality), torch.full((batch_size, sample_points,
dimensionality), fill_value=0.1)).cuda(gpu)
y = batch.y.cuda(non_blocking=True).flatten() #size (batch_size) --> torch.Size([8])
# Compute predictions
pred = model(None, X) #size (batch_size,classes) --> torch.Size([8, 10])
if overall_classes_loss:
# weighted CE Loss over all classes
loss = loss_fn(pred, y)
else:
# weighted batchwise Loss
sample_count = np.array([[x, batch.y.tolist().count(x)] for x in batch.y])[:,1]
batch_weights = 1. / sample_count
batch_weights = torch.from_numpy(batch_weights)
batch_weights = batch_weights.double()
loss = element_weighted_loss(pred, batch.y, batch_weights, gpu)
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
print(f"Loss: {loss}")
tensor_list_y = [torch.ones_like(y) for _ in range(dist.get_world_size())]
tensor_list_pred = [torch.ones_like(y) for _ in range(dist.get_world_size())]
torch.distributed.all_gather(tensor_list_y, y, group=None, async_op=False)
torch.distributed.all_gather(tensor_list_pred, pred.argmax(1), group=None, async_op=False)
tensor_list_y = torch.cat(tensor_list_y)
tensor_list_pred = torch.cat(tensor_list_pred)
# Confusion Matrix
conf_mat += confusion_matrix(tensor_list_y.cpu().detach().numpy(), tensor_list_pred.cpu().detach().numpy(), labels=np.arange(0,10))
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
# Save batch predictions
batch_results[batch_n] = {'true':tensor_list_y, 'pred':tensor_list_pred}
if verbosity == True:
print(f"\n\nTRAIN on GPU:{gpu}: True Label {y} - Prediction {pred.argmax(1)} - Loss {loss}")
truevalue = '\t\t'.join(classes[items] for items in y.tolist())
predvalues = '\t\t'.join(classes[items] for items in pred.argmax(1).tolist())
print(f"INFO on GPU:{gpu}: TRAIN - True Value\t {truevalue}")
print(f"INFO on GPU:{gpu}: TRAIN - Predictions\t {predvalues}")
if batch_n % 25 == 0:
torch.distributed.reduce(loss, 0)
return torch.tensor(losses, device=f"cuda:{gpu}"), torch.tensor(correct, device=f"cuda:{gpu}"), batch_results, conf_mat
# Test loop
def test_loop(gpu, test_dataloader, model, loss_fn):
test_losses = []
correct = 0
batch_results = dict()
conf_mat = np.zeros((10,10))
with torch.no_grad():
for batch_n, batch in enumerate(test_dataloader):
batch_size = int(batch.batch.size()[0] / sample_points)
if dimensionality == 3:
# Input dim [:,3] for your geometry x,y,z
X = batch.pos.cuda(non_blocking=True).view(batch_size, sample_points, -1)
else:
# Input dim [:,6] for your geometry x,y,z and normals nx,ny,nz
X = torch.cat((batch.pos.cuda(non_blocking=True), batch.normal.cuda(non_blocking=True)), 1).view(batch_size, sample_points, -1)
y = batch.y.cuda(non_blocking=True).flatten()
pred = model(None, X) #size (batch,classes) per batch_n
if overall_classes_loss:
# weighted CE Loss over all classes
loss = loss_fn(pred, y)
else:
# weighted batchwise Loss
sample_count = np.array([[x, batch.y.tolist().count(x)] for x in batch.y])[:,1]
batch_weights = 1. / sample_count
batch_weights = torch.from_numpy(batch_weights)
batch_weights = batch_weights.double()
loss = element_weighted_loss(pred, batch.y, batch_weights, gpu)
test_losses.append(loss.item())
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
print(f"Loss: {loss}")
tensor_list_y = [torch.ones_like(y) for _ in range(dist.get_world_size())]
tensor_list_pred = [torch.ones_like(y) for _ in range(dist.get_world_size())]
torch.distributed.all_gather(tensor_list_y, y, group=None, async_op=False)
torch.distributed.all_gather(tensor_list_pred, pred.argmax(1), group=None, async_op=False)
tensor_list_y = torch.cat(tensor_list_y)
tensor_list_pred = torch.cat(tensor_list_pred)
# Confusion Matrix
conf_mat += confusion_matrix(tensor_list_y.cpu().detach().numpy(), tensor_list_pred.cpu().detach().numpy(), labels=np.arange(0,10))
# Save batch predictions
batch_results[batch_n] = {'true':tensor_list_y, 'pred':tensor_list_pred}
if verbosity == True:
print(f"\n\nTEST on GPU:{gpu}: True Label {y} - Prediction {pred.argmax(1)} - Loss {loss}")
truevalue = '\t\t'.join(classes[items] for items in y.tolist())
predvalues = '\t\t'.join(classes[items] for items in pred.argmax(1).tolist())
print(f"INFO on GPU:{gpu}: TEST - True Value\t {truevalue}")
print(f"INFO on GPU:{gpu}: TEST - Predictions\t {predvalues}")
test_loss = statistics.mean(test_losses)
return torch.tensor(correct, device=f"cuda:{gpu}"), torch.tensor(test_loss, device=f"cuda:{gpu}"), batch_results, conf_mat
def train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, scheduler, dir_path, initial_epoch):
epoch_losses = []
training_accuracies = []
test_losses = []
test_accuracies = []
learning_rates = []
counter = 0 #early stopping counter
batchwise_results = dict()
# Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=20)
for i in range(initial_epoch, initial_epoch + epochs):
if gpu == 0:
if initial_epoch > 0:
print(f"\n\nEpoch {i}\n-------------------------------")
else:
print(f"\n\nEpoch {i + 1}\n-------------------------------")
# TRAIN
losses, training_accuracy, train_batch_result, train_conf_mat = training_loop(gpu, training_dataloader, model, loss_fn, optimizer)
average_loss = torch.mean(losses)
torch.distributed.reduce(average_loss, 0, torch.distributed.ReduceOp.SUM)
torch.distributed.reduce(training_accuracy, 0, torch.distributed.ReduceOp.SUM)
# TEST
test_accuracy, test_loss, test_batch_result, test_conf_mat = test_loop(gpu, test_dataloader, model, loss_fn)
torch.distributed.reduce(test_accuracy, 0, torch.distributed.ReduceOp.SUM)
torch.distributed.reduce(test_loss, 0, torch.distributed.ReduceOp.SUM)
# save results
batchwise_results[i] = {'train':train_batch_result, 'test':test_batch_result}
if gpu == 0: # the following operations are performed only by the process running in the first gpu
average_loss = average_loss / torch.tensor(gpus, dtype=torch.float) # average loss among all gpus
test_accuracy = test_accuracy / torch.tensor(len(test_dataloader.dataset),
dtype=torch.float) * torch.tensor(100.0)
training_accuracy = training_accuracy / torch.tensor(len(training_dataloader.dataset),
dtype=torch.float) * torch.tensor(100.0)
test_loss = test_loss / torch.tensor(gpus, dtype=torch.float)
epoch_losses.append(average_loss.item())
training_accuracies.append(training_accuracy.item())
test_losses.append(test_loss.item())
test_accuracies.append(test_accuracy.item())
learning_rates.append((optimizer.param_groups[0])["lr"])
print(f"\nBatch size: {batch_size * int(gpus)}")
print(f"average Training Loss: {average_loss.item():.6f}")
print(f"average Test Loss: {test_loss.item():.6f}")
print(f"\naverage Training Acc: {training_accuracy.item():.6f}")
print(f"average Test Acc: {test_accuracy.item():.6f}")
printLearningRate(optimizer)
scheduler.step(test_loss)
# saving model checkpoint
save_checkpoint(model, optimizer, scheduler, i, epoch_losses, training_accuracies, test_losses, test_accuracies, learning_rates,
os.path.join(dir_path, f"epoch{i}.pth"), {key: value for key, value in batchwise_results[i].items() if key == 'train'}, {key: value for key, value in batchwise_results[i].items() if key == 'test'}, train_conf_mat, test_conf_mat)
#TODO: implement ONNX Export
# early stopping scheduler
if early_stopping(test_losses) == True:
counter += 1
print(f"Early Stopping counter: {counter} of {patience}")
else:
counter += 0
if counter < patience:
pass
else:
print("\n\nEarly Stopping activated")
print(f"Training stopped at Epoch{i + 1}")
dist.destroy_process_group()
exit()
def train(gpu, gpus, world_size):
torch.manual_seed(0)
torch.cuda.set_device(gpu)
try:
dist.init_process_group(backend='nccl', world_size=world_size, rank=gpu) #for distributed GPU training
except RuntimeError:
print("\n\nINFO:RuntimeError is raised >> Used gloo backend instead of nccl!\n")
dist.init_process_group(backend='gloo', world_size=world_size, rank=gpu) #as a fallback option
dir_path = None
if gpu == 0:
dir_path = "stackgraphConvPool3DPnet"
createdir(dir_path)
training_number = next_training_number(dir_path)
dir_path = os.path.join(dir_path, f"train{training_number}")
createdir(dir_path)
#save hyper-parameters in txt protocol file
save_hyperparameters(dir_path, 'hyperparameters.txt')
print("\nINFO: Protocol File saved successfully . . .")
model = Classifier(shrinkingLayers, mlpClassifier)
torch.cuda.set_device(gpu)
model.cuda(gpu)
#setting up optimizer
if optimizer_str == "SGD":
optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=weight_decay)
elif optimizer_str == "RMSprop":
optimizer = torch.optim.RMSprop(model.parameters(), learning_rate, weight_decay=weight_decay)
else:
optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=weight_decay)
# single-program multiple-data training paradigm (Distributed Data-Parallel Training)
model = DDP(model, device_ids=[gpu])
if dimensionality == 3:
training_data = ModelNet("ModelNet10_train_data", transform=lambda x: NormalizeScale()(SamplePoints(num=sample_points)(x)))
else:
training_data = ModelNet("ModelNet10_train_data", transform=lambda x: NormalizeScale()(NormalizeRotation()(SamplePoints(num=sample_points, remove_faces=True, include_normals=True)(x))))
training_sampler = DistributedWeightedSampler(training_data, num_replicas=world_size) #weight unbalanced classes by 1/cls_count
training_dataloader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=data_shuffle, num_workers=0,
pin_memory=True, sampler=training_sampler)
if dimensionality == 3:
test_data = ModelNet("ModelNet10_test_data", train=False, transform=lambda x: NormalizeScale()(SamplePoints(num=sample_points)(x)))
else:
test_data = ModelNet("ModelNet10_test_data", train=False, transform=lambda x: NormalizeScale()(NormalizeRotation()(SamplePoints(num=sample_points, remove_faces=True, include_normals=True)(x))))
test_sampler = DistributedWeightedSampler(test_data, num_replicas=world_size) #weight unbalanced classes by 1/cls_count
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=data_shuffle, num_workers=0,
pin_memory=True, sampler=test_sampler)
# weighted CE Loss over all Classes C
class_sample_count = np.array([len(np.where(training_data.data.y == t)[0]) for t in np.unique(training_data.data.y)])
weight = 1. / class_sample_count
weight = torch.from_numpy(weight)
weight = weight.float()
loss_fn = nn.CrossEntropyLoss(weight=weight).cuda(gpu)
# continue training from certain checkpoint
continue_from_scratch = True if args.resume is None else False
if continue_from_scratch:
if gpu == 0:
print("\nINFO: Train from scratch has started . . .")
train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, None, dir_path, 0)
else:
checkpoint_path = "stackgraphConvPool3DPnet/" + args.resume
if gpu == 0:
print(f"\nINFO: Train has started from certain checkpoint {checkpoint_path.split('/')[2].split('.')[0]} in {checkpoint_path.split('/')[1]} . . .")
model.load_state_dict(torch.load(checkpoint_path)['model_state_dict'], strict=False)
optimizer.load_state_dict(torch.load(checkpoint_path)['optimizer_state_dict'])
final_epoch = (torch.load("stackgraphConvPool3DPnet/" + args.resume)['epoch'])+1
train_optimisation(gpu, gpus, training_dataloader, test_dataloader, model, loss_fn, optimizer, None, dir_path, final_epoch)
code tools:
class KMeansInitMostDistantFromMean:
def __call__(self, *args, **kwargs):
X, k = args
mean = np.mean(X, axis=0)
arg_sorted = np.argsort(np.apply_along_axis(lambda y: euclidean(mean, y), 1, X))
output = X[np.flip(arg_sorted)[:k]]
return output
class KMeansInit:
def __call__(self, *args, **kwargs):
X, k = args
current_centroids = np.expand_dims(np.mean(X, axis=0), 0)
for i in range(k - 1):
X, current_centroids = self.next_centroid(X, current_centroids)
return current_centroids
def next_centroid(self, X, curr_centroids):
highest_dist = 0.0
next_centroid = None
next_centroid_index = None
for i, x in enumerate(X):
max_dist = np.amax(np.apply_along_axis(lambda y: euclidean(x, y), 1, curr_centroids))
if max_dist > highest_dist:
next_centroid = x
highest_dist = max_dist
next_centroid_index = i
return np.delete(X, next_centroid_index, 0), np.append(curr_centroids, np.expand_dims(next_centroid, 0), 0)
class Conv(gnn.MessagePassing):
def __init__(self, sigma: nn.Module, F: nn.Module, W: nn.Module, M: nn.Module, C: int, P: int):
super().__init__(aggr="mean")
self.sigma = sigma
self.F = F
self.W = W
self.M = M
self.C = C
self.P = P
self.B = torch.randn(C+P, requires_grad=True)
def forward(self, feature_matrix, edge_index):
return self.propagate(edge_index, feature_matrix=feature_matrix)
def message(self, feature_matrix_i, feature_matrix_j):
message = self.F(feature_matrix_j - feature_matrix_i)
message = message.view(-1, self.C + self.P, self.C)
feature_matrix_i_ = feature_matrix_i.unsqueeze(2)
output = torch.bmm(message, feature_matrix_i_).squeeze()
return output
def update(self, aggr_out, feature_matrix):
Weight = self.M(aggr_out)
aggr_out = aggr_out * Weight
transform = self.W(feature_matrix)
transform = transform.view(-1, self.C + self.P, self.C)
feature_matrix = feature_matrix.unsqueeze(2)
transformation = torch.bmm(transform, feature_matrix).squeeze()
aggr_out = aggr_out + transformation
output = aggr_out + self.B
output = self.sigma(output)
return output
class Aggregation(nn.Module):
def __init__(self, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.mlp1 = mlp1
self.mlp2 = mlp2
self.softmax = nn.Softmax(0)
def forward(self, feature_matrix_batch: torch.Tensor, conv_feature_matrix_batch: torch.Tensor):
N, I, D = feature_matrix_batch.size()
N_, I_, D_ = conv_feature_matrix_batch.size()
augmentation = D_ - D
if augmentation > 0:
feature_matrix_batch = F.pad(feature_matrix_batch, (0, augmentation))
S1 = torch.mean(feature_matrix_batch, 1)
S2 = torch.mean(conv_feature_matrix_batch, 1)
Z1 = self.mlp1(S1)
Z2 = self.mlp2(S2)
M = self.softmax(torch.stack((Z1, Z2), 0))
M1 = M[0]
M2 = M[1]
M1 = M1.unsqueeze(1).expand(-1, I, -1)
M2 = M2.unsqueeze(1).expand(-1, I, -1)
output = (M1 * feature_matrix_batch) + (M2 * conv_feature_matrix_batch)
return output
class MaxPool(nn.Module):
def __init__(self, k: int):
super().__init__()
self.k = k
def forward(self, feature_matrix_batch: torch.Tensor, cluster_index: torch.Tensor):
N, I, D = feature_matrix_batch.size()
feature_matrix_batch = feature_matrix_batch.view(-1, D)
output = scatter_max(feature_matrix_batch, cluster_index, dim=0)[0]
output = output.view(N, self.k, -1)
return output
class GraphConvPool3DPnet(nn.Module):
def __init__(self, shrinkingLayers: [ShrinkingUnit], mlp: nn.Module):
super().__init__()
self.neuralNet = nn.Sequential(*shrinkingLayers, mlp)
def forward(self, x: torch.Tensor, pos: torch.Tensor):
feature_matrix_batch = torch.cat((pos, x), 2) if x is not None else pos
return self.neuralNet(feature_matrix_batch)
class ShrinkingUnitStack(nn.Module):
def __init__(self, input_stack: int, stack_fork: int, mlp: nn.Module, learning_rate: int, k: int, kmeansInit, n_init, sigma: nn.Module, F: nn.Module, W: nn.Module,
M: nn.Module, C, P, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.stack_fork = stack_fork
stack_size = input_stack * stack_fork
self.selfCorrStack = SelfCorrelationStack(stack_size, mlp, learning_rate)
self.kmeansConvStack = KMeansConvStack(stack_size, k, kmeansInit, n_init, sigma, F, W, M, C, P)
self.localAdaptFeaAggreStack = AggregationStack(stack_size, mlp1, mlp2)
self.graphMaxPoolStack = MaxPoolStack(stack_size, k)
def forward(self, feature_matrix_batch):
feature_matrix_batch = torch.repeat_interleave(feature_matrix_batch, self.stack_fork, dim=0)
feature_matrix_batch = self.selfCorrStack(feature_matrix_batch)
feature_matrix_batch_, conv_feature_matrix_batch, cluster_index = self.kmeansConvStack(feature_matrix_batch)
feature_matrix_batch = self.localAdaptFeaAggreStack(feature_matrix_batch, conv_feature_matrix_batch)
output = self.graphMaxPoolStack(feature_matrix_batch, cluster_index)
return output
class SelfCorrelationStack(nn.Module):
def __init__(self, stack_size: int, mlp: nn.Module, learning_rate: int = 1.0):
super().__init__()
self.selfCorrelationStack = nn.ModuleList([SelfCorrelation(copy.deepcopy(mlp), learning_rate) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor):
# feature_matrix_batch size = (S,N,I,D) where S=stack_size, N=batch number, I=members, D=member dimensionality
output = selfCorrThreader(self.selfCorrelationStack, feature_matrix_batch)
# output size = (S,N,I,D) where where S=stack_size, N=batch number, I=members, D=member dimensionality
return output
class KMeansConvStack(nn.Module):
def __init__(self, stack_size: int, k: int, kmeansInit, n_init: int, sigma: nn.Module, F: nn.Module, W: nn.Module,
M: nn.Module, C: int, P: int):
super().__init__()
self.kmeansConvStack = nn.ModuleList([
KMeansConv(k, kmeansInit, n_init, copy.deepcopy(sigma), copy.deepcopy(F), copy.deepcopy(W),
copy.deepcopy(M), C, P) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor):
# feature_matrix_batch size = (S,N,I,D) where S=stack size, N=batch number, I=members, D=member dimensionality
feature_matrix_batch, conv_feature_matrix_batch, cluster_index = kmeansConvThreader(self.kmeansConvStack,
feature_matrix_batch)
return feature_matrix_batch, conv_feature_matrix_batch, cluster_index
class AggregationStack(nn.Module):
def __init__(self, stack_size: int, mlp1: nn.Module, mlp2: nn.Module):
super().__init__()
self.localAdaptFeatAggreStack = nn.ModuleList([Aggregation(copy.deepcopy(mlp1), copy.deepcopy(mlp2)) for i
in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor, conv_feature_matrix_batch: torch.Tensor):
output = threader(self.localAdaptFeatAggreStack, feature_matrix_batch, conv_feature_matrix_batch)
return output
class MaxPoolStack(nn.Module):
def __init__(self, stack_size: int, k: int):
super().__init__()
self.graphMaxPoolStack = nn.ModuleList([MaxPool(k) for i in range(stack_size)])
self.apply(init_weights)
def forward(self, feature_matrix_batch: torch.Tensor, cluster_index: torch.Tensor):
output = threader(self.graphMaxPoolStack, feature_matrix_batch, cluster_index)
return output
def selfCorrThreader(modules, input_tensor):
list_append = []
threads = []
for i, t in enumerate(input_tensor):
threads.append(Thread(target=selfCorrAppender, args=(modules[i], t, list_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list_append.sort()
list_append = list(map(lambda x: x[1], list_append))
return torch.stack(list_append)
def selfCorrAppender(module, tensor, list_append, index):
list_append.append((index, module(tensor)))
def kmeansConvThreader(modules, input_tensor):
list1_append = []
list2_append = []
list3_append = []
threads = []
for i, t in enumerate(input_tensor):
threads.append(
Thread(target=kmeansAppender, args=(modules[i], t, list1_append, list2_append, list3_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list1_append.sort()
list2_append.sort()
list3_append.sort()
list1_append = list(map(lambda x: x[1], list1_append))
list2_append = list(map(lambda x: x[1], list2_append))
list3_append = list(map(lambda x: x[1], list3_append))
return torch.stack(list1_append), torch.stack(list2_append), torch.stack(list3_append)
def kmeansAppender(module, input, list1_append, list2_append, list3_append, index):
x, y, z = module(input)
list1_append.append((index, x))
list2_append.append((index, y))
list3_append.append((index, z))
def threader(modules, input_tensor1, input_tensor2):
list_append = []
threads = []
for i, t in enumerate(input_tensor1):
threads.append(Thread(target=threaderAppender, args=(modules[i], t, input_tensor2[i], list_append, i)))
[t.start() for t in threads]
[t.join() for t in threads]
list_append.sort()
list_append = list(map(lambda x: x[1], list_append))
return torch.stack(list_append)
def threaderAppender(module, t1, t2, list_append, index):
list_append.append((index, module(t1, t2)))
class Classifier(nn.Module):
def __init__(self, shrinkingLayersStack: [ShrinkingUnitStack], mlp: nn.Module):
super().__init__()
self.neuralNet = nn.Sequential(*shrinkingLayersStack)
self.mlp = mlp
def forward(self, x: torch.Tensor, pos: torch.Tensor):
feature_matrix_batch = pos.unsqueeze(0)
output = self.neuralNet(feature_matrix_batch)
output = torch.mean(output, dim=0)
return self.mlp(output)
Error:
thank you for your help
The attribute labels_ of a KMeans object is created once you actually compute the clusters by running .fit() (or .fit_predict(), or .fit_transform()).
Simple example:
>>> from sklearn.cluster import KMeans
>>> from numpy.random import random
>>> X = random((10,2))
>>> X
array([[0.2096706 , 0.69704806],
[0.31732618, 0.29607599],
[0.10372159, 0.56911046],
[0.30922255, 0.07952464],
[0.21190404, 0.46823665],
[0.67134948, 0.95702692],
[0.14781526, 0.24619197],
[0.89931979, 0.96301003],
[0.88256126, 0.07569739],
[0.70776912, 0.92997521]])
>>> clustering = KMeans(n_clusters=3)
>>> clustering.labels_
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'KMeans' object has no attribute 'labels_'
>>> clustering.fit(X)
KMeans(n_clusters=3)
>>> clustering.labels_
array([0, 0, 0, 0, 0, 1, 0, 1, 2, 1], dtype=int32)

error X = X.reshape(1, X.shape[0]) IndexError: tuple index out of range. How to fix that?

this is fragment of my code
def train(self, features, targets):
for X, y in zip(features, targets):
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
when I try to use the method with data:
train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
where gameDataList[n].upOrDown is an array e.g. [0.1, 0.9], and gameDataList[n].ball_position and gameDataList[n].wall_position are floats, I get this error.
Full code:
#### Imports ####
import numpy as np
#### Neural Network Class ####
class MLP:
##### Constructor ####
def __init__(self, n_input_nodes, hidden_nodes, n_output_nodes, lr):
## Network ##
self.n_input_nodes = n_input_nodes
self.n_output_nodes = n_output_nodes
self.nodes = hidden_nodes
self.nodes.insert(0, n_input_nodes)
self.nodes.append(n_output_nodes)
## Weights and Biases##
self.weights = []
self.biases = []
for i in range(1, len(self.nodes)):
self.weights.append(np.random.uniform(-1.0, 1.0, (self.nodes[i - 1], self.nodes[i])))
self.biases.append(np.random.uniform(-1.0, 1.0, (1, self.nodes[i])))
## Learning Rate ##
self.lr = lr
## Activation Functions ##
# Linear Activation
self.linear = lambda x: x
self.d_linear = lambda x: np.ones(x.shape)
# Relu Activation
def relu(x):
x[x < 0] = 0
return x
def d_relu(out):
out: x[x > 0] = 1
return out
self.relu = relu
self.d_relu = d_relu
# Sigmoid Activation
self.sigmoid = lambda x: 1 / (1 + np.exp(-x))
self.d_sigmoid = lambda out: out * (1 - out) # assumes out is tanh(x)
# Hyperbolic Tangent Activation
self.tanh = lambda x: np.tanh(x)
self.d_tanh = lambda out: 1 - out ** 2 # assumes out is tanh(x)
def getWeights(self):
return self.weights.copy()
def getBiases(self):
return self.biases.copy()
def setWeights(self, weights):
self.weights = weights.copy()
def setBiases(self, biases):
self.biases = biases.copy()
#### Feed Forward ####
def feed_forward(self, X):
outputs = [X]
logits = np.dot(X, self.weights[0]) + self.biases[0]
for i in range(1, len(self.nodes) - 1):
out = self.sigmoid(logits)
outputs.append(out)
logits = np.dot(out, self.weights[i]) + self.biases[i]
out = self.sigmoid(logits)
outputs.append(out)
return outputs
#### Backpropagation ####
def backpropagation(self, X, y, outputs):
weights_gradients = []
biases_gradients = []
d1 = y - outputs[-1]
d2 = self.d_sigmoid(outputs[-1])
error = d1 * d2
grad = outputs[-2].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
for i in range(len(self.weights) - 2, 1, -1):
d = self.d_sigmoid(outputs[i])
error = np.dot(error, self.weights[i + 1].T) * d
grad = outputs[i - 1].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
return weights_gradients, biases_gradients
#### Training ####
def train(self, features, targets):
# Batch Size for weight update step
batch_size = features.shape[0]
# Delta Weights Variables
delta_weights = [np.zeros(weight.shape) for weight in self.weights]
delta_biases = [np.zeros(bias.shape) for bias in self.biases]
# For every data point, forward pass, backpropogation, store weights change
for X, y in zip(features, targets):
# Forward pass
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
# Back propogation
weights_gradients, biases_gradients = self.backpropagation(X, y, outputs)
for i in range(len(weights_gradients)):
delta_weights[-(i + 1)] += weights_gradients[i]
delta_biases[-(i + 1)] += biases_gradients[i]
for i in range(len(delta_weights)):
self.weights[i] += (self.lr * delta_weights[i]) / batch_size
self.biases[i] += (self.lr * delta_biases[i]) / batch_size
#### Testing Methods ####
def predict(self, X):
# Gives prediction
return self.feed_forward(X)[-1]
def test(self, features, targets):
predictions = self.predict(features)
n_correct = 0
for i in range(len(predictions)):
prediction = np.argmax(predictions[i])
correct = np.argmax(targets[i])
if prediction == correct:
n_correct += 1
return n_correct / len(targets)
class GameData:
def __init__(self, ball_position, wall_position, upOrDown):
self.wall_position = wall_position
self.ball_position = ball_position
self.upOrDown = upOrDown
I collect data, and train my network, in this way:
gameDataList.append(GameData(ball.trt.ycor(), b.trt.ycor(), [0.1, 0.9]))
mlp = MLP(2, [32, 32], 2, 0.0001)
n = random.randint(0, 999)
mlp.train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
Problem solved. It was needed to write two square brackets instead of one.
wrong example:
np.array([gameDataList[n].ball_position, gameDataList[n].wall_position])
correct example:
np.array([[gameDataList[n].ball_position, gameDataList[n].wall_position]])

Multi-class Logistic Regression from scratch

I am trying to implement from scratch the multiclass logistic regression but my implementation returns bad results. I believe the definition of the gradient function and the cost function is fine. Maybe there is a problem with how these functions are interacting with the minimize function. I have tried it but I could not find out what is wrong. Could you please cast some light?
You can add the estimator 'myLR': myLR(**par_dict), with paramters
par_dict= {'alpha': 0.1, 'maxit': 2000, 'opt_method': 'bfgs', 'positive': False, 'penalty': None, 'verbose': True, 'seed': 3}
in this example or in any of these examples to test it.
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self, alpha=0.1, reltol=1e-8, maxit=1000, opt_method=None, verbose=True, seed=0):
self.alpha = alpha
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self, w, n_classes):
return np.reshape(w, (-1, n_classes), order='F')
def softmax(self, W, X):
a = np.exp(X # W)
o = a / np.sum(a, axis=1, keepdims=True)
return o
def cost_wraper(self, W):
return self.cost(W, self.X, self.T, self.n_samples, self.n_classes)
def cost(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
log_O = np.log(self.softmax(W, X))
reg = self.apha * np.linalg.norm(W, ord='fro')
c = -np.sum([np.vdot(T[[i]], log_O[[i]]) for i in range(n_samples)]) / n_samples + reg
return c
def gradient_wraper(self, W):
return self.gradient(W, self.X, self.T, self.n_samples, self.n_classes)
def gradient(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
O = self.softmax(W, X)
reg = self.alpha * W
grad = -X.T.dot(T - O) / n_samples + reg
return grad.flatten()
def fit(self, X, y=None):
self.n_classes = len(np.unique(y))
self.n_samples, n_features = X.shape
if self.n_classes == 2:
self.T = np.zeros((self.n_samples, self.n_classes), dtype=np.float64)
for i, cls in enumerate(range(self.n_classes)):
self.T[y == cls, i] = 1
else:
self.T = self.lbin.fit_transform(y)
self.X = X
np.random.seed(self.seed)
W_0 = np.random.random(n_features * self.n_classes)
options = {'disp': self.verbose, 'maxiter': self.maxit}
f_min = minimize(fun=self.cost_wraper, x0=W_0,
method=self.opt_method,
jac=self.gradient_wraper,
options=options)
self.coef_ = self.w_2d(f_min.x, self.n_classes)
self.W_ = self.coef_
return self
def predict_proba(self, X):
O = self.softmax(self.coef_, X)
return O
def predict(self, X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma, axis=1)
return y_pred
Edit: Regularization term is included.
I think it is now working with the following code.
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self, reltol=1e-8, maxit=1000, opt_method=None, verbose=True, seed=0):
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self, w, n_classes):
return np.reshape(w, (n_classes, -1))
def softmax(self, W, X):
a = np.exp(X # W.T)
o = a / np.sum(a, axis=1, keepdims=True)
return o
def squared_norm(self, x):
x = np.ravel(x, order='K')
return np.dot(x, x)
def cost(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
log_O = np.log(self.softmax(W, X))
c = -(T * log_O).sum()
return c / n_samples
def gradient(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
O = self.softmax(W, X)
grad = -(T - O).T.dot(X)
return grad.ravel() / n_samples
def fit(self, X, y=None):
n_classes = len(np.unique(y))
n_samples, n_features = X.shape
if n_classes == 2:
T = np.zeros((n_samples, n_classes), dtype=np.float64)
for i, cls in enumerate(np.unique(y)):
T[y == cls, i] = 1
else:
T = self.lbin.fit_transform(y)
np.random.seed(self.seed)
W_0 = np.random.random((self.n_classes, self.n_features))
options = {'disp': self.verbose, 'maxiter': self.maxit}
f_min = minimize(fun=self.cost, x0=W_0,
args=(X, T, n_samples, n_classes),
method=self.opt_method,
jac=self.gradient,
options=options)
self.coef_ = self.w_2d(f_min.x, n_classes)
self.W_ = self.coef_
return self
def predict_proba(self, X):
O = self.softmax(self.W_, X)
return O
def predict(self, X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma, axis=1)
return y_pred

weight in neural network

I am trying neural network feed forward in my anaconda using python3.7 under ipython script.
import numpy as np
X = np.array([1, 0.9])
y = np.array(([0.93], [1]
X = X/np.amax(X, axis=0)
y = y/100
class Neural_Network(object):
def __init__(self):
#enter code here
#parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
#weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize)
self.W2 = np.random.randn(self.hiddenSize, self.outputSize)
def forward(self, X):
self.z = np.dot(X, self.W1)
self.z2 = self.sigmoid(self.z)
self.z3 = np.dot(self.z2, self.W2)
o = self.sigmoid(self.z3)
return o
def sigmoid(self, s):
return 1/(1+np.exp(-s))
NN = Neural_Network()
o = NN.forward(X)
print "Predicted Output: \n" + str(o)
print "Actual Output: \n" + str(y)
I would like to know whether I put the weights as random can be change as array like y input value?

set the properties of a class fixed once in order to avoid tedious calculation

I have two classes, namely PositionsD and makemock which are defined as following:
import numpy as np
cdef class PositionsD(object):
property x:
def __get__(self):
return np.array(self._x)
def __set__(self, x):
self._x = x
property y:
def __get__(self):
return np.array(self._y)
def __set__(self, y):
self._y = y
def __init__(self, positions):
self._x = positions[:,0]
self._y = positions[:,1]
class makemock(object):
def __init__(self):
self.scale = 0.238
self.arcsec2rad = np.pi/180./60./60.
self.g1 = None
self.g2 = None
self.source_pos = None
self.z = None
self.h_pos = None
self.h_z = None
def get_pos(self):
return PositionsD(self.source_pos)
pos = property(get_shear_pos)
def get_center(self):
return PositionsD(self.h_pos)
center = property(get_center)
def get_dist(self):
dx_mpc = (self.pos.x-self.center.x)*self.arcsec2rad*self.scale
dy_mpc = (self.pos.y-self.center.y)*self.arcsec2rad*self.scale
return np.vectorize(complex)(dx_mpc, dy_mpc)
dist = property(get_dist)
def get_r(self):
return abs(self.dist)
r = property(get_r)
def get_norm(self):
return -self.dist/np.conjugate(self.dist)
norm = property(get_norm)
def get_gabs(self):
return np.sqrt(self.g1**2 + self.g2**2 )
gabs = property(get_gabs)
def get_g(self):
phiell=np.arctan2(self.g2, self.g1) /2.
phipos=np.arctan2( (self.pos.y-self.center.y), (self.pos.x-self.center.x) )
et = -self.gabs * np.cos( 2*(phiell-phipos) )
ec = -self.gabs * np.sin( 2*(phiell-phipos) )
return np.vectorize(complex)(et, ec)
obs_g = property(get_g)
def data2model(self,params):
rs = params
x = self.r/rs
P = len(self.r)
gamma = np.zeros((P,), dtype=np.float64, order='C')
kappa = np.zeros((P,), dtype=np.float64, order='C')
farcth = np.zeros((P,), dtype=np.float64, order='C')
m1 = np.where(x < 1.0)[0]
kappa[m1] = 2/(x[m1]**2 - 1) * \
(1 - np.log((1 + ((1 - x[m1])/(x[m1] + 1))**0.5)/(1 - ((1 - x[m1])/(x[m1] + 1))**0.5))/(1 - x[m1]**2)**0.5)
farcth[m1]=0.5*np.log((1.+((1.-x[m1])/(x[m1]+1.))**0.5)/(1.-((1.-x[m1])/(x[m1]+1.))**0.5))/(1-x[m1]**2)**0.5
gamma[m1] = 4*(np.log(x[m1]/2) + 2*farcth[m1]) * x[m1]**(-2) - kappa[m1]
model_g = self.norm* gamma /(1. - kappa )
e = (self.obs_g+model_g)/(1+np.conjugate(model_g)*self.obs_g)
mask=(abs(model_g)>1.)
if (np.sum(mask)>0):
e[mask]=1./np.conjugate(e[mask])
return e
My Question is:
I need to run the data2model method from makemock in a loop for different values of params. I figured the process is quite slow while it seems each time in the loop properties of a class such as r, dist, obs_g get computed in each iteration. Is there any way to set them once, in order to increase the speed of loop?
After fiddling around to find a way to initialize some instances at the begining and avoid calculating them in each iteration of each function, I found out it is the best to use a dict and leave the value of each instance in this dictionary and update them at the end of initializing the instances. That is my solution:
class makemock(object):
def __init__(self, g1, g2, source_pos, z, h_pos, h_z, **kw):
self.scale = 0.238
self.arcsec2rad = np.pi/180./60./60.
self.g1 = g1
self.g2 = g2
self.source_pos = source_pos
self.z = z
self.h_pos = h_pos
self.h_z = h_z
pos= PositionsD(self.source_pos)
center=PositionsD(self.h_pos)
dx_mpc = (pos.x-center.x)*self.arcsec2rad*self.scale
dy_mpc = (pos.y-center.y)*self.arcsec2rad*self.scale
dist= np.vectorize(complex)(dx_mpc, dy_mpc)
r= abs(dist)
norm= -dist/np.conjugate(dist)
gabs= np.sqrt(self.g1**2 + self.g2**2 )
phiell=np.arctan2(self.g2, self.g1) /2.
phipos=np.arctan2( (pos.y-center.y), (pos.x-center.x) )
et = -gabs * np.cos( 2*(phiell-phipos) )
ec = -gabs * np.sin( 2*(phiell-phipos) )
obs_g=np.vectorize(complex)(et, ec)
self.__dict__.update(kw)
del kw
self.__dict__.update(locals())
del self.self
def dump(self):
print repr(self.__dict__)
def data2model(self,params):
rs = params
x = self.r/rs
P = len(self.r)
gamma = np.zeros((P,), dtype=np.float64, order='C')
kappa = np.zeros((P,), dtype=np.float64, order='C')
farcth = np.zeros((P,), dtype=np.float64, order='C')
m1 = np.where(x < 1.0)[0]
kappa[m1] = 2/(x[m1]**2 - 1) * \
(1 - np.log((1 + ((1 - x[m1])/(x[m1] + 1))**0.5)/(1 - ((1 - x[m1])/(x[m1] + 1))**0.5))/(1 - x[m1]**2)**0.5)
farcth[m1]=0.5*np.log((1.+((1.-x[m1])/(x[m1]+1.))**0.5)/(1.-((1.-x[m1])/(x[m1]+1.))**0.5))/(1-x[m1]**2)**0.5
gamma[m1] = 4*(np.log(x[m1]/2) + 2*farcth[m1]) * x[m1]**(-2) - kappa[m1]
model_g = self.norm* gamma /(1. - kappa )
e = (self.obs_g+model_g)/(1+np.conjugate(model_g)*self.obs_g)
mask=(abs(model_g)>1.)
if (np.sum(mask)>0):
e[mask]=1./np.conjugate(e[mask])
return e

Categories