Related
My goal is to compute a confusion matrix from a huge dataset with 10 classes, so far I got the following code and results:
Note: As far as I know is doing the correct predictions over all the classes, I computed the loss in a pre-training phase, and the accuracy during this Transfer classification phase and they behave as expected, my problem comes in the obtention of the predicted labels from the outputs.
train_dataset = Subset(eurosat_dataset, train_indices, train_transforms)
val_dataset = Subset(eurosat_dataset, val_indices, val_transforms)
train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=False, num_workers=2, pin_memory=False,
drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batchsize, shuffle=False, num_workers=2, pin_memory=False,
drop_last=True)
print('train_len: %d val_len: %d' % (len(train_dataset), len(val_dataset)))
#for i, data in enumerate(val_loader): # inputs = data[0], labels = data[1]
# inputs, labels = data # inputs [1,13,224,224], labels[0-9] --> classes
# if i > 10:
# break
# print(inputs.shape, labels, inputs[0].max())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#inputs = inputs.to(device)
# Get the model, definition of the model to be loaded
import models.models_mae_mod as models_mae_mod
from models.util.pos_embed import interpolate_pos_embed # import pos_embed.py ----> Run OK
def prepare_model(chkpt_dir, arch='mae_vit_small_patch16'):
# build model
model = getattr(models_mae_mod, arch)(in_chans=13)
# load model
checkpoint = torch.load(chkpt_dir, map_location='cpu')
state_dict = model.state_dict()
for k in ['head.weight', 'head.bias']:
if k in checkpoint and checkpoint[k].shape != state_dict[k].shape:
print(f"Removing key {k} from pretrained checkpoint")
del checkpoint[k]
# interpolate position embedding
interpolate_pos_embed(model, checkpoint)
msg = model.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
# loading the model
chkpt_dir = 'C:/Users/hugo_/PycharmProjects/transfermodel_Eurosat/datasets/B_raw_norm.pth'
model_mae = prepare_model(chkpt_dir, 'mae_vit_small_patch16')
model_mae = model_mae.to(device)
model_mae.eval()
print('Model loaded.')
with torch.no_grad():
for i, (inputs, labels) in enumerate(val_loader):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model_mae(inputs) # 0 is LOSS, 1 is [1, 196, 3328] is PRED, 2 is [1, 196] is MASK,
# 3 is [1, 13, 224, 224] is TARGET
#_, preds = torch.max(outputs, 1)
#outputs = outputs[-1:]
print("set")
I'm not computing the confusion matrix this time since the Outputs format is not the correct to get it.
nb_classes = 10
confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
for i, (inputs, classes) in enumerate(val_loader):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model_mae(inputs)
outputs = outputs[3]
_, preds = torch.max(outputs, 1)
for t, p in zip(classes.view(-1), preds.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
print(confusion_matrix)
I identified my problem as the way I'm getting the Outputs, which is the correct one but not enough to get the information I want, how to get those predicted labels and use them for the calculation of the Confusion Matrix?
I attach an image of my debugging process for a better understanding:
I have a data set like this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
This is a list of two networks. The first network has 6 nodes with 5 edges and a class 0, and then 4 nodes with 3 edges and class 1 networks.
I want to develop a model in Pytorch that will classify each network into it's class, and then i'll give it a new set of networks to classify.
So ultimately, I want to be able to shuffle these lists (simultaneously, i.e. maintaining the order between the data and the classes), split into train and test, and then read the train and test data into two data loaders, and feed these into a PyTorch network.
I wrote this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
dataset = Data(x=x, edge_index=edge_index, y=y, num_classes = len(set(target)))
print(dataset)
And the error is:
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
ValueError: expected sequence of length 5 at dim 2 (got 3)
But then once that is fixed I think the next step is:
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:1] #for toy example
test_dataset = dataset[1:]
print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
for data in train_loader: # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
for epoch in range(1, 171):
train()
train_acc = test(train_loader)
test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
Could someone demonstrate to me how to get my data running into the Pytorch network above?
In Pytorch Geometric the Data object is used to contain only one graph. So you could iterate through all your arrays like so:
data_list = []
for i in range(2):
edge_index_curr = torch.tensor([edge_origins[i],
edge_destinations[i],
dtype=torch.long)
data = Data(x=torch.tensor(x[i]), edge_index=edge_index_curr, y=torch.tensor(target[i]))
datas.append(data)
You can then use this list of Data to create your own Dataloader:
loader = DataLoader(data_list, batch_size=32)
If you need to split into train/val/test (I would advise having more than 2 samples for this case) you can do it manually or using sklearn.model_selection.
For data augmentation if you really do have very little data, pytorch-geometric comes with transforms.
Hi so right now I got data load code and I'm not sure how would i split it into training and testing data. can anyone give me suggestion how to do it this is my data load code.
def __init__(self, root, specific_folder, img_extension, preprocessing_method=None, crop_size=(96, 112),train = True):
"""
Dataloader of the LFW dataset.
root: path to the dataset to be used.
specific_folder: specific folder inside the same dataset.
img_extension: extension of the dataset images.
preprocessing_method: string with the name of the preprocessing method.
crop_size: retrieval network specific crop size.
"""
self.preprocessing_method = preprocessing_method
self.crop_size = crop_size
self.imgl_list = []
self.classes = []
self.people = []
self.model_align = None
self.arr = []
# read the file with the names and the number of images of each people in the dataset
with open(os.path.join(root, 'people.txt')) as f:
people = f.read().splitlines()[1:]
# get only the people that have more than 20 images
for p in people:
p = p.split('\t')
if len(p) > 1:
if int(p[1]) >= 20:
for num_img in range(1, int(p[1]) + 1):
self.imgl_list.append(os.path.join(root, specific_folder, p[0], p[0] + '_' +
'{:04}'.format(num_img) + '.' + img_extension))
self.classes.append(p[0])
self.people.append(p[0])
le = preprocessing.LabelEncoder()
self.classes = le.fit_transform(self.classes)
print(len(self.imgl_list), len(self.classes), len(self.people))
def __getitem__(self, index):
imgl = imageio.imread(self.imgl_list[index])
cl = self.classes[index]
# if image is grayscale, transform into rgb by repeating the image 3 times
if len(imgl.shape) == 2:
imgl = np.stack([imgl] * 3, 2)
imgl, bb = preprocess(imgl, self.preprocessing_method, crop_size=self.crop_size,
is_processing_dataset=True, return_only_largest_bb=True, execute_default=True)
# append image with its reverse
imglist = [imgl, imgl[:, ::-1, :]]
# normalization
for i in range(len(imglist)):
imglist[i] = (imglist[i] - 127.5) / 128.0
imglist[i] = imglist[i].transpose(2, 0, 1)
imgs = [torch.from_numpy(i).float() for i in imglist]
return imgs, cl, imgl, bb, self.imgl_list[index], self.people[index]
def __len__(self):
return len(self.imgl_list)
I need to split the data in there into 20% and 80% data so I can test my module it been almost a week now and still have no idea at all how to do it would be appreciate so much if anyone can help:
In general using PyTorch:
import torch
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
dataset = yourdatahere
batch_size = 16 #change to whatever you'd like it to be
test_split = .2
shuffle_dataset = True
random_seed= 42
# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(test_split * dataset_size))
if shuffle_dataset :
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
sampler=test_sampler)
# Usage Example:
num_epochs = 10
for epoch in range(num_epochs):
# Train:
for batch_index, (faces, labels) in enumerate(train_loader):
# ...
Please note that you should also split your training data into training + validation data. You may use the same logic from above to do so.
I am getting the error:
ValueError: Input arrays should have the same number of samples as target arrays. Found 6 input samples and 128 target samples.
when training with keras.
I am using a generator to generate a moving window over my timeseries that looks like this:
def generator_val(X,y,number_of_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], overview_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], overview_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((len(step_idcs), 2), dtype=np.int32)
examples[:, 0] = machine_idcs
examples[:, 1] = step_idcs
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
To start the training i am using the model.fit_generator:
model.fit_generator(generator(X, y, number_of_steps= number_of_steps,batch_size=128, length=300),
validation_data = generator(X_val, y_val, number_of_steps= number_of_steps_val,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
It is like if the generator is not using the infinity loop, or cant reset within one batch?
Is there a possibility to reset the generator after each epoch?
Update
###sample data
test_X = np.random.rand(len(X),10037, 24).astype(np.float32)
test_Y = np.random.randint(0,2,(len(X),10037)).astype(np.float32)
val_X = np.random.rand(len(X_val), 10037,24).astype(np.float32)
val_Y = np.random.randint(0,1,(len(X_val),10037)).astype(np.float32)
X = [item for item in test_X]
Y = [item for item in test_Y]
X_val = [item for item in val_X]
Y_val = [item for item in val_Y]
Workaround
I have found a solution for this Error, however i am not really happy with it, because it throws away some of the last sequences. The solution is, that there are as many sequences that the data set can be devided by the batch size.
The code for that looks like this:
window_steps = 50
number_of_samples = sum([X_[i].shape[0] for i in range(len(X_))])-len(X_)*300
number_of_steps = int(number_of_samples/128/window_steps)
number_of_samples_val = sum([X_val_[i].shape[0] for i in range(len(X_val_))])-len(X_val_)*300
number_of_steps_val = int(number_of_samples_val/128/window_steps)
def generator_val(X,y, number_of_steps, window_steps = window_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], window_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], window_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((number_of_steps*batch_size, 2), dtype=np.int32)
examples[:, 0] = machine_idcs[:number_of_steps*batch_size]
examples[:, 1] = step_idcs[:number_of_steps*batch_size]
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
model.fit_generator(generator_val(X, Y, number_of_steps= number_of_steps, window_steps = window_steps, batch_size=128, length=300),
validation_data = generator_val(X_val,Y_val, number_of_steps= number_of_steps_val,window_steps = window_steps,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
And here is a sample network for it:
input1 = Input(shape=(sequence_length, num_features))
h1 = LSTM(50)(input1)
prediction = Dense(1)(h1)
model = Model(inputs=[input1], outputs=[prediction])
loss = "binary_crossentropy"
optimizer = "adam"
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
So, do you know how to use all Sequences without throwing some away?
I have written my self a tensorflow class as below, but I met some problem when I am trying to set some weight to zero after training manually in function refine_init_weight. In this function, I tried setting all numbers to zero once it is below some value and see how would the accuracy rate change. The problem is, when I reran self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}), it seems that its value does not changed accordingly. I am just wondering where should I change the symbolic variable (the accuracy depends on the weights I changed) at this case?
import tensorflow as tf
from nncomponents import *
from helpers import *
from sda import StackedDenoisingAutoencoder
class DeepFeatureSelection:
def __init__(self, X_train, X_test, y_train, y_test, weight_init='sda', hidden_dims=[100, 100, 100], epochs=1000,
lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1, optimizer='FTRL'):
# Initiate the input layer
# Get the dimension of the input X
n_sample, n_feat = X_train.shape
n_classes = len(np.unique(y_train))
self.epochs = epochs
# Store up original value
self.X_train = X_train
self.y_train = one_hot(y_train)
self.X_test = X_test
self.y_test = one_hot(y_test)
# Two variables with undetermined length is created
self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x')
self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y')
self.input_layer = One2OneInputLayer(self.var_X)
self.hidden_layers = []
layer_input = self.input_layer.output
# Initialize the network weights
weights, biases = init_layer_weight(hidden_dims, X_train, weight_init)
print(type(weights[0]))
# Create hidden layers
for init_w,init_b in zip(weights, biases):
self.hidden_layers.append(DenseLayer(layer_input, init_w, init_b))
layer_input = self.hidden_layers[-1].output
# Final classification layer, variable Y is passed
self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y)
n_hidden = len(hidden_dims)
# regularization terms on coefficients of input layer
self.L1_input = tf.reduce_sum(tf.abs(self.input_layer.w))
self.L2_input = tf.nn.l2_loss(self.input_layer.w)
# regularization terms on weights of hidden layers
L1s = []
L2_sqrs = []
for i in xrange(n_hidden):
L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[i].w)))
L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[i].w))
L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w)))
L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w))
self.L1 = tf.add_n(L1s)
self.L2_sqr = tf.add_n(L2_sqrs)
# Cost with two regularization terms
self.cost = self.softmax_layer.cost \
+ lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \
+ alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1
# FTRL optimizer is used to produce more zeros
# self.optimizer = tf.train.FtrlOptimizer(learning_rate=learning_rate).minimize(self.cost)
self.optimizer = optimize(self.cost, learning_rate, optimizer)
self.accuracy = self.softmax_layer.accuracy
self.y = self.softmax_layer.y
def train(self, batch_size=100):
sess = tf.Session()
self.sess = sess
sess.run(tf.initialize_all_variables())
for i in xrange(self.epochs):
x_batch, y_batch = get_batch(self.X_train, self.y_train, batch_size)
sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
if i % 2 == 0:
l = sess.run(self.cost, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
print('epoch {0}: global loss = {1}'.format(i, l))
self.selected_w = sess.run(self.input_layer.w)
print("Train accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_train, self.var_Y: self.y_train}))
print("Test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
print(self.selected_w)
print(len(self.selected_w[self.selected_w==0]))
print("Final test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
def refine_init_weight(self, threshold=0.001):
refined_w = np.copy(self.selected_w)
refined_w[refined_w < threshold] = 0
self.input_layer.w.assign(refined_w)
print("Test accuracy refined:",self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
(I'll just repost my comment as an answer)
You need to run the assign operation you created, otherwise it is just added to the graph and never executed.
assign_op = self.input_layer.w.assign(refined_w)
self.sess.run(assign_op)
If you want to do this in Tensorflow you could create a boolean mask of the weight variable with tf.greater and tf.less, convert this mask to tf.float32 and multiply it with the weight array.