Related
I have a data set like this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
This is a list of two networks. The first network has 6 nodes with 5 edges and a class 0, and then 4 nodes with 3 edges and class 1 networks.
I want to develop a model in Pytorch that will classify each network into it's class, and then i'll give it a new set of networks to classify.
So ultimately, I want to be able to shuffle these lists (simultaneously, i.e. maintaining the order between the data and the classes), split into train and test, and then read the train and test data into two data loaders, and feed these into a PyTorch network.
I wrote this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
dataset = Data(x=x, edge_index=edge_index, y=y, num_classes = len(set(target)))
print(dataset)
And the error is:
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
ValueError: expected sequence of length 5 at dim 2 (got 3)
But then once that is fixed I think the next step is:
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:1] #for toy example
test_dataset = dataset[1:]
print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
for data in train_loader: # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
for epoch in range(1, 171):
train()
train_acc = test(train_loader)
test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
Could someone demonstrate to me how to get my data running into the Pytorch network above?
In Pytorch Geometric the Data object is used to contain only one graph. So you could iterate through all your arrays like so:
data_list = []
for i in range(2):
edge_index_curr = torch.tensor([edge_origins[i],
edge_destinations[i],
dtype=torch.long)
data = Data(x=torch.tensor(x[i]), edge_index=edge_index_curr, y=torch.tensor(target[i]))
datas.append(data)
You can then use this list of Data to create your own Dataloader:
loader = DataLoader(data_list, batch_size=32)
If you need to split into train/val/test (I would advise having more than 2 samples for this case) you can do it manually or using sklearn.model_selection.
For data augmentation if you really do have very little data, pytorch-geometric comes with transforms.
I currently have a RNN model for time series predictions. It uses 3 input features "value", "temperature" and "hour of the day" of the last 96 time steps to predict the next 96 time steps of the feature "value".
Here you can see a schema of it:
and here you have the current code:
#Import modules
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
# Define the parameters of the RNN and the training
epochs = 1
batch_size = 50
steps_backwards = 96
steps_forward = 96
split_fraction_trainingData = 0.70
split_fraction_validatinData = 0.90
randomSeedNumber = 50
#Read dataset
df = pd.read_csv('C:/Users/Desktop/TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0]}, index_col=['datetime'])
# standardize data
data = df.values
indexWithYLabelsInData = 0
data_X = data[:, 0:3]
data_Y = data[:, indexWithYLabelsInData].reshape(-1, 1)
scaler_standardized_X = StandardScaler()
data_X = scaler_standardized_X.fit_transform(data_X)
data_X = pd.DataFrame(data_X)
scaler_standardized_Y = StandardScaler()
data_Y = scaler_standardized_Y.fit_transform(data_Y)
data_Y = pd.DataFrame(data_Y)
# Prepare the input data for the RNN
series_reshaped_X = np.array([data_X[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
series_reshaped_Y = np.array([data_Y[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
timeslot_x_train_end = int(len(series_reshaped_X)* split_fraction_trainingData)
timeslot_x_valid_end = int(len(series_reshaped_X)* split_fraction_validatinData)
X_train = series_reshaped_X[:timeslot_x_train_end, :steps_backwards]
X_valid = series_reshaped_X[timeslot_x_train_end:timeslot_x_valid_end, :steps_backwards]
X_test = series_reshaped_X[timeslot_x_valid_end:, :steps_backwards]
Y_train = series_reshaped_Y[:timeslot_x_train_end, steps_backwards:]
Y_valid = series_reshaped_Y[timeslot_x_train_end:timeslot_x_valid_end, steps_backwards:]
Y_test = series_reshaped_Y[timeslot_x_valid_end:, steps_backwards:]
# Build the model and train it
np.random.seed(randomSeedNumber)
tf.random.set_seed(randomSeedNumber)
model = keras.models.Sequential([
keras.layers.SimpleRNN(10, return_sequences=True, input_shape=[None, 3]),
keras.layers.SimpleRNN(10, return_sequences=True),
keras.layers.TimeDistributed(keras.layers.Dense(1))
])
model.compile(loss="mean_squared_error", optimizer="adam", metrics=['mean_absolute_percentage_error'])
history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid, Y_valid))
#Predict the test data
Y_pred = model.predict(X_test)
# Inverse the scaling (traInv: transformation inversed)
data_X_traInv = scaler_standardized_X.inverse_transform(data_X)
data_Y_traInv = scaler_standardized_Y.inverse_transform(data_Y)
series_reshaped_X_notTransformed = np.array([data_X_traInv[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
X_test_notTranformed = series_reshaped_X_notTransformed[timeslot_x_valid_end:, :steps_backwards]
Y_pred_traInv = scaler_standardized_Y.inverse_transform (Y_pred)
Y_test_traInv = scaler_standardized_Y.inverse_transform (Y_test)
# Calculate errors for every time slot of the multiple predictions
abs_diff = np.abs(Y_pred_traInv - Y_test_traInv)
abs_diff_perPredictedSequence = np.zeros((len (Y_test_traInv)))
average_LoadValue_testData_perPredictedSequence = np.zeros((len (Y_test_traInv)))
abs_diff_perPredictedTimeslot_ForEachSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedTimeslots = np.zeros((len (Y_test_traInv)))
mse_perPredictedSequence = np.zeros((len (Y_test_traInv)))
rmse_perPredictedSequence = np.zeros((len(Y_test_traInv)))
for i in range (0, len(Y_test_traInv)):
for j in range (0, len(Y_test_traInv [0])):
abs_diff_perPredictedSequence [i] = abs_diff_perPredictedSequence [i] + abs_diff [i][j]
mse_perPredictedSequence [i] = mean_squared_error(Y_pred_traInv[i] , Y_test_traInv [i] )
rmse_perPredictedSequence [i] = np.sqrt(mse_perPredictedSequence [i])
abs_diff_perPredictedTimeslot_ForEachSequence [i] = abs_diff_perPredictedSequence [i] / len(Y_test_traInv [0])
average_LoadValue_testData_perPredictedSequence [i] = np.mean (Y_test_traInv [i])
absoluteError_Load_Ratio_allPredictedSequence [i] = abs_diff_perPredictedSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
absoluteError_Load_Ratio_allPredictedTimeslots [i] = abs_diff_perPredictedTimeslot_ForEachSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
rmse_average_allPredictictedSequences = np.mean (rmse_perPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedSequence = np.mean (absoluteError_Load_Ratio_allPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedTimeslots = np.mean (absoluteError_Load_Ratio_allPredictedTimeslots)
absoluteAverageError_allPredictedSequences = np.mean (abs_diff_perPredictedSequence)
absoluteAverageError_allPredictedTimeslots = np.mean (abs_diff_perPredictedTimeslot_ForEachSequence)
Here you have some test data Download Test Data
So now I actually would like to include not only past values of the features into the prediction but also future values of the features "temperature" and "hour of the day" into the prediction. The future values of the feature "temperature" can for example be taken from an external weather forecasting service and for the feature "hour of the day" the future values are know before (in the test data I have included a "forecast" of the temperature that is not a real forecast; I just randomly changed the values).
This way, I could assume that - for several applications and data - the forecast could be improved.
In a schema it would look like this:
Can anyone tell me, how I can do that in Keras with a RNN (or LSTM)? One way could be to include the future values as independant features as input. But I would like the model to know that the future values of a feature are connected to the past values of a feature.
Reminder: Does anybody have an idea how to do this? I'll highly appreciate every comment.
The standard approach is to use an encoder-decoder architecture (see 1 and 2 for instance):
The encoder takes as input the past values of the features and of the target and returns an output representation.
The decoder takes as input the encoder output and the future values of the features and returns the predicted values of the target.
You can use any architecture for the encoder and for the decoder and you can also consider different approaches for passing the encoder output to the decoder (e.g. adding or concatenating it to the decoder input features, adding or concatenating it to the output of some intermediate decoder layer, or adding it to the final decoder output), the code below is just an example.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, LSTM, TimeDistributed, Concatenate, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# define the inputs
target = ['value']
features = ['temperatures', 'hour of the day']
sequence_length = 96
# import the data
df = pd.read_csv('TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime': [0]}, index_col=['datetime'])
# scale the data
target_scaler = StandardScaler().fit(df[target])
features_scaler = StandardScaler().fit(df[features])
df[target] = target_scaler.transform(df[target])
df[features] = features_scaler.transform(df[features])
# extract the input and output sequences
X_encoder = [] # past features and target values
X_decoder = [] # future features values
y = [] # future target values
for i in range(sequence_length, df.shape[0] - sequence_length):
X_encoder.append(df[features + target].iloc[i - sequence_length: i])
X_decoder.append(df[features].iloc[i: i + sequence_length])
y.append(df[target].iloc[i: i + sequence_length])
X_encoder = np.array(X_encoder)
X_decoder = np.array(X_decoder)
y = np.array(y)
# define the encoder and decoder
def encoder(encoder_features):
y = LSTM(units=100, return_sequences=True)(encoder_features)
y = TimeDistributed(Dense(units=1))(y)
return y
def decoder(decoder_features, encoder_outputs):
x = Concatenate(axis=-1)([decoder_features, encoder_outputs])
# x = Add()([decoder_features, encoder_outputs])
y = TimeDistributed(Dense(units=100, activation='relu'))(x)
y = TimeDistributed(Dense(units=1))(y)
return y
# build the model
encoder_features = Input(shape=X_encoder.shape[1:])
decoder_features = Input(shape=X_decoder.shape[1:])
encoder_outputs = encoder(encoder_features)
decoder_outputs = decoder(decoder_features, encoder_outputs)
model = Model([encoder_features, decoder_features], decoder_outputs)
# train the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.fit([X_encoder, X_decoder], y, epochs=100, batch_size=128)
# extract the last predicted sequence
y_true = target_scaler.inverse_transform(y[-1, :])
y_pred = target_scaler.inverse_transform(model.predict([X_encoder, X_decoder])[-1, :])
# plot the last predicted sequence
plt.plot(y_true.flatten(), label='actual')
plt.plot(y_pred.flatten(), label='predicted')
plt.show()
In the example above the model takes two inputs, X_encoder and X_decoder, so in your case when generating the forecasts you can use the past observed temperatures in X_encoder and the future temperature forecasts in X_decoder.
It is a pytorch code to time series prediction with an known external/exogenous regressor to the given period forecasted.Hope it helps!!!Have a marvellous day !!!
The input format is a 3d Tensor an output 1d array (MISO-Multiple Inputs Single Output)
def CNN_Attention_Bidirectional_LSTM_Encoder_Decoder_predictions(model,data ,regressors, extrapolations_leght):
n_input = extrapolations_leght
pred_list = []
batch = data[-n_input:]
model = model.train()
pred_list.append(torch.cat(( model(batch)[-1], torch.FloatTensor(regressors.iloc[1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch[n_input-1].unsqueeze(0), pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
for i in range(n_input-1):
model = model.eval()
pred_list.append(torch.cat((model(batch).squeeze(0), torch.FloatTensor(regressors.iloc[i+1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch, pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
model = model.train()
return np.array([pred_list[j].cpu().detach().numpy() for j in range(n_input)])[:,:, 0]
I am new in machine learning and it is my first time to create a linear regression model on a dataset(which is big step for me). I have created my reference rows and reshaped them. Only problem is It is too slow. Is there an any code or better way that I can use. It would be great if you have chance to revise my code.
Thank you.
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
db = pd.read_csv("Melbourne_housing_FULL.csv")
db2 = pd.read_csv("MELBOURNE_HOUSE_PRICES_LESS.csv")
"""['Suburb', 'Address',
'Rooms', 'Type', 'Price',
'Method', 'SellerG',
'Date', 'Distance',
'Postcode', 'Bedroom2', 'Bathroom', 'Car',
'Landsize', 'BuildingArea',
'YearBuilt', 'CouncilArea',
'Lattitude','Longtitude',
'Regionname', 'Propertycount']
column names for Full.csv"""
rooms_properties = db[["Rooms","Landsize","Bathroom","Car","YearBuilt"]].copy()
rooms_properties.fillna(rooms_properties.mean(),inplace=True)
rooms_price = db[["Price"]].copy()
rooms_price.fillna(rooms_price.mean(),inplace=True)
room_array_properties = rooms_properties.to_numpy()
room_array_price = rooms_price.to_numpy()
##Splitted list with percentage
def indice_splitter(array_prop,x=0.2):
val = np.random.permutation(len(array_prop))
percent_1 = val[:int(len(val) * x)]
percent_2 = val[int(len(val)*x):]
return percent_2,percent_1
## Converted df as tensor
train_indices,validation_indices = indice_splitter(room_array_price)
train_data,targets1 = room_array_properties[train_indices], room_array_price[train_indices]
validation_data ,targets2 = torch.from_numpy(room_array_properties[validation_indices]).float(), torch.from_numpy(room_array_price[validation_indices]).float()
t_data, tar1 = torch.tensor(train_data,requires_grad=True).float(), torch.tensor(targets1,requires_grad=True).float()
# rooms_price = rooms_price[rooms_price.notnull()]
# r_nonull = [rooms_properties.loc[rooms_properties[i].notnull()] for i in rooms_properties.columns]
# r_nonull = r_nonull[len(r_nonull)-1]
# r_array = r_nonull.to_numpy()
# weight = torch.rand(5,1, dtype=float,requires_grad=True)
# bias = torch.randn(len(train_data),dtype=float,requires_grad=True)
## my model and result
model = nn.Linear(5,1)
weight, bias = model.parameters()
train_ds = TensorDataset(t_data,tar1)
batch_size = 10
train_dl = DataLoader(train_ds,batch_size,shuffle=True)
preds = model(t_data)
loss_fn = F.mse_loss
opt = torch.optim.SGD(model.parameters(), lr= 1e-5)
#
def fit(num_epochs, model, loss_fn, opt):
for epochy in range(num_epochs):
for xb,yb in train_dl:
#increase the model accuracy
pred = model(xb)
loss = loss_fn(pred,yb)
loss.backward()
#upgrade the stoachaistic grad descent
opt.step()
#refresh the data
opt.zero_grad()
if (epochy +1) % 10 == 0:
print("{}/{}, Loss:{:.4f}".format(epochy+1,num_epochs,loss.item()))
fit(10,model, loss_fn,opt)
Output is = 10/10, Loss:nan
My expected output should decrease my loss function value every time.
I want to iterate this regression at least 1000 times.
I have 1660Ti , i7 9th Gen, 16gb ram laptop
Note: Long Post. Please bear with me
I have implemented a convolution neural network in PyTorch on KMNIST dataset. I need to use SMAC to optimize the learning rate and the momentum of Stochastic Gradient Descent of the CNN. I am new in hyperparameter optimization and what I learnt from the smac documentation is,
SMAC evaluates the algorithm to be optimized by invoking it through a Target Algorithm Evaluator (TAE).
We need a Scenario-object to configure the optimization process.
run_obj parameter in Scenario object specifies what SMAC is supposed to optimize.
My Ultimate goal is to get a good accuracy or low loss
This is what I have done so far:
Convolution Neural Network
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
from datasets import *
import torch.utils.data
import torch.nn.functional as F
import matplotlib.pyplot as plt
# Create the model class
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__() # to inherent the features of nn.Module
self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = 3, stride = 1, padding =1)
# in_channels =1 because of grey scale image
# kernel_size = feature_size
# padding = 1 because for same padding = [(filter_size -1)/2]
# the output size of the 8 feature maps is [(input_size - filter_size +2(padding)/stride)+1]
#Batch Normalization
self.batchnorm1 = nn.BatchNorm2d(8)
# RELU
self.relu = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size =2)
# After maxpooling, the output of each feature map is 28/2 =14
self.cnn2 = nn.Conv2d(in_channels = 8, out_channels = 32, kernel_size = 5, stride = 1, padding =2)
#Batch Normalization
self.batchnorm2 = nn.BatchNorm2d(32)
# RELU
#self.relu = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size =2)
# After maxpooling , the output of each feature map is 14/2 =7of them is of size 7x7 --> 32*7*7=1568
# Flatten the feature maps. You have 32 feature maps, each
self.fc1 = nn.Linear(in_features=1568, out_features = 600)
self.dropout = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(in_features=600, out_features = 10)
def forward(self,x):
out = self.cnn1(x)
#out = F.relu(self.cnn1(x))
out = self.batchnorm1(out)
out = self.relu(out)
out = self.maxpool1(out)
out = self.cnn2(out)
out = self.batchnorm2(out)
out = self.relu(out)
out = self.maxpool2(out)
#Now we have to flatten the output. This is where we apply the feed forward neural network as learned
#before!
#It will the take the shape (batch_size, 1568) = (100, 1568)
out = out.view(-1, 1568)
#Then we forward through our fully connected layer
out = self.fc1(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
return out
def train(model, train_loader, optimizer, epoch, CUDA, loss_fn):
model.train()
cum_loss=0
iter_count = 0
for i, (images, labels) in enumerate(train_load):
if CUDA:
images = Variable(images.cuda())
images = images.unsqueeze(1)
images = images.type(torch.FloatTensor)
images = images.cuda()
labels = Variable(labels.cuda())
labels = labels.type(torch.LongTensor)
labels = labels.cuda()
else:
images = Variable(images)
images = images.unsqueeze(1)
images = images.type(torch.DoubleTensor)
labels = Variable(labels)
labels = labels.type(torch.DoubleTensor)
optimizer.zero_grad()
outputs = model(images)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
cum_loss += loss
if (i+1) % batch_size == 0:
correct = 0
total = 0
acc = 0
_, predicted = torch.max(outputs.data,1)
total += labels.size(0)
if CUDA:
correct += (predicted.cpu()==labels.cpu()).sum()
else:
correct += (predicted==labels).sum()
accuracy = 100*correct/total
if i % len(train_load) == 0:
iter_count += 1
ave_loss = cum_loss/batch_size
return ave_loss
batch_size = 100
epochs = 5
e = range(epochs)
#print(e)
#Load datasets
variable_name=KMNIST()
train_images = variable_name.images
train_images = torch.from_numpy(train_images)
#print(train_images.shape)
#print(type(train_images))
train_labels = variable_name.labels
train_labels = torch.from_numpy(train_labels)
#print(train_labels.shape)
#print(type(train_labels))
train_dataset = torch.utils.data.TensorDataset(train_images, train_labels)
# Make the dataset iterable
train_load = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
print('There are {} images in the training set' .format(len(train_dataset)))
print('There are {} images in the loaded training set' .format(len(train_load)))
def net(learning_rate, Momentum):
model = CNN()
CUDA = torch.cuda.is_available()
if CUDA:
model = model.cuda()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate,momentum = Momentum, nesterov= True)
iteration = 0
total_loss=[]
for epoch in range(epochs):
ave_loss = train(model, train_load, optimizer, epoch, CUDA, loss_fn)
total_loss.append(ave_loss)
return optimizer, loss_fn, model, total_loss
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.01, Momentum = 0.09)
# Print model's state_dict
print("---------------")
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print("---------------")
#print("Optimizer's state_dict:")
#for var_name in optimizer.state_dict():
# print(var_name, "\t", optimizer.state_dict()[var_name])
torch.save(model.state_dict(), "kmnist_cnn.pt")
plt.plot(e, (np.array(total_loss)))
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.show()
print('Done!')
smac hyperparameter optimization:
from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformFloatHyperparameter, UniformIntegerHyperparameter
from smac.configspace.util import convert_configurations_to_array
#from ConfigSpace.conditions import InCondition
# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()
# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
lr = UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, default_value='1e-2')
momentum = UniformFloatHyperparameter('Momentum', 0.01, 0.1, default_value='0.09')
cs.add_hyperparameters([lr, momentum])
def kmnist_from_cfg(cfg):
cfg = {k : cfg[k] for k in cfg if cfg[k]}
print('Config is', cfg)
#optimizer, loss_fn, model, total_loss = net(**cfg)
#optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)
return optimizer, loss_fn, model, total_loss
# Scenario object
scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime)
"runcount-limit": 200, # maximum function evaluations
"cs": cs, # configuration space
"deterministic": "true"
})
#def_value = kmnist_from_cfg(cs.get_default_configuration())
#print("Default Value: %.2f" % (def_value))
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario,tae_runner=kmnist_from_cfg) #rng=np.random.RandomState(42)
smac.solver.intensifier.tae_runner.use_pynisher = False
print("SMAC", smac)
incumbent = smac.optimize()
inc_value = kmnist_from_cfg(incumbent)
print("Optimized Value: %.2f" % (inc_value))
When I give loss as the run_obj parameter, I get the error message
ArgumentError: argument --run-obj/--run_obj: invalid choice: 'total_loss' (choose from 'runtime', 'quality')
To be honest, I do not know what does "quality" means. Anyways, when I give quality as the run_obj parameter, I get the error message
TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
If I understood it correctly, the above error message is obtained when an int is expected but str is given. To check whether the problem was with configuration space, I tried
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)
instead of these:
optimizer, loss_fn, model, total_loss = net(**cfg)
optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])
the error remains the same.
Any ideas on how to use smac to optimize hyperparameters of CNN and why do I get this error message? I tried looking for similar problems online. This post was a little helpful. Unfortunately, since there is no implementation of smac on NN (at least I did not find it), I cannot figure out the solution. I ran out of all ideas.
Any help, ideas or useful link is appreciated.
Thank you!
I believe the tae_runner (kmnist_from_cfg in your case) has to be a callable that takes a configuration space point, which you correctly provide, and outputs a single number. You output a tuple of things. Perhaps only return the total_loss on the validation set? I am basing this on the svm example in the smac github at https://github.com/automl/SMAC3/blob/master/examples/svm.py.
I am trying to train a sparse data with an MLP to predict a forecast. However, the forecast on the test data is giving the same value for all observations. Once I omit the activation function from each layer, the outcome starts being different.
my code is below:
# imports
import numpy as np
import tensorflow as tf
import random
import json
from scipy.sparse import rand
# Parameters
learning_rate= 0.1
training_epochs = 50
batch_size = 100
# Network Parameters
m= 1000 #number of features
n= 5000 # number of observations
hidden_layers = [5,2,4,1,6]
n_layers = len(hidden_layers)
n_input = m
n_classes = 1 # it's a regression problem
X_train = rand(n, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_train = np.random.randint(4, size=n)
X_test = rand(200, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_test = np.random.randint(4, size=200)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None])
# Store layers weight & bias
weights = {}
biases = {}
weights['h1']=tf.Variable(tf.random_normal([n_input, hidden_layers[0]])) #first matrice
biases['b1'] = tf.Variable(tf.random_normal([hidden_layers[0]]))
for i in xrange(2,n_layers+1):
weights['h'+str(i)]= tf.Variable(tf.random_normal([hidden_layers[i-2], hidden_layers[i-1]]))
biases['b'+str(i)] = tf.Variable(tf.random_normal([hidden_layers[i-1]]))
weights['out']=tf.Variable(tf.random_normal([hidden_layers[-1], 1])) #matrice between last layer and output
biases['out']= tf.Variable(tf.random_normal([1]))
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_begin = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1'],a_is_sparse=True), _biases['b1']))
for layer in xrange(2,n_layers+1):
layer_begin = tf.nn.relu(tf.add(tf.matmul(layer_begin, _weights['h'+str(layer)]), _biases['b'+str(layer)]))
#layer_end = tf.nn.dropout(layer_begin, 0.3)
return tf.matmul(layer_begin, _weights['out'])+ _biases['out']
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
rmse = tf.reduce_sum(tf.abs(y-pred))/tf.reduce_sum(tf.abs(y)) # rmse loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(rmse) # Adam Optimizer
# Initializing the variables
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
#training
for step in xrange(training_epochs):
# Generate a minibatch.
start = random.randrange(1, n - batch_size)
#print start
batch_xs=X_train[start:start+batch_size,:]
batch_ys =Y_train[start:start+batch_size]
#printing
_,rmseRes = sess.run([optimizer, rmse] , feed_dict={x: batch_xs, y: batch_ys} )
if step % 20 == 0:
print "rmse [%s] = %s" % (step, rmseRes)
#testing
pred_test = multilayer_perceptron(X_test, weights, biases)
print "prediction", pred_test.eval()[:20]
print "actual = ", Y_test[:20]
PS: I am generating randomly my data just to reproduce the error. My data is sparse in fact, pretty similar to the one generated randomly. The problem I want to solve is: MLP is giving the same prediction for all observations in the test data.
That's a sign that your training failed. With GoogeLeNet Imagenet training I've seen it label everything as "nematode" when started with a bad choice of hyper-parameters. Things to check -- does your training loss decrease? If it doesn't decrease, try different learning rates/architectures. If it decreases to zero maybe your loss is wrong like was case here