I am getting the error:
ValueError: Input arrays should have the same number of samples as target arrays. Found 6 input samples and 128 target samples.
when training with keras.
I am using a generator to generate a moving window over my timeseries that looks like this:
def generator_val(X,y,number_of_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], overview_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], overview_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((len(step_idcs), 2), dtype=np.int32)
examples[:, 0] = machine_idcs
examples[:, 1] = step_idcs
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
To start the training i am using the model.fit_generator:
model.fit_generator(generator(X, y, number_of_steps= number_of_steps,batch_size=128, length=300),
validation_data = generator(X_val, y_val, number_of_steps= number_of_steps_val,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
It is like if the generator is not using the infinity loop, or cant reset within one batch?
Is there a possibility to reset the generator after each epoch?
Update
###sample data
test_X = np.random.rand(len(X),10037, 24).astype(np.float32)
test_Y = np.random.randint(0,2,(len(X),10037)).astype(np.float32)
val_X = np.random.rand(len(X_val), 10037,24).astype(np.float32)
val_Y = np.random.randint(0,1,(len(X_val),10037)).astype(np.float32)
X = [item for item in test_X]
Y = [item for item in test_Y]
X_val = [item for item in val_X]
Y_val = [item for item in val_Y]
Workaround
I have found a solution for this Error, however i am not really happy with it, because it throws away some of the last sequences. The solution is, that there are as many sequences that the data set can be devided by the batch size.
The code for that looks like this:
window_steps = 50
number_of_samples = sum([X_[i].shape[0] for i in range(len(X_))])-len(X_)*300
number_of_steps = int(number_of_samples/128/window_steps)
number_of_samples_val = sum([X_val_[i].shape[0] for i in range(len(X_val_))])-len(X_val_)*300
number_of_steps_val = int(number_of_samples_val/128/window_steps)
def generator_val(X,y, number_of_steps, window_steps = window_steps, batch_size=128, length=300, overview_steps = 300, shuffle=True, prediction = False):
while 1:
machine_idcs = np.concatenate(
[np.repeat(i, len(np.arange(length, Xi.shape[0], window_steps))) for i, Xi in enumerate(X)])
# Generate all indicies for all possible steps.
step_idcs = np.concatenate(
[np.arange(length, Xi.shape[0], window_steps) for Xi in X])
# We create a matrix of indices from which we sample for the mini
# batches.
examples = np.zeros((number_of_steps*batch_size, 2), dtype=np.int32)
examples[:, 0] = machine_idcs[:number_of_steps*batch_size]
examples[:, 1] = step_idcs[:number_of_steps*batch_size]
for i in range(0, examples.shape[0], batch_size):
# Get the machine and step indices of the mini-batch.
mbatch = examples[i:i + batch_size]
# Preinitialize the mini batch.
sequence = np.zeros(
(len(mbatch), length, X[0].shape[1]), np.float32)
mini_batch_y = np.zeros((batch_size,), dtype=np.float32)
for j in range(mbatch.shape[0]):
machine_idx = mbatch[j, 0]
step_idx = mbatch[j, 1]
sequence[j] = X[machine_idx][step_idx - length: step_idx]
mini_batch_y[j] = y[machine_idx][step_idx]
mini_batch_X = sequence
yield mini_batch_X, mini_batch_y
model.fit_generator(generator_val(X, Y, number_of_steps= number_of_steps, window_steps = window_steps, batch_size=128, length=300),
validation_data = generator_val(X_val,Y_val, number_of_steps= number_of_steps_val,window_steps = window_steps,batch_size=128, length=300),
validation_steps = number_of_steps_val,
samples_per_epoch= number_of_steps,
epochs=2)
And here is a sample network for it:
input1 = Input(shape=(sequence_length, num_features))
h1 = LSTM(50)(input1)
prediction = Dense(1)(h1)
model = Model(inputs=[input1], outputs=[prediction])
loss = "binary_crossentropy"
optimizer = "adam"
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
So, do you know how to use all Sequences without throwing some away?
Related
I am working on a dataset where I am required to find accuracy of classes with less than 20 samples. So first I used pytorch's ImageFolder to get all the images in the folders.
dataset = ImageFolder('/content/drive/MyDrive/data/Dataset/')
Now to get classes with less than 20 samples I use:
def get_class_distribution(dataset_obj):
count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}
for element in dataset_obj:
y_lbl = element[1]
y_lbl = idx2class[y_lbl]
count_dict[y_lbl] += 1
return count_dict
# print("Distribution of classes: \n", get_class_distribution(dataset))
class_distribution = get_class_distribution(dataset)
sampled_classes = [classes for (classes, samples) in class_distribution.items() if samples <= 20]
I get the list of classes correctly but my doubt is how do I proceed further for inference? How do I convert/update it to ImageFolder so that I can use the filtered dataset in the following code:
# Test model performance for classes with less than 20 samples.
y_pred_list = []
y_true_list = []
with torch.no_grad():
for x_batch, y_batch in tqdm(data_loader):
x_batch, y_batch = x_batch.to(device), y_batch.to(device)
y_test_pred = model(x_batch)
_, y_pred_tag = torch.max(y_test_pred, dim = 1)
y_pred_list.append(y_pred_tag.cpu().numpy())
y_true_list.append(y_batch.cpu().numpy())
No need of writing 1st block
use this instead
test_data = datasets.ImageFolder('test/', transform=test_transforms)
data_loader = torch.utils.data.DataLoader(test_data, batch_size=16)
y_pred_list = []
accuracy = []
with torch.no_grad():
for x_batch, y_batch in tqdm(data_loader):
x_batch, y_batch = x_batch.to(device), y_batch.to(device)
y_test_pred = model(x_batch)
top_p, top_class = y_test_pred.topk(1, dim=1)
equals = top_class == y_batch.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
print(accuracy/len(data_loader)*100) # this would print %
I'm currently using a LSTM model to make timeserie predictions with Tensorflow 2.2.0
I've been using a large dataset and everything works nicely.
However, the dataset creation takes a lot of RAM and I wanted to use a tensorflow.keras.utils.Sequence to solve the issue, my problem is the following:
When using a Sequence, my model doesn't learn anymore (it predicts the average of the real signal over the whole dataset)
My dataset is created from two python lists x_train_flights and y_train_flights, each containing pandas DataFrames. For each (x_train_flight, y_train_flight) of this list:
x_train_flight of shape (-1, features) containing features signals
y_train_flight of shape (-1, 1) containing one signal being aligned in time with the ones from x_train_flights
The system looks like as follow (I am not allowed to share the real data, I've recreated the graph using pseudo-random signals instead):
Here, features=2 (the blue and orange lines), and look_back=5. That is to say, the 10 points (from x_train_flights) in the rectangle are used to predict the golden point (which is compared to the corresponding point in y_train_flights during the training phase). The gray points are previous predictions.
To create my dataset, I've been using these functions:
def lstm_shapify(sequence, look_back, features):
res = np.empty((look_back, len(sequence), features), dtype=np.float32)
for i in range(look_back):
res[i] = np.roll(sequence, -i * features)
return np.transpose(res, axes=(1, 0, 2))[:-look_back + 1]
def make_dataset(x_flights, y_flights, look_back, features):
x = np.empty((0, look_back, features), dtype=np.float32)
y = np.empty((0, 1), dtype=np.float32)
for i in range(len(x_flights)):
x_sample = x_flights[i].values
y_sample = y_flights[i].values[look_back - 1:]
x = np.concatenate([x, lstm_shapify(x_sample, look_back, features)])
y = np.concatenate([y, y_sample])
return x, y
And I fit my network with the following:
model.fit(
x_train,
y_train,
epochs=7,
batch_size=batch_size
)
So, I've created this custom Sequence:
class LSTMGenerator(Sequence):
def __init__(
self,
x_flights: List[DataFrame],
y_flights: List[DataFrame],
look_back: int,
batch_size: int,
features: int
):
self.x_flights = x_flights
self.y_flights = []
self.look_back = look_back
self.batch_size = batch_size
self.features = features
self.length = 0
for y_flight in y_flights:
y = y_flight.iloc[look_back - 1:].to_numpy()
self.y_flights.append(y)
self.length += len(y) // batch_size
def __getitem__(self, index):
flight_index = 0
while True:
n = len(self.y_flights[flight_index]) // self.batch_size
if index < n:
break
flight_index += 1
index = index - n
start_index = index * self.batch_size
x_batch = lstm_shapify(
self.x_flights[flight_index]
.iloc[start_index:start_index + self.batch_size + self.look_back - 1]
.to_numpy(),
self.look_back,
self.features
)
y_batch = self.y_flights[flight_index][start_index:start_index + self.batch_size]
return x_batch, y_batch
def __len__(self):
return self.length
Each tuple (x, y) it returns are two numpy arrays of shape (batch_size, look_back, features) and (batch_size, 1) respectively.
And now I'm trying to fit it with:
model.fit(
LSTMGenerator(x_train_flights, y_train_flights, look_back, batch_size, features),
epochs=epochs
)
Here is my model:
model = Sequential()
model.add(LSTM(
100,
input_shape=(look_back, features),
kernel_regularizer=regularizers.l2(1e-3),
bias_regularizer=regularizers.l2(1e-4)
))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1, activation='tanh'))
model.compile(optimizer='adam', loss='mse')
Hope you can help me
EDIT: more details about the datasets
I solved it by taking a break and looking at the code once again (and I realized it was a silly mistake): the issue of my Sequence comes from the samples in each batch being consecutive samples in time, whereas my compute-everything-dataset's batches where nicely shuffled.
My Sequence was problematic because the batches were selected at a random index from a random dataset. Now I select each sample at a random index from a random dataset to create a single batch.
Here is a working example:
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.utils import *
import numpy as np
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)
features = 3
lookback = 7
model = Sequential()
model.add(LSTM(500, input_shape = (lookback, features)))
model.add(Dense(1, activation='tanh'))
XS = np.random.randn(200, features)
YS = np.random.randn(200)
class LookbackSeq(Sequence):
def __init__(self, XS, YS, batch_size, lookback):
self.XS = XS
self.YS = YS
self.batch_size = batch_size
self.lookback = lookback
def __len__(self):
n_windows = self.XS.shape[0] - self.lookback
return int(np.ceil(n_windows / self.batch_size))
def __getitem__(self, i):
base = i * self.batch_size
n_windows = self.XS.shape[0] - self.lookback
batch_size = min(n_windows - base, self.batch_size)
X = np.zeros((batch_size, self.lookback, self.XS.shape[1]))
Y = np.zeros((batch_size, 1))
for i in range(batch_size):
for j in range(self.lookback):
X[i, j] = self.XS[base + i + j]
Y[i] = self.YS[base + i + self.lookback]
return X, Y
model.compile(optimizer='adam', loss='mse')
# ALL SAMPLES IN MEMORY
X, Y = [], []
for i in range(len(XS) - lookback):
X.append(XS[i:i+lookback])
Y.append(YS[i+lookback])
X, Y = np.array(X), np.array(Y)
model.fit(X, Y, epochs = 10, batch_size = 4, shuffle = False)
# GENERATED ON THE FLY
# gen = LookbackSeq(XS, YS, 4, lookback)
# model.fit(x = gen,
# steps_per_epoch = len(gen),
# shuffle = False,
# epochs = 10)
I'm assuming your input data has the shape X = (n_points, n_features) and Y = (n_points,). LookbackSeq does the batching and windowing (lookback) for you.
You can comment and uncomment the relevant lines to either train with samples generated on the fly or with them all stored in memory. You should get identical results.
I am trying to put a dataset through a neural network. It is running on a Google Cloud virtual machine using a Tesla V100 GPU. However, before I can finish training a single epoch, I get an error message: "Cuda error: device side assert triggered". I think the problem may be in my data, but I have no idea where and I'm not sure what the problem is exactly (but I tested the code with a different dataset and it ran fine).
The thing that is odd is that the network actually runs for some time before triggering the error. I had it print every time it finished a batch and sometimes it finishes 60+ batches, sometimes 80+, I've even gotten it to finish as many as 140 batches (given the size of my data and my batches, there are 200 batches in each epoch). No matter how many it finishes, it eventually triggers this error and has not completed an epoch.
I tried setting CUDA_LAUNCH_BLOCKING = 1 and did not get any better error message. I of course made sure the neural network has the right number of input and output parameters (this is given because it works for the first however many batches). I also standardized the inputs. Some were really large and some were closes to zero so I normalized them to all fall in the range [-1,1]. Certainly the network should be able to handle that but it still causes a problem.
Here is my training loop which WORKS with a different data set. It is always the line "loss.backward()" that eventually triggers the error message.
CUDA_LAUNCH_BLOCKING = 1
start = time.time()
for epoch in range(1,6):
# Decrease learning rate at epoch 3 and 5
if epoch == 3 or epoch == 5:
lr = lr/3
# Setup optimizer
optimizer = optim.SGD(net.parameters(), lr=lr)
# Initialize stats to zeros to track network's progress
running_loss = 0
running_error = 0
num_batches = 0
# Shuffle indices to train randomly
shuffled_indices = torch.randperm(50000)
for count in range(0, 50000, bs):
# Clear gradient before each iteration
optimizer.zero_grad()
# Setup indices for minibatch
if (count + bs > 50000):
indices_list = shuffled_indices[count : ].tolist() + shuffled_indices[ : (count + bs) - 50000].tolist()
indices = torch.Tensor(indices_list)
else:
indices = shuffled_indices[count : count + bs]
# Create minibatch
minibatch_data = train_data[indices]
minibatch_label = train_label[indices]
# Send minibatch to gpu for training
minibatch_data = minibatch_data.to(device)
minibatch_label = minibatch_label.to(device)
temp = minibatch_data - mean
# Standardize entries with mean and std
inputs = ((minibatch_data - mean) / std).view(bs, 33)
# Begin tracking changes
inputs.requires_grad_()
# Forward inputs through the network
scores = net(inputs)
print(scores[:2])
print(minibatch_label)
# Compute loss
loss = criterion(scores, minibatch_label)
# Back propogate neural network
loss.backward()
# Do one step of stochastic gradient descent
optimizer.step()
# Update summary statistics
with torch.no_grad():
num_batches += 1
error = get_error(scores, minibatch_label)
running_error += error
running_loss += loss.item()
print("success: ", num_batches)
# At the end of each epoch, compute and print summary statistics
total_error = running_error / num_batches
avg_loss = running_loss / num_batches
print('Epoch: ', epoch)
print('Time: ', time.time(), '\t Loss: ', avg_loss, '\t Error (%): ', total_error * 100)
Here is my dataset formatting and normalizing:
train_list_updated = []
train_label_list = []
for entry in train_list[1:]:
entry[0] = string_to_int(entry[0])
entry[1] = handedness[entry[1]]
entry[2] = string_to_int(entry[2])
entry[3] = handedness[entry[3]]
entry[4] = string_to_int(entry[4])
entry[5] = string_to_int(entry[5])
entry[6] = string_to_int(entry[6])
entry[17] = entry[17].replace(':','')
entry[-3] = pitch_types[entry[-3]]
entry[-2] = pitch_outcomes[entry[-2]]
train_label_list.append(entry[-2])
del entry[-1]
del entry[-1]
del entry[-3]
train_list_updated.append(entry)
final_train_list = []
for entry in train_list_updated:
for index in range(len(entry)):
try:
entry[index] = float(entry[index])
except:
entry[index] = 0.
final_train_list.append(entry)
# Do the same for the test data
test_list_updated = []
for entry in test_list[1:]:
entry[0] = string_to_int(entry[0])
entry[1] = handedness[entry[1]]
entry[2] = string_to_int(entry[2])
entry[3] = handedness[entry[3]]
entry[4] = string_to_int(entry[4])
entry[5] = string_to_int(entry[5])
entry[6] = string_to_int(entry[6])
entry[17] = entry[17].replace(':','')
entry[-3] = pitch_types[entry[-3]]
del entry[-1]
del entry[-1]
del entry[-3]
test_list_updated.append(entry)
final_test_list = []
for entry in test_list_updated:
for index in range(len(entry)):
try:
entry[index] = float(entry[index])
except:
entry[index] = 0.
final_test_list.append(entry)
# Create tensors of test and train data
train_data = torch.tensor(final_train_list)
train_label = torch.tensor(train_label_list)
test_data = torch.tensor(final_test_list)
And normalizing:
max_indices = torch.argmax(train_data, dim = 0)
min_indices = torch.argmin(train_data, dim = 0)
max_values = []
min_values = []
for i in range(33):
max_idx = max_indices[i].item()
min_idx = min_indices[i].item()
max_val = train_data[max_idx][i]
min_val = train_data[min_idx][i]
max_values.append(max_val)
min_values.append(min_val)
max_values = torch.Tensor(max_values)
min_values = torch.Tensor(min_values)
ranges = max_values - min_values
min_values = min_values.view(1, 33)
min_values = torch.repeat_interleave(min_values, 582205, dim = 0)
ranges = ranges.view(1, 33)
ranges = torch.repeat_interleave(ranges, 582205, dim = 0)
train_data = train_data - min_values
train_data = 2 * (train_data / ranges)
train_data = train_data - 1
And here's my net (a lot is commented out since I thought maybe there was an issue with the gradient zeroing or something. A five layer neural network should definitely not cause a problem though):
"""
DEFINING A NEURAL NETWORK
"""
# Define a fifteen layer artificial neural network
class fifteen_layer_net(nn.Module):
def __init__(self):
super().__init__()
self.linear1 = nn.Linear(33, 200)
self.linear2 = nn.Linear(200, 250)
self.linear3 = nn.Linear(250, 300)
self.linear4 = nn.Linear(300, 350)
self.linear5 = nn.Linear(350, 7)
# self.linear6 = nn.Linear(400, 450)
# self.linear7 = nn.Linear(450, 500)
# self.linear8 = nn.Linear(500, 450)
# self.linear9 = nn.Linear(450, 400)
# self.linear10 = nn.Linear(400, 350)
# self.linear11 = nn.Linear(350, 300)
# self.linear12 = nn.Linear(300, 250)
# self.linear13 = nn.Linear(250, 200)
# self.linear14 = nn.Linear(200, 150)
# self.linear15 = nn.Linear(150, 7)
def forward(self, x):
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
x = F.relu(x)
x = self.linear3(x)
x = F.relu(x)
x = self.linear4(x)
x = F.relu(x)
scores = self.linear5(x)
# x = F.relu(x)
# x = self.linear6(x)
# x = F.relu(x)
# x = self.linear7(x)
# x = F.relu(x)
# x = self.linear8(x)
# x = F.relu(x)
# x = self.linear9(x)
# x = F.relu(x)
# x = self.linear10(x)
# x = F.relu(x)
# x = self.linear11(x)
# x = F.relu(x)
# x = self.linear12(x)
# x = F.relu(x)
# x = self.linear13(x)
# x = F.relu(x)
# x = self.linear14(x)
# x = F.relu(x)
# scores = self.linear15(x)
return scores
Network should output scores, compute a loss using cross entropy loss criterion, and then do one step of stochastic gradient descent. This works for awhile and then mysteriously breaks. I have no idea why.
Any help is greatly appreciated.
Thanks in advance.
I was also facing same issue, You can try few things :
Make sure there are no NaN, and inf values in your dataset.
set your batch size where number of samples % batchsize = 0
I have 10,000 images, each of which are labeled with 20 tags. For each image, the tag is either true or false. I'm trying to train a multi-output model to perform all these 20 binary classifications with one network.
The network is a Residual Network. After the flatten layer, the network branches out into 20 branches. Each branch has 2 fully connected layers, each of which are followed by a drop out layer. And finally a dense layer with one node and sigmoid activation in the end.
The labels for each image and the image name are stored in a text file, for both train and validation set. Like this:
1.jpg 1 -1 1 -1 -1 1 -1.........
I wrote my own generator, but I can't get them to work. I keep getting this error:
Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 20 array(s), but instead got the following list of 1 arrays.
Function explanations: get_input function reads an image and resizes it.
get_output prepares the labels for each image. The labels are stored in a list and returned in the end. preprocess_input performs preprocessing and converting images into arrays. train_generator and validation_generator generate batches with size 32 to be fed to the model.
Here's my code:
def get_input(img_name):
path = os.path.join("images", img_name)
img = image.load_img(path, target_size=(224, 224))
return img
def get_output(img_name, file_path):
data = pd.read_csv(file_path, delim_whitespace=True, header=None)
img_id = img_name.split(".")[0]
img_id = img_id.lstrip("0")
img_id = int(img_id)
labels = data.loc[img_id - 1].values
labels = labels[1:]
labels = list(labels)
label_arrays = []
for i in range(20):
val = np.zeros((1))
val[0] = labels[i]
label_arrays.append(val)
return label_arrays
def preprocess_input(img_name):
img = get_input(img_name)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
return x
def train_generator(batch_size):
file_path = "train.txt"
data = pd.read_csv(file_path, delim_whitespace=True, header=None)
while True:
for i in range(math.floor(8000/batch_size)):
x_batch = np.zeros(shape=(32, 224, 224, 3))
y_batch = np.zeros(shape=(32, 20))
for j in range(batch_size):
img_name = data.loc[i * batch_size + j].values
img_name = img_name[0]
x = preprocess_input(img_name)
y = get_output(img_name, file_path)
x_batch[j, :, :, :] = x
y_batch[j] = y
yield(x_batch, y_batch)
def val_generator(batch_size):
file_path = "val.txt"
data = pd.read_csv(file_path, delim_whitespace=True, header=None)
while True:
for i in range(math.floor(2000/batch_size)):
x_batch = np.zeros(shape=(32, 224, 224, 3))
y_batch = np.zeros(shape=(32, 20))
for j in range(batch_size):
img_name = data.loc[i * batch_size + j].values
img_name = img_name[0]
x = preprocess_input(img_name)
y = get_output(img_name, file_path)
x_batch[j, :, :, :] = x
y_batch[j] = y
yield(x_batch, y_batch)
Edit:
One quick question. What's the difference between this loop and the one in your answer:
ys = []
for i in range(batch_size):
ys.append(y_batch[i, :])
yield(x_batch, ys)
If your model has 20 outputs then you must provide a list of 20 arrays as target. One way of doing this is to modify the generator (for both training and validation):
ys = []
for i in range(20):
ys.append(y_batch[:,i])
yield(x_batch, ys)
As a side note, you mentioned that you have 20 tags per sample then why have you specified 40 in the input shape?
y_batch = np.zeros(shape=(32, 40))
Further, I don't know about the specific problem you are working on but alternatively you could only have one output of size 20 instead of 20 outputs with size one.
You can test the generator output dimensions initializing the generator and call the function next() to check the dimensions. For example with the train_generator:
train_gen = train_generator(batch_size)
x_batch, y_batch = next(train_gen)
Then check x_batch and y_batch dimensions and datatype
I would make the generator in this way:
def train_generator(batch_size):
file_path = "train.txt"
data = pd.read_csv(file_path, delim_whitespace=True, header=None)
# Initialize empty list
x_batch = []
y_batch = []
while True:
for i in range(math.floor(8000/batch_size)):
for j in range(batch_size):
img_name = data.loc[i * batch_size + j].values
img_name = img_name[0]
x = preprocess_input(img_name)
y = get_output(img_name, file_path)
x_batch.append(x)
y_batch.append(y)
yield(np.array(x_batch), np.array(y_batch))
I'm trying to slightly change the code from github here to a toy example of reading a simpler two dimensional data. My toy data set has the following structure
x-coordinate, y-coordinate, class
Some example data points are
1,1
3,1
4,1
4,2
6,2
1,3
and their corresponding classes
0
1
1
1
1
0
I'm able to read the data and create my custom mlp. However when I try to run the training part, I get the following error
(5, 2)
(5,)
Traceback (most recent call last):
File "./t.py", line 78, in <module>
train_err += train_fn(inputs, targets)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 786, in __call__
allow_downcast=s.allow_downcast)
File "/usr/local/lib/python2.7/dist-packages/theano/tensor/type.py", line 177, in filter
data.shape))
TypeError: ('Bad input argument to theano function with name "./t.py:67" at index 0(0-based)', 'Wrong number of dimensions: expected 4, got 2 with shape (5, 2).')
This has clearly something to do with the shapes of the arrays I'm passing. But what I can't seem to figure out is why is my case any different from the mnist dataset which is also a two dimensional array of an image.
My entire code is the following.
def build_mlp(input_var=None):
l_in = lasagne.layers.InputLayer(shape=(None,1,1,2),input_var = input_var)
l_h1 = lasagne.layers.DropoutLayer(l_in,p = 0.2)
l_hid1 = lasagne.layers.DenseLayer(
l_h1,num_units = 10,
nonlinearity = lasagne.nonlinearities.rectify,
W = lasagne.init.GlorotUniform())
l_h2 = lasagne.layers.DropoutLayer(l_hid1,p = 0.2)
l_hid2 = lasagne.layers.DenseLayer(
l_h2,num_units = 10,
nonlinearity = lasagne.nonlinearities.rectify,
W = lasagne.init.GlorotUniform())
l_out = lasagne.layers.DenseLayer(
l_hid2,num_units = 5,
nonlinearity = lasagne.nonlinearities.softmax,
W = lasagne.init.GlorotUniform())
return l_out
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
x_data = np.genfromtxt('a.csv',delimiter=',')
y_data = np.genfromtxt('b.csv',delimiter=',')
x_train, x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.33)
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
network = build_mlp(input_var)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.4)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
num_epochs = 100
for epoch in range(num_epochs):
train_err = 0
start_time = time.time()
for batch in iterate_minibatches(x_train, y_train, 5, shuffle=True):
inputs, targets = batch
print inputs.shape
print targets.shape
train_err += train_fn(inputs, targets)
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(x_train, y_train, 5, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
print 'Epoch %d of %d took {:%0.3f}s' % (epoch + 1, num_epochs, time.time() - start_time)
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))
Could someone point to me what I'm doing off here please?
You're declaring input_var as a 4d tensor, but the error message suggests that you're passing a data matrix of size (5,2) as input. Based on the shape of your input layer, this should be (5, 1, 1, 2) (assuming the 5 corresponds to the number of training examples in a minibatch and the 2 corresponds to your x and y coordinates).