class CIFAR10Sequence(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.epoch = 0
self.batch = 0
self.batch_size = batch_size
self.per = 4
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[int(np.ceil(self.x.shape[0]*(self.per/100)))]
batch_y = self.y[int(np.ceil(self.x.shape[0]*(self.per/100)))]
return np.array(batch_x), np.array(batch_y)
return (batch_x, batch_y)
def on_batch_end(self):
if self.epoch % 100 == 0:
self.per = self.per*1.9
self.epoch += 1
train_datagen = CIFAR10Sequence(new_x_sort, new_x_sort, 100)
test_datagen = CIFAR10Sequence(cifar100_dataset.x_test,
cifar100_dataset.x_test, 100)
model.fit_generator(generator=train_datagen, steps_per_epoch=len(new_x_sort)//100, epochs=20)
but I get:
TypeError: 'CIFAR10Sequence' object is not an iterator
You need __iter__() function in CIFAR10Sequence.
Something like
def __iter__(self):
self.cur_idx += 1
return self[self.cur_idx]
It looks like the Sequence object must have been referenced in another context before you got to the fit_generator call. That call doesn't take a keyword arg for the generator, so you would get keyword arg error if you did reach that call. The Sequence object does have __iter__() so it's iterable, but it doesn't have __next__() so it's not an iterator and if it was referenced as such it would throw that error. __iter__() is all that's needed for keras fit.
Related
Here is the code where the error occurs from:
train_data = LngDataset(zipfile.Path(archive, "train/train/"))
test_data = LngDataset(zipfile.Path(archive, "test/test/"))
train_loader = data.DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = data.DataLoader(test_data, batch_size=32, shuffle=True)
archive is a zipfile which has two folders train and test.
The LngDataset() function:
class LngDataset(data.Dataset):
def __init__(self, root):
super().__init__()
self.items = list(root.iterdir())
def __getitem__(self, index):
item = self.items[index]
if item.name.startswith("de_"):
y = 0
elif item.name.startswith("en_"):
y = 1
elif item.name.startswith("es_"):
y = 2
else:
raise ValueError(f"Unable to determine label for: {item.name}")
signal, sample_rate = soundfile.read(item.open("r"))
X = fbanks(signal, sample_rate).astype(np.float32)
X = X[np.newaxis, ...]
return X, y
def __len__(self):
return len(self.items)
I checked the documentation of zipfile. The Path attribute exists. So I dont understand how I can solve this issue.
I am starting with PyTorch and i am trying to create a Network that is predicting the sinus of x. I tried to create a DataSet like this:
class SinusDataset(Dataset):
def __init__(self, size: int = 1000):
self.size = size
def __len__(self):
return self.size
def __getitem__(self, idx: int)->Tensor:
if idx > self.size:
raise ValueError
return idx, math.sin(idx)
I do not think that is the proper way to implement this. How should I Implemented the `__get__ยด method?
You could initialize your input and labels on init and save those in lists. Then, in your __getitem__ function, pick instances from those two using the provided idx integer. Something like:
class SinusDataset(Dataset):
def __init__(self, size: int = 1000):
self.x = torch.linspace(0, 1, size)
self.y = torch.sin(self.x)
def __len__(self) -> int:
return len(self.x)
def __getitem__(self, idx: int):
return return self.x[idx][None], self.y[idx][None]
Then you can use the dataset by wrapping a torch.utils.data.DataLoader:
>>> dl = DataLoader(SinusDataset(100), batch_size=4, shuffle=True)
>>> for x, y in dl:
... print(x, y)
... break
tensor([0.2452, 0.6116, 0.0791, 0.6667]) tensor([0.2428, 0.5742, 0.0790, 0.6184])
In this case it would be more appropriate to inherit from torch.utils.data.TensorDataset directly. This comes with both __len__ and __getitem__ implemented for you (see source):
class SinusDataset(TensorDataset):
def __init__(self, size: int = 1000):
x = torch.linspace(0, 1, 1000)[:,None]
y = torch.sin(x)[:,None]
super().__init__(x, y)
This is slightly more advanced but it is considered best practice to inherit from the closest built-in torch.utils.data.Dataset class instead of writing the same methods yourself.
Inference example:
>>> model = nn.Sequential(nn.Linear(1, 4),
nn.ReLU(),
nn.Linear(4, 1))
>>> x, y = next(iter(dl))
>>> model(x)
tensor([[-0.0640],
[ 0.1461],
[-0.0882],
[ 0.2259]], grad_fn=<AddmmBackward>)
I have written my own keras Model, and I'm trying to pass a Keras Generator as input to model.fit.
The problem is that I don't know how to process the generator, when I'm in the call method of MyModel. How do I access the x and y from the generator in order to pass them as inputs to my Encoder and Decoder Network, and also keep the generator working its magic, loading the batches each epoch ?
Ok so this MyModel class which inherits tf.keras.Model
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.enc = Encoder()
self.dec1 = Decoder1()
self.dec2 = Decoder2()
def __call__(self, data_generator, **kwargs):
################################################
? how do I acces x and y in order to pass them to the encoder and decoder ?
and also keep the generator proprieties
###############################################
x_train, y_train = data_generator # ?????????
#####################################
dec_inputs = tf.concat((tf.zeros_like(y_train[:, :1, :]), y_train[:, :-1, :]), 1)
dec_inputs = dec_inputs[:, :, -hp.n_mels:]
print("########ENC INPUTS #####")
#print(tf.shape(x_train))
print("######################")
print("#########DEC INPUTS #####")
#print(tf.shape(dec_inputs))
print("######################")
memory = self.enc(x_train)
y_hat = self.dec1(dec_inputs, memory)
#z_hat = self.dec2(y_hat)
return y_hat
And this is my generator function
class DataGenerator(keras.utils.Sequence):
def __init__(self, list_IDs, ID_dictionary, labels, batch_size=8, dim1=(32, 32, 32), dim2=(32, 32, 32),
n_channels=None, n_classes=None, shuffle=True):
'Initialization'
self.dim1 = dim1 # dimensiune X
self.dim2 = dim2 # dimensiune Y
self.batch_size = batch_size
self.ID_dictionary = ID_dictionary
self.labels = labels
self.list_IDs = list_IDs
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
# 3
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
x, y = self.__data_generation(list_IDs_temp)
return x, y
# 1
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
# 2
def __data_generation(self, list_IDs_temp):
# Initialization
x = np.empty((self.batch_size, self.dim1))
y = np.empty((self.batch_size, *self.dim2), dtype=float)
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
x[i, ] = self.ID_dictionary[ID]
# Store class
y[i] = self.labels[ID]
return x, y
And this is how I call MyModel in main
listID, dict1, dict2, text_shape, mel_shape = get_batch()
# dict1 has the inputs ( text ) and dict2 has the labels ( the mels )
training_generator = DataGenerator(listID, dict1, dict2, dim1=text_shape, dim2=mel_shape)
model = MyModel()
model.compile(
optimizer=keras.optimizers.Adam(),
metrics=["accuracy"],
)
#model.fit_generator(generator=training_generator, use_multiprocessing=True, workers=6)
model.fit(training_generator, epochs=2)
call method is called in model.fit, which expects one input to it i.e x_input, so you cannot expect generator as the input of the call method when using model.fit method. Please read tensorflow.org/guide/keras/custom_layers_and_models and tensorflow.org/tutorials/text/nmt_with_attention for greater understanding how things are working.
Edit 1: how can you pass two variables in call method
# pass list [x,y] to your call function instead of only x, we will club x and y into one variable
def __call__(self, inputs):
x = inputs[0]
y = inputs[1]
# now you can use x and y coming from your generator without changing much
# update your generator to return [x,y] and y
def generator
yield [x, y], y
# simply call model.fit like you were doing before
model.fit(generator)
I am currently using a generator to produce my training and validation datasets using tf.data.Dataset.from_generator. I have a class method that takes care of this for me:
def build_dataset(self, batch_size=16, shuffle=16, validation=None):
train_dataset = tf.data.Dataset.from_generator(import_images(validation=validation), (tf.float32, tf.float32))
self.train_dataset = train_dataset.shuffle(shuffle).repeat(-1).batch(batch_size).prefetch(1)
if validation is not None:
val_dataset = tf.data.Dataset.from_generator(import_images(validation=validation), (tf.float32, tf.float32))
self.val_dataset = val_dataset.repeat(1).batch(batch_size).prefetch(1)
The problem is passing in (validation=validation) to my import_images generator creates the generator object which Tensorflow doesn't want, and it gives me the error:
TypeError: `generator` must be callable.
Because I have to pass in validation to tell my generator to produce a separate training and validation version, I am required to create two versions of the same generator. It also doesn't allow me to pass in other arguments to control the percentage of training and validation examples - meaning the generator has to be static. Any suggestions?
I recently encountered a similar problem, but I'm a beginner so not sure if this will help.
Try add a call function in your class.
Below are the original class which raise TypeError: `generator` must be callable.
class DataGen:
def __init__(self, files, data_path):
self.i = 0
self.files=files
self.data_path=data_path
def __load__(self, files_name):
data_path = os.path.join(self.data_path, files_name)
arr_img, arr_mask = load_patch(data_path)
return arr_img, arr_mask
def getitem(self, index):
_img, _mask = self.__load__(self.files[index])
return _img, _mask
def __iter__(self):
return self
def __next__(self):
if self.i < len(self.files):
img_arr, mask_arr = self.getitem(self.i)
self.i += 1
else:
raise StopIteration()
return img_arr, mask_arr
Then I revised the code as below and it worked for me.
class DataGen:
def __init__(self, files, data_path):
self.i = 0
self.files=files
self.data_path=data_path
def __load__(self, files_name):
data_path = os.path.join(self.data_path, files_name)
arr_img, arr_mask = load_patch(data_path)
return arr_img, arr_mask
def getitem(self, index):
_img, _mask = self.__load__(self.files[index])
return _img, _mask
def __iter__(self):
return self
def __next__(self):
if self.i < len(self.files):
img_arr, mask_arr = self.getitem(self.i)
self.i += 1
else:
raise StopIteration()
return img_arr, mask_arr
def __call__(self):
self.i = 0
return self
I implemented an iterator class as following:
import numpy as np
import time
class Data:
def __init__(self, filepath):
# Computationaly expensive
print("Computationally expensive")
time.sleep(10)
print("Done!")
def __iter__(self):
return self
def __next__(self):
return np.zeros((2,2)), np.zeros((2,2))
count = 0
for batch_x, batch_y in Data("hello.csv"):
print(batch_x, batch_y)
count = count + 1
if count > 5:
break
count = 0
for batch_x, batch_y in Data("hello.csv"):
print(batch_x, batch_y)
count = count + 1
if count > 5:
break
However the constructor is computationally expensive, and the for loop might be called multiple times. For example, in above code the constructor is called twice (each for loop create a new Data object).
How do I separate constructor and iterator? I am hoping to have the following code, where constructor is called once only:
data = Data(filepath)
for batch_x, batch_y in data.get_iterator():
print(batch_x, batch_y)
for batch_x, batch_y in data.get_iterator():
print(batch_x, batch_y)
You can just iterate over an iterable object directly, for..in doesn't require anything else:
data = Data(filepath)
for batch_x, batch_y in data:
print(batch_x, batch_y)
for batch_x, batch_y in data:
print(batch_x, batch_y)
That said, depending on how you implement __iter__(), this could be buggy.
E.g.:
Bad
class Data:
def __init__(self, filepath):
self._items = load_items(filepath)
self._i = 0
def __iter__(self): return self
def __next__(self):
if self._i >= len(self._items): # Or however you check if data is available
raise StopIteration
result = self._items[self._i]
self._i += 1
return result
Because then you couldn't iterate over the same object twice, as self._i would still point at the end of the loop.
Good-ish
class Data:
def __init__(self, filepath):
self._items = load_items(filepath)
def __iter__(self):
self._i = 0
return self
def __next__(self):
if self._i >= len(self._items):
raise StopIteration
result = self._items[self._i]
self._i += 1
return result
This resets the index every time you're about to iterate, fixing the above. This won't work if you're nesting iteration over the same object.
Better
To fix that, keep the iteration state in a separate iterator object:
class Data:
class Iter:
def __init__(self, data):
self._data = data
self._i = 0
def __next__(self):
if self._i >= len(self._data._items): # check for available data
raise StopIteration
result = self._data._items[self._i]
self._i = self._i + 1
def __init__(self, filepath):
self._items = load_items(filepath)
def __iter__(self):
return self.Iter(self)
This is the most flexible approach, but it's unnecessarily verbose if you can use either of the below ones.
Simple, using yield
If you use Python's generators, the language will take care of keeping track of iteration state for you, and it should do so correctly even when nesting loops:
class Data:
def __init__(self, filepath):
self._items= load_items(filepath)
def __iter__(self):
for it in self._items: # Or whatever is appropriate
yield return it
Simple, pass-through to underlying iterable
If the "computationally expensive" part is loading all the data into memory, you can just use the cached data directly.
class Data:
def __init__(self, filepath):
self._items = load_items(filepath)
def __iter__(self):
return iter(self._items)
Instead of creating a new instance of Data, create a second class IterData that contains an __init__ method that runs a process which is not as computationally expensive as instantiating Data. Then, create a classmethod in Data as an alternative constructor for IterData:
class IterData:
def __init__(self, filepath):
#only pass the necessary data
def __iter__(self):
#implement iter here
class Data:
def __init__(self, filepath):
# Computationaly expensive
#classmethod
def new_iter(cls, filepath):
return IterData(filepath)
results = Data.new_iter('path')
for batch_x, batch_y in results:
pass