i'm using keras with a simple cnn model.
i want to add gaussian noise to images in training. i want to change the noise parameters (mean and sigma) every epoch,based on some function. for example,
in epoch 1 i want to add noise with sigma=1
in epoch 2 i want to add noise with sigma=2
in epoch 3 i want to add noise with sigma=3
# note-mean is always zero
and so on...
inefficient way to solve it is with a for loop, save and load the mode after every epoch and call augmentation function.
more efficient way will be with a custom callback or generator, which i didn't succeed to do
inefficient way:
total_num_of_epochs=100
def sigma_function(current_epoch):
sigma_fun=current_epoch/total_num_of_epochs
return sigma_fun
for i in range(total_num_of_epochs):
x_train += np.random.normal(mean=0,sigma=sigma_fun(i),size=x_train shape) # augment x_train based on sigma_function and current epochs
model.compile(...)
model.fit(x_train ,y_train...initial_epoch=i,epochs=i+1) #load the model
# from previous loop
save model
load model for next loop
the desired result (i tried with ImageDataGenerator but maybe callback can do):
def sigma_function(current_epoch):
sigma_fun=current_epoch/total_num_of_epochs
return sigma_fun
datagen=ImageDataGenerator(preprocessing_function=sigma_function)
datagen.fit(x_train)
model.fit_generator(... don't know what to put here)
edit
according to the proposed solution by Daniel Möller,i tried this way and still got an error
sigmaParam = 1
def apply_sigma(x):
return x + np.random.normal(mean=0,scale=sigmaParam,size=(3,32,32))
imgGen = ImageDataGenerator( preprocesing_function=apply_sigma)
generator = imgGen.flow_from_directory('data/train') # folder that contains
# only x_train and y_train
from keras.utils import Sequence
class SigmaGenerator(Sequence):
def __init__(self, keras_generator):
self.keras_generator = keras_generator
def __len__(self):
return len(self.keras_generator)
def __getitem__(self,i):
return self.keras_generator[i]
def on_epoch_end(self):
sigmaParam += 1
self.keras_generator.on_epoch_end()
training_generator = SigmaGenerator(generator)
model.fit_generator(training_generator,validation_data=(x_test,y_test),
steps_per_epoch=x_train.shape[0]//batch_size,epochs=100)
the error i get:
process finished with exit code -1073741819 (0xC0000005)
You can try this:
sigmaParam = 1
def applySigma(x):
return x + np.random.normal(mean=0,scale=sigmaParam,size=x.shape)
Create the original generator:
imgGen = ImageDataGenerator(..., preprocesing_function=apply_sigma)
generator = imgGen.flow_from_directory(....)
Create a custom generator to wrap the original one, replace its on_epoch_end method to update sigmaParam.
from keras.utils import Sequence
class SigmaGenerator(Sequence):
def __init__(self, keras_generator):
self.keras_generator = keras_generator
def __len__(self):
return len(self.keras_generator)
def __getitem__(self,i):
return self.keras_generator[i]
def on_epoch_end(self):
sigmaParam += 1
self.keras_generator.on_epoch_end()
training_generator = SigmaGenerator(generator)
Related
I am running this code (https://github.com/ayu-22/BPPNet-Back-Projected-Pyramid-Network/blob/master/Single_Image_Dehazing.ipynb) on a custom dataset but I am running into this error.
RuntimeError: one of the variables needed for gradient computation has been modified by an in place operation: [torch. cuda.FloatTensor [1, 512, 4, 4]] is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Please refer to the code link above for clarification of where the error is occurring.
I am running this model on a custom dataset, the data loader part is pasted below.
import torchvision.transforms as transforms
train_transform = transforms.Compose([
transforms.Resize((256,256)),
#transforms.RandomResizedCrop(256),
#transforms.RandomHorizontalFlip(),
#transforms.ColorJitter(),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])
class Flare(Dataset):
def __init__(self, flare_dir, wf_dir,transform = None):
self.flare_dir = flare_dir
self.wf_dir = wf_dir
self.transform = transform
self.flare_img = os.listdir(flare_dir)
self.wf_img = os.listdir(wf_dir)
def __len__(self):
return len(self.flare_img)
def __getitem__(self, idx):
f_img = Image.open(os.path.join(self.flare_dir, self.flare_img[idx])).convert("RGB")
for i in self.wf_img:
if (self.flare_img[idx].split('.')[0][4:] == i.split('.')[0]):
wf_img = Image.open(os.path.join(self.wf_dir, i)).convert("RGB")
break
f_img = self.transform(f_img)
wf_img = self.transform(wf_img)
return f_img, wf_img
flare_dir = '../input/flaredataset/Flare/Flare_img'
wf_dir = '../input/flaredataset/Flare/Without_Flare_'
flare_img = os.listdir(flare_dir)
wf_img = os.listdir(wf_dir)
wf_img.sort()
flare_img.sort()
print(wf_img[0])
train_ds = Flare(flare_dir, wf_dir,train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_ds,
batch_size=BATCH_SIZE,
shuffle=True)
To get a better idea of the dataset class , you can compare my dataset class with the link pasted above
Your code is stuck in what is called the "Backpropagation" of your GAN Network.
What you have defined your backward graph should follow is the following:
def backward(self, unet_loss, dis_loss):
dis_loss.backward(retain_graph = True)
self.dis_optimizer.step()
unet_loss.backward()
self.unet_optimizer.step()
So in your backward graph, you are propagating the dis_loss which is the combination of the discriminator and adversarial loss first and then you are propagating the unet_loss which is the combination of UNet, SSIM and ContentLoss but the unet_loss is connected to discriminator's output loss. So the pytorch is confused and gives you this error as you are taking the optimizer step of dis_loss before even storing the backward graph for unet_loss and I would recommend you to change the code as follows:
def backward(self, unet_loss, dis_loss):
dis_loss.backward(retain_graph = True)
unet_loss.backward()
self.dis_optimizer.step()
self.unet_optimizer.step()
And this will start your training! but you can experiment with your retain_graph=True.
And great work on the BPPNet Work.
I am doing the end-to-end mapping. As I have to pass two images (input and output), I have created a custom generator. My generator gets two same images with different resolutions. Right now I can only get 5 images to pass to training but I want to pass the whole generator so that all my data gets trained. As I am new to using generators and yield I don't the correct way to pass the whole generator.
import os
import numpy as np
import cv2
class image_gen():
def __init__(self, idir,odir,batch_size, shuffle = True):
self.batch_index=0
self.idir=idir
self.odir=odir# directory containing input images
self.batch_size=batch_size #batch size is number of samples in a batch
self.shuffle=shuffle # set to True to shuffle images, False for no shuffle
self.label_list=[] # initialize list to hold sequential list of total labels generated
self.image_list=[] # initialize list to hold sequential list of total images filenames generated
self.i_list=os.listdir(self.idir)
self.o_list=os.listdir(self.odir)# list of images in directory
def get_images(self): # gets a batch of input images, resizes input image to make target images
while True:
input_image_batch=[]
output_image_batch=[]# initialize list to hold a batch of target images
sample_count=len(self.i_list) # determine total number of images available
for i in range(self.batch_index * self.batch_size, (self.batch_index + 1) * self.batch_size ): #iterate for a batch
j=i % sample_count # cycle j value over range of available images
k=j % self.batch_size # cycle k value over batch size
if self.shuffle: # if shuffle select a random integer between 0 and sample_count-1 to pick as the image=label pair
m=np.random.randint(low=0, high=sample_count-1, size=None, dtype=int)
else:
m=j # no shuffle
#input
path_to_in_img=os.path.join(self.idir,self.i_list[m])
path_to_out_img=os.path.join(self.odir,self.o_list[m])
# define the path to the m th image
input_image=cv2.imread(path_to_in_img)
input_image=cv2.resize( input_image,(3200,3200))#create the target image from the input image
output_image=cv2.imread(path_to_out_img)
output_image=cv2.resize(output_image,(3200,3200))
input_image_batch.append(input_image)
output_image_batch.append(output_image)
input_image_array=np.array(input_image_batch)
input_image_array = input_image_array / 255.0
output_image_array=np.array(output_image_batch)
output_image_array = output_image_array /255.0
self.batch_index= self.batch_index + 1
yield (input_image_array, output_image_array )
if self.batch_index * self.batch_size > sample_count:
break
This is how i get the images
batch_size=5
idir=r'D:\\train'
odir=r'D:\\Train\\train'#
shuffle=True
gen=image_gen(idir,odir,batch_size,shuffle=True) # instantiate an instance of the class
input_images,output_images = next(gen.get_images())
This is how i train.This way i only train 5 images and not the whole dataset
model.fit(input_images,output_images,validation_data = (valin_images,valout_images),batch_size= 5,epochs = 100)
when i try to pass the whole dataset
model.fit(gen(),validation_data = (valin_images,valout_images),batch_size= 5,epochs = 1)
I get a error "image_gen" object is not callable. How should i pass the generator to model.fit()
The reason why you have this problem is because this error is raised when you try to access a image_gen as if it were a function, but in fact it is an object of a class.
In the first snippet you provided, you accessed in fact the method of the class which is indeed a generator, which yielded some numpy arrays that could be fed as input to the model. The second snippet however fails, because of the error described in the first paragraph.
Two possible solutions for your problem would be the following:
Use a Keras Sequence() generator.
Use a function as a generator (def my_generator(...)).
I personally recommend the first solution, as the Sequence() generator ensures that you only train once per each sample during an epoch, property which is not satisfied in case of simple function generators.
Solution for Keras Sequence() :
You need to override the Sequence class and then overwrite its methods. A complete example from the TensorFlow official documentation is:
from skimage.io import imread
from skimage.transform import resize
import numpy as np
import math
# Here, `x_set` is list of path to the images
# and `y_set` are the associated classes.
class CIFAR10Sequence(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
def __len__(self):
return math.ceil(len(self.x) / self.batch_size)
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) *
self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) *
self.batch_size]
return np.array([
resize(imread(file_name), (200, 200))
for file_name in batch_x]), np.array(batch_y)
You can use the above code as a starting point for your solution. Incidentally, it is likely your network will not train with such huge image dimensions, you could also try to lower them.
A solution for simple generator could be:
def my_generator(path_to_dataset, other_argument):
...
...
yield image_1, image_2
train_generator = my_generator(path_to_train,argument_1)
val_generator = my_generator(path_to_val,argument_2)
model.fit(train_generator,
steps_per_epoch=len(training_samples) // BATCH_SIZE,
epochs=10, validation_data=val_generator,
validation_steps=len(validation_samples) // BATCH_SIZE)
I am training a model using custom generators, but just before finishing the first epoch, the model runs out of data. It gives me the following error:
Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least (steps_per_epoch * epochs) batches (in this case, 8740 batches). You may need to use the repeat() function when building your dataset
I have four generators (one for the train data, and another for the train label. Same thing with validation). I then zip train & label together. This is the prototype of my generators. I got the idea from here:
import numpy as np
import nibabel as nib
from tensorflow import keras
import os
def weirddivision(n,d):
return np.array(n)/np.array(d) if d else 0
class ImgDataGenerator(keras.utils.Sequence):
def __init__(self, file_list, batch_size=8, shuffle=True):
"""Constructor can be expanded,
with batch size, dimentation etc.
"""
self.file_list = file_list
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Take all batches in each iteration'
return int(np.floor(len(self.file_list) / self.batch_size))
def __getitem__(self, index):
'Get next batch'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# single file
file_list_temp = [self.file_list[k] for k in indexes]
# Set of X_train and y_train
X = self.__data_generation(file_list_temp)
return X
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.file_list))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, file_list_temp):
'Generates data containing batch_size samples'
train_loc = '/home/faruk/Desktop/BrainSeg/Dataset/Train/'
X = np.empty((self.batch_size,224,256,1))
# Generate data
for i, ID in enumerate(file_list_temp):
x_file_path = os.path.join(train_loc, ID)
img = np.load(x_file_path)
img = np.pad(img, pad_width=((14,13),(12,11)), mode='constant')
img = np.expand_dims(img,-1)
img = weirddivision(img, img.max())
# Store sample
X[i,] = img
return X
As mentioned, here I create four generators and zip them:
training_img_generator = ImgDataGenerator(train)
training_label_generator = LabelDataGenerator(train)
train_generator = zip(training_img_generator,training_label_generator)
val_img_generator = ValDataGenerator(val)
val_label_generator = ValLabelDataGenerator(val)
val_generator = zip(val_img_generator,val_label_generator)
Because the generator is generating data dynamically, I thought that maybe it was trying to generate more than what is actually available. Hence, I calculated the steps per epoch as follows and passed it to fit_generator:
batch_size = 8
spe = len(train)//batch_size # len(train) = 34965
val_spe = len(val)//batch_size # len(val) = 4347
History=model.fit_generator(generator=train_generator, validation_data=val_generator, epochs=2, steps_per_epoch=spe, validation_steps = val_spe, shuffle=True, verbose=1)
But still, this is not working. I have tried reducing the number of steps per epoch, and I am able to finish the first epoch, but the error then appears at the beginning of the second epoch. Apparently the generator needs to be repeated infinitely, but I don't know how to achieve this. Can I use an infinite while loop? If yes, where?
Try this:
train_generator = train_generator.repeat()
val_generator = val_generator.repeat()
I solved this. I was defining my Generator class as follows:
class ImgDataGenerator(keras.utils.Sequence)
However, my model was not sequential... It was functional. I solved this by creating my own custom generator without inheriting from the keras.utils.sequence.
I hope this is helpful to someone.
I am following along this tutorial,
https://www.tensorflow.org/tutorials/keras/basic_classification
When I fit the model using model.fit(train_images, train_labels, epochs=5, verbose =1), the times are displayed in the python console. I want to get the wall time of each epoch by using time.clock().
I am assuming that when more epochs are added the fitting time increases linearly, but I want to graph this to be sure.
Besides fitting with 1 epoch, then 2 epochs, then 3 epochs, etc, how can I work out the training time (fitting time) for an increasing number of epochs?
Using a custom callback you can plot the total time taken to fit certain epochs.
class timecallback(tf.keras.callbacks.Callback):
def __init__(self):
self.times = []
# use this value as reference to calculate cummulative time taken
self.timetaken = time.clock()
def on_epoch_end(self,epoch,logs = {}):
self.times.append((epoch,time.clock() - self.timetaken))
def on_train_end(self,logs = {}):
plt.xlabel('Epoch')
plt.ylabel('Total time taken until an epoch in seconds')
plt.plot(*zip(*self.times))
plt.show()
And then pass this as a callback to the model.fit function like this
timetaken = timecallback()
model.fit(train_images, train_labels, epochs=5,callbacks = [timetaken])
This plots a graph at the end of training which shows the total time taken for the model to train upto a certain epoch from the start.
And if you want to plot the per epoch time. You can replace the on_train_end method with on_epoch_end.
def on_epoch_end(self,epoch,logs= {}):
# same as the on_train_end function
Using a custom callback definitely works but you must be careful of how you generate a timestamp. The recommended time.clock() works differently on Windows vs UNIX systems and may not generate the behavior that you want. Therefore, I recommend a tweak to the code others have recommended, using the built in tensorflow.timestamp() method (documentation). Note that this is a tensor object so if you'd like to plot the time as text, as I did, you'll need to extract the float value. I did so using .numpy() as this is an EagerTensor.
import tensorflow as tf
import matplotlib.pyplot as plt
from datetime import datetime
class timecallback(tf.keras.callbacks.Callback):
def __init__(self):
self.times = []
self.epochs = []
# use this value as reference to calculate cummulative time taken
self.timetaken = tf.timestamp()
def on_epoch_end(self,epoch,logs = {}):
self.times.append(tf.timestamp() - self.timetaken)
self.epochs.append(epoch)
def on_train_end(self,logs = {}):
plt.xlabel('Epoch')
plt.ylabel('Total time taken until an epoch in seconds')
plt.plot(self.epochs, self.times, 'ro')
for i in range(len(self.epochs)):
j = self.times[i].numpy()
if i == 0:
plt.text(i, j, str(round(j, 3)))
else:
j_prev = self.times[i-1].numpy()
plt.text(i, j, str(round(j-j_prev, 3)))
plt.savefig(datetime.now().strftime("%Y%m%d%H%M%S") + ".png")
Then, when calling model fit:
model.fit(train_images, train_labels, epochs=5,callbacks = [timecallback()])
Good practice to use callbacks. How do we record the durations in the history?
Say we're using
history = model.fit( ..., callbacks=[my_training_callback])
What should I write into the definition of my_training_callback?
I'm tryin to do:
def my_training_callback(Callback):
def __init__(self):
mark = 0
duration = 0
def on_epoch_begin(self, epoch, logs=None):
self.mark = time()
def on_epoch_end(self, epoch, logs=None):
self.duration = time() - self.mark
It works ok, but I'm having trouble adding the duration value to the history.
Thanks
I have managed to add the duration to the history.
The trick comes from the fact that the logs object passed to the methods of Callback is mutable, and is the same one that is passed to each callback... including the History object returned by model.fit.
So if you want the duration of each epoch in the history (as opposed to displayed on the screen or saved to a file as in the other answers), you have to add it to the logs object in your custom callback.
Example:
import datetime
import tensorflow as tf
class TimestampCallback(tf.keras.callbacks.Callback):
def __init__(self, metric_name = "duration"):
self.__epoch_start = None
self.__metric_name = metric_name
def on_epoch_begin(self, epoch, logs=None):
self.__epoch_start = datetime.datetime.utcnow()
def on_epoch_end(self, epoch, logs=None):
logs[self.__metric_name] = datetime.datetime.utcnow() - self.__epoch_start
You don't have to use datetime obviously. And if you have a callback that consumes the duration, make sure to put it after this callback in the callback list. Because History is called after every callback, it will always receive the duration.
I am trying to predict several million images with my trained model using a predict_generator in python 3 with keras and tensorflow as backend. The generator and the model predictions work, however, some images in the directory are broken or corrupted and cause the predict_generator to stop and throw an error. Once the image is removed it works again until the next corrupted/broken image gets fed through the function.
Since there are so many images it is not feasible to run a script to open every image and delete the ones that are throwing an error. Is there a way to incorporate a "skip image if broken" argument into the generator or flow from directory function?
Any help is greatly appreciated!
There's no such argument in ImageDataGenerator and neither in flow_from_directory method as you can see int the Keras docs for both (here and here). One workaround would be to extend the ImageDataGenerator class and overload the flow_from_directory method to check wether the image is corrupted or not before yeld it in the generator. Here you can find it's source code.
Since it happens during prediction, if you skip any image or batch, you need to keep track of which images are skipped, so that you can correctly map the prediction scores to the image file name.
Based on this idea, my DataGenerator is implemented with a valid image index tracker. In particular, focus on the variable valid_index where index of valid images are tracked.
class DataGenerator(keras.utils.Sequence):
def __init__(self, df, batch_size, verbose=False, **kwargs):
self.verbose = verbose
self.df = df
self.batch_size = batch_size
self.valid_index = kwargs['valid_index']
self.success_count = self.total_count = 0
def __len__(self):
return int(np.ceil(self.df.shape[0] / float(self.batch_size)))
def __getitem__(self, idx):
print('generator is loading batch ',idx)
batch_df = self.df.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
self.total_count += batch_df.shape[0]
# return a list whose element is either an image array (when image is valid) or None(when image is corrupted)
x = load_batch_image_to_arrays(batch_df['image_file_names'])
# filter out corrupted images
tmp = [(u, i) for u, i in zip(x, batch_df.index.values.tolist()) if
u is not None]
# boundary case. # all image failed, return another random batch
if len(tmp) == 0:
print('[ERROR] All images loading failed')
# based on https://github.com/keras-team/keras/blob/master/keras/utils/data_utils.py#L621,
# Keras will automatically find the next batch if it returns None
return None
print('successfully loaded image in {}th batch {}/{}'.format(str(idx), len(tmp), self.batch_size))
self.success_count += len(tmp)
x, batch_index = zip(*tmp)
x = np.stack(x) # list to np.array
self.valid_index[idx] = batch_index
# follow preprocess input function provided by keras
x = resnet50_preprocess(np.array(x, dtype=np.float))
return x
def on_epoch_end(self):
print('total image count', self.total_count)
print('successful images count', self.success_count)
self.success_count = self.total_count = 0 # reset count after one epoch ends.
During prediction.
predictions = model.predict_generator(
generator=data_gen,
workers=10,
use_multiprocessing=False,
max_queue_size=20,
verbose=1
).squeeze()
indexes = []
for i in sorted(data_gen.valid_index.keys()):
indexes.extend(data_gen.valid_index[i])
result_df = df.loc[indexes]
result_df['score'] = predictions