Get ValueError when apply transfer learning in federated learning (TFF) - python

I want to use pre_trained model in federated learning as following code:
first I build my model and set the weights on model and then I freeze convolutional layers and remove 4 last layer.
def create_keras_model():
model = Sequential()
model.add(Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu', input_shape=(226,232,1)))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(128, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(128, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Conv2D(256, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))
return model
keras_model = create_keras_model()
server_state=FileCheckpointManager(root_dir= '/content/drive/MyDrive',
prefix= 'federated_clustering',
step= 1,
keep_total= 1,
keep_first= True).load_checkpoint(structure=server_state,round_num=10)
keras_model.set_weights(server_state)
for layer in keras_model.layers[:-4]:
layer.trainable = False
model_pre = Model(inputs=keras_model.input,outputs=keras_model.layers[14].output)
next, I build new model.
def create_keras_model1():
model = Sequential()
model.add(model_pre)
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))
return model
def model_fn():
# We _must_ create a new model here, and _not_ capture it from an external
# scope. TFF will call this within different graph contexts.
keras_model = create_keras_model1()
return tff.learning.from_keras_model(
keras_model,
input_spec=preprocessed_example_dataset.element_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
but I get ValueError when I want to use tff.learning.build_federated_averaging_process.
iterative_process = tff.learning.build_federated_averaging_process(
model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.001),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))
ValueError: Your Layer or Model is in an invalid state. This can happen for the following cases:
1. You might be interleaving estimator/non-estimator models or interleaving models/layers made in tf.compat.v1.Graph.as_default() with models/layers created outside of it. Converting a model to an estimator (via model_to_estimator) invalidates all models/layers made before the conversion (even if they were not the model converted to an estimator). Similarly, making a layer or a model inside a a tf.compat.v1.Graph invalidates all layers/models you previously made outside of the graph.
2. You might be using a custom keras layer implementation with custom __init__ which didn't call super().__init__. Please check the implementation of <class 'keras.engine.functional.Functional'> and its bases.
please help me to fix it.

You need to create a new keras model during the invocation of model_fn, as the code comment suggests. It seems you are using model_pre which you have already created before that, which is likely the core problem.

Related

CNN model, How does dense layer in CNN divide into two streams using keras

I am working on the CNN model. In the given CNN model I can not handle how to divide the 4th layer into two streams and get output.
I also built a model in Keras.
def _build_model(self):
model = Sequential()
model.add(Conv2D(8, (3, 3), strides=4, padding='same', input_shape=self.state_size))
model.add(Activation('relu'))
model.add(Conv2D(2, (2, 2), strides=4, padding='same'))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(self.action_size, activation='relu'))
model.compile(loss='mse', optimizer=Adam())
return model
How to handle it? An example would be appreciated.

Keras ImageDataGenerator validation_data

Cannot really figure the error here.
Key requisites:
augmented_images and val_data_gen are keras.preprocessing.image.ImageDataGenerator functions.
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation='relu', input_shape=(32,32,3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(rate=0.5))
model.add(Flatten())
model.add(Dense(units=64, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.05),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
history = model.fit(x=augmented_images,
validation_data=val_data_gen,
epochs=10,
steps_per_epoch=2000,
validation_steps=1000)
ValueError: Layer sequential_10 expects 1 inputs, but it received 5 input tensors.

TypeError: __init__() missing 1 required positional argument: 'units'

I am working in python and tensor flow but I miss 'units' argument and I do not know how to solve it, It looks like your post is mostly code; please add some more details.It looks like your post is mostly code; please add some more details.
here the code
def createModel():
model = Sequential()
# first set of CONV => RELU => MAX POOL layers
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=inputShape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(output_dim=NUM_CLASSES, activation='softmax'))
# returns our fully constructed deep learning + Keras image classifier
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# use binary_crossentropy if there are two classes
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
return model
print("Reshaping trainX at..."+ str(datetime.now()))
#print(trainX.sample())
print(type(trainX)) # <class 'pandas.core.series.Series'>
print(trainX.shape) # (750,)
from numpy import zeros
Xtrain = np.zeros([trainX.shape[0],HEIGHT, WIDTH, DEPTH])
for i in range(trainX.shape[0]): # 0 to traindf Size -1
Xtrain[i] = trainX[i]
print(Xtrain.shape) # (750,128,128,3)
print("Reshaped trainX at..."+ str(datetime.now()))
print("Reshaping valX at..."+ str(datetime.now()))
print(type(valX)) # <class 'pandas.core.series.Series'>
print(valX.shape) # (250,)
from numpy import zeros
Xval = np.zeros([valX.shape[0],HEIGHT, WIDTH, DEPTH])
for i in range(valX.shape[0]): # 0 to traindf Size -1
Xval[i] = valX[i]
print(Xval.shape) # (250,128,128,3)
print("Reshaped valX at..."+ str(datetime.now()))
# initialize the model
print("compiling model...")
sys.stdout.flush()
model = createModel()
# print the summary of model
from keras.utils import print_summary
print_summary(model, line_length=None, positions=None, print_fn=None)
# add some visualization
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(model).create(prog='dot', format='svg'))
Try changing this line:
model.add(Dense(output_dim=NUM_CLASSES, activation='softmax'))
to
model.add(Dense(NUM_CLASSES, activation='softmax'))
I'm not experience in keras but I could not find a parameter called output_dim on the documentation page for Dense. I think you meant to provide units but labelled it as output_dim
The Keras Dense layer documentation is as follows:
keras.layers.Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)
Using the following :
classifier.add(Dense(6, activation='relu', kernel_initializer='glorot_uniform',input_dim=11))
Will work as here the units means the output_dim saying that we need 6 neurons in the hidden layer. The weights are initialized with the uniform function and the input layer has 11 independent variables of the dataset (input_dim) to feed the above-hidden layer.
I think it's a version issue. In updated version of keras for Dense there is no "output_dim" argument.
You can see this documentation link for Dense arguments.
https://keras.io/api/layers/core_layers/dense/
tf.keras.layers.Dense(
units,
activation=None,
use_bias=True,
kernel_initializer="glorot_uniform",
bias_initializer="zeros",
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs
)
So the first argument is "units", Which is mandatory.
instead of this line:
model.add(Dense(output_dim=NUM_CLASSES, activation='softmax'))
use this:
model.add(Dense(units=NUM_CLASSES, activation='softmax'))
or
model.add(Dense(NUM_CLASSES, activation='softmax'))

Theano vs. tensorflow backend in Keras

I have a classifier model that I trained using 'theano' backend. The model works properly and I got the expected classification perforamance. The tensor size is Nx3x28x112 However, I would like to use the same classifier in another file (main_file.py) which contains a GANs implementation (with'tensorflow' backend). Thereby, I want to use the same classificer in the main_file.py and to change the input size of the tensor in order to be Nx28x112x3 (that is the proper input for the tensorflow backend). While the training procedure starts the performance is not close to the one I got with 'theano' and is close to random performance. My model looks like:
def createModel():
model = Sequential()
# The first two layers with 32 filters of window size 3x3
model.add(Conv2D(28, (3, 3), padding='same', activation='relu', input_shape=(28, 112, 3)))
# or input_shape = (3, 28, 112) in case of theano backend
model.add(Conv2D(28, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nClasses, activation='softmax'))
return model
What should I do in order to make the model perform properly? Is there any fundamental difference when the backend is changing except the order of the input tensors?

Why does Neural Network predict all input negative?

I'm working on a sentiment analysis project in python with word2vec as an embedding method. (in my NON_ENGLISH corpus I considered every negative tweet with the 0 label, positive = 1 and neutral = 2)Since I'm really new in this field I don't have any idea as to why my model predicts everything negative. I surfed the net before and read something about the number of hidden layer and batches , etc. which related to this error but I'm not sure that I understood correctly.
here is my keras model:
model = Sequential()
model.add(Conv1D(32, kernel_size=3, activation='elu',
padding='same', input_shape=
(max_tweet_length,vector_size)))
model.add(Conv1D(32, kernel_size=3, activation='elu',
padding='same'))
model.add(Conv1D(32, kernel_size=3, activation='elu',
padding='same'))
model.add(Conv1D(32, kernel_size=3, activation='elu',
padding='same'))
model.add(Dropout(0.25))
model.add(Conv1D(32, kernel_size=2, activation='elu',
padding='same'))
model.add(Conv1D(32, kernel_size=2, activation='elu',
padding='same'))
model.add(Conv1D(32, kernel_size=2, activation='elu',
padding='same'))
model.add(Conv1D(32, kernel_size=2, activation='elu',
padding='same'))
model.add(Dropout(0.25))
model.add(Dense(256, activation='tanh'))
model.add(Dense(256, activation='tanh'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
thanks a lot!

Categories