I used the following codes to draw a waterfall plot.
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer.shap_values(P)
# visualize the first prediction's explanation
shap.waterfall_plot(explainer.expected_value[0], shap_values[0])
It shows this error
AttributeError: 'numpy.float64' object has no attribute 'base_values'
Then I tried this method according to a similar q&a in github https://github.com/slundberg/shap/issues/2255
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer(P)
# NOW CHANGED: SET UP THE WORKAROUND
class helper_object():
"""
This wraps the shap object.
It takes as input i, which indicates the index of the observation to be explained.
"""
def __init__(self, i):
self.base_values = shap_values.base_values[i][0]
self.data = P.loc[i]
self.feature_names = P.columns.to_list()
self.values = shap_values.values[i]
# visualize the sixth prediction's explanation using the workaround
shap.waterfall_plot(helper_object(5), len(shap_values[0]))
It shows this error
AttributeError: 'list' object has no attribute 'base_values'
I changed to explainer(P) instead of explainer.shap_values(P)
It shows this error
AttributeError: 'helper_object' object has no attribute 'display_data'
I printed the values and showed me a range of values instead of a constant.
print(shap_values[0].base_values)
print(type(shap_values.base_values[0]))
print(shap_values[0])
[ 4.03719405 -4.03719405]
<class 'numpy.ndarray'>
.values =
array([[-5.19398412e-02, 5.19398412e-02],
[-1.52522416e+00, 1.52522416e+00],
[-7.06765115e-01, 7.06765115e-01],
[-2.52875346e-01, 2.52875346e-01],
[-1.54701093e-01, 1.54701093e-01],
[ 6.35169405e-03, -6.35169405e-03],
[ 6.57487803e-03, -6.57487803e-03],
[ 3.70178479e-02, -3.70178479e-02],
[ 0.00000000e+00, 0.00000000e+00],
[-6.99857166e-02, 6.99857166e-02],
[-1.38675779e-01, 1.38675779e-01],
[-6.09464170e-02, 6.09464170e-02],
[-1.94668294e-03, 1.94668294e-03],
[-1.15005190e-03, 1.15005190e-03],
[-1.11472815e+00, 1.11472815e+00],
[-1.05449992e-01, 1.05449992e-01],
[-1.82219843e-01, 1.82219843e-01],
[ 1.58137725e-02, -1.58137725e-02],
[-3.98713235e-01, 3.98713235e-01],
[-9.45830700e-01, 9.45830700e-01],
[-6.23312829e-02, 6.23312829e-02],
[ 6.32792510e-02, -6.32792510e-02],
[-6.01518308e-02, 6.01518308e-02],
[ 1.87929746e-04, -1.87929746e-04],
[-1.58341844e-03, 1.58341844e-03],
[-5.08391166e-03, 5.08391166e-03],
[ 0.00000000e+00, 0.00000000e+00],
[ 1.05478554e-02, -1.05478554e-02],
[ 2.37974651e-02, -2.37974651e-02],
[ 5.65731935e-03, -5.65731935e-03],
[ 2.05245700e-03, -2.05245700e-03]])
.base_values =
array([ 4.03719405, -4.03719405])
.data =
array([ 1.07000001e-01, 7.87079980e+03, 4.26199989e+01, 9.39999998e-01,
1.48000002e+01, 2.19000006e+00, 3.10000002e-01, 7.98212012e+03,
2.61899994e+02, 8.40000000e+01, 2.00000000e+01, 3.74000001e+00,
-1.00000000e+00, -1.00000000e+00, 4.00000000e+00, 3.00000000e+00,
1.00000000e+00, -1.87000008e+01, 8.41299988e+02, 1.36000004e+01,
-8.52000046e+00, 6.99999809e-01, -7.65000000e+02, 5.40000010e+00,
0.00000000e+00, -1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])
what should I do to generate a waterfall plot for a single observation?
UPDATE
A = [1,2,3,4,5,6,7,8,9,10]
B = [21,5,7,8,15,36,20,18,15,13]
W = [1,0,0,0,0,0,1,1,0,0]
C = ["Adult", "Child", "Child", "Child", "Child", "Adult", "Adult", "Adult", "Child", "Child"]
Number = pd.Series(A, name='Number')
Age = pd.Series(B, name='Age')
Car = pd.Series(W, name='Car')
User_ages = pd.concat([Number, Age], axis=1)
User_cars = pd.concat([User_ages, Car], axis=1)
group = pd.Series(C, name='group')
data = pd.concat([User_cars, group], axis=1)
features = ['Age', 'Car']
X = data[features]
y = data['group']
D = [11,12,13,14,15,16,17,18,19,20]
E = [22,11,17,21,25,31,30,8,5,3]
F = [1,0,0,0,1,0,1,0,0,0]
Number = pd.Series(D, name='Number')
Age = pd.Series(E, name='Age')
Car = pd.Series(F, name='Car')
data1 = pd.concat([Number, Age], axis=1)
data2 = pd.concat([data1, Car], axis=1)
P = data2[features]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=22, test_size=0.1)
params = {
'num_leaves': [20, 30, 40, 50, 60, 70],
'learning_rate': [0.05, 0.01],
'n_estimators': [100, 300, 500],
'subsample': [0.95],
'colsample_bytree': [0.95],
'n_jobs': [7],
'random_state': [22]
}
gcv = GridSearchCV(LGBMClassifier(), params, cv=2, verbose=1, error_score='raise').fit(X_train, y_train)
gbm = gcv.best_estimator_
gbm_pred = gbm.predict(X_test)
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer.shap_values(P)
# visualize the first prediction's explanation
shap.waterfall_plot(explainer.expected_value[0], shap_values[0])
Related
I can't understand why, using TabnetRegressor, it does not recognize in any way the parameters created using optuna, Using tabnetclassifier gives me no problem, but if I use tabnetregressor it tells me Unexpected argument
`
clf = TabNetRegressor(**final_params) # TabNetRegressor()
clf.fit(
X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=TabNet_params['patience'], max_epochs=epochs,
eval_metric=['rmse']
)
res.append(roc_auc_score(y_test.values, clf.predict(X_test.values)))
File line 504, in main_pipeline2
clf = TabNetRegressor(**final_params) # TabNetRegressor()
TypeError: __init__() got an unexpected keyword argument 'n_d'
this is how I create the hyperparameters, using tabnetclassifier because the regressor gives me problems
def Objective(trial):
mask_type = trial.suggest_categorical("mask_type", ["entmax", "sparsemax"])
n_da = trial.suggest_int("n_da", 56, 64, step=4)
n_steps = trial.suggest_int("n_steps", 1, 3, step=1)
gamma = trial.suggest_float("gamma", 1., 1.4, step=0.2)
n_shared = trial.suggest_int("n_shared", 1, 3)
lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-3, log=True)
tabnet_params = dict(n_d=n_da, n_a=n_da, n_steps=n_steps, gamma=gamma,
lambda_sparse=lambda_sparse, optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=mask_type, n_shared=n_shared,
scheduler_params=dict(mode="min",
patience=trial.suggest_int("patienceScheduler", low=3, high=10),
# changing sheduler patience to be lower than early stopping patience
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0,
) # early stopping
regressor = TabNetClassifier(**tabnet_params)
regressor.fit(X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=trial.suggest_int("patience", low=15, high=30),
max_epochs=trial.suggest_int('epochs', 1, 100),
eval_metric=['rmse'])
avg = roc_auc_score(y_test.values, regressor.predict(X_test.values))
return avg
study = optuna.create_study(direction="maximize", study_name='TabNet optimization')
# TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 1, 'gamma': 1.2, 'n_shared': 1, 'lambda_sparse': 0.00018593172980376437, 'patienceScheduler': 8, 'patience': 17, 'epochs': 13}
TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 3, 'gamma': 1.4, 'n_shared': 2,
'lambda_sparse': 7.628773104483722e-05, 'patienceScheduler': 10, 'patience': 29, 'epochs': 45}
final_params = dict(n_d=TabNet_params['n_da'], n_a=TabNet_params['n_da'], n_steps=TabNet_params['n_steps'],
gamma=TabNet_params['gamma'],
lambda_sparse=TabNet_params['lambda_sparse'], optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=TabNet_params['mask_type'], n_shared=TabNet_params['n_shared'],
scheduler_params=dict(mode="min",
patience=TabNet_params['patienceScheduler'],
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0)
epochs = TabNet_params['epochs']
I am working on a neural net for time series classification with metadata.
My problem is that I have a data frame with different datatypes, which all need to be preprocessed. In doing so I generate a lot of different input layers with individual names. But in my tf.dataset the data frame is saved as a whole and thus the keras.fit function expects one input instead of many.
Do you have any idea as to how I can get the model.fit to accept the data I have and maybe even get it all into a nicer software architecture?
import pandas as pd
import tensorflow as tf
def get_normalization_layer(
dataset, input_name, all_inputs, encoded_features, axis=None
):
normalization_layer = tf.keras.layers.Normalization(axis=axis)
feature_ds = dataset.map(lambda x, y: x[input_name])
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.float32)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
def get_category_encoding_layer(
dataset,
input_name,
dtype,
all_inputs,
encoded_features,
vocabulary=None,
max_tokens=None,
):
if dtype == "string":
index = tf.keras.layers.StringLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.string)
elif dtype == "int":
index = tf.keras.layers.IntegerLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.int64)
if vocabulary is None:
feature_ds = dataset.map(lambda x, y: x[input_name])
index.adapt(feature_ds)
encoder = tf.keras.layers.CategoryEncoding(num_tokens=index.vocabulary_size())
encoder = encoder(index(input_layer))
all_inputs.append(input_layer)
encoded_features.append(encoder)
################################################################################
measurement_data = np.random.rand(150, 11, 2400)
meta_data = pd.DataFrame(
data={
"Product": ["A", "B", "C", "D"],
"Length": [23, 22, 21, 24],
"Width": [11.2, 23.4, 57.35, 0],
"Labels": [0, 0, 1, 0],
}
)
################################################################################
dataframe = meta_data.copy()
labels = dataframe.pop("Labels")
dataframe = {key: np.array(value)[:, tf.newaxis] for key, value in dataframe.items()}
dataset_measurement = tf.data.Dataset.from_tensor_slices(measurement_data)
dataset_meta = tf.data.Dataset.from_tensor_slices((dataframe, labels))
################################################################################
all_inputs = []
encoded_features = []
normalization_layer = tf.keras.layers.Normalization(axis=1)
feature_ds = dataset_measurement.map(lambda x: x)
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(
shape=list(feature_ds)[0].shape,
name="measurement_input",
dtype=tf.float32,
)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
get_category_encoding_layer(
dataset_meta, "Product", "string", all_inputs, encoded_features
)
get_category_encoding_layer(dataset_meta, "Length", "int", all_inputs, encoded_features)
get_normalization_layer(dataset_meta, "Width", all_inputs, encoded_features)
dataset = tf.data.Dataset.zip((dataset_measurement, dataset_meta))
dataset = dataset.map(
lambda x, y: ({"measurement_input": x, "meta_input": y[:-1]}, y[-1])
)
dataset = dataset.batch(64)
dataset = dataset.prefetch(64)
################################################################################
conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(
encoded_features[0]
)
conv1 = tf.keras.layers.BatchNormalization()(conv1)
conv1 = tf.keras.layers.ReLU()(conv1)
gap = tf.keras.layers.GlobalAveragePooling1D()(conv1)
all_features = tf.keras.layers.concatenate(encoded_features[1:])
x1 = tf.keras.layers.Dense(128, activation="relu")(all_features)
x1 = tf.keras.layers.Dropout(0.5)(x1)
meta_and_measurement = tf.keras.layers.concatenate([gap, x1])
f1 = tf.keras.layers.Dense(128, activation="relu")(meta_and_measurement)
f1 = tf.keras.layers.Dropout(0.5)(f1)
output_layer = tf.keras.layers.Dense(2, activation="softmax")(f1)
################################################################################
model = tf.keras.models.Model(inputs=all_inputs, outputs=output_layer)
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
# ValueError: Missing data for input "Product".
# You passed a data dictionary with keys ['measurement_input', 'meta_input'].
# Expected the following keys: ['measurement_input', 'Product', 'Length', 'Width']
history = model.fit(
dataset,
epochs=50,
verbose=1,
)
I found an answer, but it feels rather hacky and it would be lovely if someone had a better idea. You can use a function to strip and recombine the dataset in a way that tensorflow likes:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
def get_normalization_layer(
dataset, input_name, all_inputs, encoded_features, axis=None
):
normalization_layer = tf.keras.layers.Normalization(axis=axis)
feature_ds = dataset.map(lambda x, y: x[input_name])
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.float32)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
def get_category_encoding_layer(
dataset,
input_name,
dtype,
all_inputs,
encoded_features,
vocabulary=None,
max_tokens=None,
):
if dtype == "string":
index = tf.keras.layers.StringLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.string)
elif dtype == "int":
index = tf.keras.layers.IntegerLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.int64)
if vocabulary is None:
feature_ds = dataset.map(lambda x, y: x[input_name])
index.adapt(feature_ds)
encoder = tf.keras.layers.CategoryEncoding(num_tokens=index.vocabulary_size())
encoder = encoder(index(input_layer))
all_inputs.append(input_layer)
encoded_features.append(encoder)
################################################################################
measurement_data = np.random.rand(150, 11, 2400)
meta_data = pd.DataFrame(
data={
"Product": ["A", "A", "C", "D"],
"Length": [23, 22, 21, 24],
"Width": [11.2, 23.4, 57.35, 0],
"Labels": [0, 0, 1, 0],
}
)
################################################################################
dataframe = meta_data.copy()
labels = dataframe.pop("Labels")
dataframe = {key: np.array(value)[:, tf.newaxis] for key, value in dataframe.items()}
dataset_measurement = tf.data.Dataset.from_tensor_slices(measurement_data)
dataset_meta = tf.data.Dataset.from_tensor_slices((dataframe, labels))
################################################################################
all_inputs = []
encoded_features = []
normalization_layer = tf.keras.layers.Normalization(axis=1)
feature_ds = dataset_measurement.map(lambda x: x)
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(
shape=list(feature_ds)[0].shape,
name="measurement_input",
dtype=tf.float32,
)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
get_category_encoding_layer(
dataset_meta, "Product", "string", all_inputs, encoded_features
)
get_category_encoding_layer(dataset_meta, "Length", "int", all_inputs, encoded_features)
get_normalization_layer(dataset_meta, "Width", all_inputs, encoded_features)
dataset = tf.data.Dataset.zip((dataset_measurement, dataset_meta))
def map_func(x, y):
meta_names = [name for name in y[0]]
meta_values = list(y[0].values())
meta = dict(zip(meta_names, meta_values))
dictinary = {"measurement_input": x}
dictinary.update(meta)
result = (dictinary, y[-1])
return result
data_set = dataset.map(map_func)
data_set = data_set.batch(64)
data_set = data_set.prefetch(64)
################################################################################
conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(
encoded_features[0]
)
conv1 = tf.keras.layers.BatchNormalization()(conv1)
conv1 = tf.keras.layers.ReLU()(conv1)
gap = tf.keras.layers.GlobalAveragePooling1D()(conv1)
all_features = tf.keras.layers.concatenate(encoded_features[1:])
x1 = tf.keras.layers.Dense(128, activation="relu")(all_features)
x1 = tf.keras.layers.Dropout(0.5)(x1)
meta_and_measurement = tf.keras.layers.concatenate([gap, x1])
f1 = tf.keras.layers.Dense(128, activation="relu")(meta_and_measurement)
f1 = tf.keras.layers.Dropout(0.5)(f1)
output_layer = tf.keras.layers.Dense(2, activation="softmax")(f1)
################################################################################
model = tf.keras.models.Model(inputs=all_inputs, outputs=output_layer)
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
tf.keras.utils.plot_model(
model,
to_file=os.path.join(os.getcwd(), "model.png"),
show_shapes=True,
show_dtype=True,
)
history = model.fit(
data_set,
epochs=50,
verbose=1,
)
This is my code:
fold = StratifiedKFold(10, shuffle= True, random_state=42)
score = []
cat_prediction = []
lgbm_prediction = []
forest_prediction = []
rgb_prediction = []
oldList = []
for train_s, test_s in tqdm(fold.split(X, Y)):
xtrain, ytrain = X.iloc[train_s], Y.iloc[train_s]
xtest, ytest = X.iloc[test_s], Y.iloc[test_s]
#xtrain, ytrain = df.drop("Response", 1).iloc[train_s], df.Response.iloc[train_s]
#xtest, ytest = df.drop("Response", 1).iloc[test_s], df.Response.iloc[test_s]
model = RGFClassifier(algorithm="RGF_Sib", test_interval=100, normalize = True)
model.fit(df.drop("Response", 1), df.Response)
score.append(f1_score(ytest, model.predict(xtest)))
rgb_prediction.append(model.predict(tt))
model = RandomForestClassifier(class_weight = class_weight)
model.fit(df.drop("Response", 1), df.Response)
score.append(f1_score(ytest, model.predict(xtest)))
forest_prediction.append(model.predict(tt))
model = CatBoostClassifier(iterations= 400, class_weights = class_weight, silent= True)
model.fit(df.drop("Response", 1), df.Response)
score.append(f1_score(ytest, model.predict(xtest)))
cat_prediction.append(model.predict(tt))
model = LGBMClassifier(class_weight = class_weight)
model.fit(df.drop("Response", 1), df.Response)
score.append(f1_score(ytest, model.predict(xtest)))
lgbm_prediction.append(model.predict(tt))
I want to aggregate the scores to make a blend of the different models.
I tried
(cat_prediction + lgbm_prediction + forest_prediction + rgb_prediction)/4
and
(np.mean(cat_prediction , 0) + np.mean(lgbm_prediction, 0) + np.mean(forest_prediction , 0) + np.mean(rgb_prediction, 0))/4
But the result are not as desired.
How do I achieve this?
class SigmoidNeuron:
def __init__(self):
self.w=None
self.b=None
def perceptron(self,x):
return np.dot(x,self.w.T)+self.b
def sigmoid(self,x):
return 1.0/(1.0+np.exp(-x))
def grad_w(self,x,y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred-y)*y_pred*(1-y_pred)*x
def grad_b(self,x,y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred-y)*y_pred*(1-y_pred)
def fit(self,x,y,epochs=1,learning_rate=1,initialise=True):
#initialise w,b
if initialise:
self.w=np.random.randn(1,X.shape[1])
self.b=0
for i in range(epochs):
dw=0
db=0
for x,y in zip(X,Y):
dw+=self.grad_w(x,y)
db+=self.grad_b(x,y)
self.w -= learning_rate*dw
self.b -= learning_rate*db
`
I'm running a sigmoid neural network code and I'm getting error while running this class with data
X_scaled_train.astype(float)
array([[ 1.29929126, -0.90185206, 0.03173306, ..., -0.14142136,
-0.15523011, 0.21232515],
[-1.16225208, -0.86697607, 1.03451971, ..., -0.14142136,
-0.15523011, 0.21232515],
[ 1.77523922, 0.65594214, 0.03173306, ..., -0.14142136,
-0.15523011, 0.21232515],
...,
[ 1.44058831, -0.58796815, -0.66464655, ..., -0.14142136,
-0.15523011, 0.21232515],
[-1.42253612, 0.50481285, 1.54984063, ..., -0.14142136,
-0.15523011, 0.21232515],
[ 1.06875397, 0.6791928 , 0.97880934, ..., -0.14142136,
-0.15523011, 0.21232515]])
Y_scaled_train.astype(float)
array([[0.68],
[0.72],
[0.72],
[0.6 ],
[0.8 ],
[0.64],
[0.68],
These are the data for train set
while I'm running this line
sn.fit(X_scaled_train,Y_scaled_train,epochs=10,learning_rate=0.2)
I'm getting that type error
what should I do to remove it
the error shows
TypeError Traceback (most recent call last)
<ipython-input-167-51016d58d1f5> in <module>()
----> 1 sn.fit(X_scaled_train,Y_scaled_train,epochs=10,learning_rate=0.2)
2 frames
<ipython-input-25-2e09637c6d09> in perceptron(self, x)
4 self.b=None
5 def perceptron(self,x):
----> 6 return np.dot(x,self.w.T)+self.b
7 def sigmoid(self,x):
8 return 1.0/(1.0+np.exp(-x))
<__array_function__ internals> in dot(*args, **kwargs)
TypeError: Cannot cast array data from dtype('float64') to dtype('<U32') according to the rule 'safe'
Use:
np.array(your_list)
your_list.values.astype(np.float)
Or:
new_list = [float(i) for i in your_list]
Or:
For huge arrays, I would strongly recommend you to use numpy:
np.array(your_list, dtype=np.float32)
np.array(your_list,dtype=float)
You don't have to assign if it's a float:
np.array(your_list)
Or:
new_list = float("{:.1f}".format(float(input())))
list.append(new_list)
I am following the tensorflow neural machine translation tutorial:
https://www.tensorflow.org/tutorials/text/nmt_with_attention
I am trying to save the Encoder and Decoder models which are subclasses of the tf.keras.Model and work properly during training and inference, however I want to save the models. When I try to do so I get the following error:
TypeError: call() missing 1 required positional argument: 'initial_state'
Here is the code:
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
def call(self, input_utterence, initial_state):
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
# do some training...
tf.saved_model.save(decoder, "encoder_model")
I also tried to make the call method take one input list argument only and unpack the variables I need within the method but then I get the following error when trying to save:
File "C:\Users\Fady\Documents\Machine Learning\chatbot\models\seq2seq_model.py", line 32, in call
input_utterence, initial_state = inputs
ValueError: too many values to unpack (expected 2)
You can export the model successfully if you package your inputs in a list. You also need to specify the input signatures to export your model, here your code with slight modifications which works
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM
import numpy as np
print('TensorFlow: ', tf.__version__)
vocab_size = 10000
LSTM_DIM = 256
BATCH_SIZE = 16
embedding_matrix = np.random.randn(vocab_size, 300)
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
#tf.function
def call(self, inputs):
input_utterence, initial_state = inputs
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
random_input = tf.random.uniform(shape=[BATCH_SIZE, 3], maxval=vocab_size, dtype=tf.int32)
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
initial_state = [encoder.create_initial_state(), encoder.create_initial_state()]
_ = encoder([random_input, initial_state]) # required so that encoder.build is triggered
tf.saved_model.save(encoder, "encoder_model", signatures=encoder.call.get_concrete_function(
[
tf.TensorSpec(shape=[None, None], dtype=tf.int32, name='input_utterence'),
[
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_h'),
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_c')
]
]))
loaded_model = tf.saved_model.load('encoder_model')
loaded_model([random_input, initial_state])
output:
TensorFlow: 2.2.0-rc1
WARNING:tensorflow:From /home/dl_user/tf_stable/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: encoder_model/assets
(<tf.Tensor: shape=(16, 3, 256), dtype=float32, numpy=
array([[[-0.06000457, 0.02422162, -0.05310762, ..., -0.01340707,
0.12212028, -0.02747637],
[ 0.13303193, 0.3119418 , -0.17995344, ..., -0.10185111,
0.09568192, 0.06919193],
[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464]],
[[-0.03792192, -0.08431012, 0.03687581, ..., -0.1768839 ,
-0.10469476, 0.08730042],
[-0.02956271, 0.43850696, -0.07400024, ..., 0.04097629,
0.209705 , 0.27194855],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378]],
[[ 0.04196627, 0.08302739, 0.02218204, ..., 0.07388053,
-0.05696848, -0.31895265],
[-0.00536443, 0.1566213 , -0.22412768, ..., 0.10560389,
0.20187919, -0.1896591 ],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215]],
...,
[[ 0.10999472, 0.07398727, 0.23443945, ..., -0.1912791 ,
-0.0195728 , 0.11717851],
[ 0.03978832, 0.07587367, 0.16567066, ..., -0.29463592,
0.05950819, 0.0242265 ],
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641]],
[[-0.20408148, 0.04629526, 0.00601436, ..., 0.21321473,
0.04952445, -0.0129672 ],
[-0.14671509, 0.2911171 , 0.13047697, ..., -0.03531414,
-0.16794083, 0.01575338],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705]],
[[-0.2747393 , 0.24351111, -0.05829309, ..., -0.00448833,
0.07568972, 0.03978251],
[-0.16282909, -0.04586324, -0.0054924 , ..., 0.11050001,
0.1312355 , 0.16555254],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215],
...,
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.32829475, -0.18770668, -0.2956414 , ..., -0.2427501 ,
0.03146099, 0.16033864],
[ 0.05112522, 0.6664379 , -0.19836858, ..., 0.10015503,
0.511694 , 0.51550364],
[ 0.3379809 , 0.7145362 , 0.22311993, ..., 0.372106 ,
0.25914627, -0.81374717],
...,
[ 0.36742535, 0.29009506, 0.13245934, ..., -0.4318537 ,
0.26666188, -0.20086129],
[-0.17384854, 0.22998339, 0.27335796, ..., 0.09973672,
0.10726923, 0.47339764],
[ 0.22148325, -0.11998752, -0.16339599, ..., 0.31903535,
0.20365229, 0.28087002]], dtype=float32)>)