AttributeError: 'function' object has no attribute 'fit' - python

I'm just starting with deep learning and python and I'm already stuck with this error when I try to train the model.
I thought it would be an easy starting project to get together the basic building blocks, but I obviously haven't grasped some basics..
My goal is to train a model on a data set of 5 column values '1ex','2ex','3ex','4ex','5ex' and predict sequences of 5 values.
I'm reading the dataset from a csv file i generated and it's displaying as expected.
Can you help me understanding what I'm missing out?
Thank you very much as always.
This is the code I've written so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from collections import deque
import random
# load the data set
df = pd.read_csv('DataSet.csv',delimiter=',',usecols=['Wheel','Date','1ex','2ex','3ex','4ex','5ex'])
# divide it into portions
times = sorted(df.index.values) # get the times
last_10pct = sorted(df.index.values)[-int(0.1*len(times))] # get the last 10% of the times
last_20pct = sorted(df.index.values)[-int(0.2*len(times))] # get the last 20% of the times
test_df = df[(df.index >= last_10pct)]
validation_df = df[(df.index >= last_20pct) & (df.index < last_10pct)]
train_df = df[(df.index < last_20pct)] # now the train_df is all the data up to the last 20%
# drop 'Date' column
train_df.drop(columns=["Date"], inplace=True)
validation_df.drop(columns=["Date"], inplace=True)
test_df.drop(columns=["Date"], inplace=True)
# the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# from tensorflow.keras.layers import LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# define base model
def baseline_model():
# scale = StandardScaler()
# create model
model = Sequential()
model.add(Dense(5, input_dim=5, kernel_initializer='normal', activation='relu'))
model.add(Dense(15, kernel_initializer='normal', activation='relu'))
model.add(Dense(15, kernel_initializer='normal', activation='relu'))
model.add(Dense(5, kernel_initializer='normal', activation = softmax))
# Compile model
# model.compile(loss='mean_absolute_error', optimizer='adam')
model.compile(loss='mean_squared_error', optimizer='adam')
# model.fit(train_df, epochs = 5)
return model
# train the model
baseline_model.fit(train_df, batch_size=1, epochs=200, verbose=1)

If you write baseline_model, it returns the function, not the result.
Therefore baseline_model.fit can't be called because 'function' object has no attribute 'fit'
You must execute the function to get its result, using parentheses - baseline_model() - and then fit will be performed on the result. ;)
tl;dr:
baseline_model.fit( -> baseline_model().fit(

Related

WARNING:tensorflow:AutoGraph could not transform <function Model.make_train_function.<locals>.Cause:unsupported operand type for-:'NoneType' and 'int'

While i was dealing with basic example of dropout layer, i got stuck here.
not only in this moving further where in another model i used KerasClassifier i got similar error.
for this you can use sonar dataset.
# Baseline Model on the Sonar Dataset
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# load dataset
dataframe = read_csv("/content/sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline
def create_baseline():
# create model
model = Sequential()
model.add(Dense(40, input_shape=(60,), activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
sgd = SGD(learning_rate=0.01, momentum=0.8)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(model=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
I'm getting error:
WARNING:tensorflow:AutoGraph could not transform <function Model.make_train_function.<locals>.train_function at 0x2ab077a60> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
I'm currently using Mac M1 and working in mini-conda environment.

Keras estimator predict says that input is misshapen

I'm getting a numpy shape error when I use the predict function of a Keras estimator. I build, evaluate, and then retrain the model using the following code:
import pandas as pd
import sqlalchemy as sqla
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
# Connect to to the DB and retrieve the iris table
con = sqla.create_engine('postgresql://tristan:sebens#db:5432/tristan')
con.connect()
table_name = "iris"
schema = "public"
iris = pd.read_sql_table(table_name, con, schema=schema)
iris.head()
iris_ds = iris.values # Convert the table to a numpy array
X = iris_ds[:, 0:4].astype(float) # Slice the descriptive features into a numpy array
Y = iris_ds[:, 4] # Slice the labels away as their own numpy array
# The labels are encoded as strings, so we need to encode them
# as numbers that can be output by an ANN
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = to_categorical(encoded_Y)
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(8, input_dim=4, activation='relu'))
model.add(Dense(3, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
seed = 7
# Train the model:
# First we define the model as a classifier. This will affect the process used to train it
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)
# Honestly not totally sure what this is, but it has to do with splitting the training/evaluation data in
# a way that gives us a more realistic metric of the model's accuracy
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
# Now that we have our classifier and our data pipeline defined, we can begin the training process
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
# If we like our accuracy, then we can train the model for real
# Evaluating the model actually evaluates a clone of the model, so now we need to train the model again
estimator.fit(X, dummy_y)
And this is where the trouble is. I try to make a test prediction:
# Let's make a test prediction with our model
x = X[0]
estimator.predict(x)
And I get an input shape error:
ValueError: Error when checking input: expected dense_21_input to have shape (4,) but got array with shape (1,)
I'm at a loss. How can the input have the wrong shape if it's literally a member of the training dataset?

GridSearch implementation for Keras Regression

Trying to understand and implement GridSearch method for the Keras Regression. Here is my simple producible regression application.
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
# Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
# Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
#Create Model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5",save_best_only=True) # save best model
model.fit(x_train,y_train,callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
#model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("(RMSE): {}".format(score))
If you run the code, you can see loss is not too big numbers.
And here is my producible GridSearch implementation. First of all, I have simply searched the web and find the GridSearch application for KerasClassifier, then tried to revise it for KerasRegressor. I am not sure if my revision is correct. If I assume the general concept is correct, there must be a problem in this code, because loss function does not make sense. The loss function is MSE but the output is negative, unfortunately I could not figure out where I am doing wrong.
from keras.wrappers.scikit_learn import KerasRegressor
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import GridSearchCV
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
#Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
#Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
#Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
def create_model(optimizer='adam'):
# create model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer=optimizer,metrics=['mse'])
return model
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
optimizer = ['SGD', 'RMSprop', 'Adagrad']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)
#summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
I have tested your code, and I have seen that you are not using a scoring function in GridSearchCV so according to documentation scikit-learn documentation:
If None, the estimator’s default scorer (if available) is used.
It seems like if it would be using the 'neg_mean_absolute_error' (or any of these scoring functions for regression) by default for scoring models.
That is because probably it says that the best model is:
-75.820078 using {'optimizer':'Adagrad'}

Multi-class classification using keras

I am developing a neural network in order to classify with classes pre-calculated with k-means.
Dataset looks like:
50,12500,2,1,5
50,8500,2,1,15
50,6000,2,1,9
50,8500,2,1,15
Where resulting row is the last row.
Here is the code on Python with Keras I am trying to get working:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = numpy.genfromtxt ('../r-calculations/k-means/output16.csv', delimiter=",")
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]
print(Y[0])
Y = np_utils.to_categorical(Y)
model = Sequential()
model.add(Dense(5, activation='tanh', input_dim=4))
#model.add(Dropout(0.25))
model.add(Dense(10, activation='tanh'))
#model.add(Dropout(0.25))
model.add(Dense(10, activation='relu'))
#model.add(Dropout(0.25))
model.add(Dense(17, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X,Y, epochs=10, batch_size=10)
#print( model.predict(numpy.array([2,36,2,5,2384,1,2,4,3,1,1,4,33,3,1,1,2,1,1,1]).reshape((1,20))) )
#print( model.predict(numpy.array(X[0]).reshape((1,4))) )
#print( model.predict(numpy.array(X[1]).reshape((1,4))) )
#print( model.predict(numpy.array(X[2]).reshape((1,4))) )
result = model.predict(numpy.array(X[0]).reshape((1,4)))
for res in result[0]:
print res
If I get it right, now I am getting a probability for each class as an output. How can I retrieve labels back after I have called "to_categorical" on it?
Is there a way to get a class number, instead of probability for each class?
For now it does not seem to be working right, big loss ~2, accuracy ~0.29 and I cannot make it to converge. What am I doing wrong?
UPDATE Mar 19
So far I have solved my problem, I changed my model a lot of times and finally found working configuration.
If you want the class instead of the probability you could call numpy argmax at your predictions.
Or use the convenient call predict_classes instead of predict
result = model.predict_classes(numpy.array(X[0]).reshape((1,4)))
As for your result, you could try running a few extra epochs, but it is hard to say what is wrong. Could be your training data quality, bad initialization, not having enough data, bad model (i'd use only relu activations).

How to save Scikit-Learn-Keras Model into a Persistence File (pickle/hd5/json/yaml)

I have the following code, using Keras Scikit-Learn Wrapper:
from keras.models import Sequential
from sklearn import datasets
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
import pickle
import numpy as np
import json
def classifier(X, y):
"""
Description of classifier
"""
NOF_ROW, NOF_COL = X.shape
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=NOF_COL, init='uniform', activation='relu'))
model.add(Dense(6, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# evaluate using 10-fold cross validation
seed = 7
np.random.seed(seed)
model = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10, verbose=0)
return model
def main():
"""
Description of main
"""
iris = datasets.load_iris()
X, y = iris.data, iris.target
X = preprocessing.scale(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
model_tt = classifier(X_train, y_train)
model_tt.fit(X_train,y_train)
#--------------------------------------------------
# This fail
#--------------------------------------------------
filename = 'finalized_model.sav'
pickle.dump(model_tt, open(filename, 'wb'))
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)
#--------------------------------------------------
# This also fail
#--------------------------------------------------
# from keras.models import load_model
# model_tt.save('test_model.h5')
#--------------------------------------------------
# This works OK
#--------------------------------------------------
# print model_tt.score(X_test, y_test)
# print model_tt.predict_proba(X_test)
# print model_tt.predict(X_test)
# Output of predict_proba
# 2nd column is the probability that the prediction is 1
# this value is used as final score, which can be used
# with other method as comparison
# [ [ 0.25311464 0.74688536]
# [ 0.84401423 0.15598579]
# [ 0.96047372 0.03952631]
# ...,
# [ 0.25518912 0.74481088]
# [ 0.91467732 0.08532269]
# [ 0.25473493 0.74526507]]
# Output of predict
# [[1]
# [0]
# [0]
# ...,
# [1]
# [0]
# [1]]
if __name__ == '__main__':
main()
As stated in the code there it fails at this line:
pickle.dump(model_tt, open(filename, 'wb'))
With this error:
pickle.PicklingError: Can't pickle <function create_model at 0x101c09320>: it's not found as __main__.create_model
How can I get around it?
Edit 1 : Original answer about saving model
With HDF5 :
# saving model
json_model = model_tt.model.to_json()
open('model_architecture.json', 'w').write(json_model)
# saving weights
model_tt.model.save_weights('model_weights.h5', overwrite=True)
# loading model
from keras.models import model_from_json
model = model_from_json(open('model_architecture.json').read())
model.load_weights('model_weights.h5')
# dont forget to compile your model
model.compile(loss='binary_crossentropy', optimizer='adam')
Edit 2 : full code example with iris dataset
# Train model and make predictions
import numpy
import pandas
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.utils import np_utils
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
iris = datasets.load_iris()
X, Y, labels = iris.data, iris.target, iris.target_names
X = preprocessing.scale(X)
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)
def build_model():
# create model
model = Sequential()
model.add(Dense(4, input_dim=4, init='normal', activation='relu'))
model.add(Dense(3, init='normal', activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def save_model(model):
# saving model
json_model = model.to_json()
open('model_architecture.json', 'w').write(json_model)
# saving weights
model.save_weights('model_weights.h5', overwrite=True)
def load_model():
# loading model
model = model_from_json(open('model_architecture.json').read())
model.load_weights('model_weights.h5')
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=seed)
# build
model = build_model()
model.fit(X_train, Y_train, nb_epoch=200, batch_size=5, verbose=0)
# save
save_model(model)
# load
model = load_model()
# predictions
predictions = model.predict_classes(X_test, verbose=0)
print(predictions)
# reverse encoding
for pred in predictions:
print(labels[pred])
Please note that I used Keras only, not the wrapper. It only add some complexity in something simple. Also code is volontary not factored so you can have the whole picture.
Also, you said you want to output 1 or 0. It is not possible in this dataset because you have 3 output dims and classes (Iris-setosa, Iris-versicolor, Iris-virginica). If you had only 2 classes then your output dim and classes would be 0 or 1 using sigmoid output fonction.
Just adding to gaarv's answer - If you don't require the separation between the model structure (model.to_json()) and the weights (model.save_weights()), you can use one of the following:
Use the built-in keras.models.save_model and 'keras.models.load_model` that store everything together in a hdf5 file.
Use pickle to serialize the Model object (or any class that contains references to it) into file/network/whatever..
Unfortunetaly, Keras doesn't support pickle by default. You can use
my patchy solution that adds this missing feature. Working code is
here: http://zachmoshe.com/2017/04/03/pickling-keras-models.html
Another great alternative is to use callbacks when you fit your model. Specifically the ModelCheckpoint callback, like this:
from keras.callbacks import ModelCheckpoint
#Create instance of ModelCheckpoint
chk = ModelCheckpoint("myModel.h5", monitor='val_loss', save_best_only=False)
#add that callback to the list of callbacks to pass
callbacks_list = [chk]
#create your model
model_tt = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10)
#fit your model with your data. Pass the callback(s) here
model_tt.fit(X_train,y_train, callbacks=callbacks_list)
This will save your training each epoch to the myModel.h5 file. This provides great benefits, as you are able to stop your training when you desire (like when you see it has started to overfit), and still retain the previous training.
Note that this saves both the structure and weights in the same hdf5 file (as showed by Zach), so you can then load you model using keras.models.load_model.
If you want to save only your weights separately, you can then use the save_weights_only=True argument when instantiating your ModelCheckpoint, enabling you to load your model as explained by Gaarv. Extracting from the docs:
save_weights_only: if True, then only the model's weights will be saved (model.save_weights(filepath)), else the full model is saved (model.save(filepath)).
The accepted answer is too complicated. You can fully save and restore every aspect of your model in a .h5 file. Straight from the Keras FAQ:
You can use model.save(filepath) to save a Keras model into a single
HDF5 file which will contain:
the architecture of the model, allowing to re-create the model
the weights of the model
the training configuration (loss, optimizer)
the state of the optimizer, allowing to resume training exactly where you left off.
You can then use keras.models.load_model(filepath) to reinstantiate your model. load_model will also take care of compiling the model using the saved training configuration (unless the model was never compiled in the first place).
And the corresponding code:
from keras.models import load_model
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
del model # deletes the existing model
# returns a compiled model identical to the previous one
model = load_model('my_model.h5')
In case your keras wrapper model is in a scikit pipeline, you save steps in the pipeline separately.
import joblib
from sklearn.pipeline import Pipeline
from tensorflow import keras
# pass the create_cnn_model function into wrapper
cnn_model = keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_cnn_model)
# create pipeline
cnn_model_pipeline_estimator = Pipeline([
('preprocessing_pipeline', pipeline_estimator),
('clf', cnn_model)
])
# train model
final_model = cnn_model_pipeline_estimator.fit(
X, y, clf__batch_size=32, clf__epochs=15)
# collect the preprocessing pipeline & model seperately
pipeline_estimator = final_model.named_steps['preprocessing_pipeline']
clf = final_model.named_steps['clf']
# store pipeline and model seperately
joblib.dump(pipeline_estimator, open('path/to/pipeline.pkl', 'wb'))
clf.model.save('path/to/model.h5')
# load pipeline and model
pipeline_estimator = joblib.load('path/to/pipeline.pxl')
model = keras.models.load_model('path/to/model.h5')
new_example = [[...]]
# transform new data with pipeline & use model for prediction
transformed_data = pipeline_estimator.transform(new_example)
prediction = model.predict(transformed_data)

Categories