Related
I am using Tensorflow's flow_from_directory to collect a large image dataset and then train on it. I want to use Keras Tuner but when I run
tuner.search(test_data_gen, epochs=50,
validation_split=0.2, callbacks=[stop_early])
It throws the following error,
ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'tensorflow.python.keras.preprocessing.image.DirectoryIterator'>]
I don't know much about converting between data types in AI so any help is truly appreciated.
Here is the rest of my code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from PIL import Image, ImageSequence
import os
import pathlib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import datetime
import kerastuner as kt
tf.compat.v1.enable_eager_execution()
epochs = 50
steps_per_epoch = 10
batch_size = 20
IMG_HEIGHT = 200
IMG_WIDTH = 200
train_dir = "Data/Train"
test_dir = "Data/Val"
train_image_generator = ImageDataGenerator(rescale=1. / 255)
test_image_generator = ImageDataGenerator(rescale=1. / 255)
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='sparse')
test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
directory=test_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='sparse')
def model_builder(hp):
model = keras.Sequential()
model.add(Conv2D(265, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)))
model.add(MaxPooling2D())
model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(keras.layers.Dense(256, activation="relu"))
hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
model.add(keras.layers.Dense(hp_units, activation="relu"))
model.add(keras.layers.Dense(80, activation="softmax"))
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['top_k_categorical_accuracy'])
return model
tuner = kt.Hyperband(model_builder,
objective='val_accuracy',
max_epochs=30,
factor=3,
directory='Hypertuner_Dir',
project_name='AIOS')
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
and start searching with tuner
tuner.search(train_data_gen, epochs=50, validation_split=0.2, callbacks=[stop_early])
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")
model = tuner.hypermodel.build(best_hps)
model.summary()
tf.keras.utils.plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True, rankdir='TB')
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
os.system("rm -r logs")
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#history = model.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
history = model.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_split=0.2,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.save('model.h5', include_optimizer=True)
test_loss, test_acc = model.evaluate(test_data_gen)
print("Tested Acc: ", test_acc)
print("Tested Acc: ", test_acc*100, "%")
val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))
===================================EDIT====================================
According to the doc about validation_split:
validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the x and y data provided, before shuffling. This argument is not supported when x is a dataset, generator or keras.utils.Sequence instance.
Now, as you've generator, try as follows, reference
tuner.search(train_data_gen,
epochs=50,
validation_data=test_data_gen,
callbacks=[stop_early])
Also, ensure that each of your generators properly generates the valid batches.
Unfortunately doing a validation_split=0.2 does not work in this case, because this argument assumes that the data is a Tensor or a NumPy array. Since you have the data stored as a generator (which is a good idea), you can't simply split it.
You'll need to create a validation generator, just like you did with test_data_gen, and change validation_split=0.2 to validation_data=val_data_gen.
I have been working on this the whole day but no luck
I managed to eliminate the problem in one line of TfidfVectorizer
Here is my working code
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vectorizer.fit(xtrain)
X_train_count = vectorizer.transform(xtrain)
X_test_count = vectorizer.transform(xval)
X_train_count
from keras.models import Sequential
from keras import layers
input_dim = X_train_count.shape[1] # Number of features
model = Sequential()
model.add(layers.Dense(10, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(X_train_count, ytrain,
epochs=10,
verbose=False,
validation_data=(X_test_count, yval),
batch_size=10)
But when I change to
from sklearn.feature_extraction.text import TfidfVectorizer
#TF-IDF initializer
vectorizer = TfidfVectorizer(max_df=0.8, max_features=1000)
vectorizer.fit(xtrain)
X_train_count = vectorizer.transform(xtrain)
X_test_count = vectorizer.transform(xval)
X_train_count
from keras.models import Sequential
from keras import layers
input_dim = X_train_count.shape[1] # Number of features
model = Sequential()
model.add(layers.Dense(10, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(X_train_count, ytrain,
epochs=10,
verbose=False,
validation_data=(X_test_count, yval),
batch_size=10)
The only thing changed is this 2 lines
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_df=0.8, max_features=1000)
and then I get this error
InvalidArgumentError: indices[1] = [0,997] is out of order. Many sparse ops require sorted indices.
Use tf.sparse.reorder to create a correctly ordered copy.
[Op:SerializeManySparse]
How to fix that and why it is happening?
vectorizer.transform(...) produces a sparse array and this is not good for keras. you simply have to transform it in a simple array. this is simply possible with:
vectorizer.transform(...).toarray()
I want to train a neural net for sentiment analysis. I have followed the tutorials on the keras webpage but I had to adapt the code to my usecase in order to be able to use the net afterwards.
For this purpose I decode back the texts from the imdb dataset from keras from numbers to text, and then I stemmize the text because I need to use the text stemmized. After that, since I want to control the way I am doing the word embeddings rather than using text_to_sequences an pad_sequences I am training a doc2vec embeddings and I am using it on the training set, so that I can obtain the embeddings from the text I want to classify.
The problem is that, the net does not learn anything, the accuracy does not improve and I can not reduce the loss function. I have tried many many things, like the architecture of the net, all the hyperparameters and changing the last layer from 2 nets to 1 and from sparse_categorical_entropy to binary_crossentropy. Let's see if anybody can help and show some light to my problem. I plug the code here and thanks in advance.
from keras.datasets import imdb
max_features = 40000
(training_data, training_targets), (testing_data, testing_targets) = imdb.load_data(num_words=max_features)
import numpy as np
data = np.concatenate((training_data, testing_data), axis=0)
targets = np.concatenate((training_targets, testing_targets), axis=0)
index = imdb.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()])
decoded = " ".join([reverse_index.get(i - 3, "") for i in data[0]])
import nltk
from nltk .stem import LancasterStemmer
toke_corpus = list()
lan = LancasterStemmer()
from tqdm import tqdm
lista_reviews = list()
for review in tqdm(data):
lista_reviews.append(np.array([lan.stem(reverse_index.get(i - 3, '')) for i in review][1:]))
train_x, test_x = lista_reviews[10000:], lista_reviews[:10000]
train_y, test_y = targets[10000:], targets[:10000]
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
'''Callback to log information about training'''
def __init__(self):
self.epoch = 0
def on_epoch_begin(self, model):
print("Epoch #{} start".format(self.epoch))
def on_epoch_end(self, model):
print("Epoch #{} end".format(self.epoch))
self.epoch += 1
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(lista_reviews)]
print("DOcuments already built")
epoch_logger = EpochLogger()
model = Doc2Vec(documents, vector_size=512, window=5, min_count=3, workers=8, epochs = 7, callbacks=[epoch_logger])
encoded_x_train, encoded_x_test = list(), list()
from tqdm import tqdm
for i in tqdm(train_x):
encoded_x_train.append(model.infer_vector(i))
for k in tqdm(test_x):
encoded_x_test.append(model.infer_vector(k))
import keras
reduce_lr = keras.callbacks.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.50, patience=2, verbose=1, mode='auto', cooldown=0, min_lr=0.00001)
early = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=4, verbose=1, mode='auto')
from keras import models
from keras.models import Sequential
from keras import layers
from keras.layers import Embedding, Bidirectional, Dense, LSTM, Conv1D, MaxPooling1D, Flatten
model1 = Sequential()
model1.add(Embedding(input_dim = max_features, input_length=512, output_dim=128, trainable=False))
model1.add(Conv1D(filters=64,
kernel_size=5,
padding='valid',
activation='linear',
strides=1))
model1.add(MaxPooling1D(pool_size=4))
model1.add(Dense(64, activation='linear'))
model1.add(LSTM(32, activation='tanh'))
# model1.add(Dense(32, activation='relu'))
# model1.add(Flatten())
# model1.add(Dense(1, activation='sigmoid'))
model1.add(Dense(2, activation='softmax'))
model1.summary()
from keras import optimizers
# sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
model1.compile(loss='sparse_categorical_crossentropy',
optimizer=adam,
metrics=['accuracy'])
history = model1.fit( np.array(encoded_x_train), np.array(train_y),
epochs= 20,
batch_size = 500,
validation_data = (np.array(encoded_x_test), np.array(test_y)), callbacks = [reduce_lr, early]
)
You use Doc2Vec to create sample embeddings. for this reason, I don't think that Embedding, Conv1D and MaxPooling1D layers are useful in your network. they are useful for word2vec where you can extract embeddings of each token and use them inside a network.
try to feed your network directly with your embedding in this way
model1 = Sequential()
model1.add(Dense(128, activation='relu', input_shape=(512,)))
# ....
model1.add(Dense(2, activation='softmax'))
adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
model1.compile(loss='sparse_categorical_crossentropy',
optimizer=adam,
metrics=['accuracy'])
history = model1.fit( np.array(encoded_x_train), np.array(train_y),
epochs= 20,
batch_size = 500,
validation_data = (np.array(encoded_x_test), np.array(test_y)), callbacks = [reduce_lr, early]
)
I'm trying to extract the weights from a model after training it. Here's a code.
1- how to save the model in the previously uploaded directory path?
2. how to get weights?
3. I have some csv file as the same used for modeling but without class. how can I estimate the classes with this model?
# Preprocess
import pandas as pd
import keras
from sklearn.model_selection import train_test_split
# Prepare data
churn = pd.read_csv('Churn_Modelling3.csv')
# split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(churn.iloc[:,0:10],churn.iloc[:, 10], test_size=0.2)
# Create a Model
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
model = Sequential()
# Input layer
model.add(Dense(32, activation=tf.nn.sigmoid,input_dim=x_train.shape[1]))
# Hidden layers
model.add(Dense(64, activation=tf.nn.sigmoid))
model.add(Dense(32, activation=tf.nn.sigmoid))
# Output layer
model.add(Dense(1, activation=tf.nn.sigmoid))
# Compile the Model
from keras.optimizers import SGD
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile( optimizer= 'sgd', loss='binary_crossentropy', metrics=['accuracy'])
# Fit the model
history = model.fit(x_train, y_train, epochs=20, batch_size=128,validation_data=(x_test, y_test))
# ٍEvaluate the Model
score = model.evaluate(x_test, y_test, batch_size=128)
How to save the model in the previously uploaded directory path? => You can use any of the below commands,
`model.save_weights('./Weights/Weights')` or
`tf.keras.experimental.export_saved_model` or
`model.save`
How to get weights? => We can Load the Weights using any of the below commands:
model.load_weights('my_model_weights.h5') or
keras.models.load_model('my_model.h5') or
tf.keras.experimental.load_from_saved_model
I have some csv file as the same used for modeling but without class. how can I estimate the classes with this model? => One of the ways to do it is to iterate the below code for each instance of CSV data Label.
ynew = model.predict(Xnew)
Fore more information, you can refer to the below links:
https://www.tensorflow.org/tutorials/keras/save_and_restore_models
https://www.tensorflow.org/guide/saved_model
https://www.tensorflow.org/api_docs/python/tf/keras/Model
I'm trying to use the following model Inceptionv3 base CNN and LSTM layer for a regression problem. My input data is pictures with continuous target values. I'd like to feed the sequence of images to a CNN and after to an LSTM layer. However i get a
RuntimeError: You must compile your model before using it
message. Any idea what can be the cause? I tried to find out in github and on several pages but i didnt succeed.
from keras.applications.inception_v3 import InceptionV3
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Conv2D, MaxPooling2D, GlobalAveragePooling2D, LSTM, TimeDistributed, Input
from keras.optimizers import SGD, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# custom R2-score metrics for keras backend
#https://www.kaggle.com/c/mercedes-benz-greener-manufacturing/discussion/34019
from keras import backend as K
def r2_keras(y_true, y_pred):
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
train_data_dir = '...'
test_data_dir = '...'
train_df = pd.read_csv('...')
valid_df = pd.read_csv('...')
filepath_loss = '...'
filepath_csv = '...'
datagen=ImageDataGenerator(rescale=1./255.,)
img_width, img_height = 380, 380
frames = 5
channels = 3
pictures = Input(shape=(frames, img_width, img_height, channels))
train_generator=datagen.flow_from_dataframe(
dataframe=train_df,
directory=train_data_dir,
x_col="block_heights",
y_col="weighted_prices",
has_ext=False, #x_col column doesnt has the file extensions
#subset="training", if validation split is set in ImageDataGenerator
batch_size=16,
seed=42,
shuffle=False,
class_mode="other", #for regression other should be used
target_size=(img_width, img_height))
valid_generator=datagen.flow_from_dataframe(
dataframe=valid_df,
directory=train_data_dir,
x_col="block_heights",
y_col="weighted_prices",
has_ext=False, #x_col column doesnt has the file extensions
#subset="validation", if validation split is set in ImageDataGenerator
batch_size=16,
seed=42,
shuffle=False,
class_mode="other",
target_size=(img_width, img_height))
conv_base = InceptionV3(weights=None, include_top=False, input_shape=(img_width,img_height,3))
conv_base.trainable = True
model = Sequential()
model.add(TimeDistributed(conv_base))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(10, return_sequences=True))
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='linear'))
#error at callbacks if the learning rate is explicitly set somewhere
rms = RMSprop(lr=0.1, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='mse', optimizer=rms, metrics=['mae', r2_keras])
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
callbacks = [EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto'),
ReduceLROnPlateau(monitor='val_loss', factor=0.02, patience=3, min_lr=0.001),
ModelCheckpoint(filepath_loss, monitor='val_loss', verbose=1, save_best_only=True, mode='min'),
CSVLogger(filepath_csv, separator = ",", append = False)]
history = model.fit_generator(generator=train_generator,steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator, validation_steps=STEP_SIZE_VALID, epochs=50, callbacks=callbacks)