How to save split training and testing data to file?

How to save split training and testing data to file? - python

I'm using Python and Keras to predict a continuous value from given data. I've already build my Neural Network model and got the results that I wanted. Here's what my code looks like:
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape, Y_val.shape, Y_test.shape)
# Output: (693, 3) (148, 3) (149, 3) (693,) (148,) (149,)
#Building out model
model = Sequential([
Dense(9, activation='relu', input_shape=(3,)),
Dense(3, activation='relu'),
Dense(1, activation='linear'),
])
#Compiling model
model.compile(loss='mean_absolute_percentage_error',
metrics=['mse'],
optimizer='RMSprop')
#fit the model
hist = model.fit(X_train, Y_train,
batch_size=100, epochs=20, verbose=1,
validation_data=(X_val, Y_val))
# evaluate model
model.evaluate(X_test, Y_test)
# Output: 149/149 [==============================] - 0s 42us/step
# [93.14884595422937, 171.0550879152029]
Now, after building the model, I would like to split the data and get accuracies for training and testing data separately. How can I do that? Or extract the test and train data as a .csv file

Related

ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 6557, 40, 1)

I'm working with audio data and extracted features and stored it in a CSV file and trying to load data from the CSV file. Getting the above error please help me to solve it! I've made all the attempts I knew. I'm pretty lost here!
def load_data(data_path):
"""Loads training dataset from csv file.
:return X (ndarray): Inputs
:return y (ndarray): Targets
"""
X = datasetcsv1[feature_cols] #np.array(datasetcsv1[feature_cols])
X = np.array(X) #new added trial
y = datasetcsv1["Label"]
y = np.array(y)
print(" x shape and y shape:",X.shape, y.shape)
return X, y
def plot_history(history):
"""Plots accuracy/loss for training/validation set as a function of the epochs
:param history: Training history of model
:return:
"""
fig, axs = plt.subplots(2)
# create accuracy sublpot
axs[0].plot(history.history["accuracy"], label="train accuracy")
axs[0].plot(history.history["val_accuracy"], label="test accuracy")
axs[0].set_ylabel("Accuracy")
axs[0].legend(loc="lower right")
axs[0].set_title("Accuracy eval")
# create error sublpot
axs[1].plot(history.history["loss"], label="train error")
axs[1].plot(history.history["val_loss"], label="test error")
axs[1].set_ylabel("Error")
axs[1].set_xlabel("Epoch")
axs[1].legend(loc="upper right")
axs[1].set_title("Error eval")
plt.show()
def prepare_datasets(test_size, validation_size):
# load data
X, y = load_data(DATA_PATH)
# create train, validation and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
return X_train, X_validation, X_test, y_train, y_validation, y_test
def build_model(input_shape):
"""Generates RNN-LSTM model
:param input_shape (tuple): Shape of input set
:return model: RNN-LSTM model
"""
# build network topology
model = keras.Sequential()
# 2 LSTM layers
model.add(keras.layers.LSTM(64, input_shape= input_shape, return_sequences=True)) #added flatten new
model.add(keras.layers.LSTM(64),return_sequences=True)
# dense layer
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dropout(0.3))
# output layer
model.add(keras.layers.Dense(10, activation='softmax'))
return model
if name == "main":
# get train, validation, test splits
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
# reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
print(" x train shape and x test shape:",X_train.shape,X_test.shape)
# create network
input_shape = (6557, 40,1) #(X_train.shape[0], X_train.shape[1],1)
model = build_model(input_shape)
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
# train model
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)
# plot accuracy/error for training and validation
plot_history(history)
# evaluate model on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

How can I improve a tensorflow model with CuDDLSTM

I am not a data scientist and not very professional in machine learning. I am trying to improve the results of this model for predicting the trend for a stock movement (-1:down, 0:no change, +1:up). Here is the code in python and plots for the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle= False) #Shuffle set to False
#Normalizing data
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
#setting up the model of tensorflow
input_layer = Input(shape=(X_train.shape[1],1))
x=input_layer
for _ in range(2): # five layers
x = Dropout(0.5)(x) # Dropout to avoid overfitting
x = CuDNNLSTM(X_train.shape[1], return_sequences = True)(x) # using LSTM with return sequences to adopt to time sequences
x = GlobalAveragePooling1D()(x) #Global averaging to one layer shape to feed to a dense categorigal classification
output = Dense(y.shape[1], activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output)
opt = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics = ['acc'])
#creating an early stop based on minmizing val_loss
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200,restore_best_weights=True)
#fit the model
r = model.fit(X_train, y_train, epochs = 200000, batch_size=16400,
validation_data = (X_test, y_test), callbacks=[early_stop], shuffle=False)
#plot the results.
pd.DataFrame(r.history).plot()
model learning plot

Why I cannot feed my Keras model in batches?

I am trying to feed a Sequential model in batches. To reproduce my example, suppose my data is:
X = np.random.rand(432,24,1)
Y = np.random.rand(432,24,1)
My goal is to feed the model in batches. 24 points at a time (24 x 1 vector), 432 times.
I built my model as:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=12)
model = keras.Sequential([
#keras.layers.Flatten(batch_input_shape=(None, 432, 2)),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(2, activation=tf.nn.sigmoid),
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.3)
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Model loss:', test_loss, 'Model accuracy: ', test_acc)
However, I get this error:
ValueError: Input 0 of layer dense_25 is incompatible with the layer: expected axis -1 of input shape to have value 864 but received
input with shape (None, 432)

I am not really too sure what you want to do, but here is a working example:
import tensorflow as tf
from sklearn.model_selection import train_test_split
X = np.random.rand(432, 24)
Y = np.random.randint(2, size=(432, 2))
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=12)
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation=tf.nn.relu),
tf.keras.layers.Dense(2, activation=tf.nn.sigmoid),
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.3)
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Model loss:', test_loss, 'Model accuracy: ', test_acc)
Note that your data X has the shape (432, 24) and your labels Y has the shape (432, 2). I removed your Flatten layer as it doesn't make much sense if your data has the shape (432, 24). You can make a prediction after training your model like this:
X_new = np.random.rand(1, 24)
Y_new = model.predict(X_new)
print(Y_new)

I think that there is some confusion about the dimensions of your input data. I'm going to assume that 432 is the number of points, and each data point has dimensionality 24 (i.e., 24 features). In that case, the first dimension should index the points because both scikit-learn and keras expect that way. For example,
X = np.random.rand(432, 24)
Y = np.random.rand(432, 24)
then your code should run if you correct the input shape accordingly,
model = keras.Sequential([
keras.layers.Flatten(batch_input_shape=(None, 24)),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(2, activation=tf.nn.sigmoid),
])

Convert numpy array shape to tensorflow

I'm constructing an image array with numpy and then trying to convert it to a tensor to fit a tensorflow model but then I get an error
Data prep
def prep_data(images):
count = len(images)
data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)
for i, image_file in enumerate(tqdm(images)):
image = read_image(image_file)
data[i] = image.T
return data
train = prep_data(train_images)
test = prep_data(test_images)
Build model
pretrained_base = hub.KerasLayer("https://tfhub.dev/google/imagenet/inception_v1/classification/5")
pretrained_base.trainable = False
model = keras.Sequential([
tf.keras.layers.InputLayer(input_shape=(64, 64, 3)),
pretrained_base,
Flatten(),
Dense(6, activation='relu'),
Dense(1, activation='sigmoid')
])
model.build((None, 64, 64, 3))
model.summary()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=0.25, random_state=0)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
def run_catdog():
history = LossHistory()
model.fit(train_dataset,
batch_size=batch_size,
epochs=nb_epoch,
verbose=1,
callbacks=[history, early_stopping])
predictions = model.predict(test, verbose=0)
return predictions, history
predictions, history = run_catdog()
WARNING:tensorflow:Model was constructed with shape (None, 64, 64, 3) for input KerasTensor(type_spec=TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name='input_63'), name='input_63', description="created by layer 'input_63'"), but it was called on an input with incompatible shape (None, 3, 64, 64).
Can't quite figure out how to change/convert the numpy array to TF

You don't need to convert the NumPy array to tensor, just change the shape of your input. np.moveaxis can do the trick. It works like this:
np.moveaxis(your_array, source, destination).
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=0.25, random_state=0)
# now reshape the train and test input data
X_train = np.moveaxis(X_train, 0, -1)
X_test = np.moveaxis(X_test, 0, -1)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
def run_catdog():
history = LossHistory()
model.fit(train_dataset,
batch_size=batch_size,
epochs=nb_epoch,
verbose=1,
callbacks=[history, early_stopping])
predictions = model.predict(test, verbose=0)
return predictions, history
predictions, history = run_catdog()

Get different accuracy on the same evaluating dataset after loading saved model

I just simply use MNIST dataset to implement a simple ML application. My code is
import tensorflow as tf
import numpy as np
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10)
])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',
loss=loss_fn,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
print('Before saving')
model.evaluate(x_test, y_test, verbose=2)
model.save('model.h5')
# load model again
loaded_model = tf.keras.models.load_model('model.h5')
# evaluate on the same data
print('After loading')
loaded_model.evaluate(x_test, y_test, verbose=2)
The accuracies on the same dataset are different after loading

This is a known issue: https://github.com/tensorflow/tensorflow/issues/42045
Compile the model with metrics='sparse_categorical_accuracy' instead of just 'accuracy'.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to save split training and testing data to file? - python

Related

ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 6557, 40, 1)

How can I improve a tensorflow model with CuDDLSTM

Why I cannot feed my Keras model in batches?

Convert numpy array shape to tensorflow

Get different accuracy on the same evaluating dataset after loading saved model

Categories

Resources