Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed 2 years ago.
Improve this question
I new in python and machine learning. I tried to implement the following code for federated learning with the MNIST dataset but it doesn't work !! it tried to train a model in a distributed way in local workers. the jpeg version of the MNIST data set is using here. It consists of 42000 digit images with each class kept in a separate folder. I will load the data into memory using this code snippet and keep 10% of the data for testing the trained global model later on.
The following error appears when i implement the following fl_implemetation.py
(base) C:\python1>fl_implemetation.py
File "C:\python1\fl_implemetation.py", line 112
global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(320)
^
SyntaxError: invalid syntax
there are two python files, first **fl_implemetation.py**.
The original code I am using can be found here:
https://github.com/datafrick/tutorial
import NumPy as np
import random
import cv2
import os
from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
import TensorFlow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
from fl_mnist_implementation_tutorial_utils import *
#declear path to your mnist data folder
img_path = '/path/to/your/training/dataset'
#get the path list using the path object
image_paths = list(paths.list_images(img_path))
#apply our function
image_list, label_list = load(image_paths, verbose=10000)
#binarize the labels
lb = LabelBinarizer()
label_list = lb.fit_transform(label_list)
#split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(image_list,
label_list,
test_size=0.1,
random_state=42)
#create clients
clients = create_clients(X_train, y_train, num_clients=10, initial='client')
#process and batch the training data for each client
clients_batched = dict()
for (client_name, data) in clients.items():
clients_batched[client_name] = batch_data(data)
#process and batch the test set
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))
comms_round = 100
#create optimizer
lr = 0.01
loss='categorical_crossentropy'
metrics = ['accuracy']
optimizer = SGD(lr=lr,
decay=lr / comms_round,
momentum=0.9
)
#initialize global model
smlp_global = SimpleMLP()
global_model = smlp_global.build(784, 10)
#commence global training loop
for comm_round in range(comms_round):
# get the global model's weights - will serve as the initial weights for all local models
global_weights = global_model.get_weights()
#initial list to collect local model weights after scalling
scaled_local_weight_list = list()
#randomize client data - using keys
client_names= list(clients_batched.keys())
random.shuffle(client_names)
#loop through each client and create new local model
for client in client_names:
smlp_local = SimpleMLP()
local_model = smlp_local.build(784, 10)
local_model.compile(loss=loss,
optimizer=optimizer,
metrics=metrics)
#set local model weight to the weight of the global model
local_model.set_weights(global_weights)
#fit local model with client's data
local_model.fit(clients_batched[client], epochs=1, verbose=0)
#scale the model weights and add to list
scaling_factor = weight_scalling_factor(clients_batched, client)
scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
scaled_local_weight_list.append(scaled_weights)
#clear session to free memory after each communication round
K.clear_session()
#to get the average over all the local model, we simply take the sum of the scaled weights
average_weights = sum_scaled_weights(scaled_local_weight_list)
#update global model
global_model.set_weights(average_weights)
#test global model and print out metrics after each communications round
for(X_test, Y_test) in test_batched:
global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(320)
smlp_SGD = SimpleMLP()
SGD_model = smlp_SGD.build(784, 10)
SGD_model.compile(loss=loss,
optimizer=optimizer,
metrics=metrics)
# fit the SGD training data to model
_ = SGD_model.fit(SGD_dataset, epochs=100, verbose=0)
#test the SGD global model and print out metrics
for(X_test, Y_test) in test_batched:
SGD_acc, SGD_loss = test_model(X_test, Y_test, SGD_model, 1)
and second fl_mnist_implementation_tutorial_utils.py
import NumPy as np
import random
import cv2
import os
from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
import TensorFlow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
def load(paths, verbose=-1):
'''expects images for each class in separate dir,
e.g all digits in 0 class in the directory named 0 '''
data = list()
labels = list()
# loop over the input images
for (i, imgpath) in enumerate(paths):
# load the image and extract the class labels
im_gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)
image = np.array(im_gray).flatten()
label = imgpath.split(os.path.sep)[-2]
# scale the image to [0, 1] and add to list
data.append(image/255)
labels.append(label)
# show an update every `verbose` images
if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
print("[INFO] processed {}/{}".format(i + 1, len(paths)))
# return a tuple of the data and labels
return data, labels
def create_clients(image_list, label_list, num_clients=10, initial='clients'):
''' return: a dictionary with keys clients' names and value as
data shards - tuple of images and label lists.
args:
image_list: a list of numpy arrays of training images
label_list:a list of binarized labels for each image
num_client: number of fedrated members (clients)
initials: the clients'name prefix, e.g, clients_1
'''
#create a list of client names
client_names = ['{}_{}'.format(initial, i+1) for i in range(num_clients)]
#randomize the data
data = list(zip(image_list, label_list))
random.shuffle(data)
#shard data and place at each client
size = len(data)//num_clients
shards = [data[i:i + size] for i in range(0, size*num_clients, size)]
#number of clients must equal number of shards
assert(len(shards) == len(client_names))
return {client_names[i] : shards[i] for i in range(len(client_names))}
def batch_data(data_shard, bs=32):
'''Takes in a clients data shard and create a tfds object off it
args:
shard: a data, label constituting a client's data shard
bs:batch size
return:
tfds object'''
#seperate shard into data and labels lists
data, label = zip(*data_shard)
dataset = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
return dataset.shuffle(len(label)).batch(bs)
class SimpleMLP:
#staticmethod
def build(shape, classes):
model = Sequential()
model.add(Dense(200, input_shape=(shape,)))
model.add(Activation("relu"))
model.add(Dense(200))
model.add(Activation("relu"))
model.add(Dense(classes))
model.add(Activation("softmax"))
return model
def weight_scalling_factor(clients_trn_data, client_name):
client_names = list(clients_trn_data.keys())
#get the bs
bs = list(clients_trn_data[client_name])[0][0].shape[0]
#first calculate the total training data points across clinets
global_count = sum([tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy() for client_name in client_names])*bs
# get the total number of data points held by a client
local_count = tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy()*bs
return local_count/global_count
def scale_model_weights(weight, scalar):
'''function for scaling a models weights'''
weight_final = []
steps = len(weight)
for i in range(steps):
weight_final.append(scalar * weight[i])
return weight_final
def sum_scaled_weights(scaled_weight_list):
'''Return the sum of the listed scaled weights. The is equivalent to scaled avg of the weights'''
avg_grad = list()
#get the average grad accross all client gradients
for grad_list_tuple in zip(*scaled_weight_list):
layer_mean = tf.math.reduce_sum(grad_list_tuple, axis=0)
avg_grad.append(layer_mean)
return avg_grad
def test_model(X_test, Y_test, model, comm_round):
cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
#logits = model.predict(X_test, batch_size=100)
logits = model.predict(X_test)
loss = cce(Y_test, logits)
acc = accuracy_score(tf.argmax(logits, axis=1), tf.argmax(Y_test, axis=1))
print('comm_round: {} | global_acc: {:.3%} | global_loss: {}'.format(comm_round, acc, loss))
return acc, loss
You forgot to add \n in this line:
global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(320)
So, this line should be two lines like so:
global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)
SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(320)
Related
I structured a Convolutional LSTM model to predict the forthcoming Bitcoin price data, using the analyzed past data of the Bitcoin close price and other features.
Let me jump straight to the code:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow.keras as keras
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from keras.models import Sequential
from keras.layers import InputLayer, ConvLSTM1D, LSTM, Flatten, RepeatVector, Dense, TimeDistributed
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.losses import Huber
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
DIR = '../input/btc-features-targets'
SEG_DIR = '../input/segmented'
segmentized_features = os.listdir(SEG_DIR)
btc_train_features = []
for seg in segmentized_features:
train_features = pd.read_csv(f'{SEG_DIR}/{seg}')
train_features.set_index('date', inplace=True)
btc_train_features.append(scaler.fit_transform(train_features.values))
btc_train_targets = pd.read_csv(f'{DIR}/btc_train_targets.csv')
btc_train_targets.set_index('date', inplace=True)
btc_test_features = pd.read_csv(f'{DIR}/btc_test_features.csv')
btc_tef1 = btc_test_features.iloc[:111]
btc_tef2 = btc_test_features.iloc[25:]
btc_tef1.set_index('date', inplace=True)
btc_tef2.set_index('date', inplace=True)
btc_test_targets = pd.read_csv(f'{DIR}/btc_test_targets.csv')
btc_test_targets.set_index('date', inplace=True)
btc_trt_log = np.log(btc_train_targets)
btc_tefs1 = scaler.fit_transform(btc_tef1.values)
btc_tefs2 = scaler.fit_transform(btc_tef2.values)
btc_tet_log = np.log(btc_test_targets)
scaled_train_features = []
for features in btc_train_features:
shape = features.shape
scaled_train_features.append(np.expand_dims(features, [0,3]))
shape_2 = btc_tefs1.shape
btc_tefs1 = np.expand_dims(btc_tefs1, [0,3])
shape_3 = btc_tefs2.shape
btc_tefs2 = np.expand_dims(btc_tefs2, [0,3])
btc_trt_log = btc_trt_log.values[0]
btc_tet_log = btc_tet_log.values[0]
def build(hp):
model = keras.Sequential()
# Input Layer
model.add(InputLayer(input_shape=(111,32,1)))
# ConvLSTM1D
convLSTM_hp_filters = hp.Int(name='convLSTM_filters', min_value=32, max_value=512, step=32)
convLSTM_hp_kernel_size = hp.Choice(name='convLSTM_kernel_size', values=[3,5,7])
convLSTM_activation = hp.Choice(name='convLSTM_activation', values=['selu', 'relu'])
model.add(ConvLSTM1D(filters=convLSTM_hp_filters,
kernel_size=convLSTM_hp_kernel_size,
padding='same',
activation=convLSTM_activation,
use_bias=True,
bias_initializer='zeros'))
# Flatten
model.add(Flatten())
# RepeatVector
model.add(RepeatVector(5))
# LSTM
LSTM_hp_units = hp.Int(name='LSTM_units', min_value=32, max_value=512, step=32)
LSTM_activation = hp.Choice(name='LSTM_activation', values=['selu', 'relu'])
model.add(LSTM(units=LSTM_hp_units, activation=LSTM_activation, return_sequences=True))
# TimeDistributed Dense
dense_units = hp.Int(name='dense_units', min_value=32, max_value=512, step=32)
dense_activation = hp.Choice(name='dense_activation', values=['selu', 'relu'])
model.add(TimeDistributed(Dense(units=dense_units, activation=dense_activation)))
# TimeDistributed Dense_Output
model.add(Dense(1))
# Set Learning Rate
hp_learning_rate = hp.Choice(name='learning_rate', values=[1e-2, 1e-3, 1e-4])
# Compile Model
model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
loss=Huber(),
metrics=[RootMeanSquaredError()])
return model
tuner = kt.Hyperband(build,
objective=kt.Objective('root_mean_squared_error', direction='min'),
max_epochs=10,
factor=3)
early_stop = EarlyStopping(monitor='root_mean_squared_error', patience=5)
opt_hps = []
for train_features in scaled_train_features:
tuner.search(train_features, btc_trt_log, epochs=50, callbacks=[early_stop])
opt_hps.append(tuner.get_best_hyperparameters(num_trials=1)[0])
models, epochs = ([] for _ in range(2))
for hps in opt_hps:
model = tuner.hypermodel.build(hps)
models.append(model)
history = model.fit(train_features, btc_trt_log, epochs=70, verbose=0)
rmse = history.history['root_mean_squared_error']
best_epoch = rmse.index(min(rmse)) + 1
epochs.append(best_epoch)
hypermodel = tuner.hypermodel.build(opt_hps[0])
for train_features, epoch in zip(scaled_train_features, epochs): hypermodel.fit(train_features, btc_trt_log, epochs=epoch)
tp1 = hypermodel.predict(btc_tefs1).flatten()
tp2 = hypermodel.predict(btc_tefs2).flatten()
test_predictions = np.concatenate((tp1, tp2[86:]), axis=None)
The hyperparameters of the model are configured using keras_tuner; as there were ResourceExhaustError issues output by the notebook when training is done with the full features dataset, sequentially segmented datasets are used instead (and apparently, referring to the study done utilizing the similar model architecture, training is able to be efficiently done through this training approach).
The input dimension of each segmented dataset is (111,32,1).
There aren't any issues reported until before the last code block. The models work fine. Yet, when the .predict() function is executed, the notebook prints out an error, which states that the dimension of the input features for making predictions is incompatible with the dimension of the input features used while training. I did not understand the reason behind its occurrence, since as far as I know, the input dimensions of a train dataset for a DNN model cannot be identical as the input dimensions of a test dataset.
Even though all the price data from 2018 to early 2021 are used as training datasets, predictions are only needed for the mid 2021 timeframe.
The dataset used for prediction has a dimension of (136,32,1).
I tried matching the dimension of this dataset to (111,32,1), through index slicing.
Now this showed issues in the output dimension. While predictions should be made for 136 data points, the result only returned 10.
Are there any issues relevant to the model configuration? Cannot interpret the current situation.
I'm trying to build a sequential neural network with keras. I generate a dataset with inserting randoms in a known function and train my model with this dataset, long enough to get a steady loss. Then I ask the model to predict the x_train values, but instead of predicting something close to y_train, it returns the same value regardless of the input x. This value also happens to be the average of y_train values. I don't understand what I'm doing wrong and why this is happening.
I'm using the following function for training the model:
def train_model(x_train,y_train,batch_size,input_size,layer_sizes,activations,optimizer,epochs,loss='MeanSquaredError'):
assert len(layer_sizes) == len(activations)
n_layers=len(layer_sizes)
model = Sequential()
model.add(LayerNormalization(input_dim=input_size))
model.add(Dense(layer_sizes[0],kernel_regularizer='l2',kernel_initializer='ones',activation=activations[0],input_dim=input_size,name='layer1'))
for i in range(1,n_layers):
model.add(Dense(layer_sizes[i],kernel_initializer='ones',activation=activations[i],name=f'layer{i+1}'))
model.compile(
optimizer = optimizer,
loss = loss, #MeanSquaredLogarithmicError
)
print(model.summary())
history = model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs)
loss_history = history.history['loss']
plt.scatter(x=np.arange(1,epochs+1),y=loss_history)
plt.show()
return model
I then created an arbitrary function (just for test purposes) as:
def func(x1,x2,x3,x4):
y=(x1**3+(x2*x3+2))/(x4+x2*x1)
return y
and made a random dataset with this function:
def random_points_in_range(n,ranges):
points = np.empty((n,len(ranges)))
for i,element in enumerate(ranges):
start=min(element[1],element[0])
interval=abs(element[1]-element[0])
rand_check = np.random.rand(n)
randoms = ( rand_check*interval ) + start
points[:,i] = randoms.T
return points
def generate_random_dataset(n=200,ranges=[(0,10),(0,10),(0,10),(0,10)]):
x_dataset = random_points_in_range(n,ranges)
y_dataset = np.empty(n)
for i in range(n):
x1,x2,x3,x4 = x_dataset[i]
y_dataset[i] = func(x1,x2,x3,x4)
return x_dataset,y_dataset
I then train a model with these functions:
x_train,y_train = generate_random_dataset()
layer_sizes = [6,8,10,10,1]
activations = [LeakyReLU(),'relu','swish','relu','linear']
opt = Adam(learning_rate=0.001)
epochs = 3000
model=train_model(x_train,y_train,5,4,layer_sizes,activations,opt,epochs,loss='MeanSquaredError')
if you want to run the code these are things you need to import:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
So I have three models created in three different files: Model_A.py, Model_B.py, Model_C.py. Model_A is the first one i have created. When I run Model_A, everything works well. However, when I run Models B or C python still runs Model A. I guessed it has to do with the session, but I am not sure and I have not figured out how to fix it.
Here is Code for model A.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import Callback
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import warnings
from math import sqrt
from keras import backend as K
#Early stopping based on loss
class EarlyStoppingByLossVal(Callback):
def __init__(self, monitor='val_loss', value=0.00001, verbose=0):
super(Callback, self).__init__()
self.monitor = monitor
self.value = value
self.verbose = verbose
def on_epoch_end(self, epoch, logs={}):
current = logs.get(self.monitor)
if current is None:
warnings.warn("Early stopping requires %s available!" % self.monitor, RuntimeWarning)
if current < self.value:
if self.verbose > 0:
print("Epoch %05d: early stopping THR" % epoch)
self.model.stop_training = True
NAME = "TensBo{}".format(int(time.time()))
tensorboard = tf.keras.callbacks.TensorBoard(log_dir='logs/{}'.format(NAME))
# # DATA MANAGER: Define a function that imports and defines data
def data_manager(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
#((1 t_step-1 inout[0]) (batch_sz t_step-1 inout[0]) (batch_sz t_step-1 inout[0]))
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(1, t_step-1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (batch_sz, t_step-1, inout[0])) #input selection
targets = np.reshape(targets, (batch_sz, t_step-1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model01\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # NEURAL NETWORK CREATOR
# Creating a model, which is a linear stack of layers
model = Sequential()
'''
LSTM layer of n nodes. Shape of the input is the columns of inputs. activation function is rectifier linear function.
Return sequencies = true basically tells the layer to output a sequence. If we were to have another Recurrent layer, this is necessary. Else not, as it would not understand it
Time distribute is important. That basically relates every input step in the input sequence with its corresponding output.
Other way we would just be considering the last value of the sequence
'''
l1 = model.add(layers.LSTM(inout[0], input_shape=(t_step-1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model.add(layers.LSTM(30, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(20)))
l4 = model.add(layers.LSTM(10, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(10)))
model.add(layers.Dropout(0.2))
l5 = model.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStoppingByLossVal('val_loss', value=0.002),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
#
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
model.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
history = model.fit(inputs, targets, epochs=50, validation_split=0.25, callbacks=callbacks)
# Evaluate the model
scores = model.evaluate(inputs, targets, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
# print a summary of the outputs of every layer
print(model.summary())
#SAVING THE MODEL
#
#The model is saved by modelcheckpoint in a folder. Here, we are saving the models arquitecture in a json file
#model_json = model.to_json()
#with open("model/model01/model.json", "w") as json_file:
# json_file.write(model_json)
# PREDICTIONS WITH THE MODEL
t = 1
fig1 = plt.figure()
for prediction in test:
NN_pred = model.predict(prediction[0])
#ANALYSIS
#reshape the prediction for plotting
NN_pred = np.reshape(NN_pred, (prediction[1].shape[0], inout[1]))
prediction[0] = np.reshape(prediction[0], (t_step-1, inout[0]))
#plots: top, predicted and desired test output. down, test inputs
plt.subplot(n_test, 1, t)
plt.title('Test0' + np.str(t))
plt.plot(NN_pred)
plt.plot(prediction[1])
plt.legend(['I_real_pred', 'I_im_pred', 'Ir', 'Ii'])
# mean squared error
rmse = sqrt(mean_squared_error(prediction[1], NN_pred))
print('Test RMSE: %.3f' % rmse)
t = t + 1
fig2 = plt.figure()
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
# print inputs yes or no
printin = input('Print inputs as well? [y/n]: ')
m = True
while m == True:
if printin == 'y':
t = 1
fig3 = plt.figure()
for prediction in test:
plt.title('Inputs: V, P, Q')
plt.subplot(n_test, 1, t)
plt.plot(prediction[0])
t = t + 1
m = False
elif printin == 'n':
m = False
else:
printin = input('Answer not valid. Print inputs? [y/n]: ')
plt.show()
And here is codel model B.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow import Graph
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from keras import backend as K
from Model_A import EarlyStoppingByLossVal
K.clear_session()
def data_manager_1(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(t_step-1, 1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (inputs.shape[0], 1, inout[0])) #input selection
targets = np.reshape(targets, (targets.shape[0], 1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model02\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager_1(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # CREATE THE MODEL
model02 = Sequential()
l1 = model02.add(layers.LSTM(inout[0], input_shape=(1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model02.add(layers.LSTM(5, activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(5)))
model02.add(layers.Dropout(0.2))
l5 = model02.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStopping('val_loss', patience=20),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
model02.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
# train model and save history
history = model02.fit(inputs, targets, epochs=20, validation_split=0.25, callbacks=callbacks)
# plot loss during training
def train_plots(history):
fig2 = plt.figure()
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
plt.show()
train_plots(history)
I tried to initialize graphs and sessions for the creation of the models, but it is not working.
Would it be possible(at least as a workaround) to kill each process after it finishes the task? Killing the process would ensure the release of memory of TensorFlow.
If the models need a communication channel/have intermediate results sent over, you could use queues or text files in order to solve this.
Just fixed it. My problem was that I was unable to reset the tensorflow session or clean the Graphs in my session to create and train a different model. I found that the command keras.backend.reset_uids() does that. Thank you anyway!
The end goal of the system is to classify video input. The training data consists of 1-2 second 1080x1920 video sequences, which are preprocessed by transforming RGB frames to grayscale and ensuring video sequences have the same length by padding with blank frames. A series of 3D convolution layers connected to a fully-connected network is used. The problem is that, when the model is trained by calling the fit method, "Killed: 9" is returned and the program stops executing. Here is the code being used:
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import SGD
from keras.utils import np_utils
from pprint import pprint
from sklearn.utils import shuffle
import json
import os
import cv2
import sys
import numpy as np
class VideoClassifier(object):
def __init__(self, rows, columns, frames_per_sequence):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.X = []
self.Y = []
self.training_classes = []
self.vc = None
def train_vc_model(self):
""" Train the video classifier
"""
# split data into trainining and test data
print "Splitting data into training and test sets"
validation_split = 0.2
sample_count = len(self.X)
X_train = self.X[0:int(round(sample_count*(1-validation_split)))]
Y_train = self.Y[0:int(round(sample_count*(1-validation_split)))]
X_test = self.X[int(round(sample_count*(1-validation_split))):-1]
Y_test = self.Y[int(round(sample_count*(1-validation_split))):-1]
# incrementally train model
print "Training video classifier"
self.vc.fit(x=X_train,
y=Y_train,
validation_data=(X_test, Y_test),
batch_size=10,
nb_epoch=10,
verbose=2)
score = self.vc.evaluate(x=X_test,
y=Y_test,
batch_size=10)
print "Accuracy: {0:.2%}".format(scores[1]*100)
def generate_vc_model(self):
""" Builds the video classifier model
"""
print "".join(["Generating video classifier model\n",
"-"*40])
vc = Sequential()
print " - Adding convolution layers"
vc.add(Convolution3D(nb_filter=32,
kernel_dim1=2,
kernel_dim2=2,
kernel_dim3=2,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
vc.add(MaxPooling3D(pool_size=(2, 2, 2)))
vc.add(Dropout(0.5))
vc.add(Convolution3D(nb_filter=32,
kernel_dim1=2,
kernel_dim2=2,
kernel_dim3=2,
border_mode="same",
activation="relu"))
vc.add(MaxPooling3D(pool_size=(2, 2, 2)))
vc.add(Dropout(0.5))
vc.add(Flatten())
print " - Adding fully connected layers"
vc.add(Dense(output_dim=32,
init="normal",
activation="relu"))
vc.add(Dense(output_dim=32,
init="normal",
activation="relu"))
vc.add(Dense(output_dim=3,
init="normal",
activation="softmax"))
print " - Compiling model"
sgd = SGD(lr=0.01,
decay=1e-6,
momentum=0.9,
nesterov=True)
vc.compile(loss="categorical_crossentropy",
optimizer=sgd)
self.vc = vc
print " * VIDEO CLASSIFIER MODEL GENERATED * "
def load_training_data(self):
""" Loads and preprocesses training data
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open("training_config.json") as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
X, Y = self.preprocess_training_data(training_metadata, training_classes)
X, Y = shuffle(X, Y)
self.X = X
self.Y = Y
self.training_classes = training_classes
def preprocess_training_data(self, training_metadata, training_classes):
""" Preprocess training data for loading
"""
# process training data
X = []
Y = []
# iterate through each class data
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r".format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# get frames
frames = self.split_frames(training_class_sequence_path)
# equalize sequence length
if len(frames) < self.frames_per_sequence:
blank_frame = np.zeros((self.rows, self.columns))
for _ in xrange(self.frames_per_sequence - len(frames)):
frames.append(blank_frame)
frames = frames[0:self.frames_per_sequence]
frames = map(lambda frame: frame.astype("float32") / 255.0, frames)
# add frames and class lable to training data
X.append([frames])
Y.append(class_label)
print "\n"
# one hot encode labels
Y = np_utils.to_categorical(Y)
return X, Y
def split_frames(self, video_file_path):
""" Splits video sequences into frames
"""
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
while success:
success, frame = video.read()
if success:
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
return frames
if __name__ == "__main__":
vc = VideoClassifier(1080, 1920, 30)
vc.load_training_data()
vc.generate_osr_model()
vc.train_osr_model()
From what I understand, "Killed: 9" means that the program is consuming too much memory, and I only have 8 GB of RAM with my 13-inch Macbook Pro. I have tried reducing the batch size to 1, but even that does not solve the problem. Is there another way to train a Keras model to deal with this memory constraint?
These images are just too big for classification, and recent research never goes beyond 512x512 images (mostly for object detection). For classification the typical sizes are around 256x256 or 224x224.
So your only option is to downscale the images to a reasonable size. Also reducing the batch size helps to reduce memory usage, but it won't work in this case.
I am training a tensorflow model and later plan to use it for predictions.
import numpy as np
import pandas as pd
import sys
import tensorflow as tf
from tensorflow.contrib import learn
from sklearn.metrics import mean_squared_error, mean_absolute_error
from lstm_predictor import load_csvdata, lstm_model
import pymysql as mariadb
LOG_DIR = './ops_logs'
K = 1 # history used for lstm.
TIMESTEPS = 65*K
RNN_LAYERS = [{'steps': TIMESTEPS}]
DENSE_LAYERS = [10, 10]
TRAINING_STEPS = 1000
BATCH_SIZE = 1
PRINT_STEPS = TRAINING_STEPS / 10
def train_model(symbol=1,categ='M1',limit=1000,upgrade=False):
MODEL_DIR = 'model/'+str(symbol)+categ
regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
n_classes=0,
verbose=1,
steps=TRAINING_STEPS,
optimizer='Adagrad',
learning_rate=0.03,
continue_training=True,
batch_size=BATCH_SIZE )
X, y = load_csvdata(df, K )
regressor.fit(X['train'], y['train'] , logdir=MODEL_DIR ) #logdir=LOG_DIR)
X['test']=X['train'][-10:]
y['test']=y['train'][-10:]
predicted = regressor.predict(X['test'])
print('actual', 'predictions')
for i,yi in enumerate(y['test']):
print(yi[0], ' ' ,predicted[i])
mse = mean_absolute_error(y['test'], predicted)
print ("mean_absolute_error : %f" % mse)
###############################
regressor.save( LOG_DIR )
train_model()
Then I want to write a predict function which would read the model from model/** and make predictions.
def predict(symbol=1,categ='M1'):
pass
# how to load saved model data ?
But I am unable to load the model using
regressor = learn.TensorFlowEstimator.restore( LOG_DIR )
Since its currently not implemented.
Suggest me how can I do repeated predictions at multiple times in future?
The model checkpoints are saved as:
checkpoint model.ckpt-8001.meta
events.out.tfevents.1476102309.hera.creatory.org model.ckpt-8301-00000-of-00001
events.out.tfevents.1476102926.hera.creatory.org model.ckpt-8301.meta
events.out.tfevents.1476105626.hera.creatory.org model.ckpt-8601-00000-of-00001
events.out.tfevents.1476106521.hera.creatory.org model.ckpt-8601.meta
events.out.tfevents.1476106839.hera.creatory.org model.ckpt-8901-00000-of-00001
events.out.tfevents.1476107001.hera.creatory.org model.ckpt-8901.meta
events.out.tfevents.1476107462.hera.creatory.org model.ckpt-9000-00000-of-00001
graph.pbtxt model.ckpt-9000.meta
model.ckpt-8001-00000-of-00001