So, there is the universal approximation theorem which says that a neural network can approximate any continuous function, provided it has at least one hidden layer and uses non-linear activation there.
So my doubt is as follows: "How do I approximate a function using neural networks with my input being other functions?"
Let's say I want to approximate y = x + 1 and I have z_1 = 2x, z_2 = 3x + 3 and z_3 = 4x + 1, with x being time variant. What I want my model to learn is the relationship between z_1, z_2, z_3 and y, as I may write *y = -6 * z_1 - 1 * z_2 + 4 z_3* ( I want my network to learn this relationship).
From time 0 to T I have the value of all functions and can do a supervised learning, but from (T + 1) +, I will only have z_1, z_2 and z_3 and so, I would be using the network to approximate the future values of y based on these z functions (z_1, z_2, z_3).
How do I implement that on python using Keras? I used the following code but didn't get any decent results.
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
n = 10000
def z_1(x):
x_0 = []
for i in x:
x_0.append(2*i)
return x_0
def z_2(x):
x_0 = []
for i in x:
x_0.append(3*i + 3)
return x_0
def z_3(x):
x_0 = []
for i in x:
x_0.append(4* i + 1)
return x_0
def z_0(x):
x_0 = []
for i in x:
x_0.append(i + 1)
return x_0
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=3))
model.add(Dense(500, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
np.random.seed(seed = 2000)
input = np.random.random(n) * 10
dataset = z_0(input)
input_1 = z_1(input)
input_2 = z_2(input)
input_3 = z_3(input)
x_train = np.array([input_1[0:int(0.8*n)], input_2[0:int(0.8*n)], input_3[0:int(0.8*n)]])
y_train = np.array([dataset[0:int(0.8*n)]])
x_train = x_train.reshape(int(0.8*n), 3)
y_train = y_train.reshape(int(0.8*n),1)
es = keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0,
patience=0,
verbose=0, mode='auto')
model.fit(x_train, y_train, epochs=100, batch_size=128, callbacks = [es])
x_test = np.array([input_1[int(n-100):n], input_2[int(n-100):n], input_3[int(n-100):n]])
x_test = x_test.reshape(int(100), 3)
classes = model.predict(x_test, batch_size=128)
y_test = np.array([dataset[int(n-100):n]]).reshape(int(100),1)
plt.plot(y_test,c='b', label = 'test data')
plt.plot(classes,c='r', label = 'test result')
plt.legend()
plt.show()
You can't do this with a feedforward neural network. You need to do this with recurrent neural networks. Look up LSTM or GRU cells in Keras.
https://keras.io/layers/recurrent/
Related
I am trying to use keras dense neural networks to forecast some time series.
When fitting my model on complex real datasets, my model converges toward a constant output, i.e. whatever the input, the model gives the same output (which seems to be a reasonable estimate of the mean of my dataset).
I reduced the problem up to very simple simulated datasets, and still have the same issue. Here is a minimal working example:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
X = []
Y = []
for jh in range(10000):
x = np.arange(-1, 1, 0.01)
y = 1+x*((np.random.random()-0.5))
y += np.random.randn(len(x))/(100)
X.append(y[:100])
Y.append(y[100:])
X = np.array(X)[:,:,None]
Y = np.array(Y)[:,:,None]
model = models.Sequential()
model.add(layers.Input((100,1,)))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(tf.keras.layers.Reshape((100,1)))
model.compile(loss = tf.keras.losses.MeanSquaredError(),optimizer="adam")
# model.summary()
print("Fit model on training data")
print("Fit model on training data")
history = model.fit(x=X, y=Y, batch_size=10000, epochs=200)
for k in np.arange(0,10000,1000):
plt.plot(np.arange(len(X[k])), X[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), model(X)[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), Y[k])
In this example, the model returns exactly same output regardless of the input.
I tried to change the number of layers, the loss function, the learning rate, the batch size and the number of epochs, without any noticeable improvement.
Do you have any suggestion on this issue?
If you rearrange your random inputs to be like
y = np.array(1. + x)
y += 1. / 100.
also
J, K = [] , []
for jh in range(10000):
j = np.arange(-1, 1, 0.01)
k = -np.array(1. - j)
k += 1. / 100
J.append(k[:100])
K.append(k[100:])
J = np.array(J)[:, :, None]
K = np.array(K)[:, :, None]
and finally add
plt.plot(np.arange(len(X[k]), len(X[k]) + len(Y[k])), model(J)[k])
in the plotting loop, then you will see two different results. Probably you should check your datasets diversity.
I am using TF2 (2.3.0) NN to approximate the function y which solves the ODE: y'+3y=0
I have defined cutsom loss class and function in which I am trying to differentiate the single output with respect to the single input so the equation holds, provided that y_true is zero:
from tensorflow.keras.losses import Loss
import tensorflow as tf
class CustomLossOde(Loss):
def __init__(self, x, model, name='ode_loss'):
super().__init__(name=name)
self.x = x
self.model = model
def call(self, y_true, y_pred):
with tf.GradientTape() as tape:
tape.watch(self.x)
y_p = self.model(self.x)
dy_dx = tape.gradient(y_p, self.x)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
return loss
but running the following NN:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from custom_loss_ode import CustomLossOde
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
loss = CustomLossOde(model.input, model)
model.compile(optimizer=Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99),loss=loss)
model.run_eagerly = True
model.fit(x_train, y_train, batch_size=16, epochs=30)
for now I am getting 0 loss from the fisrt epoch, which doesn't make any sense.
I have printed both y_true and y_test from within the function and they seem OK so I suspect that the problem is in the gradien which I didn't succeed to print.
Apprecitate any help
Defining a custom loss with the high level Keras API is a bit difficult in that case. I would instead write the training loop from scracth, as it allows a finer grained control over what you can do.
I took inspiration from those two guides :
Advanced Automatic Differentiation
Writing a training loop from scratch
Basically, I used the fact that multiple tape can interact seamlessly. I use one to compute the loss function, the other to calculate the gradients to be propagated by the optimizer.
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
# using the high level tf.data API for data handling
x_train = tf.reshape(x_train,(-1,1))
dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(1)
opt = Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99)
for step, (x,y_true) in enumerate(dataset):
# we need to convert x to a variable if we want the tape to be
# able to compute the gradient according to x
x_variable = tf.Variable(x)
with tf.GradientTape() as model_tape:
with tf.GradientTape() as loss_tape:
loss_tape.watch(x_variable)
y_pred = model(x_variable)
dy_dx = loss_tape.gradient(y_pred, x_variable)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
grad = model_tape.gradient(loss, model.trainable_variables)
opt.apply_gradients(zip(grad, model.trainable_variables))
if step%20==0:
print(f"Step {step}: loss={loss.numpy()}")
I am trying to implement an algorithm from a paper, using keras, where they train a neural network to approximate a mathematical function f(x) with limited amount of data points. I want the input of the neural network to be x and the output in the form of f(x) = 1 + xN(x), where N(x) is the value from the final dense layer.
I know how to make it work for output f(x) = N(x) but I just don't know how to adjust the network for f(x) = 1 + xN(x). Can someone help me?
This is my current code
from keras.layers import Input, Dense, Add, Multiply
from keras.models import Model
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
import time
def f(x):
return x**2
Xtrain = np.linspace(0, 1, 10)
ytrain = np.array([f(x) for x in Xtrain])
X = np.linspace(0, 2, 100)
y = np.array([f(x) for x in X])
input = Input(shape=(1,))
init = np.ones(shape=(10, 1))
init = K.variable(init)
hidden = input
hidden = Dense(8, activation='relu')(hidden)
out = Dense(1, activation='linear')(hidden)
out = Add()([init, Multiply()([out, input])])
model = Model(inputs=input, outputs=out)
model.compile(loss='mean_squared_error', optimizer="adam")
tic = time.perf_counter()
model.fit(Xtrain, ytrain, epochs=1000, verbose=1)
toc = time.perf_counter()
print(f"Training time: {toc - tic:0.4f} seconds")
prediction = model.predict(X)
prediction = prediction.reshape((100,))
plt.figure(figsize=(10,5))
plt.plot(X, y, color='red', label='Analytical solution')
plt.plot(X, prediction, color='black', label = 'Prediction')
plt.scatter(Xtrain, ytrain, color='blue', label='Training points')
plt.legend()
plt.show()
plt.tight_layout()
which crashes at line
out = Add()([init, Multiply()([out, input])])
The Add layer is working between two layers and between a layer and a number/ndarray.
you can just use it like this:
init=np.ones(shape=(10, 1))
inp = Input(shape=(1,))
hidden = Dense(8, activation='relu')(inp)
out = Dense(1, activation='linear')(hidden)
mul=Multiply()([out, inp])
out = Add()([init, mul])
model = Model(inputs=inp, outputs=out)
model.compile(loss='mean_squared_error', optimizer="adam")
I checked it and it worked.
by the way, input is a builtin function, I don't recommend to use it unless you want to use it.
I'm new to machine learning and trying to fit a sample data set with neural networks in python using tensorflow. After having implemented the neural network in Dymola I want to compare the outputs of the function with those from the neural network.
The sample data set is:
import tensorflow as tf
from keras import metrics
import numpy as np
from keras.models import *
from keras.layers import Dense, Dropout
from keras import optimizers
from keras.callbacks import *
import scipy.io as sio
import mat4py as m4p
inputs = np.linspace(0, 15, num=3000)
outputs = 1/7 * ((inputs/5)^3 - (inputs/3)^2 + 5)
Inputs and outputs are then scaled into the interval [0; 0.9]:
inputs_max = np.max(inputs)
inputs_min = np.min(inputs)
outputs_max = np.max(outputs)
outputs_min = np.min(outputs)
upper_bound = 0.9
lower_bound = 0
m_in = (upper_bound - lower_bound) / (inputs_max - inputs_min)
c_in = upper_bound - (m_in * inputs_max)
scaled_in = m_in * inputs + c_in
m_out = (upper_bound - lower_bound) / (outputs_max - outputs_min)
c_out = upper_bound - (m_out * outputs_max)
scaled_out = m_in * inputs + c_in
and after that the neural network is trained with:
# shuffle values
def shuffle_in_unison(a, b):
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
tf_features_64 = scaled_in
tf_labels_64 = scaled_out
tf_features_32 = tf_features_64.astype(np.float32)
tf_labels_32 = tf_labels_64.astype(np.float32)
X = tf_features_32
Y = tf_labels_32
shuffle_in_unison(X, Y)
# define callbacks
filepath = "weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
savebestCallBack = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=False, mode='auto', period=1)
tbCallBack = TensorBoard(log_dir='./Graph',
histogram_freq=5,
write_graph=True,
write_images=True)
esCallback = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=500,
verbose=0,
mode='min')
# neural network architecture
visible = Input(shape=(1,))
x = Dense(40, activation='tanh')(visible)
x = Dense(39, activation='tanh')(x)
x = Dense(38, activation='tanh')(x)
x = Dense(30, activation='tanh')(x)
output = Dense(1)(x)
# setup optimizer
Optimizer = optimizers.adam(lr=0.0007, amsgrad=True)
model = Model(inputs=visible, outputs=output)
model.compile(optimizer=Optimizer,
loss=['mse'],
metrics=['mae', 'mse']
)
model.fit(X, Y, epochs=1000, batch_size=1, verbose=1,
shuffle=True, validation_split=0.05, callbacks=[tbCallBack, esCallback])
# return weights
weights1 = model.layers[1].get_weights()[0]
biases1 = model.layers[1].get_weights()[1]
print('Layer1---------------------------------------------------------------------------------------------------------')
print('weights1:')
print(repr(weights1.transpose()))
print('biases1:')
print(repr(biases1))
w1 = weights1.transpose()
b1 = biases1.transpose()
we1 = {'w1' : w1.tolist()}
bi1 = {'b1' : b1.tolist()}
.........
......
Later on, I implemented the trained neural network in the program "Dymola" by loading the weights and biases in pre-configured "neural network base classes" (which have been used several times and are working).
// Modelica code for Dymola:
Real inputs;
Real outputs;
Real scaled_outputs;
Real scaled_inputs(start=0);
Real scaled_outputsfunc;
der(scaled_inputs) = 0.9;
//part of the neural network implementation in Dymola
NeuralNetwork.BaseClasses.NeuralNetworkLayer neuralNetworkLayer1(
NeuronActivationFunction=NeuralNetwork.Types.ActivationFunction.TanSig,
numInputs=1,
numNeurons=40,
weightTable=[-0.367953330278397; ......])
annotation (Placement(transformation(extent={{-76,22},{-56,42}})));
//scaled inputs
neuralNetworkLayer1.u[1] = scaled_inputs;
//scaled outputs
neuralNetworkLayer5.y[1]= scaled_outputs;
//scaled_inputs = 0.06 * inputs
inputs = 1/0.06 * (scaled_inputs);
outputs = 1/875 * inputs^3 - 1/63 * inputs^2 + 5/7;
scaled_outputsfunc = 1.2173139581825052 * outputs - 0.3173139581825052;
When plotting and comparing the scaled outputs of the function and the returned (scaled) values of the neural network I noticed that the approximation is very good in the interval from [0.5; 0.8], but the closer the inputs reach the boundaries the worse the approximation becomes.
Unfortunately, I have no clue why this is happening and how to fix this issue. I'd be very glad if someone could help me.
I want to answer my own question: I forgot to specify the activation function in the output layer in my python code, which Keras then set to a linear function by default, see also:
https://keras.io/layers/core/
In Dymola, where my ANN was implemented, 'tanh' was the activation function in the last layer, which lead to a divergence near the boundaries.
The correct python code for this application must be:
visible = Input(shape=(1,))
x = Dense(40, activation='tanh')(visible)
x = Dense(39, activation='tanh')(x)
x = Dense(38, activation='tanh')(x)
x = Dense(30, activation='tanh')(x)
output = Dense(1, activation='tanh')(x)
This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')