I am not using model.compile() function, and I am following 'Writing a training loop from scratch' (Therefore, simple 'model.save()' does not include my optimizer). Also, I create NN using a functional API in Tensorflow. So, I am able to save using 'model.save()', but it saves only NN model and does not save the state of my optimizer.
I think I was able to save an optimizer using pickle, but somehow I am no longer able to save it before
(it give me an error message (AttributeError: Can't pickle local object 'make_gradient_clipnorm_fn..'))
I can use 'get_config' and 'from_config' methods, but it does not save weights. There is another solution to save weights like here. I am looking for a better (and simple) solution.
(and actually this method does not work for me because I am not only training 'model.trainable_weights', but also other 'tf.Variable', which are not belong to my 'model'.)
Is there any easy and convenient way to save and load optimizer?
Thanks
colab: https://colab.research.google.com/drive/1Jn2lbMcaVURpKITL6qEQ-05XndVDoABQ?usp=sharing
or
--code--
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import datetime
import os
import importlib
import pickle
import time
from pathlib import Path
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
"""Generate samples for training"""
my_dtype = 'float32'
tf.keras.backend.set_floatx(my_dtype)
num_smpl = 500
amp1 = 0.5
x = 0.5*np.random.rand(num_smpl, 1).astype(np.float32)
y = amp1 * np.heaviside(x, 0) - amp1*0.2*np.heaviside(x-0.15, 0) \
+ amp1*0.2*np.heaviside(x-0.17, 0)
mu, sigma = 0, 0.001 # mean and standard deviation
white_noise = np.random.normal(mu, sigma, num_smpl).reshape(-1, 1)
y = y + white_noise
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x, y, s=5, edgecolors='k')
"""Create model using a funtional API"""
layer_input = tf.keras.layers.Input(shape=(1,), dtype=my_dtype)
layer_dense = tf.keras.layers.Dense(8, activation='tanh', dtype=my_dtype)(layer_input)
layer_dense = tf.keras.layers.Dense(8, activation='tanh', dtype=my_dtype)(layer_dense)
layer_output = tf.keras.layers.Dense(1, name='', dtype=my_dtype)(layer_dense)
model = tf.keras.Model(inputs=layer_input, outputs=layer_output, name='my_model')
model.summary()
epoch = 1
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, epsilon=1e-09)
loss_fn = tf.keras.losses.MeanSquaredError()
"""Training using a custom training loop"""
while epoch < 100:
with tf.GradientTape() as t1:
output = model(x, training=True)
mse = loss_fn(output, y)
grads = t1.gradient(mse, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
if epoch % 10 == 0:
print('Epoch: {}, Loss: {}'.format(epoch, mse))
epoch = epoch + 1
"""Normal variables can be saved using pickle"""
print('Cur. locaiton: ' + os.getcwd())
a = np.arange(0., 10.)
print(a)
with open('test.pkl', 'wb') as file:
pickle.dump(a, file)
"""***How to save optimizer?***"""
with open('my_opti.pkl', 'wb') as file:
pickle.dump(optimizer, file)
'
Related
I have modified pytorch tutorial on LSTM (sine-wave prediction: given [0:N] sine-values -> [N:2N] values) to use Adam optimizer instead of LBFGS optimizer. However, the model does not train well and cannot predict sine-wave correctly. Since in most cases we use Adam optimizer for RNN training, I wonder how this issue can be resolved. I also wonder if the code segment regarding sequence-in-sequence-out (done with a loop: for input_t in input.split(1, dim=1)), can be done by a pytorch module or function.
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
class Sequence(nn.Module):
def __init__(self):
super(Sequence, self).__init__()
self.lstm1 = nn.LSTMCell(1, 51)
self.lstm2 = nn.LSTMCell(51, 51)
self.linear = nn.Linear(51, 1)
def forward(self, input, future = 0):
outputs = []
h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
for input_t in input.split(1, dim=1):
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
for i in range(future):# if we should predict the future
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
outputs = torch.cat(outputs, dim=1)
return outputs
if __name__ == '__main__':
# set random seed to 0
np.random.seed(0)
torch.manual_seed(0)
# load data and make training set
data = torch.load('traindata.pt')
input = torch.from_numpy(data[3:, :-1])
target = torch.from_numpy(data[3:, 1:])
test_input = torch.from_numpy(data[:3, :-1])
test_target = torch.from_numpy(data[:3, 1:])
print("input.size", input.size())
print("target.size", target.size())
# build the model
seq = Sequence()
seq.double()
criterion = nn.MSELoss()
# use LBFGS as optimizer since we can load the whole data to train
optimizer = optim.Adam(seq.parameters(), lr=0.005)
#begin to train
for i in range(15):
print('STEP: ', i)
seq.train()
def run1step():
optimizer.zero_grad()
out = seq(input)
loss = criterion(out, target)
print('train loss:', loss.item())
loss.backward()
return loss
run1step()
optimizer.step()
# begin to predict, no need to track gradient here
seq.eval()
with torch.no_grad():
future = 1000
pred = seq(test_input, future=future)
loss = criterion(pred[:, :-future], test_target)
print('test loss:', loss.item())
y = pred.detach().numpy()
# draw the result
def draw(yi, color):
plt.figure(figsize=(30,10))
plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
plt.xlabel('x', fontsize=20)
plt.ylabel('y', fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
plt.show()
if i == 14:
draw(y[0], 'r')
draw(y[1], 'g')
draw(y[2], 'b')
plt.savefig('predict_LSTM%d.pdf'%i)
#plt.close()
I've just executed your code and the original code. I think the problem is you didn't train your code with ADAM long enough. You can see your training loss is still getting smaller at step 15. So I changed the number of steps from 15 to 45 and this is the figure generated after step 40:
The original code reached 4e-05 loss after step 4. But after that, the loss somehow exploded. Your code with ADAM can reduce the loss across all 45 steps, but the final loss is around 0.001. I hope I run both programs correctly.
Oh, regarding your second question.
also wonder if the code segment regarding sequence-in-sequence-out
Yes, you can write a function or define a module with two LSTMs to do that. But it doesn't make sense since your network contains only two LSTMs. After all, you have to do this “wiring“ work at some point.
If your network contains several such blocks, you can write a module with two LSTMs and use it as a primitive module, e.g. self.BigLSTM = BigLSTM(...) , just like you define self.lstm1 = nn.LSTMCell(...).
So I have three models created in three different files: Model_A.py, Model_B.py, Model_C.py. Model_A is the first one i have created. When I run Model_A, everything works well. However, when I run Models B or C python still runs Model A. I guessed it has to do with the session, but I am not sure and I have not figured out how to fix it.
Here is Code for model A.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import Callback
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import warnings
from math import sqrt
from keras import backend as K
#Early stopping based on loss
class EarlyStoppingByLossVal(Callback):
def __init__(self, monitor='val_loss', value=0.00001, verbose=0):
super(Callback, self).__init__()
self.monitor = monitor
self.value = value
self.verbose = verbose
def on_epoch_end(self, epoch, logs={}):
current = logs.get(self.monitor)
if current is None:
warnings.warn("Early stopping requires %s available!" % self.monitor, RuntimeWarning)
if current < self.value:
if self.verbose > 0:
print("Epoch %05d: early stopping THR" % epoch)
self.model.stop_training = True
NAME = "TensBo{}".format(int(time.time()))
tensorboard = tf.keras.callbacks.TensorBoard(log_dir='logs/{}'.format(NAME))
# # DATA MANAGER: Define a function that imports and defines data
def data_manager(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
#((1 t_step-1 inout[0]) (batch_sz t_step-1 inout[0]) (batch_sz t_step-1 inout[0]))
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(1, t_step-1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (batch_sz, t_step-1, inout[0])) #input selection
targets = np.reshape(targets, (batch_sz, t_step-1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model01\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # NEURAL NETWORK CREATOR
# Creating a model, which is a linear stack of layers
model = Sequential()
'''
LSTM layer of n nodes. Shape of the input is the columns of inputs. activation function is rectifier linear function.
Return sequencies = true basically tells the layer to output a sequence. If we were to have another Recurrent layer, this is necessary. Else not, as it would not understand it
Time distribute is important. That basically relates every input step in the input sequence with its corresponding output.
Other way we would just be considering the last value of the sequence
'''
l1 = model.add(layers.LSTM(inout[0], input_shape=(t_step-1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model.add(layers.LSTM(30, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(20)))
l4 = model.add(layers.LSTM(10, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(10)))
model.add(layers.Dropout(0.2))
l5 = model.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStoppingByLossVal('val_loss', value=0.002),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
#
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
model.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
history = model.fit(inputs, targets, epochs=50, validation_split=0.25, callbacks=callbacks)
# Evaluate the model
scores = model.evaluate(inputs, targets, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
# print a summary of the outputs of every layer
print(model.summary())
#SAVING THE MODEL
#
#The model is saved by modelcheckpoint in a folder. Here, we are saving the models arquitecture in a json file
#model_json = model.to_json()
#with open("model/model01/model.json", "w") as json_file:
# json_file.write(model_json)
# PREDICTIONS WITH THE MODEL
t = 1
fig1 = plt.figure()
for prediction in test:
NN_pred = model.predict(prediction[0])
#ANALYSIS
#reshape the prediction for plotting
NN_pred = np.reshape(NN_pred, (prediction[1].shape[0], inout[1]))
prediction[0] = np.reshape(prediction[0], (t_step-1, inout[0]))
#plots: top, predicted and desired test output. down, test inputs
plt.subplot(n_test, 1, t)
plt.title('Test0' + np.str(t))
plt.plot(NN_pred)
plt.plot(prediction[1])
plt.legend(['I_real_pred', 'I_im_pred', 'Ir', 'Ii'])
# mean squared error
rmse = sqrt(mean_squared_error(prediction[1], NN_pred))
print('Test RMSE: %.3f' % rmse)
t = t + 1
fig2 = plt.figure()
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
# print inputs yes or no
printin = input('Print inputs as well? [y/n]: ')
m = True
while m == True:
if printin == 'y':
t = 1
fig3 = plt.figure()
for prediction in test:
plt.title('Inputs: V, P, Q')
plt.subplot(n_test, 1, t)
plt.plot(prediction[0])
t = t + 1
m = False
elif printin == 'n':
m = False
else:
printin = input('Answer not valid. Print inputs? [y/n]: ')
plt.show()
And here is codel model B.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow import Graph
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from keras import backend as K
from Model_A import EarlyStoppingByLossVal
K.clear_session()
def data_manager_1(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(t_step-1, 1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (inputs.shape[0], 1, inout[0])) #input selection
targets = np.reshape(targets, (targets.shape[0], 1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model02\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager_1(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # CREATE THE MODEL
model02 = Sequential()
l1 = model02.add(layers.LSTM(inout[0], input_shape=(1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model02.add(layers.LSTM(5, activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(5)))
model02.add(layers.Dropout(0.2))
l5 = model02.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStopping('val_loss', patience=20),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
model02.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
# train model and save history
history = model02.fit(inputs, targets, epochs=20, validation_split=0.25, callbacks=callbacks)
# plot loss during training
def train_plots(history):
fig2 = plt.figure()
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
plt.show()
train_plots(history)
I tried to initialize graphs and sessions for the creation of the models, but it is not working.
Would it be possible(at least as a workaround) to kill each process after it finishes the task? Killing the process would ensure the release of memory of TensorFlow.
If the models need a communication channel/have intermediate results sent over, you could use queues or text files in order to solve this.
Just fixed it. My problem was that I was unable to reset the tensorflow session or clean the Graphs in my session to create and train a different model. I found that the command keras.backend.reset_uids() does that. Thank you anyway!
So I've just started experimenting a bit with tensorflow but I feel like I have a hard time grasping the concept, I'm currently focusing on the MNIST-dataset, but only 8000 of them as training and 2000 for testing. The little code snippet I have currently is:
from keras.layers import Input, Dense, initializers
from keras.models import Model
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras import optimizers, losses
import tensorflow as tf
import keras.backend as K
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
low_dim = 32
def autoencoder():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
input = Input(shape=(num_features,))
encoded = Dense(low_dim, activation='relu', kernel_initializer = w)(input)
decoded = Dense(num_features, activation='sigmoid', kernel_initializer = w)(encoded)
autoencoder = Model(input, decoded)
adam = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
autoencoder.compile(optimizer=adam, loss='binary_crossentropy')
autoencoder.fit(d.X_train, d.X_train,
epochs=50,
batch_size=64,
shuffle=True,
)
encoded_imgs = autoencoder.predict(d.X_test)
decoded_imgs = autoencoder.predict(encoded_imgs)
#sess = tf.InteractiveSession()
#error = losses.mean_absolute_error(decoded_imgs[0], d.X_train[0])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
n = 20 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
#sess = tf.InteractiveSession()
error = losses.mean_absolute_error(decoded_imgs[n], d.X_test[n])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
ax = plt.subplot(2, n, i + 1)
plt.imshow(d.X_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#print(error)
plt.show()
return error
What I want to do is to store the error as a list which I later can print or plot in a graph, but how do you do this efficiently with tensorflow/keras? Thanks in advance
You can store the errors inside a csv file by using the callback CSVLogger. This is a code snippet for this task.
from keras.callbacks import CSVLogger
# define callbacks
callbacks = [CSVLogger(path_csv_logger, separator=';', append=True)]
# pass callback to model.fit() oder model.fit_generator()
model.fit_generator(
train_batch, train_steps, epochs=10, callbacks=callbacks,
validation_data=validation_batch, validation_steps=val_steps)
EDIT: For storing the errors in list you can use something like this
# source https://keras.io/callbacks/
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = []
def on_batch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
import tensorflow as tf
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 6)
df1 = pd.read_csv("TrainData.csv")
df2 = pd.read_csv("TestData.csv")
train_data_X = np.asanyarray(df1['ENGINE SIZE'])
train_data_Y = np.asanyarray(df1['CO2 EMISSIONS'])
test_data_X = np.asanyarray(df2['ENGINE SIZE'])
test_data_Y = np.asanyarray(df2['CO2 EMISSIONS'])
W = tf.Variable(20.0, name= 'Weight')
b = tf.Variable(30.0, name= 'Bias')
X = tf.placeholder(tf.float32, name= 'Input')
Y = tf.placeholder(tf.float32, name= 'Output')
Y = W*X + b
loss = tf.reduce_mean(tf.square(Y - train_data_Y))
optimizer = tf.train.GradientDescentOptimizer(0.05)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
loss_values = []
train_data = []
for step in range(100):
_, loss_val, a_val, b_val = sess.run([train, loss, W, b], feed_dict={X:train_data_X, Y:train_data_Y})
loss_values.append(loss_val)
if step % 5 == 0:
print(step, loss_val, a_val, b_val)
train_data.append([a_val, b_val])
plt.plot(loss_values, 'ro')
plt.show()
I am trying to make a linear regression model to detect CO2 emission by giving size of engine as input. I am using the above code in tensorflow.
1) When I use this code Weight and Bias remains unchanged. What is the problem in code?
2) Also if I want engine size and milage both as inputs. what code changes should be made
Thanks in advance
There were few mistakes in the code which are mentioned below :
You were using placeholder Y = W*X + b, which in later section of the code was used to feed data (feed_dict={X:train_data_X, Y:train_data_Y}). You should have used another variable for prediction (not the placeholder which you were using to feed data) and then you should have been able to calculate loss function. However, required changes have been made. Check prediction= W*X + b in the below code
You were passing complete data in feed_dict at once (feed_dict={X:train_data_X, Y:train_data_Y}). However, you need to pass single data value at a time (feed_dict={X:x, Y:y})
Below code with the needful correction should be working fine.
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
import tensorflow as tf
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 6)
df1 = pd.read_csv("TrainData.csv")
df2 = pd.read_csv("TestData.csv")
train_data_X = np.asanyarray(df1['ENGINE SIZE'])
train_data_Y = np.asanyarray(df1['CO2 EMISSIONS'])
test_data_X = np.asanyarray(df2['ENGINE SIZE'])
test_data_Y = np.asanyarray(df2['CO2 EMISSIONS'])
W = tf.Variable(20.0, name= 'Weight')
b = tf.Variable(30.0, name= 'Bias')
X = tf.placeholder(tf.float32, name= 'Input')
Y = tf.placeholder(tf.float32, name= 'Output')
prediction= W*X + b
loss = tf.reduce_mean(tf.square(prediction - Y))
optimizer = tf.train.GradientDescentOptimizer(0.05)
train = optimizer.minimize(loss)
loss_values = []
train_data = []
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for step in range(100):
for (x,y) in zip(train_data_X,train_data_Y):
_, loss_val, a_val, b_val = sess.run([train, loss, W, b], feed_dict={X:x, Y:y})
loss_values.append(loss_val)
if step % 5 == 0:
print(step, loss_val, a_val, b_val)
train_data.append([a_val, b_val])
plt.plot(loss_values, 'ro')
plt.show()
Note : Because of incorrect choice of loss function, your loss keeps on increasing with every step.
I have mentioned a loss function below, which might work for your data. I am not sure how your data looks like, but you can give this a try if you want and let me know if this worked.
n_samples = train_data_X.shape[0]
loss = tf.reduce_sum(tf.pow(prediction - Y, 2)) / (2 * n_samples)
Response to your second query.
Assuming that your data has column name as MILEAGE, You can perform the below changes in train_data_X and test_data_X. Rest of the code will remain the same as above.
train_data_X = np.asanyarray(df1[['ENGINE SIZE','MILEAGE']])
train_data_Y = np.asanyarray(df1['CO2 EMISSIONS'])
test_data_X = np.asanyarray(df2[['ENGINE SIZE','MILEAGE']])
test_data_Y = np.asanyarray(df2['CO2 EMISSIONS'])
I'm currently changing my code from Keras to Tensorflow in order to use the new feature of quantized training in Tensorflow 1.10.0. However, I found out that the training process in Keras and Tensorflow shows very large difference when using Adam optimizer.
Here is the code for practice usage, which aims on the same purpose to train a "sin(10x)" function in Tensorflow and Keras way.
from keras.layers import Input, Dense, BatchNormalization
from keras.models import Model
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import keras.backend as K
KERAS = 'keras'
TENSORFLOW = 'tensorflow'
def create_model():
ipt = Input([1])
m = Dense(1000, activation='relu')(ipt)
m = BatchNormalization()(m)
m = Dense(1000, activation='relu')(m)
m = BatchNormalization()(m)
m = Dense(1)(m)
return Model(ipt, m)
valX = np.expand_dims(np.linspace(-1, 1, 10000), 1)
valY = np.sin(valX * 10)
valY_ = {}
for phase in (KERAS, TENSORFLOW):
sess = tf.Session()
sess.as_default()
K.set_session(sess)
model = create_model()
if phase is KERAS:
model.compile('adam', 'mean_squared_error')
else:
tensor_y_gt = tf.placeholder(dtype=tf.float32, shape=model.output.get_shape().as_list())
mse = tf.losses.mean_squared_error(model.output, tensor_y_gt)
training_steps = tf.train.AdamOptimizer().minimize(mse)
sess.run(tf.global_variables_initializer())
for step in range(2000):
X = np.random.uniform(-1, 1, [256, 1])
Y = np.sin(X * 10)
if phase is KERAS:
loss = model.train_on_batch(X, Y)
else:
loss, _ = sess.run([mse, training_steps], feed_dict={model.input: X, tensor_y_gt: Y})
if step % 100 == 0:
print('%s, step#%d, loss=%.5f' % (phase, step, loss))
valY_[phase] = model.predict(valX)[:, 0]
sess.close()
valX = valX[:, 0]
valY = valY[:, 0]
plt.plot(valX, valY, 'r--', label='sin(10x)')
plt.plot(valX, valY_[KERAS], 'g-', label=KERAS)
plt.plot(valX, valY_[TENSORFLOW], 'b-', label=TENSORFLOW)
plt.legend(loc='best', ncol=1)
plt.show()
You can see the difference between the two:
plot of sin(10x)
Environment:
tensorflow-gpu 1.10.0
Keras 2.2.2
Does anyone has a clue?