so I am new to using Tensorflow so please bear with me.
I want to create a LSTM rnn for sequence prediction, but since i am also going to use this for tuning hyper parameters, i would like the input to be of the format
[n1,n2,n3...]
and output will be a single number.
eg: input[1,2,3,4] | output=[5]
I have created a dictionary like so:
dict = { "n1":array(1,2,3,4,5), "n2":array(2,3,4,5,6), "n3":array(3,4,5,6,7), "n4":array(4,5,6,7,8) }
labels/output=[5,6,7,8,9]
I would like to use an estimator, i was going to use DynamicRnnEstimator() but it is going to be removed in a future version and so i would like to create my own custom estimator for this.
i want to know how to create y own custom estimator for this particular scenario of training the mode to predict the next number given a sequence of "sequence_length".
i have many sequences for the training.
the code I have written till now is below:
# START
# IMPORTS
import numpy as np
import tensorflow as tf
import csv
from tensorflow.contrib.learn import DynamicRnnEstimator
from tensorflow.python import debug as tf_debug
# GLOBAL VARIABLES
sequence_length = 4 # predict the 5th number give the previous sequence_length numbers.
batch_size = 10
n_epochs = 100
n_neurons = 5
# NORMALIZING THE DATA
def normalize_data(train, test):
m11 = float(max(train))
m12 = float(min(train))
normalized_train = [float((x-m12)/(m11-m12)) for x in train]
m21 = float(max(test))
m22 = float(min(train))
normalized_test = [float((x-m22)/(m21-m22)) for x in test]
return normalized_train, normalized_test
# LOADING THE DATA
def load_data(train, test):
with open(train, 'r') as csvfile1:
reader1 = csv.reader(csvfile1, delimiter = ',')
train = [ float(row[1]) for row in reader1]
with open(test, 'r') as csvfile2:
reader2 = csv.reader(csvfile2, delimiter = ',')
test = [ float(row[1]) for row in reader2]
normalized_train, normalized_test = normalize_data(train, test)
global sequence_length
trainx = [ normalized_train[i:i+sequence_length] for i in range(len(normalized_train) - sequence_length-2)]
testx = [ normalized_test[i:i+sequence_length] for i in range(len(normalized_test) - sequence_length-2)]
trainy = [ normalized_train[i] for i in range(sequence_length+1, len(normalized_train))]
testy = [ normalized_test[i] for i in range(sequence_length+1, len(normalized_test))]
return trainx, testx, trainy, testy
# Create Dict function
def create_dict(x,y):
feature_dict = dict()
global sequence_length
for i in range(sequence_length):
temp = "number"+str(i+1)
feature_dict[temp] = x[:,i]
labels = y
return feature_dict, labels
# Training input function
def train_input_fn(features, labels, batch_size):
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(100).repeat().batch(batch_size)
# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()
# Create feature columns
def create_feature_columns():
feature_column = []
for i in range(sequence_length):
feature_column.append(tf.feature_column.numeric_column(key="number"+str(i+1)))
return feature_column
# Model_ function
def my_model_fn(features, labels, mode, params):
net = tf.feature_column.input_layer(features, params['feature_columns'])
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
logits = tf.layers.dense(net, params['n_classes'], activation=None)
def main():
global sequence_length, batch_size, n_epochs, n_neurons
trainx, testx, trainy, testy = load_data("train.csv", "test.csv")
trainx, testx, trainy, testy = np.array(trainx), np.array(testx), np.array(trainy), np.array(testy )
n_train = len(trainx)
n_test = len(testx)
feature_dict_train, label_train = create_dict(trainx, trainy)
feature_dict_test, label_test = create_dict(testx, testy)
feature_column = create_feature_columns()
main()
THANKS IN ADVANCE!
Related
I have medical data collected on 30 patients over 30 times series. The response is categorical and over four categories. Modifying an example in Udemy, PyTorch for Deep Learning with Python Bootcamp, I trained a single LSTM time series model fitting the parameters over the 30 patients. This seems to be working well.
Now, I want to account for covariate information. The devices the patients were fitted with return physiological information at all the points in time and can help predict the response. There is also demographic information in the data set.
I tried with just one quantitative variable, 'mean_heart_X', in the data set, but I don't know how to make use of this information.
Can someone help me out? See my code below:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv("C:/Users/Jsmith/mydata.csv")
df = df.dropna(axis = 0, how ='any')
nsubj = 30
subjects = []
for i in range(nsubj):
dfs = df[df['Subject'] == (i+1)]
subjects.append(dfs)
subjects_ytrain = []
for i in range(nsubj):
y = subjects[i]['Response_var']
y = y.astype('category')
y_train = torch.tensor(y.cat.codes.values).flatten().float()
subjects_ytrain.append(y_train)
# How can include covariates as below in my model?
covs_train = []
for i in range(nsubj):
x = subjects[i]['mean.Heart_X']
x = torch.tensor(x.values, dtype = torch.float)
covs_train.append(x)
def input_data(seq,ws): # ws is the window size
out = []
L = len(seq)
for i in range(L-ws):
window = seq[i:i+ws]
label = seq[i+ws:i+ws+1]
out.append((window,label))
return out
window_size = 7
# Create the training dataset of sequence/label tuples:
subjects_traindata = []
for i in range(nsubj):
train_data = input_data(subjects_ytrain[i],window_size)
subjects_traindata.append(train_data)
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_size=50, out_size=4):
super().__init__()
self.hidden_size = hidden_size
# Add an LSTM layer:
self.lstm = nn.LSTM(input_size,hidden_size)
# Add a fully-connected layer:
self.linear = nn.Linear(hidden_size,out_size)
# Initialize h0 and c0:
self.hidden = (torch.zeros(1,1,hidden_size),
torch.zeros(1,1,hidden_size))
def forward(self,seq):
lstm_out, self.hidden = self.lstm(
seq.view(len(seq), 1, -1), self.hidden)
pred = self.linear(lstm_out.view(len(seq),-1))
return pred[-1] # we only care about the last prediction
model = LSTM()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 10
for i in range(epochs):
for j in range(nsubj):
# tuple-unpack the train_data set
for seq, y_train in subjects_traindata[j]:
# reset the parameters and hidden states
optimizer.zero_grad()
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
y_pred = model(seq)
y_train=y_train.reshape([1]).long()
y_pred=torch.reshape(y_pred,(1,-1))
#print(y_pred)
#print(y_train)
#print(y_train.dtype)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
# print training result
print(f'Epoch: {i+1:2} Loss: {loss.item():10.8f}')
```
I have a data set like this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
This is a list of two networks. The first network has 6 nodes with 5 edges and a class 0, and then 4 nodes with 3 edges and class 1 networks.
I want to develop a model in Pytorch that will classify each network into it's class, and then i'll give it a new set of networks to classify.
So ultimately, I want to be able to shuffle these lists (simultaneously, i.e. maintaining the order between the data and the classes), split into train and test, and then read the train and test data into two data loaders, and feed these into a PyTorch network.
I wrote this:
edge_origins = np.array([[0,1,2,3,4],[6,7,8]])
edge_destinations = np.array([[1,2,3,4,5],[7,8,9]])
target = np.array([0,1])
x = [[np.array([0.1,0.5,0.2]),np.array([0.5,0.6,0.23]),
np.array([0.1,0.5,0.5]),np.array([0.1,0.6,0.23]),
np.array([0.1,0.4,0.4]),np.array([0.52,0.6,0.23])],
[np.array([0.1,0.3,0.3]),np.array([0.3,0.6,0.23]),
np.array([0.1,0.1,0.2]),np.array([0.4,0.6,0.23])]]
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
dataset = Data(x=x, edge_index=edge_index, y=y, num_classes = len(set(target)))
print(dataset)
And the error is:
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
ValueError: expected sequence of length 5 at dim 2 (got 3)
But then once that is fixed I think the next step is:
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:1] #for toy example
test_dataset = dataset[1:]
print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
for data in train_loader: # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
for epoch in range(1, 171):
train()
train_acc = test(train_loader)
test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
Could someone demonstrate to me how to get my data running into the Pytorch network above?
In Pytorch Geometric the Data object is used to contain only one graph. So you could iterate through all your arrays like so:
data_list = []
for i in range(2):
edge_index_curr = torch.tensor([edge_origins[i],
edge_destinations[i],
dtype=torch.long)
data = Data(x=torch.tensor(x[i]), edge_index=edge_index_curr, y=torch.tensor(target[i]))
datas.append(data)
You can then use this list of Data to create your own Dataloader:
loader = DataLoader(data_list, batch_size=32)
If you need to split into train/val/test (I would advise having more than 2 samples for this case) you can do it manually or using sklearn.model_selection.
For data augmentation if you really do have very little data, pytorch-geometric comes with transforms.
I currently have a RNN model for time series predictions. It uses 3 input features "value", "temperature" and "hour of the day" of the last 96 time steps to predict the next 96 time steps of the feature "value".
Here you can see a schema of it:
and here you have the current code:
#Import modules
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
# Define the parameters of the RNN and the training
epochs = 1
batch_size = 50
steps_backwards = 96
steps_forward = 96
split_fraction_trainingData = 0.70
split_fraction_validatinData = 0.90
randomSeedNumber = 50
#Read dataset
df = pd.read_csv('C:/Users/Desktop/TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0]}, index_col=['datetime'])
# standardize data
data = df.values
indexWithYLabelsInData = 0
data_X = data[:, 0:3]
data_Y = data[:, indexWithYLabelsInData].reshape(-1, 1)
scaler_standardized_X = StandardScaler()
data_X = scaler_standardized_X.fit_transform(data_X)
data_X = pd.DataFrame(data_X)
scaler_standardized_Y = StandardScaler()
data_Y = scaler_standardized_Y.fit_transform(data_Y)
data_Y = pd.DataFrame(data_Y)
# Prepare the input data for the RNN
series_reshaped_X = np.array([data_X[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
series_reshaped_Y = np.array([data_Y[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
timeslot_x_train_end = int(len(series_reshaped_X)* split_fraction_trainingData)
timeslot_x_valid_end = int(len(series_reshaped_X)* split_fraction_validatinData)
X_train = series_reshaped_X[:timeslot_x_train_end, :steps_backwards]
X_valid = series_reshaped_X[timeslot_x_train_end:timeslot_x_valid_end, :steps_backwards]
X_test = series_reshaped_X[timeslot_x_valid_end:, :steps_backwards]
Y_train = series_reshaped_Y[:timeslot_x_train_end, steps_backwards:]
Y_valid = series_reshaped_Y[timeslot_x_train_end:timeslot_x_valid_end, steps_backwards:]
Y_test = series_reshaped_Y[timeslot_x_valid_end:, steps_backwards:]
# Build the model and train it
np.random.seed(randomSeedNumber)
tf.random.set_seed(randomSeedNumber)
model = keras.models.Sequential([
keras.layers.SimpleRNN(10, return_sequences=True, input_shape=[None, 3]),
keras.layers.SimpleRNN(10, return_sequences=True),
keras.layers.TimeDistributed(keras.layers.Dense(1))
])
model.compile(loss="mean_squared_error", optimizer="adam", metrics=['mean_absolute_percentage_error'])
history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid, Y_valid))
#Predict the test data
Y_pred = model.predict(X_test)
# Inverse the scaling (traInv: transformation inversed)
data_X_traInv = scaler_standardized_X.inverse_transform(data_X)
data_Y_traInv = scaler_standardized_Y.inverse_transform(data_Y)
series_reshaped_X_notTransformed = np.array([data_X_traInv[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
X_test_notTranformed = series_reshaped_X_notTransformed[timeslot_x_valid_end:, :steps_backwards]
Y_pred_traInv = scaler_standardized_Y.inverse_transform (Y_pred)
Y_test_traInv = scaler_standardized_Y.inverse_transform (Y_test)
# Calculate errors for every time slot of the multiple predictions
abs_diff = np.abs(Y_pred_traInv - Y_test_traInv)
abs_diff_perPredictedSequence = np.zeros((len (Y_test_traInv)))
average_LoadValue_testData_perPredictedSequence = np.zeros((len (Y_test_traInv)))
abs_diff_perPredictedTimeslot_ForEachSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedTimeslots = np.zeros((len (Y_test_traInv)))
mse_perPredictedSequence = np.zeros((len (Y_test_traInv)))
rmse_perPredictedSequence = np.zeros((len(Y_test_traInv)))
for i in range (0, len(Y_test_traInv)):
for j in range (0, len(Y_test_traInv [0])):
abs_diff_perPredictedSequence [i] = abs_diff_perPredictedSequence [i] + abs_diff [i][j]
mse_perPredictedSequence [i] = mean_squared_error(Y_pred_traInv[i] , Y_test_traInv [i] )
rmse_perPredictedSequence [i] = np.sqrt(mse_perPredictedSequence [i])
abs_diff_perPredictedTimeslot_ForEachSequence [i] = abs_diff_perPredictedSequence [i] / len(Y_test_traInv [0])
average_LoadValue_testData_perPredictedSequence [i] = np.mean (Y_test_traInv [i])
absoluteError_Load_Ratio_allPredictedSequence [i] = abs_diff_perPredictedSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
absoluteError_Load_Ratio_allPredictedTimeslots [i] = abs_diff_perPredictedTimeslot_ForEachSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
rmse_average_allPredictictedSequences = np.mean (rmse_perPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedSequence = np.mean (absoluteError_Load_Ratio_allPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedTimeslots = np.mean (absoluteError_Load_Ratio_allPredictedTimeslots)
absoluteAverageError_allPredictedSequences = np.mean (abs_diff_perPredictedSequence)
absoluteAverageError_allPredictedTimeslots = np.mean (abs_diff_perPredictedTimeslot_ForEachSequence)
Here you have some test data Download Test Data
So now I actually would like to include not only past values of the features into the prediction but also future values of the features "temperature" and "hour of the day" into the prediction. The future values of the feature "temperature" can for example be taken from an external weather forecasting service and for the feature "hour of the day" the future values are know before (in the test data I have included a "forecast" of the temperature that is not a real forecast; I just randomly changed the values).
This way, I could assume that - for several applications and data - the forecast could be improved.
In a schema it would look like this:
Can anyone tell me, how I can do that in Keras with a RNN (or LSTM)? One way could be to include the future values as independant features as input. But I would like the model to know that the future values of a feature are connected to the past values of a feature.
Reminder: Does anybody have an idea how to do this? I'll highly appreciate every comment.
The standard approach is to use an encoder-decoder architecture (see 1 and 2 for instance):
The encoder takes as input the past values of the features and of the target and returns an output representation.
The decoder takes as input the encoder output and the future values of the features and returns the predicted values of the target.
You can use any architecture for the encoder and for the decoder and you can also consider different approaches for passing the encoder output to the decoder (e.g. adding or concatenating it to the decoder input features, adding or concatenating it to the output of some intermediate decoder layer, or adding it to the final decoder output), the code below is just an example.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, LSTM, TimeDistributed, Concatenate, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# define the inputs
target = ['value']
features = ['temperatures', 'hour of the day']
sequence_length = 96
# import the data
df = pd.read_csv('TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime': [0]}, index_col=['datetime'])
# scale the data
target_scaler = StandardScaler().fit(df[target])
features_scaler = StandardScaler().fit(df[features])
df[target] = target_scaler.transform(df[target])
df[features] = features_scaler.transform(df[features])
# extract the input and output sequences
X_encoder = [] # past features and target values
X_decoder = [] # future features values
y = [] # future target values
for i in range(sequence_length, df.shape[0] - sequence_length):
X_encoder.append(df[features + target].iloc[i - sequence_length: i])
X_decoder.append(df[features].iloc[i: i + sequence_length])
y.append(df[target].iloc[i: i + sequence_length])
X_encoder = np.array(X_encoder)
X_decoder = np.array(X_decoder)
y = np.array(y)
# define the encoder and decoder
def encoder(encoder_features):
y = LSTM(units=100, return_sequences=True)(encoder_features)
y = TimeDistributed(Dense(units=1))(y)
return y
def decoder(decoder_features, encoder_outputs):
x = Concatenate(axis=-1)([decoder_features, encoder_outputs])
# x = Add()([decoder_features, encoder_outputs])
y = TimeDistributed(Dense(units=100, activation='relu'))(x)
y = TimeDistributed(Dense(units=1))(y)
return y
# build the model
encoder_features = Input(shape=X_encoder.shape[1:])
decoder_features = Input(shape=X_decoder.shape[1:])
encoder_outputs = encoder(encoder_features)
decoder_outputs = decoder(decoder_features, encoder_outputs)
model = Model([encoder_features, decoder_features], decoder_outputs)
# train the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.fit([X_encoder, X_decoder], y, epochs=100, batch_size=128)
# extract the last predicted sequence
y_true = target_scaler.inverse_transform(y[-1, :])
y_pred = target_scaler.inverse_transform(model.predict([X_encoder, X_decoder])[-1, :])
# plot the last predicted sequence
plt.plot(y_true.flatten(), label='actual')
plt.plot(y_pred.flatten(), label='predicted')
plt.show()
In the example above the model takes two inputs, X_encoder and X_decoder, so in your case when generating the forecasts you can use the past observed temperatures in X_encoder and the future temperature forecasts in X_decoder.
It is a pytorch code to time series prediction with an known external/exogenous regressor to the given period forecasted.Hope it helps!!!Have a marvellous day !!!
The input format is a 3d Tensor an output 1d array (MISO-Multiple Inputs Single Output)
def CNN_Attention_Bidirectional_LSTM_Encoder_Decoder_predictions(model,data ,regressors, extrapolations_leght):
n_input = extrapolations_leght
pred_list = []
batch = data[-n_input:]
model = model.train()
pred_list.append(torch.cat(( model(batch)[-1], torch.FloatTensor(regressors.iloc[1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch[n_input-1].unsqueeze(0), pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
for i in range(n_input-1):
model = model.eval()
pred_list.append(torch.cat((model(batch).squeeze(0), torch.FloatTensor(regressors.iloc[i+1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch, pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
model = model.train()
return np.array([pred_list[j].cpu().detach().numpy() for j in range(n_input)])[:,:, 0]
I am using tensorflow 1.10 Python 3.6
My code is based in the premade iris classification model provided by TensorFlow. This means, I am using a Tensorflow DNN premade classifier, with the following difference:
10 features instead 4.
5 classes instead 3.
The test and training files can be downloaded from the following link:
https://www.dropbox.com/sh/nmu8i2i8xe6hvfq/AADQEOIHH8e-kUHQf8zmmDMDa?dl=0
I have made a code to export this classifier to a tflite format, however the accuracy in the python model is higher than 75% but when exported the accuracy decrease approximately to 45% this means approximately 30% Accuracy is lost (This is too much).
I have tried the code with different set of data and in all of them the accuracy after exporting decrease a lot!
This made me think that something is going wrong with the TocoConverter function or that maybe I am exporting to tflite incorrectly, missing a parameter or something like that.
This is the way I generate the model:
classifier = tf.estimator.DNNClassifier(
feature_columns=my_feature_columns,
hidden_units=[100, 500],
optimizer=tf.train.AdagradOptimizer(learning_rate=0.003),
n_classes=num_labels,
model_dir="myModel")
And this is the function I am using to convert to tflite:
converter = tf.contrib.lite.TocoConverter.from_frozen_graph(final_model_path, input_arrays, output_arrays, input_shapes={"dnn/input_from_feature_columns/input_layer/concat": [1, 10]})
tflite_model = converter.convert()
I share the complete code in which I also calculate the accuracy of the resulting .tflite file.
import argparse
import tensorflow as tf
import pandas as pd
import csv
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,
help='number of training steps')
features_global = None
feature_spec = None
MODEL_NAME = 'myModel'
def load_data(train_path, test_path):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
with open(train_path, newline='') as f:
reader = csv.reader(f)
column_names = next(reader)
y_name = column_names[-1]
train = pd.read_csv(train_path, names=column_names, header=0)
train_x, train_y = train, train.pop(y_name)
test = pd.read_csv(test_path, names=column_names, header=0)
test_x, test_y = test, test.pop(y_name)
return (train_x, train_y), (test_x, test_y)
def train_input_fn(features, labels, batch_size):
"""An input function for training"""
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)
# Return the dataset.
return dataset
def eval_input_fn(features, labels, batch_size):
"""An input function for evaluation or prediction"""
features=dict(features)
if labels is None:
# No labels, use only features.
inputs = features
else:
inputs = (features, labels)
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices(inputs)
# Batch the examples
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)
# Return the dataset.
return dataset
def main(argv):
args = parser.parse_args(argv[1:])
train_path = "trainData.csv"
test_path = "testData.csv"
# Fetch the data
(train_x, train_y), (test_x, test_y) = load_data(train_path, test_path)
# Load labels
num_labels = 5
# Feature columns describe how to use the input.
my_feature_columns = []
for key in train_x.keys():
my_feature_columns.append(tf.feature_column.numeric_column(key=key))
# Build 2 hidden layer DNN
classifier = tf.estimator.DNNClassifier(
feature_columns=my_feature_columns,
hidden_units=[100, 500],
optimizer=tf.train.AdagradOptimizer(learning_rate=0.003),
# The model must choose between 'num_labels' classes.
n_classes=num_labels,
model_dir="myModel")
# Train the Model
classifier.train(
input_fn=lambda:train_input_fn(train_x, train_y,
args.batch_size),
steps=args.train_steps)
# Evaluate the model.
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(test_x, test_y,
args.batch_size))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
# Export model
feature_spec = tf.feature_column.make_parse_example_spec(my_feature_columns)
serve_input_fun = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
saved_model_path = classifier.export_savedmodel(
export_dir_base="out",
serving_input_receiver_fn=serve_input_fun,
as_text=True,
checkpoint_path=classifier.latest_checkpoint(),
)
tf.reset_default_graph()
var = tf.Variable(0)
with tf.Session() as sess:
# First let's load meta graph and restore weights
sess.run(tf.global_variables_initializer())
latest_checkpoint_path = classifier.latest_checkpoint()
saver = tf.train.import_meta_graph(latest_checkpoint_path + '.meta')
saver.restore(sess, latest_checkpoint_path)
input_arrays = ["dnn/input_from_feature_columns/input_layer/concat"]
output_arrays = ["dnn/logits/BiasAdd"]
frozen_graph_def = tf.graph_util.convert_variables_to_constants(
sess, sess.graph_def,
output_node_names=["dnn/logits/BiasAdd"])
frozen_graph = "out/frozen_graph.pb"
with tf.gfile.FastGFile(frozen_graph, "wb") as f:
f.write(frozen_graph_def.SerializeToString())
# save original graphdef to text file
with open("estimator_graph.pbtxt", "w") as fp:
fp.write(str(sess.graph_def))
# save frozen graph def to text file
with open("estimator_frozen_graph.pbtxt", "w") as fp:
fp.write(str(frozen_graph_def))
input_node_names = input_arrays
output_node_name = output_arrays
output_graph_def = optimize_for_inference_lib.optimize_for_inference(
frozen_graph_def, input_node_names, output_node_name,
tf.float32.as_datatype_enum)
final_model_path = 'out/opt_' + MODEL_NAME + '.pb'
with tf.gfile.FastGFile(final_model_path, "wb") as f:
f.write(output_graph_def.SerializeToString())
tflite_file = "out/iris.tflite"
converter = tf.contrib.lite.TocoConverter.from_frozen_graph(final_model_path, input_arrays, output_arrays, input_shapes={"dnn/input_from_feature_columns/input_layer/concat": [1, 10]})
tflite_model = converter.convert()
open(tflite_file, "wb").write(tflite_model)
interpreter = tf.contrib.lite.Interpreter(model_path=tflite_file)
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on random input data.
input_shape = input_details[0]['shape']
# change the following line to feed into your own data.
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
resultlist = list()
df = pd.read_csv(test_path)
expected = df.iloc[:, -1].values.tolist()
with open(test_path, newline='') as f:
reader = csv.reader(f)
column_names = next(reader)
for x in range(0, len(expected)):
linea = next(reader)
linea = linea[:len(linea) - 1]
input_data2 = np.array(linea, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], [input_data2])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
#print(output_data)
max = 0;
longitud = len(output_data[0])
for k in range(0, longitud):
if (output_data[0][k] > output_data[0][max]):
max = k
resultlist.append(max)
print(resultlist)
coincidences = 0
for pred_dict, expec in zip(resultlist, expected):
if pred_dict == expec:
coincidences = coincidences + 1
print("tflite Accuracy: " + str(coincidences / len(expected)))
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main)
I hope some of you can identify the error, or give a possible solution
This question is answered here might help.
As mentioned in the answer share, doing some
pre-processing
on the image before it is fed into "interpreter.invoke()" solves the issue if that was the problem in the first place.
To elaborate on that here is a block quote from the shared link:
The below code you see is what I meant by pre-processing:
test_image = cv2.imread(file_name)
test_image = cv2.resize(test_image,(299,299),cv2.INTER_AREA)
test_image = np.expand_dims((test_image)/255,axis=0).astype(np.float32)
interpreter.set_tensor(input_tensor_index, test_image)
interpreter.invoke()
digit = np.argmax(output()[0])
#print(digit)
prediction = result[digit]
As you can see there are two crucial commands/pre-processing done on
the image once it is read using "imread()":
i) The image should be resized to the size that is the "input_height"
and "input_width" values of the input image/tensor that was used
during the training. In my case (inception-v3) this was 299 for both
"input_height" and "input_width". (Read the documentation of the model
for this value or look for this variable in the file that you used to
train or retrain the model)
ii) The next command in the above code is:
test_image = np.expand_dims((test_image)/255,axis=0).astype(np.float32)
I got this from the "formulae"/model code:
test_image = np.expand_dims((test_image - input_mean)/input_std, axis=0).astype(np.float32)
Reading the documentation revealed that for my architecture input_mean = 0 and input_std = 255.
Hope this helps.
I have met the same problem. It seems to me that the accuracy problem is mainly caused by failure to detect overlapping objects. I couldn't figure out what part of the code is wrong though.
I want to train, evaluate the accuracy and eventually predict with my model. This is my first time using high level APIs such as tf.estimator.
I'm getting a value error from estimator.train(train_input_fn):
'ValueError: features should be a dictionary of `Tensor's. Given type: '
I'm not sure what is going on here. My model is taking 3 inputs and producing a binary output from one neuron.
Before this error I was getting an error about the requested shape not equal to the actual shape, or something along those lines. I fixed it by reducing the batchSize down to 1, instead of 100. I'm sure this isn't going to do so well when it comes to training though.
Any ideas? Heres my code:
import tensorflow as tf
import numpy as np
import sys
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Testing/')
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Training/')
#other files
from TestDataNormaliser import *
from TrainDataNormaliser import *
learning_rate = 0.01
trainingIteration = 15
batchSize = 1
displayStep = 2
#Layers using tf.layers
def get_logits(features):
l1 = tf.layers.dense(features, 3, activation=tf.nn.relu)
l2 = tf.layers.dense(l1, 4, activation=tf.nn.relu)
l3 = tf.layers.dense(l2, 1, activation=None)
a = l3
return a
#cost function
def get_loss(a, labels):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
return tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=labels)
#cross_entropy = tf.reduce_mean((l3 - y)**2)
#cross_entropy = -tf.reduce_sum(y*tf.log(a))-tf.reduce_sum((1-y)*tf.log(1-a))
#optimizer
def get_train_op(loss):
learning_rate = 1e-3
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(loss, global_step=tf.train.get_global_step())
#training
####
def get_inputs(feature_data, label_data, batch_size, n_epochs=None, shuffle=True):
dataset = tf.data.Dataset.from_tensor_slices(
(feature_data, label_data))
dataset = dataset.repeat(n_epochs)
if shuffle:
dataset = dataset.shuffle(len(feature_data))
dataset = dataset.batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
def model_fn(features, labels, mode):
a = get_logits(features)
loss = get_loss(a, labels)
train_op = get_train_op(loss)
predictions = tf.greater(a, 0)
accuracy = tf.metrics.accuracy(labels, predictions)
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops={'Accuracy': accuracy},
predictions=predictions
)
def train_input_fn():
return get_inputs(
trainArrayValues,
trainArrayLabels,
batchSize
)
def eval_input_fn():
return get_inputs(
testArrayValues,
testArrayLabels,
batchSize,
n_epochs=1,
shuffle=False
)
model_dir = './savedModel'
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
#estimator.train(train_input_fn, max_steps=1)
estimator.train(train_input_fn)
estimator.evaluate(eval_input_fn)
Your problem is this line:
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
You need to set the feature_columns argument to an array of feature columns. A feature column tells the estimator about the data you're feeding it.
It looks like all your input data is numeric, so I'd call tf.feature_column.numeric_column to create your feature column(s). The documentation is here. For example, the following code creates a numeric feature column containing x-coordinates:
xcol = tf.feature_column.numeric_column('x')
If all your estimator needs are x-coordinates, then you could create the estimator with the following code:
estimator = tf.estimator.LinearRegressor(feature_columns=[xcol])