Keras LSTM Multiclass Classification structure - python

I am a beginner in machine learning and have been trying to use an LSTM to classify according to 12 features into 4 classes. I've followed quite a few tutorials but I'm still a bit confused. My dataset has 12 columns i want to use for training, including the label column which has the values that correspond to each class.
0 = Class 1
1 = Class 2
2 = Class 3
3 = Class 4
and this is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import time
# For LSTM model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras import optimizers
# Load dataset
train = pd.read_csv("C:\Users\O\Documents\Datasets\FinalDataset2.csv")
train_proccessed = train.iloc[:, 1:13]
scaler = MinMaxScaler(feature_range = (0, 1))
train_scaled = scaler.fit_transform(train_proccessed)
features_set = []
labels = []
for i in range(1, 393763):
features_set.append(train_scaled[i-1:i, 0])
labels.append(train_scaled[i, 0])
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))
# Initialize LSTM model
model = Sequential()
model.add(LSTM(512, return_sequences=True, activation='tanh', input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
model.add(LSTM(units=1, activation='sigmoid'))
opt = optimizers.Adam(lr=0.0001)
model.compile(optimizer = opt , loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(features_set, labels, epochs = 100, batch_size = 512)
I am very unsure about whether my model is built correctly or not. Moreover it only yields very low accuracies (27-28%). Any help would be greatly appreciated!!

Short Answer:
Last Layer to be Dense(4, activation='softmax')
Labels must be one hot encoded as you are using loss='categorical_crossentropy'
Here are more notes to help
1st Layer
LSTM(512, return_sequences=True, activation='tanh')
You started with huge LSTM units while your data is just 12 columns.
return_sequences=True which is not justified in your case as you are not staking another layer after it
Model Body
No layers in the middle between LST & final Dense()
Add one Dense layer at least
Output Layer
It could be easier to use loss as sparse_categorical_crossentropy instead of categorical_crossentropy so labels could be passes as numbers otherwise you need to one hot them

Related

Train a neural network (ANN) with 8 input and 8 output features and predict a result for one unseen input feature

I tried to train a neural network with a CSV data file that contains both input (3560 x 8) and output (3560 x 8) values.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow import keras
# Load the data
dataframe = pd.read_csv("ANN.csv",header=None)
dataset = dataframe.values
# Assign the columns of the dataframe to the inputs for arrays for the ANN
X_input_dataset = dataset[:, 0:8]
Y_output_dataset = dataset[:, 8:16]
# Sequential model
model = Sequential()
# Add the different layers
model.add(keras.layers.Flatten(input_shape=(8,)))
model.add(Dense(50, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
# Configure the model and start training
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_percentage_error'])
history = model.fit(X_input_dataset, Y_output_dataset, epochs=2000, batch_size=10, verbose=1, validation_split=0.3)
# Predict values
x_new = X_input_dataset[:,0]
y_new = model.predict(x_new)
print(y_new)
But during the prediction for one column of new unseen input (3560 x 1) with the training data itself, I get an error due to the input shape. The neural network is expecting 8 features (3560 x 8) as input to predict the new y (3560 x 1). Please help me with this.

My accuracy for the ANN is not coming perfect

crop: object = CropData(path= '')
crop.dataset: pd.DataFrame = CropAnalysis.rename_columns(dataset=
crop.dataset)
crop.dataset.head()
data_charaterictics: Generator =
CropAnalysis.data_characteristics(dataset= crop.dataset)
while True:
try:
print('-'*100)
print(data_charaterictics.__next__())
except StopIteration: break
unique_values: Generator =
CropAnalysis.data_unique_values(dataset= crop.dataset)
while True:
try:
print('-'*100)
print(unique_values.__next__())
except StopIteration: break
target_classification: Generator =
CropAnalysis.target_classification_count(dataset= crop.dataset,
target= 'crop')
while True:
try:
print('-'*100)
print(target_classification.__next__())
except StopIteration: break
crop.dataset.keys()
numeric_histoplots: Generator =
CropAnalysis.histograms_numeric_features(dataset= crop.dataset,
numeric_features= [
'Nitrogen', 'Phosphorus',
'Potassium', 'Temp','hum', 'PH','Rain'
] )
while True:
try: numeric_histoplots.__next__()
except AttributeError: break
crop.dataset: pd.DataFrame =
CropPreprocess.change_object_to_str(dataset= crop.dataset, cols=
['crop'])
crop.dataset: pd.DataFrame =
CropPreprocess.encode_features(dataset= crop.dataset)
crop.dataset
X, y = crop.dataset.drop('crop', axis= 1), crop.dataset['crop']
from sklearn import preprocessing
normalizer = preprocessing.Normalizer()
normalized_train_X = normalizer.fit_transform(X_train)
normalized_train_X
from tensorflow.keras import utils
from tensorflow.keras.utils import to_categorical
one_hot_y_train = to_categorical(y_train)
one_hot_y_test = to_categorical(y_test)
from sklearn.model_selection import train_test_split as tts
X_train,X_test,y_train,y_test=tts(X,y,test_size=0.2)
from tensorflow import keras
from tensorflow.keras import layers , Sequential
` from keras.layers import Dense
import keras
from keras.models import Sequential
from keras.layers import Dense
# Neural network
model = Sequential()
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(22, activation='softmax'))
model.compile(optimizer = 'adam', loss =
'categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 25, epochs = 100)
The value of the accuracy and loss is way to small such as 0.04 which is not the same with machine learning algorithms, the size of the data is also big like 2200*8 which is not small. Help me find out what is the problem with the data
The dataset is provided in here https://www.kaggle.com/atharvaingle/crop-recommendation-dataset
You need to add more neurons to your output layer. Currently you are predicting whether to use a single crop or not. But in the Kaggle dataset the labels are actually 22 different crops and you want to decide which crop is most suitable to the soil at hand.
So you need 22 neurons in your output layer, i.e. change your last layer to
model.add(Dense(22, activation='softmax'))
The resulting prediction will indicate which crop is most suitable for the soil.
Make sure that y_train,y_testare one hot vectors for this. If they're not yet, you can convert them f.e. by using one_hot_y_train = to_categorical(y_train)

Multiple variable output in Neural Network | Why is Keras yielding negative binary_cross_entropy?

I am encountering an issue for a school project.
I have to predict on a test set, based on textual data, the age and gender of a person. My training dataset has 4 features (ID, keywords, age, sex).
I created a neural network (please see the code below) but when fitting the latter, my loss values are extremely negative.
Could you please tell me how to alleviate this issue?
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
#Load the datasets
chunk_train = pd.read_csv('/Users/romeoleon/Downloads/train.csv',chunksize=10**6)
data_train = pd.concat(chunk_train)
#Map the values for sex columns
data_train.sex = data_train.sex.map({'M':0,'F':1})
#Remove the rows with missing data
print('Missing rows represent {} percent of the dataframe'.format(data_train['keywords'].isna().sum()/len(data_train.keywords)*100))
#Drop the missing values
data_train.dropna(inplace=True)
#Plot the distribution of numerical variables
sns.histplot(data_train.age,bins=85)
plt.show()
sns.countplot(x='sex',data=data_train)
plt.show()
#Prepare the data to feed it to the NN
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
x_train, x_test, y_train, y_test = train_test_split(data_train['keywords'],data_train[["age","sex"]],test_size=0.2)
#Choose parameters
vocab_size = 1000
maxlen = 300
batch_size = 32
embedding_dims = 100
hidden_dims = 5
filters = 250
kernel_size = 3
epochs = 10
#Tokenize the words
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(x_train)
X_train = tokenizer.texts_to_matrix(x_train)
X_test = tokenizer.texts_to_matrix(x_test)
#Pad sequencing : Ensure all sequences have the same length
from tensorflow.keras.preprocessing import sequence
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, MaxPooling1D
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.layers import Conv1D, Flatten
#Create the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dims, input_length=maxlen, trainable=True))
model.add(Dropout(0.5))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu'))
#model.add(MaxPooling1D(pool_size=4))
model.add(Flatten())
model.add(Dense(hidden_dims, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))
# Compile neural model
model.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='adam', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
model.summary()
#Fit the model
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=1,
validation_data=(X_test, y_test), verbose=1)
You can find below a screenshot of the structure of my training dataset:
When using 'binary_crossentropy' as the loss function, dense at the output end should have only 1 unit rather than 2. (1 unit have 2 states, which is 1 or 0)
Using this instead:
model.add(Dense(1, activation='sigmoid'))

Simple Keras ML model for predicting multiplication isn't working

I have created a simple machine learning model to predict the multiplication of two given numbers. I followed a youtube tutorial to learn the basic and try to work on this simple idea.
My model has three dense layers - input, hidden, output. Input and hidden were using same activation function 'relu' which were giving me loss as NaN on model fit so I changed one of them to sigmoid which started giving me 0.00000+e... something as loss.
I don't know what is wrong. Anyone can please direct me what I am doing wrong or assuming wrong?
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
print(df)
x = np.array(df['X'])
y = np.array(df['Y'])
s = np.array(df['S'])
def build_model():
model = keras.Sequential()
inputLayer = layers.Dense(64, activation='sigmoid', input_shape=[2])
hiddenLayer = layers.Dense(64, activation='relu')
outputLayer = layers.Dense(1)
model.add(inputLayer)
model.add(hiddenLayer)
model.add(outputLayer)
model.compile(optimizer='sgd', loss='mean_squared_error',metrics=['accuracy'])
return model
model = build_model()
print(model.summary())
EPOCHS = 1000
# I didn't know how to provide mulitple input to my model for
# training so I checked stackoverflow here
# https://stackoverflow.com/questions/55233377/keras-sequential-model-with-multiple-inputs?noredirect=1&lq=1
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=EPOCHS, validation_split = 0.2, verbose=2)
print(history)
print(model.predict([[2,3],]))
Disclaimer: I am a beginner and I have just started using keras and python for the first time in my life.
It does work for smaller numbers with ReLU activation.
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
x = np.random.randint(0, 10, 1000)
y = np.random.randint(0, 10, 1000)
s = x*y
def build_model():
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=[2]))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=keras.optimizers.Adam(lr=0.01),
loss='mean_squared_error')
return model
model = build_model()
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=250,
validation_split=0.2)
test_input = [2, 3]
print('\n{} x {} ='.format(*test_input),
np.round(model.predict([test_input])[0][0]).astype(int))
2 x 3 = 6
SGD also works, but it requires standardization/normalization, which kind of defeats the purpose of your task, so I changed it. But it also works.
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
x = np.random.randint(0, 10, 1000)
y = np.random.randint(0, 10, 1000)
s = x*y
x = x/10
y = y/10
def build_model():
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=[2]))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=keras.optimizers.SGD(0.001), loss='mean_squared_error')
return model
model = build_model()
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=250,
validation_split=0.2, batch_size=16)
test_input = [2/10, 3/10]
print('\n{} x {} ='.format(*map(lambda l: int(l*10), test_input)),
np.round(model.predict([test_input])[0][0]).astype(int))
i noticed a couple of issues with your model:
Your input layer is not an input. You do not need to have a designated input layer in this case. The arguement input_shape=[2] is sufficient to add a proper input layer before this layer.
You do not determine any batchsize in the fit function: batches are usually a small subset of your training and validation set (commonly some base-2 numbers like 4, 8, 16, 32, ...). During training not only one sample of your set is used for backpropagating and adjusting your weights (aka "learning") but in batches, which makes it faster. Since your input data are two single floating numbers (I assume) you can choose a really high batchsize like 1024 or higher. The batch size belongs to the so called hyperparameter, which affect your overall training success.
history = model.fit(merged_array, s, batch_size=1024, epochs=EPOCHS, validation_split=0.2, verbose=2)
During training you track the "accuracy" metric. As you are working on a regression problem, this is not helping you in estimating your model's performance. (Accuracy is used for classification problems) You can leave it out
I cannnot give you more specific advice with knowledge about the data you are using, how many, datapoints you have and what kind of numbers you want to multiply (bounded to numbers between 0 and 10, float or integeres,...)
Hope this helps sofar (;

Deep Learning: Multiclass Classification with same amount of labels between the training dataset and test dataset

I'm writing a code for doing a multiclass classification. I have custom datasets with 7 columns (6 features and 1 label), the training dataset has 2 types of label (1 and 2), and the testing dataset has 3 types of labels (1, 2, and 3). The aim of the model is to see how well the model predicting the label '3'.
As of now, I'm trying the MLP algorithm, the code is as follows:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from keras.models import load_model
from sklearn.externals import joblib
from joblib import dump, load
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#from keras.layers import Dense, Embedding, LSTM, GRU
#from keras.layers.embeddings import Embedding
#Load the test dataset
df1 = pd.read_csv("/home/user/Desktop/FinalTestSet.csv")
test = df1
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtest_Val = test.iloc[:,0:6].values
Ytest = test.iloc[:,6].values
#print Ytest
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
Xtest = scaler.fit_transform(xtest_Val)
#print Xtest
#Load the train dataset
df2 = pd.read_csv("/home/user/Desktop/FinalTrainingSet.csv")
train = df2
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtrain_Val = train.iloc[:,0:6].values
Ytrain = train.iloc[:,6].values
#print Ytrain
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# Fit the model
Xtrain = scaler.fit_transform(xtrain_Val)
#Reshape data for CNN
Xtrain = Xtrain.reshape((Xtrain.shape[0], 1, 6, 1))
print(Xtrain)
#Xtest = Xtest.reshape((Xtest.shape[0], 1, 6, 1))
#print Xtrain.shape
max_length=70
EMBEDDING_DIM=100
vocab_size=100
num_labels=2
#Define model
def init_model():
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=Xtrain.shape[0]))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='softmax'))
model.add(Flatten())
#adam optimizer
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
return model
print('Train...')
model = init_model()
#To avoid overfitting
callbacks = [EarlyStopping('val_loss', patience=3)]
hist = model.fit(Xtrain, Ytrain, epochs=50, batch_size=50, validation_split=0.20, callbacks=callbacks, verbose=1)
#Evaluate model and print results
score, acc = model.evaluate(Xtest, Ytest, batch_size=50)
print('Test score:', score)
print('Test accuracy:', acc)
However, I'm getting the following error:
ValueError: Input 0 is incompatible with layer flatten_1: expected min_ndim=3, found ndim=2
I tried to remove the flatten layers, but getting different error:
ValueError: Error when checking input: expected dense_1_input to have shape (424686,) but got array with shape (6,)
424686 is the number of rows in dataset and 6 is the number of features.
I appreciate any suggestion. Thank you.
Based on Omarfoq suggestion, now I used three labels for both the training and testing datasets. The code and error remains unchanged.
Can anyone please suggest me the solution? Thank you.
I would say that what you are trying is not logical, your model will never predict class "3" if it doesn't exist in the training set. What you are trying have no sense. Try to reformulate your problem.

Categories