I tried to set up a LSTM model with input matrix 7 columns, ca. 1650 rows
Output matrix is 1 column, 1650 rows.
My model code is shown in the following.
Problem, the prediction does in every 1650 columns have the same value 26,19...
Can anyone help?
Thank you!
Edit: I updated the code.
In svt.csv are 8 columns with ca. 8000 rows...Y is seperated from that, so that Y has one column and X has 7 columns.
Edit2: I realized that predict has shape (1657, 20, 1) why not (1657,1) ???
import pandas as pd
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers
from keras.models import Sequential
import numpy as np
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from matplotlib import pyplot
from tensorflow.keras import callbacks
df_svt = pd.read_csv("svt.csv", sep=";")
df_svt = df_svt.iloc[6980:]
df_svt = df_svt.iloc[:8158]
df_y = pd.DataFrame(df_svt.y)
df_y_train = df_y.iloc[:6501]
df_y_test = df_y.iloc[6501:8158]
y_train = df_y_train.to_numpy()
y_test = df_y_test.to_numpy()
df_svt = df_svt.drop(columns=["y"])
df_svt = df_svt.drop(columns=["Date"])
df_x_train = df_svt.iloc[:6501]
df_x_test = df_svt.iloc[6501:8158]
x_train = df_x_train.to_numpy()
x_test = df_x_test.to_numpy()
train_X = x_train.reshape(6501,1,7)
test_X = x_test.reshape(1657,1,7)
train_Y = y_train.reshape(6501,1,1)
test_Y = y_test.reshape(1657,1,1)
trainX = []
trainY = []
testX = []
testY = []
ts = 20
timestep = ts
def create_dataset(datasetX,datasetY, timestep=10):
dataX, dataY = [],[]
for i in range(len(datasetX)-timestep-1):
a = datasetX[i:(i+timestep),0]
dataX.append(a)
dataY.append(datasetY[i+timestep,0])
return np.array(dataX), np.array(dataY)
timestep=ts
trainX, trainY = create_dataset(train_X, train_Y, timestep)
timestep=ts
testX, testY = create_dataset(test_X,test_Y,timestep)
model = tf.keras.Sequential()
model.add(LSTM(units=70,return_sequences=True, input_shape=(trainX.shape[1],trainX.shape[2]) ))
model.add(Dropout(0.2))
model.add(LSTM(units=70, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=70))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer="adam",loss="mean_squared_error")
model.fit(trainX, trainY, epochs=5, batch_size=32,verbose=1
model.summary()
predict = model.predict(testX)
print(predict)
You are using return_sequences=True in your LSTM layers which means that all outputs from each timestep are fed to the next timestep. This is most likely what you want to do in a sequential model, but you need to remove it from the last LSTM layer, where you want the outputs from the last timestep.
model.add(LSTM(units=70,return_sequences=True, input_shape=(trainX.shape[1],trainX.shape[2]) ))
model.add(Dropout(0.2))
model.add(LSTM(units=70, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=70))
model.add(Dropout(0.2))
Related
I am encountering an issue for a school project.
I have to predict on a test set, based on textual data, the age and gender of a person. My training dataset has 4 features (ID, keywords, age, sex).
I created a neural network (please see the code below) but when fitting the latter, my loss values are extremely negative.
Could you please tell me how to alleviate this issue?
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
#Load the datasets
chunk_train = pd.read_csv('/Users/romeoleon/Downloads/train.csv',chunksize=10**6)
data_train = pd.concat(chunk_train)
#Map the values for sex columns
data_train.sex = data_train.sex.map({'M':0,'F':1})
#Remove the rows with missing data
print('Missing rows represent {} percent of the dataframe'.format(data_train['keywords'].isna().sum()/len(data_train.keywords)*100))
#Drop the missing values
data_train.dropna(inplace=True)
#Plot the distribution of numerical variables
sns.histplot(data_train.age,bins=85)
plt.show()
sns.countplot(x='sex',data=data_train)
plt.show()
#Prepare the data to feed it to the NN
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
x_train, x_test, y_train, y_test = train_test_split(data_train['keywords'],data_train[["age","sex"]],test_size=0.2)
#Choose parameters
vocab_size = 1000
maxlen = 300
batch_size = 32
embedding_dims = 100
hidden_dims = 5
filters = 250
kernel_size = 3
epochs = 10
#Tokenize the words
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(x_train)
X_train = tokenizer.texts_to_matrix(x_train)
X_test = tokenizer.texts_to_matrix(x_test)
#Pad sequencing : Ensure all sequences have the same length
from tensorflow.keras.preprocessing import sequence
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, MaxPooling1D
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.layers import Conv1D, Flatten
#Create the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dims, input_length=maxlen, trainable=True))
model.add(Dropout(0.5))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu'))
#model.add(MaxPooling1D(pool_size=4))
model.add(Flatten())
model.add(Dense(hidden_dims, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))
# Compile neural model
model.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='adam', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
model.summary()
#Fit the model
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=1,
validation_data=(X_test, y_test), verbose=1)
You can find below a screenshot of the structure of my training dataset:
When using 'binary_crossentropy' as the loss function, dense at the output end should have only 1 unit rather than 2. (1 unit have 2 states, which is 1 or 0)
Using this instead:
model.add(Dense(1, activation='sigmoid'))
I am a beginner in machine learning and have been trying to use an LSTM to classify according to 12 features into 4 classes. I've followed quite a few tutorials but I'm still a bit confused. My dataset has 12 columns i want to use for training, including the label column which has the values that correspond to each class.
0 = Class 1
1 = Class 2
2 = Class 3
3 = Class 4
and this is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import time
# For LSTM model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras import optimizers
# Load dataset
train = pd.read_csv("C:\Users\O\Documents\Datasets\FinalDataset2.csv")
train_proccessed = train.iloc[:, 1:13]
scaler = MinMaxScaler(feature_range = (0, 1))
train_scaled = scaler.fit_transform(train_proccessed)
features_set = []
labels = []
for i in range(1, 393763):
features_set.append(train_scaled[i-1:i, 0])
labels.append(train_scaled[i, 0])
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))
# Initialize LSTM model
model = Sequential()
model.add(LSTM(512, return_sequences=True, activation='tanh', input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
model.add(LSTM(units=1, activation='sigmoid'))
opt = optimizers.Adam(lr=0.0001)
model.compile(optimizer = opt , loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(features_set, labels, epochs = 100, batch_size = 512)
I am very unsure about whether my model is built correctly or not. Moreover it only yields very low accuracies (27-28%). Any help would be greatly appreciated!!
Short Answer:
Last Layer to be Dense(4, activation='softmax')
Labels must be one hot encoded as you are using loss='categorical_crossentropy'
Here are more notes to help
1st Layer
LSTM(512, return_sequences=True, activation='tanh')
You started with huge LSTM units while your data is just 12 columns.
return_sequences=True which is not justified in your case as you are not staking another layer after it
Model Body
No layers in the middle between LST & final Dense()
Add one Dense layer at least
Output Layer
It could be easier to use loss as sparse_categorical_crossentropy instead of categorical_crossentropy so labels could be passes as numbers otherwise you need to one hot them
I'm writing a code for doing a multiclass classification. I have custom datasets with 7 columns (6 features and 1 label), the training dataset has 2 types of label (1 and 2), and the testing dataset has 3 types of labels (1, 2, and 3). The aim of the model is to see how well the model predicting the label '3'.
As of now, I'm trying the MLP algorithm, the code is as follows:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from keras.models import load_model
from sklearn.externals import joblib
from joblib import dump, load
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#from keras.layers import Dense, Embedding, LSTM, GRU
#from keras.layers.embeddings import Embedding
#Load the test dataset
df1 = pd.read_csv("/home/user/Desktop/FinalTestSet.csv")
test = df1
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtest_Val = test.iloc[:,0:6].values
Ytest = test.iloc[:,6].values
#print Ytest
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
Xtest = scaler.fit_transform(xtest_Val)
#print Xtest
#Load the train dataset
df2 = pd.read_csv("/home/user/Desktop/FinalTrainingSet.csv")
train = df2
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtrain_Val = train.iloc[:,0:6].values
Ytrain = train.iloc[:,6].values
#print Ytrain
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# Fit the model
Xtrain = scaler.fit_transform(xtrain_Val)
#Reshape data for CNN
Xtrain = Xtrain.reshape((Xtrain.shape[0], 1, 6, 1))
print(Xtrain)
#Xtest = Xtest.reshape((Xtest.shape[0], 1, 6, 1))
#print Xtrain.shape
max_length=70
EMBEDDING_DIM=100
vocab_size=100
num_labels=2
#Define model
def init_model():
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=Xtrain.shape[0]))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='softmax'))
model.add(Flatten())
#adam optimizer
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
return model
print('Train...')
model = init_model()
#To avoid overfitting
callbacks = [EarlyStopping('val_loss', patience=3)]
hist = model.fit(Xtrain, Ytrain, epochs=50, batch_size=50, validation_split=0.20, callbacks=callbacks, verbose=1)
#Evaluate model and print results
score, acc = model.evaluate(Xtest, Ytest, batch_size=50)
print('Test score:', score)
print('Test accuracy:', acc)
However, I'm getting the following error:
ValueError: Input 0 is incompatible with layer flatten_1: expected min_ndim=3, found ndim=2
I tried to remove the flatten layers, but getting different error:
ValueError: Error when checking input: expected dense_1_input to have shape (424686,) but got array with shape (6,)
424686 is the number of rows in dataset and 6 is the number of features.
I appreciate any suggestion. Thank you.
Based on Omarfoq suggestion, now I used three labels for both the training and testing datasets. The code and error remains unchanged.
Can anyone please suggest me the solution? Thank you.
I would say that what you are trying is not logical, your model will never predict class "3" if it doesn't exist in the training set. What you are trying have no sense. Try to reformulate your problem.
I'm using the Keras API to write a code that can predict using the learned .h5 file.
The learning model is as follows
#Libraries
import keras
from keras import backend as k
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense, Flatten, Reshape
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
import numpy as np
from random import randint
from sklearn.preprocessing import MinMaxScaler
#Create 2 numpy lists that will hold both our sample data and raw data
train_labels = []
train_samples = []
#declare array to hold training data as well as label
train_samples_temp_a = []
train_samples_temp_b = []
#Generate data
for i in range(1000):
#YOUNGER PEOPLE
random_younger_a = randint(13,64)
random_younger_b = randint(13,64)
train_samples_temp_a.append(random_younger_a)
train_samples_temp_b.append(random_younger_b)
train_labels.append(0)
#OLDER PEOPLE
random_older_a = randint(65,100)
random_older_b = randint(65,100)
train_samples_temp_a.append(random_older_a)
train_samples_temp_b.append(random_older_b)
train_labels.append(1)
for i in range(50):
#YOUNGER PEOPLE
random_younger_a = randint(13,64)
random_younger_b = randint(13,64)
train_samples_temp_a.append(random_younger_a)
train_samples_temp_b.append(random_younger_b)
train_labels.append(1)
#OLDER PEOPLE
random_older_a = randint(65,100)
random_older_b = randint(65,100)
train_samples_temp_a.append(random_older_a)
train_samples_temp_b.append(random_older_b)
train_labels.append(0)
#Array of Two Arrays
train_samples.append(train_samples_temp_a)
train_samples.append(train_samples_temp_b)
#Convert both train_label and train_sample list into a numpy array
train_samples = np.array(train_samples)
train_labels = np.array(train_labels)
#Scale down train_samples to numbers between 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples=scaler.fit_transform((train_samples))
#Sequential Model
model = Sequential([
Dense(16, input_shape=(2,2100), activation='relu'),
Flatten(),
Dense(32, activation='relu'),
Dense(2, activation='softmax')
])
#Compile Model
model.compile(Adam(lr=.0001), loss='sparse_categorical_crossentropy',
metrics= ['accuracy'])
#Train Model
model.fit(scaled_train_samples, train_labels, validation_split = 0.20,
batch_size=10, epochs=20, shuffle=True, verbose=2)
I used the Transpose function to reshape scaled_train_samples from a 2 by 2100 matrix into a 2100 by 2 matrix. Thanks guys for your contributions.
#Transpose
scaled_train_samples = scaled_train_samples.transpose()
However, running the line of code below gives the accuracy of the model. And currently, I am getting an accuracy of 51.52%, is there anything I can do to improve the accuracy of this model?
#Evaluate the model
scores = model.evaluate(scaled_train_samples, train_labels)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
input_shape=(2100,)
The input shape should not contain the batch size.
I want to build 40-class LSTM classifier to analyze time series data. I have a 13 dimension real time data collected from 13 sensors. When I run the code below I keep getting this error message.
ValueError: Error when checking model input: the list of Numpy arrays
that you are passing to your model is not the size the model expected.
Expected to see 1 arrays but instead got the following list of 241458
arrays: [array([[ 0.64817517, 0.12892013, 0.01879949, 0.00946322,
0.00458952,
0.01668651, 0.04776124, 0.03301365, 0.0360659 , 0.15013408,
0.10112171, 0.05494366, 0.02620634],
RNN code
from __future__ import print_function
import keras
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization
from sklearn.cross_validation import train_test_split
import pandas as pd
from keras.callbacks import CSVLogger
from keras.models import load_model
from keras.layers import LSTM
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import keras
def top_k_acc(y_true, y_pred):
return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)
# train Parameters
sequence_length = 60
data_dim = 13
num_classes = 40
batch_size = 15000
epochs = 10
# tf.set_random_seed(777) # reproducibility
def MinMaxScaler(data):
''' Min Max Normalization
Parameters
----------
data : numpy.ndarray
input data to be normalized
shape: [Batch size, dimension]
Returns
----------
data : numpy.ndarry
normalized data
shape: [Batch size, dimension]
References
----------
.. [1] http://sebastianraschka.com/Articles/2014_about_feature_scaling.html
'''
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
# Load data
xy = np.loadtxt('sc_total_for 60s v4.0 test.csv', delimiter=',', skiprows=1)
x = xy[:, 1:14]
x = MinMaxScaler(x)
y = xy[:,0]
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
# One-hot encoding
encoder = LabelEncoder()
encoder.fit(y_data)
encoded_Y = encoder.transform(y_data)
dummy_y = np_utils.to_categorical(encoded_Y)
#train/test split
x_train,x_test,y_train,y_test=train_test_split(x_data,dummy_y,random_state=4,test_size=0.3);
# print(x_train[0],"->",y_train[0])
# Network
model = Sequential()
model.add(LSTM(40, batch_input_shape=(batch_size, sequence_length, data_dim),return_sequences=True))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(40))
model.add(Activation("linear"))
# model.add(Dense(40))
# model.add(Dense(25, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(30, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(40, init='uniform', activation='softmax'))
model.summary()
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
csv_logger = CSVLogger('LSTM 1111.log')
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
callbacks=[csv_logger])
score = model.evaluate(x_test, y_test, verbose=0)
predictions=model.predict(x_test)
# model.save('New Model6 save.h5')
#plot_model(model, to_file='model1.png')
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])
The issue is:
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
You're building a list of 2d numpy arrays for x_data when Keras expects a single, three-dimensional array for LSTM. Do this instead:
num_samples = len(y) - sequence_length
x_data = np.zeros((num_samples, sequence_length, data_dim))
y_data = np.zeros((num_samples))
for i in range(num_samples):
x_data[i] = x[i:i + sequence_length]
y_data[i] = y[i + sequence_length]