Here the problem, I have a dataset 2200x39, I know... very poor. Where 38 are the features (texture and statistic) and the last column is the output class which could be 0 or 1. My dataset is balanced (1100 "1" and 1100 "0").
I'm trying to improve my performance which is stuck in 0.69 for loss and 0.49 for accuracy. I tried to add a layer, to add neurons, different parameters. Nothing, values of accuracy and loss change just a bit.
So, first of all, I import all the stuff I need
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Conv1D
from tensorflow.keras.optimizers import SGD
import matplotlib.pyplot as plt
Then I prepar my data and split 80% training set and 20% validation test
# fix a seed for reproducing same results if we wish to train and evaluate our network more than once
seed = 9
np.random.seed(seed)
# load dataset
dataset = np.loadtxt('tr_set.csv', delimiter=',', skiprows=1)
# Show the first 10 rows
print(dataset[1:10])
# Delete the first column with the patient index
dataset = dataset[:,1:42]
# Split into input (features) and output variables
X = dataset[:,2:40]
Y = dataset[:,40]
# Counting elements in class 0 and in class 1
count_0 = 0
count_1 = 0
for i in Y:
if i == 0:
count_0 = count_0 + 1
if i == 1:
count_1 = count_1 + 1
print("Number of elements in 0 class:", count_0)
print("Number of elements in 1 class:", count_1)
# The dataset is balanced
# Split into training set(80%) and validation set (20%)
(X_train, X_val, Y_train, Y_val) = train_test_split(X, Y, test_size=0.2, random_state=seed)
And here my model after I reshape X_train and X_val due to using Conv1D
# Create the model
opt = SGD(lr=0.00001)
model = Sequential()
model.add(Dense(1024, activation='relu', kernel_initializer='random_uniform', input_shape=(1,38)))
model.add(BatchNormalization()) # It is used to normalize the input layer by adjusting and scaling the activations.
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.summary()
model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(2))
model.summary()
model.add(Dense(1, activation='sigmoid'))
model.summary()
# compile the model
model.compile(loss='binary_crossentropy', optimizer= opt, metrics=['accuracy'])
# fit the model
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=15, batch_size=10)
# w_data = model.get_weights()
What it is wrong, I delete the max-pooling because I have problems with dimension (Something like subtracting 2 from 1)?
Related
I'm trying to train a model from a dataset of about a few thousands of entries with 51 numerical features and a labeled column, Example:
when training the model to predict the 3 labels (candidate, false positive, confirmed) the loss is always nan and the accuracy stabilizes very fast on a specific value.
The code:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, RobustScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = RobustScaler().fit_transform(X_train)
X_train_enc = oe.transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
le.fit(y_train)
le.fit(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.fit_transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
X_train_enc, X_test_enc = X_train, X_test
print('Finished preparing inputs.'
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
norm_layer = Normalization()
norm_layer.adapt(X)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(3, activation='sigmoid'))
opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train_enc, epochs=20, batch_size=128, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
I tried increasing/decreasing the learning rate, changing the optimizer, lowering and increasing the number of neurons and layers, and playing with batch sizes but nothing seems to bring the model to get good results. I think I'm missing something here but can't put my finger on it.
Result example:
EDIT: More lines from the csv:
EDIT2: Tried l2 regularization also and didnt did anything.
One of the reasons:
Check whether your dataset have NaN values or not. NaN values can cause problem to the model while learning.
Some of the major bugs in your code:
You are using sigmoid activation function instead of softmax for output layer having 3 neurons
You are fitting both train and test set while using encoders which is wrong. You should fit_transform for your train data and only use transform for test sets
Also you are using input for all layers which is wrong, Only the first layer should accept the input tensor.
You forgot to use prepare_inputs function for X_train and X_test
Your model should be fit with X_train_enc not X_train
Use this instead
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = MinMaxScaler()
X_train_enc = oe.fit_transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
#prepare_input function missing here
X_train_enc, X_test_enc = prepare_inputs(X_train, X_test)
print('Finished preparing inputs.')
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))
#opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(X_train_enc, y_train_enc, epochs=20, batch_size=32, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test_enc, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
You want to change your model definition to this:
model = Sequential()
model.add(Dense(128, input_shape=X_train.shape[1:], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
You only need to define the input shape for the first layer, Keras will automatically determine the proper shape for the subsequent layers. You leave out the batch size when defining the input_shape, which is the first dimension, hence input_shape=X_train.shape[1:].
A sigmoid activation will actually work (because the output will vary between 0 and 1), but what you really want is a softmax activation (which makes sure all the outputs sum to 1, which is what probability dictates -- the probability that something happened is 100%, not the 120% that sigmoid could end up giving you).
Also, you're not using your LabelEncoder anywhere. I think what you mean to do is this:
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
# teach the label encoder our labels
le.fit(y_train)
# turn our strings into integers
y_train_transformed = le.transform(y_train)
y_test_transformed = le.transform(y_test)
# turn our integers into one-hot-encoded arrays
y_train_enc = ohe.fit_transform(y_train_transformed).toarray()
y_test_enc = ohe.transform(y_test_transformed).toarray()
return y_train_enc, y_test_enc
I am encountering an issue for a school project.
I have to predict on a test set, based on textual data, the age and gender of a person. My training dataset has 4 features (ID, keywords, age, sex).
I created a neural network (please see the code below) but when fitting the latter, my loss values are extremely negative.
Could you please tell me how to alleviate this issue?
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
#Load the datasets
chunk_train = pd.read_csv('/Users/romeoleon/Downloads/train.csv',chunksize=10**6)
data_train = pd.concat(chunk_train)
#Map the values for sex columns
data_train.sex = data_train.sex.map({'M':0,'F':1})
#Remove the rows with missing data
print('Missing rows represent {} percent of the dataframe'.format(data_train['keywords'].isna().sum()/len(data_train.keywords)*100))
#Drop the missing values
data_train.dropna(inplace=True)
#Plot the distribution of numerical variables
sns.histplot(data_train.age,bins=85)
plt.show()
sns.countplot(x='sex',data=data_train)
plt.show()
#Prepare the data to feed it to the NN
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
x_train, x_test, y_train, y_test = train_test_split(data_train['keywords'],data_train[["age","sex"]],test_size=0.2)
#Choose parameters
vocab_size = 1000
maxlen = 300
batch_size = 32
embedding_dims = 100
hidden_dims = 5
filters = 250
kernel_size = 3
epochs = 10
#Tokenize the words
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(x_train)
X_train = tokenizer.texts_to_matrix(x_train)
X_test = tokenizer.texts_to_matrix(x_test)
#Pad sequencing : Ensure all sequences have the same length
from tensorflow.keras.preprocessing import sequence
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, MaxPooling1D
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.layers import Conv1D, Flatten
#Create the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dims, input_length=maxlen, trainable=True))
model.add(Dropout(0.5))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu'))
#model.add(MaxPooling1D(pool_size=4))
model.add(Flatten())
model.add(Dense(hidden_dims, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))
# Compile neural model
model.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='adam', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
model.summary()
#Fit the model
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=1,
validation_data=(X_test, y_test), verbose=1)
You can find below a screenshot of the structure of my training dataset:
When using 'binary_crossentropy' as the loss function, dense at the output end should have only 1 unit rather than 2. (1 unit have 2 states, which is 1 or 0)
Using this instead:
model.add(Dense(1, activation='sigmoid'))
My input is simply a csv file with 237124 rows and 37 columns :
The first 36 columns as features
The last column is a Binary class label
I am trying to train my data on the conv1D model.
I have tried to build a CNN with one layer, but I have some problems with it.
The compiler outputs:
ValueError:Error when checking input: expected conv1d_9_input to have shape
(213412, 36) but got array with shape (36, 1)
Code:
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv2D,Conv1D, MaxPooling2D,MaxPooling1D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout,BatchNormalization
dataset=pd.read_csv("C:/Users/User/Desktop/data.csv",encoding='cp1252')
dataset.shape
#output: (237124, 37)
array = dataset.values
X = array[:,0:36]
Y = array[:,36]
kf = KFold(n_splits=10)
kf.get_n_splits(X)
for trainindex, testindex in kf.split(X):
Xtrain, Xtest = X[trainindex], X[testindex]
Ytrain, Ytest = Y[trainindex], Y[testindex]
Xtrain.shape[0]
#output: 213412
Xtrain.shape[1]
#output: 36
Ytrain.shape[0]
#output: 213412
n_timesteps, n_features, n_outputs =Xtrain.shape[0], Xtrain.shape[1],
Ytrain.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=1,
activation='relu',input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=
['accuracy'])
# fit network
model.fit(Xtrain, Ytrain, epochs=10, batch_size=32, verbose=0)
# Testing CNN model BY X test
Predictions = model.predict(Xtest,batch_size =100)
rounded = [round(x[0]) for x in Predictions]
Y_predection = pd.DataFrame(rounded)
Y_predection = Y_predection.iloc[:, 0]
.
.
.
I tried to modify the code this way:
Xtrain = np.expand_dims(Xtrain, axis=2)
But the error remains the same.
There's a couple of problems I notice with your code.
Xtrain - Needs to be a 3D tensor. Because anything else, Conv1D cannot process. So if you have 2D data you need to add a new dimension to make it 3D.
Your input_shape needs to be changed to reflect that. For example, if you added only a single channel, it should be [n_features, 1].
# Here I'm assuming some dummy data
# Xtrain => [213412, 36, 1] (Note that you need Xtrain to be 3D not 2D - So we're adding a channel dimension of 1)
Xtrain = np.expand_dims(np.random.normal(size=(213412, 36)),axis=-1)
# Ytrain => [213412, 10]
Ytrain = np.random.choice([0,1], size=(213412,10))
n_timesteps, n_features, n_outputs =Xtrain.shape[0], Xtrain.shape[1], Ytrain.shape[1]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=1,
activation='relu',input_shape=(n_features,1)))
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(Xtrain, Ytrain, epochs=10, batch_size=32, verbose=0)
You need to specifi only how many dimension X has, not how many samples you will pass for the input layer.
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_features,)))
This means that the input will be N samples of shape n_features
For the last layer you should change the number of units to how many classes you have instead of how many rows your data has.
I'm doing a Neural Network for the "Default of credit card clients" from http://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients.
But the accuracy of my models is pretty bad, worse than if I predicted all zeros. I already did some research on it and did oversample to correct the classes, and change the optimizer, because adam was not increasing the accuracy.
What else could I do?
import pandas
import numpy
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import keras
from imblearn.over_sampling import SMOTE
seed = 8
numpy.random.seed(seed)
base = pandas.read_csv('base_nao_trabalhada.csv')
train, test = train_test_split(base, test_size = 0.2)
train=train.values
test=test.values
X_train = train[:,1:23]
Y_train = train[:,24]
X_test = test[:,1:23]
Y_test = test[:,24]
sm = SMOTE(kind='regular')
X_resampled, Y_resampled = sm.fit_sample(X_train, Y_train)
# Model Creation
model = Sequential()
model.add(Dense(40, input_dim=22, init='uniform', activation='relu'))
model.add(Dense(4, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
#activation='relu'
opt = keras.optimizers.SGD(lr=0.000001)
# Compile model
model.compile(loss='binary_crossentropy', optimizer=opt , metrics=['accuracy'])
#loss=binary_crossentropy
#optimizer='adam'
# creating .fit
model.fit(X_resampled, Y_resampled, nb_epoch=10000, batch_size=30)
# evaluate the model
scores = model.evaluate(X_test, Y_test)
print ()
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
I'm experimenting with Keras to develop a multi-layer perceptron for binary classification purposes and I'm surprised at the (poor) performance I obtain (57% accuracy on training set). A logistic regression correctly classifies 100% of the samples.
I created a data set with 2 inputs: input A is a sine function. Input B = input A plus a lag. When input A >= input B, output = 1. Otherwise, output = 0.
SIN SIN-1 Direction
0 0 1
0.06279052 0 1
0.125333234 0.06279052 1
0.187381315 0.125333234 1
0.248689887 0.187381315 1
0.309016994 0.248689887 1
0.368124553 0.309016994 1
0.425779292 0.368124553 1
0.481753674 0.425779292 1
0.535826795 0.481753674 1
0.587785252 0.535826795 1
0.63742399 0.587785252 1
The issue I have is similar to what is described in "Keras low accuracy classification task". The answer points in the direction of the data set which I do not believe is the problem here.
See code below. What am I missing to improve the accuracy of the model? Adding neurons to the layers or changing the activation function of the output layer to softmax hasn't yielded any better results.
import numpy
import pandas
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load the dataset
dataframe = pandas.read_csv('SIN.csv', usecols=[0,1,2], engine='python')
dataset = dataframe.values
X = dataset[:,0:2].astype(float)
Y = dataset[:,2].astype(int)
# split into train and test sets
train_size = int(len(X) * 0.80)
test_size = len(X) - train_size
Xtrain, Xtest = X[0:train_size,:], X[train_size:len(X),:]
Ytrain, Ytest = Y[0:train_size], Y[train_size:len(Y)]
print(len(Xtrain), len(Xtest))
# create and fit Multilayer Perceptron model
model = Sequential()
model.add(Dense(2, input_dim=2, init='uniform', activation='relu'))
model.add(Dense(2, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(Xtrain, Ytrain, nb_epoch=20, batch_size=2, verbose=2)
# Estimate model performance
trainScore = model.evaluate(Xtrain, Ytrain, verbose=2)
print('Train Score: %.2f' % trainScore[1])
testScore = model.evaluate(Xtest, Ytest, verbose=2)
print('Test Score: %.2f' % testScore[1])