My accuracy for the ANN is not coming perfect - python

crop: object = CropData(path= '')
crop.dataset: pd.DataFrame = CropAnalysis.rename_columns(dataset=
crop.dataset)
crop.dataset.head()
data_charaterictics: Generator =
CropAnalysis.data_characteristics(dataset= crop.dataset)
while True:
try:
print('-'*100)
print(data_charaterictics.__next__())
except StopIteration: break
unique_values: Generator =
CropAnalysis.data_unique_values(dataset= crop.dataset)
while True:
try:
print('-'*100)
print(unique_values.__next__())
except StopIteration: break
target_classification: Generator =
CropAnalysis.target_classification_count(dataset= crop.dataset,
target= 'crop')
while True:
try:
print('-'*100)
print(target_classification.__next__())
except StopIteration: break
crop.dataset.keys()
numeric_histoplots: Generator =
CropAnalysis.histograms_numeric_features(dataset= crop.dataset,
numeric_features= [
'Nitrogen', 'Phosphorus',
'Potassium', 'Temp','hum', 'PH','Rain'
] )
while True:
try: numeric_histoplots.__next__()
except AttributeError: break
crop.dataset: pd.DataFrame =
CropPreprocess.change_object_to_str(dataset= crop.dataset, cols=
['crop'])
crop.dataset: pd.DataFrame =
CropPreprocess.encode_features(dataset= crop.dataset)
crop.dataset
X, y = crop.dataset.drop('crop', axis= 1), crop.dataset['crop']
from sklearn import preprocessing
normalizer = preprocessing.Normalizer()
normalized_train_X = normalizer.fit_transform(X_train)
normalized_train_X
from tensorflow.keras import utils
from tensorflow.keras.utils import to_categorical
one_hot_y_train = to_categorical(y_train)
one_hot_y_test = to_categorical(y_test)
from sklearn.model_selection import train_test_split as tts
X_train,X_test,y_train,y_test=tts(X,y,test_size=0.2)
from tensorflow import keras
from tensorflow.keras import layers , Sequential
` from keras.layers import Dense
import keras
from keras.models import Sequential
from keras.layers import Dense
# Neural network
model = Sequential()
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(7, activation='relu'))
model.add(Dense(22, activation='softmax'))
model.compile(optimizer = 'adam', loss =
'categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 25, epochs = 100)
The value of the accuracy and loss is way to small such as 0.04 which is not the same with machine learning algorithms, the size of the data is also big like 2200*8 which is not small. Help me find out what is the problem with the data
The dataset is provided in here https://www.kaggle.com/atharvaingle/crop-recommendation-dataset

You need to add more neurons to your output layer. Currently you are predicting whether to use a single crop or not. But in the Kaggle dataset the labels are actually 22 different crops and you want to decide which crop is most suitable to the soil at hand.
So you need 22 neurons in your output layer, i.e. change your last layer to
model.add(Dense(22, activation='softmax'))
The resulting prediction will indicate which crop is most suitable for the soil.
Make sure that y_train,y_testare one hot vectors for this. If they're not yet, you can convert them f.e. by using one_hot_y_train = to_categorical(y_train)

Related

Multiple variable output in Neural Network | Why is Keras yielding negative binary_cross_entropy?

I am encountering an issue for a school project.
I have to predict on a test set, based on textual data, the age and gender of a person. My training dataset has 4 features (ID, keywords, age, sex).
I created a neural network (please see the code below) but when fitting the latter, my loss values are extremely negative.
Could you please tell me how to alleviate this issue?
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
#Load the datasets
chunk_train = pd.read_csv('/Users/romeoleon/Downloads/train.csv',chunksize=10**6)
data_train = pd.concat(chunk_train)
#Map the values for sex columns
data_train.sex = data_train.sex.map({'M':0,'F':1})
#Remove the rows with missing data
print('Missing rows represent {} percent of the dataframe'.format(data_train['keywords'].isna().sum()/len(data_train.keywords)*100))
#Drop the missing values
data_train.dropna(inplace=True)
#Plot the distribution of numerical variables
sns.histplot(data_train.age,bins=85)
plt.show()
sns.countplot(x='sex',data=data_train)
plt.show()
#Prepare the data to feed it to the NN
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
x_train, x_test, y_train, y_test = train_test_split(data_train['keywords'],data_train[["age","sex"]],test_size=0.2)
#Choose parameters
vocab_size = 1000
maxlen = 300
batch_size = 32
embedding_dims = 100
hidden_dims = 5
filters = 250
kernel_size = 3
epochs = 10
#Tokenize the words
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(x_train)
X_train = tokenizer.texts_to_matrix(x_train)
X_test = tokenizer.texts_to_matrix(x_test)
#Pad sequencing : Ensure all sequences have the same length
from tensorflow.keras.preprocessing import sequence
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, MaxPooling1D
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.layers import Conv1D, Flatten
#Create the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dims, input_length=maxlen, trainable=True))
model.add(Dropout(0.5))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu'))
#model.add(MaxPooling1D(pool_size=4))
model.add(Flatten())
model.add(Dense(hidden_dims, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))
# Compile neural model
model.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='adam', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
model.summary()
#Fit the model
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=1,
validation_data=(X_test, y_test), verbose=1)
You can find below a screenshot of the structure of my training dataset:
When using 'binary_crossentropy' as the loss function, dense at the output end should have only 1 unit rather than 2. (1 unit have 2 states, which is 1 or 0)
Using this instead:
model.add(Dense(1, activation='sigmoid'))

Keras LSTM Multiclass Classification structure

I am a beginner in machine learning and have been trying to use an LSTM to classify according to 12 features into 4 classes. I've followed quite a few tutorials but I'm still a bit confused. My dataset has 12 columns i want to use for training, including the label column which has the values that correspond to each class.
0 = Class 1
1 = Class 2
2 = Class 3
3 = Class 4
and this is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import time
# For LSTM model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras import optimizers
# Load dataset
train = pd.read_csv("C:\Users\O\Documents\Datasets\FinalDataset2.csv")
train_proccessed = train.iloc[:, 1:13]
scaler = MinMaxScaler(feature_range = (0, 1))
train_scaled = scaler.fit_transform(train_proccessed)
features_set = []
labels = []
for i in range(1, 393763):
features_set.append(train_scaled[i-1:i, 0])
labels.append(train_scaled[i, 0])
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))
# Initialize LSTM model
model = Sequential()
model.add(LSTM(512, return_sequences=True, activation='tanh', input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
model.add(LSTM(units=1, activation='sigmoid'))
opt = optimizers.Adam(lr=0.0001)
model.compile(optimizer = opt , loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(features_set, labels, epochs = 100, batch_size = 512)
I am very unsure about whether my model is built correctly or not. Moreover it only yields very low accuracies (27-28%). Any help would be greatly appreciated!!
Short Answer:
Last Layer to be Dense(4, activation='softmax')
Labels must be one hot encoded as you are using loss='categorical_crossentropy'
Here are more notes to help
1st Layer
LSTM(512, return_sequences=True, activation='tanh')
You started with huge LSTM units while your data is just 12 columns.
return_sequences=True which is not justified in your case as you are not staking another layer after it
Model Body
No layers in the middle between LST & final Dense()
Add one Dense layer at least
Output Layer
It could be easier to use loss as sparse_categorical_crossentropy instead of categorical_crossentropy so labels could be passes as numbers otherwise you need to one hot them

Neural Network classifier with low accuracy does not improve when changing neuron count

I made a neural network to recognize objects, I trained this model using 7 categories of images. When I train this model, I always get the accuracy of 0.217. Even I changed each count of neuron of each layers, still I get the accuracy of 0.217
categories of training image data
(I used open cv to convert images to arrays and used pickle to store datasets)
'create data set'
import numpy as np
import os
import cv2
import pickle
import random
datadir = r"C:\Users\pc\Desktop\Tenserflow\upgrade1\Images"
categories = []
for root, dirs, files in os.walk(datadir, topdown=False):
for name in dirs:
categories.append(name)
training_data = []
img_size = 100
def create_training_data():
for category in categories:
path = os.path.join(datadir, category)
class_num = categories.index(category)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path, img),cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (img_size, img_size))
training_data.append([new_array,class_num])
except Exception as e:
pass
create_training_data()
random.shuffle(training_data)
x =[]
y =[]
for features ,label in training_data:
x.append(features)
y.append(label)
x = np.array(x).reshape(-1, img_size, img_size, 1)
y = np.array(y)
file1 = open('x.pickle', 'wb')
file2 = open('y.pickle', 'wb')
pickle.dump(x, file1)
pickle.dump(y, file2)
file1.close()
file2.close()
'training code'
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
import pickle
import numpy as np
x =pickle.load(open("x.pickle", "rb"))
y =pickle.load(open("y.pickle", "rb"))
x = x/255.0
model = Sequential()
model.add(Conv2D(3,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(7,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(7,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(5))
model.add(Activation("relu"))
model.add(Dense(7))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
model.fit(x, y, epochs = 2, validation_split = 0.1)
I see two issues in your code:
1: you use categorical_crossentropy as your loss. This loss assumes that the targets are in format [1,0,0,0,0,0,0], [0,1,0,0,0,0,0], [0,0,1,0,0,0,0] etc but your dataset creation seems to have the targets as [0], [1], [2] etc.
Either you need to switch to sparse_categorical_crossentropy loss and single output feature or use to_categorical function for your targets. I would suggest using the to_categorical function so you don't need to change your network.
More info on those:
https://www.tensorflow.org/api_docs/python/tf/keras/losses/SparseCategoricalCrossentropy
https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical
2: Your network is far too simple to give you a good accuracy. You use far too few filters in your Conv2D. Try using 16, 32 and 32 filters for example. Also your Dense layer is far too small. Try using 128 for the first and 7 for the second dense layer.

Deep Learning: Multiclass Classification with same amount of labels between the training dataset and test dataset

I'm writing a code for doing a multiclass classification. I have custom datasets with 7 columns (6 features and 1 label), the training dataset has 2 types of label (1 and 2), and the testing dataset has 3 types of labels (1, 2, and 3). The aim of the model is to see how well the model predicting the label '3'.
As of now, I'm trying the MLP algorithm, the code is as follows:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from keras.models import load_model
from sklearn.externals import joblib
from joblib import dump, load
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#from keras.layers import Dense, Embedding, LSTM, GRU
#from keras.layers.embeddings import Embedding
#Load the test dataset
df1 = pd.read_csv("/home/user/Desktop/FinalTestSet.csv")
test = df1
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtest_Val = test.iloc[:,0:6].values
Ytest = test.iloc[:,6].values
#print Ytest
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
Xtest = scaler.fit_transform(xtest_Val)
#print Xtest
#Load the train dataset
df2 = pd.read_csv("/home/user/Desktop/FinalTrainingSet.csv")
train = df2
le = LabelEncoder()
test['Average_packets_per_flow'] = le.fit_transform(test['Average_packets_per_flow'])
test['Average_PktSize_per_flow'] = le.fit_transform(test['Average_PktSize_per_flow'])
test['Avg_pkts_per_sec'] = le.fit_transform(test['Avg_pkts_per_sec'])
test['Avg_bytes_per_sec'] = le.fit_transform(test['Avg_bytes_per_sec'])
test['N_pkts_per_flow'] = le.fit_transform(test['N_pkts_per_flow'])
test['N_pkts_size_per_flow'] = le.fit_transform(test['N_pkts_size_per_flow'])
#Select the x and y columns from dataset
xtrain_Val = train.iloc[:,0:6].values
Ytrain = train.iloc[:,6].values
#print Ytrain
#MinMax Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# Fit the model
Xtrain = scaler.fit_transform(xtrain_Val)
#Reshape data for CNN
Xtrain = Xtrain.reshape((Xtrain.shape[0], 1, 6, 1))
print(Xtrain)
#Xtest = Xtest.reshape((Xtest.shape[0], 1, 6, 1))
#print Xtrain.shape
max_length=70
EMBEDDING_DIM=100
vocab_size=100
num_labels=2
#Define model
def init_model():
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=Xtrain.shape[0]))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='softmax'))
model.add(Flatten())
#adam optimizer
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
return model
print('Train...')
model = init_model()
#To avoid overfitting
callbacks = [EarlyStopping('val_loss', patience=3)]
hist = model.fit(Xtrain, Ytrain, epochs=50, batch_size=50, validation_split=0.20, callbacks=callbacks, verbose=1)
#Evaluate model and print results
score, acc = model.evaluate(Xtest, Ytest, batch_size=50)
print('Test score:', score)
print('Test accuracy:', acc)
However, I'm getting the following error:
ValueError: Input 0 is incompatible with layer flatten_1: expected min_ndim=3, found ndim=2
I tried to remove the flatten layers, but getting different error:
ValueError: Error when checking input: expected dense_1_input to have shape (424686,) but got array with shape (6,)
424686 is the number of rows in dataset and 6 is the number of features.
I appreciate any suggestion. Thank you.
Based on Omarfoq suggestion, now I used three labels for both the training and testing datasets. The code and error remains unchanged.
Can anyone please suggest me the solution? Thank you.
I would say that what you are trying is not logical, your model will never predict class "3" if it doesn't exist in the training set. What you are trying have no sense. Try to reformulate your problem.

CNN doesn't improve its performance

Here the problem, I have a dataset 2200x39, I know... very poor. Where 38 are the features (texture and statistic) and the last column is the output class which could be 0 or 1. My dataset is balanced (1100 "1" and 1100 "0").
I'm trying to improve my performance which is stuck in 0.69 for loss and 0.49 for accuracy. I tried to add a layer, to add neurons, different parameters. Nothing, values of accuracy and loss change just a bit.
So, first of all, I import all the stuff I need
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Conv1D
from tensorflow.keras.optimizers import SGD
import matplotlib.pyplot as plt
Then I prepar my data and split 80% training set and 20% validation test
# fix a seed for reproducing same results if we wish to train and evaluate our network more than once
seed = 9
np.random.seed(seed)
# load dataset
dataset = np.loadtxt('tr_set.csv', delimiter=',', skiprows=1)
# Show the first 10 rows
print(dataset[1:10])
# Delete the first column with the patient index
dataset = dataset[:,1:42]
# Split into input (features) and output variables
X = dataset[:,2:40]
Y = dataset[:,40]
# Counting elements in class 0 and in class 1
count_0 = 0
count_1 = 0
for i in Y:
if i == 0:
count_0 = count_0 + 1
if i == 1:
count_1 = count_1 + 1
print("Number of elements in 0 class:", count_0)
print("Number of elements in 1 class:", count_1)
# The dataset is balanced
# Split into training set(80%) and validation set (20%)
(X_train, X_val, Y_train, Y_val) = train_test_split(X, Y, test_size=0.2, random_state=seed)
And here my model after I reshape X_train and X_val due to using Conv1D
# Create the model
opt = SGD(lr=0.00001)
model = Sequential()
model.add(Dense(1024, activation='relu', kernel_initializer='random_uniform', input_shape=(1,38)))
model.add(BatchNormalization()) # It is used to normalize the input layer by adjusting and scaling the activations.
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.summary()
model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(2))
model.summary()
model.add(Dense(1, activation='sigmoid'))
model.summary()
# compile the model
model.compile(loss='binary_crossentropy', optimizer= opt, metrics=['accuracy'])
# fit the model
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=15, batch_size=10)
# w_data = model.get_weights()
What it is wrong, I delete the max-pooling because I have problems with dimension (Something like subtracting 2 from 1)?

Categories