I am trying to modify the following example to simulate CNN for my set of data and running into some errors
https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
X = D.replace(['Resting', 'Swimming', 'Feeding', 'Non directed motion'], [0, 1, 2, 3])
X_Label = X['Label'].to_numpy()
X_Data = X[['X_static','Y_static','Z_static','X_dynamic','Y_dynamic','Z_dynamic']].to_numpy()
X_names = ['X_static','Y_static','Z_static','X_dynamic','Y_dynamic','Z_dynamic']
X_Label_Names = np.array(['Resting', 'Swimming', 'Feeding', 'Non directed motion'])
X_Data is a 5600 by 6 column numpy matrix. Each column represents a type of measurement data over time
X_Label is a 5600 by 1 column consisting of values of 0 through 3 that represents the features or attributes. 0 represents resting, 1 represents swimming and so on.
X = X_Data
y = X_Label
def load_dataset_f(X,y):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y, random_state=random_state
)
trainX = X_train
trainy = y_train
testX = X_test
testy = y_test
print(trainX)
print(trainX.shape)
print(trainy.shape)
return trainX, trainy, testX, testy
# fit and evaluate a model
def evaluate_model_f(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 2, 10, 20
n_timesteps, n_features, n_outputs = 6, 1, 1
print('n timesteps --------------------------------------------------------------------')
print(n_timesteps)
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
print(to_categorical(trainy))
model.fit(trainX.reshape(len(trainX),6,1), to_categorical(trainy))
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
return accuracy
def run_experiment_f(repeats=1):
# load data
trainX, trainy, testX, testy = load_dataset_f(X,y)
print(trainX)
# repeat experiment
scores = list()
for r in range(repeats):
score = evaluate_model_f(trainX, trainy, testX, testy)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
load_dataset_f(X,y)
run_experiment_f()
I am unfamiliar with the tensorflow library and getting errors at model.fit(), I am not sure to how to approach this. The matrix presented in the example was 3D where as my data is 2D, not sure if that matters. How do I get this code to work ?
You need to make sure that your input to your Conv1D layer has the shape (timesteps, features) and that your last output layer's units equals the number of unique labels in your dataset. Here is a working example:
import tensorflow as tf
trainX = tf.random.normal((32, 6))
trainy = tf.random.uniform((32, 1), maxval=4)
verbose, epochs, batch_size = 2, 10, 20
n_timesteps, n_features, n_outputs = 6, 1, 4
print('n timesteps --------------------------------------------------------------------')
print(n_timesteps)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(100, activation='relu'))
model.add(tf.keras.layers.Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
print(tf.keras.utils.to_categorical(trainy))
trainX = tf.expand_dims(trainX, axis=2)
model.fit(trainX, tf.keras.utils.to_categorical(trainy))
Related
I am new to ML and I am creating a CNN model for Sentiment analysis using word2vec. My word2vec contains negative value also. While fitting the model I got an error -
InvalidArgumentError in model.fit(X_train, Y_train, epochs=3, batch_size=64)
InvalidArgumentError: Graph execution error: Detected at node 'sequential_30/embedding_29/embedding_lookup'
This is the code to create the model
def get_vec(x):
doc = nlp(x)
vec = doc.vector
return vec
df['vec'] = df['text'].apply(lambda x: get_vec(x))
XTrain = df['vec'].to_numpy()
XTrain = XTrain.reshape(-1, 1)
XTrain = np.concatenate(np.concatenate(XTrain, axis = 0), axis = 0).reshape(-1, 300)
YTrain = df['target']
X_train, X_test, Y_train, Y_test = train_test_split(XTrain, YTrain, test_size = .3, random_state = 45, stratify = YTrain)
# Pad the sequence to the same length
max_review_length = 1600
X_train = pad_sequences(X_train, maxlen=max_review_length)
top_words = (len(nlp.vocab)) + 1
# Using embedding from Keras
embedding_vecor_length = 300
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=3, batch_size=64)
When I replace all negative values in df['vec'], code is working without error but with 0 accuracy. What is wrong in this? Please help. Thanks in advance..
I am creating an LSTM model based on the following parameters
embed_dim = 128
lstm_out = 200
batch_size = 32
model = Sequential()
model.add(Embedding(2500, embed_dim,input_length = X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(lstm_out))
model.add(Dense(2,activation='sigmoid'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())
Xtrain, Xtest, ytrain, ytest = train_test_split(X, train['target'], test_size = 0.2, shuffle=True)
print(Xtrain.shape, ytrain.shape)
print(Xtest.shape, ytest.shape)
model.fit(Xtrain, ytrain, batch_size =batch_size, epochs = 1, verbose = 5)
but I am receiving the following error
ValueError: Shapes (32, 1) and (32, 2) are incompatible
Can you help me with this error?
Your y_train is coming from a single column of a Pandas dataframe, which is a single column. This is suitable if your classification problem is a binary classification 0/1 problem. Then you only need a single neuron in the output layer.
model = Sequential()
model.add(Embedding(2500, embed_dim,input_length = X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(lstm_out))
# Only one neuron in the output layer
model.add(Dense(1,activation='sigmoid'))
working with a LSTM model for predicting stock prices, i did every step exactly as the tutorial but unlike the tutorial, my code runs into an error.
here is the code i am working with:
df = pd.read_csv(f'D:\\algo\\all\\EURUSD_15M.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed',)]
training_set = df.iloc[:-int(len(df)/10), 4:5].values
sc = MinMaxScaler(feature_range= (0, 1))
training_set_scaled = sc.fit_transform(training_set)
x_train , y_train = [], []
for i in range(60, len(training_set)):
x_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape = (x_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['Accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=64)
So whats supposed to happen is take 60 periods of a price and predict the 61th period.
but i ultimately face the following error:
ValueError: A target array with shape (379319, 1) was passed for an output of shape (None, 60, 1) while using as loss `mean_squared_error`. This loss expects targets to have the same shape as the output.
what am i doing wrong?
As a dataset try using
TimeseriesGenerator (tf.keras.preprocessing.sequence.TimeseriesGenerator) instead of your custom list -> https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/TimeseriesGenerator
ex.
train_gen = TimeseriesGenerator(Xtrain, Xtrain, n_steps, batch_size=24*7)
valid_gen = TimeseriesGenerator(Xvalid, Xvalid, n_steps, batch_size=24*7)
test_gen = TimeseriesGenerator(Xtest, Xtest, n_steps, batch_size=24*7)
#Input 13 features
#Output Binary
# 297 data points
x = x.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12]].values
y1= y['Target'}
# Stratified K fold cross Validation
kf = StratifiedKFold(n_splits=10,random_state=None)
num_features=13
num_predictions=2
#Splitting data
for train_index, test_index in kf.split(x,y1):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y1[train_index], y1[test_index]
# Standardization of data
sc=StandardScaler(0,1)
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)
print(X_train.shape) # o/p: (267,13)
Print(y_train.shape) # o/p: (267)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], -1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], -1))
# Convert class vectors to binary class matrices.
y_train = np.reshape(y_train, (y_train.shape[0], num_predictions))
y_test = np.reshape(y_test, (y_test.shape[0], num_predictions))
verbose, epochs, batch_size = 1, 10, 32
n_timesteps, n_features, n_outputs = X_train.shape[1],X_train.shape[2],y_train.shape[1]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape (n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(297, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
print(accuracy)
How can i input data to feed into CNN which requires 3 dimensions of data. How to solve issue
ValueError: cannot reshape array of size 267 into shape (267,2).
Imagine you have a line of 100 squares, and you want to make it a rectangle. Could you turn it into a rectangle by making it 2x100? No, but you could make it 50x2.
In short, you can't make a rectangle that has more values than the original.
I'm experimenting with geometric shape classification. My datasets are 100x100 px thresholded black and white images of squares, circles and triangles in total 3000 and 1000 for each shape. They look like these:
But I got them as a csv file, where each row is the one dimensional representation of the image and last column is label.
I used MLP from sklearn to make a classifier. It performed well. Almost 99%.
df = pd.read_csv("img_data.csv", sep=";")
df = df.sample(frac=1) # shuffling the whole dataset
X = df.drop('label', axis=1) # Because 'label' is the column of label
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
clf = MLPClassifier(solver='adam', activation="relu",alpha=1e- 5,hidden_layer_sizes=(1000,), random_state=1, verbose=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('accuracy',accuracy_score(y_test, y_pred))
Then I wanted to try with CNN. For that I used keras with tensorflow backend. But accuracy here couldn't cross above 92% even after 20 epochs. Here's my code:
df = pd.read_csv("img_data.csv", sep=";")
df = df.sample(frac=1) # shuffling the whole dataset
X = df.drop('label', axis=1) # Because 'label' is the column of label
y = df['label']
X=X.as_matrix()
X = np.reshape(X, (-1, 100, 100, 1)) #made 1d to 2d
a = list(y)
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(max(a)))
y = label_binarizer.transform(a) # encoding one hot for labels
X_train, X_test, y_train, y_test = train_test_split(all_images, y, test_size=0.20)
model = Sequential()
model.add(Conv2D(32, 3, activation='relu', input_shape=[100, 100, 1]))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
epochs = 20
model.fit(X_train, y_train,
validation_data=(X_test, y_test),
epochs=epochs, batch_size=64, verbose=1)
This seems to be a very simple problem. There is very little structure inside the data, so I think you could try to reduce the depth of the neural network by removing the last two convolution and max pooling layers. Instead increase the number of nodes in the fully-connected layer, like this:
model = Sequential()
model.add(Conv2D(32, 3, activation='relu', input_shape=[100, 100, 1]))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(3, activation='softmax'))
You could also try to use some image augmentation techniques like shifting and rotating to increase your dataset. Then I expect the convnet to outperform the standard mlp.
Best