keras, invalid predict size - python

im quite new in keras
I have trained this with (100,8) size of input and output, i want to 1*8 output with 1*8 predict data.
for example
input that i enter 1*8.
code returns, 1*8 output data.
here is my code:
from tensorflow import keras
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
import keras
from keras.layers import Input, Dense
accuracy = tf.keras.metrics.CategoricalAccuracy()
import numpy as np
xs=np.ones((100,8))
ys=np.ones((100,8))
for i in range(100):
xs[i]*=np.random.randint(30, size=8)
ys[i]=xs[i]*2
xs=xs.reshape(1,100,8)
ys=ys.reshape(1,100,8)
# model = tf.keras.Sequential([layers.Dense(units=1, input_shape=[2,4])])
model = Sequential()
model.add(Dense(10,input_shape=[100,8]))
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(10))
model.add(Activation('relu'))
# model.add(Dropout(0.5))
model.add(Dense(8))
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'] )
model.fit(xs, ys, epochs=1000, batch_size=100)
p= np.array([[1,3,4,5,9,2,3,4]]).reshape(1,1,8)
print(model.predict(p))

you don't need to add one dimension in the first position of your data. for 2D network, you simply have to feed your model with data in the format (n_sample, n_features)
here the complete example
xs=np.ones((100,8))
ys=np.ones((100,8))
for i in range(100):
xs[i]*=np.random.randint(30, size=8)
ys[i]=xs[i]*2
xs=xs.reshape(100,8)
ys=ys.reshape(100,8)
model = Sequential()
model.add(Dense(10,input_shape=(8,)))
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(10))
model.add(Activation('relu'))
model.add(Dense(8))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xs, ys, epochs=10, batch_size=100)
p = np.array([[1,3,4,5,9,2,3,4]]) # (1, 8)
pred = model.predict(p)
print(pred)
print(pred.shape) # (1, 8)

Related

How to implement Many to Many LSTM architecture for numerical data (not timeseries , not NLP) in Keras

I have read this, this
I have numerical data in arrays of shape,
input_array = 14674 x 4
output_array = 13734 x 4
reshaping for LSTM (batch, timesteps, features) gives
input_array= (14574, 100, 4)
output_array = (13634, 100, 4)
Now I would like to build a Many to Many LSTM architecture for this given data,
should I use encoder-decorder or synced sequence input and output architecture
using following model but it works on when input and outputs are same
import tenowingsorflow
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.layers import Conv1D, Dense, MaxPooling1D, Flatten
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)
model_enc_dec_cnn = Sequential()
model_enc_dec_cnn.add(Conv1D(filters=64, kernel_size=9, activation='relu', input_shape=(100, 4)))
model_enc_dec_cnn.add(Conv1D(filters=64, kernel_size=11, activation='relu'))
model_enc_dec_cnn.add(MaxPooling1D(pool_size=2))
model_enc_dec_cnn.add(Flatten())
model_enc_dec_cnn.add(RepeatVector(100))
model_enc_dec_cnn.add(LSTM(100, activation='relu', return_sequences=True))
model_enc_dec_cnn.add(TimeDistributed(Dense(4)))
model_enc_dec_cnn.compile( optimizer=opt, loss='mse', metrics=['accuracy'])
history = model_enc_dec_cnn.fit(X,y, epochs=3, batch_size=64, )

SHAP ValueError: Dimension 1 in both shapes must be equal, but are 2 and 1. Shapes are [?,2] and [?,1]

Based on a previously trained feed-forward network, I tried to use SHAP to get the feature importance. I followed all the steps described in the documentation but I am still receiving the following error
ValueError: Dimension 1 in both shapes must be equal, but are 2 and 1. Shapes are [?,2] and [?,1]
The following code produces a reproduciple example that has the same error.
import pandas as pd
from numpy.random import randint
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout, Activation
from keras.optimizers import Adam
import shap
# Train_x data creation
train_x = pd.DataFrame({
'v1': randint(2, 20, 1489),
'v2': randint(50, 200, 1489),
'v3': randint(30, 90, 1489),
'v4': randint(100, 150, 1489)
})
# Train_y data creation
train_y = randint(0, 2, 1489)
# One-hot encoding as I use categorical cross-entropy
train_y = to_categorical(train_y, num_classes=2)
# Start construction of a DNN Sequential model.
model = Sequential()
# First input layer with a dropout and batch normalization layer following
model.add(Dense(256, input_dim=train_x.shape[1]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(rate=0.2))
# Output layer
model.add(Dense(2))
model.add(Activation('softmax'))
# Use the Adam optimizer
optimizer = Adam(lr=0.001)
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
# Fit model
hist = model.fit(train_x, train_y, epochs=100, batch_size=128, shuffle=False, verbose=2)
# SHAP calculation
explainer = shap.DeepExplainer(model, train_x)
shap_values = explainer.shap_values(train_x[:500].values)
where I have an input shape of (None, 4) and a softmax activation function at the end with 2 neurons as I use it for binary classification. The train_x data on the following code snippet are a pandas data frame of shape (1489, 4).
I tried to change the train_x shape but I had a similar error. Any help would be much appreciated.
Please see below a working example for binary classification with TF:
from numpy.random import randint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation
from tensorflow.keras.optimizers import Adam
import shap
import tensorflow
print(shap.__version__, "\n",tensorflow.__version__)
# Train_x data creation
train_x = pd.DataFrame({
'v1': randint(2, 20, 1489),
'v2': randint(50, 200, 1489),
'v3': randint(30, 90, 1489),
'v4': randint(100, 150, 1489)
})
# Train_y data creation
train_y = randint(0, 2, 1489)
# One-hot encoding as I use categorical cross-entropy
train_y = to_categorical(train_y, num_classes=2)
# Start construction of a DNN Sequential model.
model = Sequential()
# First input layer with a dropout and batch normalization layer following
model.add(Dense(256, input_dim=train_x.shape[1]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(rate=0.2))
# Output layer
model.add(Dense(2))
model.add(Activation('softmax'))
# Use the Adam optimizer
optimizer = Adam(lr=0.001)
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# model.summary()
# Fit model
hist = model.fit(train_x, train_y, epochs=100, batch_size=128, shuffle=False, verbose=0)
# SHAP calculation
shap.explainers._deep.deep_tf.op_handlers["AddV2"] = shap.explainers._deep.deep_tf.passthrough
explainer = shap.DeepExplainer(model, train_x)
shap_values = explainer.shap_values(train_x[:500].values)
shap.summary_plot(shap_values[1])
0.38.2
2.2.0
Note couple of things:
Package versions (tf should be below 2.4 I believe)
Addition of "AddV2" (see discussion here)

How to apply model.fit() function over an CNN-LSTM model?

I am trying to use this to classify the images into two categories. Also I applied model.fit() function but its showing error.
ValueError: A target array with shape (90, 1) was passed for an output of shape (None, 10) while using as loss binary_crossentropy. This loss expects targets to have the same shape as the output.
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, LSTM
import pickle
import numpy as np
X = np.array(pickle.load(open("X.pickle","rb")))
Y = np.array(pickle.load(open("Y.pickle","rb")))
#scaling our image data
X = X/255.0
model = Sequential()
model.add(Conv2D(64 ,(3,3), input_shape = (300,300,1)))
# model.add(MaxPooling2D(pool_size = (2,2)))
model.add(tf.keras.layers.Reshape((16, 16*512)))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)
model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])
# model.summary()
model.fit(X, Y, batch_size=32, epochs = 2, validation_split=0.1)
If your problem is categorical, your issue is that you are using binary_crossentropy instead of categorical_crossentropy; ensure that you do have a categorical instead of a binary classification problem.
Also, please note that if your labels are in simple integer format like [1,2,3,4...] and not one-hot-encoded, your loss_function should be sparse_categorical_crossentropy, not categorical_crossentropy.
If you do have a binary classification problem, like said in the error of the above ensure that:
Loss is binary_crossentroy + Dense(1,activation='sigmoid')
Loss is categorical_crossentropy + Dense(2,activation='softmax')

How to use a 1D-CNN model in Lime?

I have a numeric health record dataset. I used a 1D CNN keras model for the classification step.
I am giving a reproductible example in Python:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv1D, Activation, Flatten, Dense
import numpy as np
a = np.array([[0,1,2,9,3], [0,5,1,33,6], [1, 12,1,8,9]])
train = np.reshape(a[:,1:],(a[:,1:].shape[0], a[:,1:].shape[1],1))
y_train = keras.utils.to_categorical(a[:,:1])
model = Sequential()
model.add(Conv1D(filters=2, kernel_size=2, strides=1, activation='relu', padding="same", input_shape=(train.shape[1], 1), kernel_initializer='he_normal'))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False),
metrics=['accuracy'])
model.fit(train, y_train, epochs=3, verbose=1)
I am getting this error when I apply lime to my 1D CNN model
IndexError: boolean index did not match indexed array along dimension 1; dimension is 4 but corresponding boolean dimension is 1
import lime
import lime.lime_tabular
explainer = lime.lime_tabular.LimeTabularExplainer(train)
Is there a solution ?
I did some minor changes to your initial code (changed from keras to tensorflow.keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Activation, Flatten, Dense
import numpy as np
a = np.array([[0,1,2,9,3], [0,5,1,33,6], [1, 12,1,8,9]])
train = np.reshape(a[:,1:],(a[:,1:].shape[0], a[:,1:].shape[1],1))
y_train = tf.keras.utils.to_categorical(a[:,:1])
model = Sequential()
model.add(Conv1D(filters=2, kernel_size=2, strides=1, activation='relu',
padding="same", input_shape=(train.shape[1], 1),
kernel_initializer='he_normal'))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
model.compile(loss=tf.keras.losses.binary_crossentropy,
optimizer=tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9,
beta_2=0.999, amsgrad=False),
metrics=['accuracy'])
model.fit(train, y_train, epochs=3, verbose=1)
Then I added some test data because you don't want to train and test your LIME model on the same data
b = np.array([[1,4,5,3,2], [1,4,2,55,1], [7, 3,22,3,10]])
test = np.reshape(b[:,1:],(b[:,1:].shape[0], b[:,1:].shape[1],1))
Here I show how the RecurrentTabularExplainer can be trained
import lime
from lime import lime_tabular
explainer = lime_tabular.RecurrentTabularExplainer(train,training_labels=y_train, feature_names=["random clf"],
discretize_continuous=False, feature_selection='auto', class_names=['class 1','class 2'])
Then you can run your LIME model on one of the examples in your test data:
exp = explainer.explain_instance(np.expand_dims(test[0],axis=0), model.predict, num_features=10)
and finally display the predictions
exp.show_in_notebook()
or just printing the prediction
print(exp.as_list())
You should try lime_tabular.RecurrentTabularExplainer instead of LimeTabularExplainer. It is an explainer for keras-style recurrent neural networks. Check out the examples in LIME documentation for better understanding. Good luck:)

Using conv1D “Error when checking input: expected conv1d_input to have 3 dimensions, but got array with shape (213412, 36)”

My input is simply a csv file with 237124 rows and 37 columns :
The first 36 columns as features
The last column is a Binary class label
I am trying to train my data on the conv1D model.
I have tried to build a CNN with one layer, but I have some problems with it.
The compiler outputs:
ValueError:Error when checking input: expected conv1d_9_input to have shape
(213412, 36) but got array with shape (36, 1)
Code:
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv2D,Conv1D, MaxPooling2D,MaxPooling1D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout,BatchNormalization
dataset=pd.read_csv("C:/Users/User/Desktop/data.csv",encoding='cp1252')
dataset.shape
#output: (237124, 37)
array = dataset.values
X = array[:,0:36]
Y = array[:,36]
kf = KFold(n_splits=10)
kf.get_n_splits(X)
for trainindex, testindex in kf.split(X):
Xtrain, Xtest = X[trainindex], X[testindex]
Ytrain, Ytest = Y[trainindex], Y[testindex]
Xtrain.shape[0]
#output: 213412
Xtrain.shape[1]
#output: 36
Ytrain.shape[0]
#output: 213412
n_timesteps, n_features, n_outputs =Xtrain.shape[0], Xtrain.shape[1],
Ytrain.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=1,
activation='relu',input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=
['accuracy'])
# fit network
model.fit(Xtrain, Ytrain, epochs=10, batch_size=32, verbose=0)
# Testing CNN model BY X test
Predictions = model.predict(Xtest,batch_size =100)
rounded = [round(x[0]) for x in Predictions]
Y_predection = pd.DataFrame(rounded)
Y_predection = Y_predection.iloc[:, 0]
.
.
.
I tried to modify the code this way:
Xtrain = np.expand_dims(Xtrain, axis=2)
But the error remains the same.
There's a couple of problems I notice with your code.
Xtrain - Needs to be a 3D tensor. Because anything else, Conv1D cannot process. So if you have 2D data you need to add a new dimension to make it 3D.
Your input_shape needs to be changed to reflect that. For example, if you added only a single channel, it should be [n_features, 1].
# Here I'm assuming some dummy data
# Xtrain => [213412, 36, 1] (Note that you need Xtrain to be 3D not 2D - So we're adding a channel dimension of 1)
Xtrain = np.expand_dims(np.random.normal(size=(213412, 36)),axis=-1)
# Ytrain => [213412, 10]
Ytrain = np.random.choice([0,1], size=(213412,10))
n_timesteps, n_features, n_outputs =Xtrain.shape[0], Xtrain.shape[1], Ytrain.shape[1]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=1,
activation='relu',input_shape=(n_features,1)))
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(Xtrain, Ytrain, epochs=10, batch_size=32, verbose=0)
You need to specifi only how many dimension X has, not how many samples you will pass for the input layer.
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_features,)))
This means that the input will be N samples of shape n_features
For the last layer you should change the number of units to how many classes you have instead of how many rows your data has.

Categories