Whenever I concatenate the outputs of two layers (for example, because I want to use softmax on some outputs and another activation function on the rest), the network always fails to learn.
This is some example code to demonstrate the problem:
from tensorflow.keras.layers import Lambda, Input, Dense, Concatenate, Dropout, Reshape, \
Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import mnist
from tensorflow.keras.losses import mse, categorical_crossentropy, binary_crossentropy
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
import numpy as np
import matplotlib.pyplot as plt
import argparse
import os
import pygameVisualise as pyvis
# MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
no_cls = max(y_train)+1
width = 20
extra_dims = True
image_size = x_train.shape[1]
original_dim = image_size * image_size
x_train = np.reshape(x_train, [-1, original_dim])
x_test = np.reshape(x_test, [-1, original_dim])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
y_train = to_categorical(y_train, num_classes=width if extra_dims else no_cls)
y_test = to_categorical(y_test, num_classes=width if extra_dims else no_cls)
hidden_dim = 512
batch_sz = 256
eps = 10
ins = Input(shape=(original_dim,))
x = Dense(hidden_dim)(ins)
cls_pred = Dense(no_cls, activation="softmax")(x)
other = Dense(width-no_cls)(x)
outs = Concatenate()([cls_pred, other])
encoder = Model(ins, outs if extra_dims else cls_pred, name="encoder")
encoder.summary()
def cust_loss_fn(y_true, y_pred):
return categorical_crossentropy(y_true[:no_cls], y_pred[:no_cls])
optimiser = optimizers.SGD(lr=0.003, clipvalue=0.1)
encoder.compile(optimizer=optimiser, loss=cust_loss_fn,
metrics=["accuracy"])
encoder.fit(x_train, y_train,
batch_size=batch_sz,
epochs=eps,
validation_data=(x_test, y_test))
score = encoder.evaluate(x_test, y_test)
print(score)
print(encoder.predict(x_train[0:10]))
With extra_dims = False, i.e. no concatenate layer, the network will consistently reach 88% accuracy in the 10 epochs. When it is True, the network will stay at around 8% accuracy and the loss will not drop at all during training.
Am I doing something wrong?
Related
I am new to tensorflow and keras, I am trying to follow a tutorial on keras (https://www.youtube.com/watch?v=qFJeN9V1ZsI min.38:40) and everything seems to work but as soon as I run the fit, accuracy remains almost fixed at 50% and I can't understand why, can someone help me?
Here is the code:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
train_labels = []
train_samples = []
for i in range(50):
random_younger = randint(13,64)
train_samples.append(random_younger)
train_labels.append(1)
random_older = randint(65,100)
train_samples.append(random_older)
train_labels.append(0)
for i in range(950):
random_younger = randint(13,64)
train_samples.append(random_younger)
train_labels.append(0)
random_older = randint(65,100)
train_samples.append(random_older)
train_labels.append(1)
train_label = np.array(train_labels)
train_samples = np.array(train_samples)
train_labels, train_labels = shuffle(train_labels, train_labels)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform(train_samples.reshape(-1,1))
scaled_train_samples = np.array(scaled_train_samples)
model = Sequential([
Dense(units=16, input_shape = (1,), activation='relu'),
Dense(units=32, activation='relu'),
Dense(units=2, activation='softmax')
])
#model.summary()
train_labels = np.array(train_labels)
scaled_train_samples = np.array(scaled_train_samples)
model.compile(optimizer = Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x=scaled_train_samples, y=train_labels, batch_size=10, epochs=30, shuffle=True, verbose =2)
input()
You have code
train_labels, train_labels = shuffle(train_labels, train_labels)
you shuffle the labels but not the train samples suspect you want
train_labels, train_samples= shuffle(train_label, train_samples)
this code shuffles the labels and the samples. Also curious why for the first 50 samples you have younger label as 1 and older label as 0, then for next 950 samples
the labels are reversed?
I am trying to classify skin diseases into 4 classes. I tried transfer learning using VGG16. No matter what changes I make, its not classifying. Accuracy is 1 and loss is 0. But I think its not overfitting also, as by confusion matrix I got to know that its classifying everything as single class.
import json
import math
import os
import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.models import Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from tqdm import tqdm
from sklearn import metrics
from collections import Counter
import json
import itertools
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Dropout, Flatten
#loading data and data preparation
def Dataset_loader(DIR, RESIZE, sigmaX=10):
IMG = []
read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
for IMAGE_NAME in tqdm(os.listdir(DIR)):
PATH = os.path.join(DIR,IMAGE_NAME)
_, ftype = os.path.splitext(PATH)
if ftype == ".jpg":
img = read(PATH)
img = cv2.resize(img, (RESIZE,RESIZE))
IMG.append(np.array(img))
return IMG
eczema_train = np.array(Dataset_loader('/content/medical-image-analysis/train/Eczema Photos', 224))
melanoma_train = np.array(Dataset_loader('/content/medical-image-analysis/train/Melanoma Skin Cancer Nevi and Moles',224))
psoriasis_train = np.array(Dataset_loader('/content/medical-image-analysis/train/Psoriasis pictures Lichen Planus and related diseases',224))
#labelling
eczema_train_label = np.zeros(len(eczema_train))
melonoma_train_label = np.zeros(len(melanoma_train))
psoriasis_train_label = np.zeros(len(psoriasis_train))
X_train = np.concatenate((eczema_train, melanoma_train, psoriasis_train), axis=0,)
Y_train = np.concatenate((eczema_train_label, melonoma_train_label, psoriasis_train_label), axis=0,)
#train and evaluation split
X_train = (X_train-np.mean(X_train))/np.std(X_train)
X_train, X_test, Y_train, Y_test = train_test_split(
X_train, Y_train,
test_size=0.3,
random_state=1
)
X_test, X_val, Y_test, Y_val = train_test_split(
X_test, Y_test,
test_size=0.3,
random_state=1
)
s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train = X_train[s]
Y_train = Y_train[s]
pre_trained_model = VGG16(input_shape=(224, 224, 3), include_top=False, weights="imagenet")
for layer in pre_trained_model.layers:
print(layer.name)
layer.trainable = False
print(len(pre_trained_model.layers))
last_layer = pre_trained_model.get_layer('block5_pool')
print('last layer output shape:', last_layer.output_shape)
last_output = last_layer.output
# Flatten the output layer to 1 dimension
x = layers.GlobalMaxPooling2D()(last_output)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(128, activation='softmax')(x)
# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(4, activation='softmax')(x)
# Configure and compile the model
model = Model(pre_trained_model.input, x)
optimizer = Adam(lr=0.0001, beta_1=0.9, beta_2=0.9999, epsilon=None, decay=0.0, amsgrad=True)
model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
train_datagen = ImageDataGenerator(rotation_range=60,
shear_range=0.2,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
train_datagen.fit(X_train)
val_datagen = ImageDataGenerator()
val_datagen.fit(X_val)
batch_size = 64
epochs = 3
history = model.fit(train_datagen.flow(X_train,Y_train, batch_size=batch_size),
epochs = epochs, validation_data = val_datagen.flow(X_val, Y_val),
verbose = 1, steps_per_epoch=(X_train.shape[0] // batch_size),
validation_steps=(X_val.shape[0] // batch_size))
This is the code, can you please help to find out where we went wrong. Thanks in advance:)
#labelling
eczema_train_label = np.zeros(len(eczema_train))
melonoma_train_label = np.zeros(len(melanoma_train))
psoriasis_train_label = np.zeros(len(psoriasis_train))
All of these numpy arrays are filled with zeroes. No matter whether an image represents eczema, melanoma, or psoriasis, you define its ground-truth label as 0 and hence your model "learns" to unconditionally output 0. Make the labels for at least two of the classes nonzero.
Further, you are using a CategoricalCrossentropy, which is only suitable for one-hot label encodings or binary classification. Use a sparse categorical (or sparse softmax) crossentropy instead. (Note that there may be other issues in the code, but this one stands out as a common error which may be seen).
so I'm making a project where basically i have to predict whether or not a house price is above or below its median price and to do that, I'm using this dataset from Kaggle(https://drive.google.com/file/d/1GfvKA0qznNVknghV4botnNxyH-KvODOC/view). 1 means "Above Median" and 0 means "Below Median". I wrote this code to train a neural network and save it as a .h5 file:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import h5py
df = pd.read_csv('housepricedata.csv')
dataset = df.values
X = dataset[:,0:10]
Y = dataset[:,10]
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
model = Sequential([
Dense(32, activation='relu', input_shape=(10,)),
Dense(32, activation='relu'),
Dense(1, activation='sigmoid'),
])
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy'])
hist = model.fit(X_train, Y_train,
batch_size=32, epochs=100,
validation_data=(X_val, Y_val))
model.save("house_price.h5")
After running it, it successfully saves the .h5 file to my directory. What I want to do now is use my trained model to make predictions on a new .csv file and determine whether or not each of those are above or below median price. This is an image of the csv file in VSCode that i want it to make predictions on:
csv file image As you can see, this file doesn't contain a 1(above median) or 0(below median) because that's what I want it to predict. This is the code I wrote to do that:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.models import load_model
import h5py
df = pd.read_csv('data.csv')
dataset = df.values
X = dataset[:,0:10]
Y = dataset[:,10]
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
model = load_model("house_price.h5")
y_pred = model.predict(X_test)
print(y_pred)
It's output is [[0.00101464]] I have no clue what that is and why it's only returning one value even though the csv file has 4 rows. Does anyone know how I can fix that and be able to predict either a 1 or a 0 for each row in the csv file?
Thank You!
As much I understand what you want!
Let's Try ! This code work for me
import tensorflow
model = tensorflow.keras.models.load_model("house_price.h5")
y_pred=model.predict(X_test)
still you are not able to get visit following site
1:answer1
2:answer2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('C:\\Users\\acer\\Downloads\\housepricedata.csv')
dataset.head()
X=dataset.iloc[:,0:10]
y=dataset.iloc[:,10]
X.head()
from sklearn.preprocessing import StandardScaler
obj=StandardScaler()
X=obj.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split
(X,y,random_state=2020,test_size=0.25)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation =
'relu', input_dim = 10))
# classifier.add(Dropout(p = 0.1))
# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation
= 'relu'))
# classifier.add(Dropout(p = 0.1))
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation
= 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics
= ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
print(y_pred)
classifier.save("house_price.h5")
import tensorflow
model = tensorflow.keras.models.load_model("house_price.h5")
y_pred=model.predict(X_test)
y_pred = (y_pred > 0.5)
print(y_pred)
Both y_pred produce same output for me
Here one thing you not y_pred not contain 0 and 1 because you use sigmoid function which determine predication in probability
so if(y_pred>0.5) it mean value is one
#True rep one
#false rep zero
#you can use replace function or map function of pandas to get convert true
into 1
When I tried to use RestNet50 with all layers frozen to classify Fashion MNIST dataset, I could only get around 78% training accuracy and 41% prediction accuracy. Below is the code snippet:
from keras import optimizers
from keras.applications.resnet50 import ResNet50
from keras.datasets import fashion_mnist
from keras.layers import Activation, Flatten, Dense
from keras.models import Model
(x, y), (x_test, y_test) = fashion_mnist.load_data()
dat_train, dat_val, train_lbs, val_lbs = train_test_split(x, y, test_size=10000, random_state=42)
... # transform dat_train, dat_val, x_test from shapes (28, 28, ) to (32, 32, 3) and re-scale to value range [0, 1], also one hot encoding train_lbs, val_lbs, y_test to shape (, 10)
resnet50_base = ResNet50(include_top=False,
weights='imagenet',
input_shape=(32, 32, 3))
for layer in resnet50_base.layers:
layer.trainable = False
base_out = resnet50_base.output
base_out = Flatten()(base_out)
base_out = Dense(128)(base_out)
base_out = Activation("relu")(base_out)
preds = Dense(10, activation="softmax")(base_out)
model = Model(inputs=resnet50_base.input, outputs=preds)
model.compile(loss="categorical_crossentropy",
optimizer=optimizers.Adam(lr=0.0005),
metrics=["accuracy"])
It produced this result
Did I do something wrong or ResNet50 was not suited for the Fashion MNIST datatset?
Hey you can see my repo https://github.com/rushu570/Fashion_Mnist to understand how it works
I seem to have some issue getting proper results with keras, and I am not sure why I am always getting bad result, so I decided today to test it with something for certain should work, being the Cifar10 dataset, and vgg16 network.
#!/usr/bin/python
#
import warnings
from mpl_toolkits.mplot3d import Axes3D
from keras.utils import np_utils
from matplotlib import cm
from keras import metrics
import keras
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras import backend as K
from keras.applications.imagenet_utils import decode_predictions
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import _obtain_input_shape
from keras.engine.topology import get_source_inputs
from keras.models import Sequential
from keras.optimizers import SGD
import scipy
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D,MaxPooling2D, MaxPooling1D, Reshape
#from keras.utils.visualize_util import plot
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.advanced_activations import ELU
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping
from keras.layers.merge import Concatenate
from keras.models import load_model
from keras.utils import plot_model
from skimage.util.shape import view_as_blocks
from skimage.util.shape import view_as_windows
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
from keras import backend as K
from keras.layers.local import LocallyConnected1D
from keras.datasets import mnist,cifar10
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 32, 32
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
#print('x_train shape:', x_train.shape)
#print(x_train.shape[0], 'train samples')
#print(x_test.shape[0], 'test samples')
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
def fws():
#print "Inside"
# Params:
# batch , lr, decay , momentum, epochs
#
#Input shape: (batch_size,40,45,3)
#output shape: (1,15,50)
# number of unit in conv_feature_map = splitd
input = Input(shape=(img_rows,img_cols,3))
zero_padded_section = keras.layers.convolutional.ZeroPadding2D(padding=(20,17), data_format='channels_last')(input)
model = keras.applications.vgg16.VGG16(include_top = False,
weights = 'imagenet',
input_shape = (48,84,3),
pooling = 'max',
classes = 10)
model_output = model(input)
#FC
dense1 = Dense(units = 512, activation = 'relu', name = "dense_1")(model_output)
dense2 = Dense(units = 256, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 10 , activation = 'softmax', name = "dense_3")(dense2)
model = Model(inputs = input , outputs = dense3)
#sgd = SGD(lr=0.08,decay=0.025,momentum = 0.99,nesterov = True)
model.compile(loss="categorical_crossentropy", optimizer='adam' , metrics = [metrics.categorical_accuracy])
model.fit(x_train[:500], y_train[:500],
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test[:10], y_test[:10]))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
fws()
I am only using a 500/10 of the dataset, since it would take a while to train it... but based on the result i have now.. nothing has been learning.
7 epochs in and the cat.acc. is 0.1300 and val.acc 0.200 and has been that for 7 epochs now...
What is wrong?
Error being the number of steps_per_epoch not being equal to the full size..
that fixed things.