How can i improve my accuracy of audio data classification

How can i improve my accuracy of audio data classification - python

I need to classify if a sound is of a hammer or not.
The dataset contains 8000 .wav audio files(approx 1s each) for training and about 2000 for testing with 2 classes for classification(0 if the sound is not of a hammer or 1 if it is). I tried extracting features(mfcc and padding) and to build my network, but when I fit the model with the extracted data I cannot get over 70.5% accuracy.
What can I change or what else can I consider that would increase my accuracy? Also if my approach is flawed please tell me.
Here is the data extraction part:
def get_traing_features(row):
file_name = os.path.join(os.path.abspath(path_train), str(row[0]))
audio_train, sfreq_train = lb.load(file_name, res_type='kaiser_fast')
mfcc = lb.feature.mfcc(y=audio_train, sr=sfreq_train, n_mfcc=40)
pad_width = max_pad_len - mfcc.shape[1]
mfccs = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
feature = mfccs
label = row[1]
return pd.Series([feature, label])
def get_validation(row):
file_name = os.path.join(os.path.abspath(path_validation), str(row[0]))
audio_val, sfreq_val = lb.load(file_name, res_type='kaiser_fast')
mfcc = lb.feature.mfcc(y=audio_val, sr=sfreq_val, n_mfcc=40)
pad_width = max_pad_len - mfcc.shape[1]
mfccs = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
feature = mfccs
label = row[1]
return pd.Series([feature, label])
train_data= train_data.apply(parser, axis=1)
train_data.columns = ['feature', 'label']x = np.array(temp.feature.tolist())
x = np.array(train_data.feature.tolist())
y = np.array(train_data.label.tolist())
y_binary = to_categorical(y)
x = x.reshape(x.shape[0], numb_rows, numb_cols, num_channels)
test_data = validation.apply(get_validation, axis=1)
test_data.columns = ['feature', 'label']
x_val = np.array(temp_val.feature.tolist())
y_val = np.array(temp_val.label.tolist())
y_binary_val = to_categorical(y_val)
x_val = x_val.reshape(x_val.shape[0], numb_rows, numb_cols, num_channels)
And here is the CNN:
numb_rows = 40
numb_cols = 174
num_channels = 1
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(numb_rows, numb_cols, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
checkpointer = ModelCheckpoint(filepath='./saved_models/weights.best.basic_cnn.hdf5', verbose=1, save_best_only=True)
model.fit(x, y_binary, batch_size=10, epochs=200, callbacks=[checkpointer], verbose=1)
y_pred = model.predict_classes(x_val)
acc = accuracy_score(y_val, y_pred)

Related

How to create LSTM model with time series data with joint torque prediction

I'm trying to predict joint torque from 8 input features with an LSTM model.
I've tried using TimeseriesGenerator but kept getting an error
Key: 10 when I tried running the .fit_generator function in the model.
I also sliced my dataset into sub time frames, but am pretty confused on this whole concept.
The dataset is at this link.
Here's my code:
file = r'/content/drive/MyDrive/only_force.csv'
df = pd.read_csv(file)
X = df.iloc[:, :9]
y = df.iloc[:,9]
first_slice = X[:1081]
second_slice = X[1081:2076]
third_slice = X[2076:3122]
fourth_slice = X[3122:4038]
fifth_slice = X[4038:5186]
sixth_slice = X[5186:6270]
seventh_slice = X[6270:7464]
eighth_slice = X[7464:]
from keras.preprocessing.sequence import TimeseriesGenerator
look_back = 10
train_generator = TimeseriesGenerator(X_train, X_train, length = look_back, batch_size = 32)
test_generatory = TimeseriesGenerator(X_test, X_test, length = look_back, batch_size = 32)
[verbose, epochs, batch_size] = [1, 500, 32]
input_shape = (X_train.shape[1],1)
model = Sequential()
# LSTM
model.add(LSTM(64, input_shape=input_shape, return_sequences = False))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)))
#model.add(Dropout(0.2))
model.add(Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)))
model.add(Dense(1,activation='relu'))
earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience = 30, verbose =1, mode = 'auto')
model.summary()
model.compile(loss = 'mse', optimizer = Adam(learning_rate = 0.0005), metrics=[tf.keras.metrics.RootMeanSquaredError()])
history = model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs, verbose = verbose, validation_data=(X_test,y_test), callbacks = [earlys
#model.fit_generator(train_generator, epochs=epochs, verbose=1)

Get GradCAM image for Mel-Spectrogram from CNN saved model in Python

After extracting the MFCCs via this code:
def extract_features(file_name):
try:
audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=nmfcc)
pad_width = max_pad_len - mfccs.shape[1]
mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
except Exception as e:
print("Error encountered while parsing file: ", file_name)
return None
return mfccs
I pass the MFCCs through a shallow network (shown below) and then save the model:
# Construct model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())
model.add(Dense(num_labels, activation='softmax'))
This is the model summary:
I recall the model via:
def print_prediction(file_name):
prediction_feature = extract_features(file_name)
prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)
model_path = os.path.join("ModelPath")
model = load_model(filepath)
predicted_vector = np.argmax(model.predict(prediction_feature), axis=-1)
predicted_class = le.inverse_transform(predicted_vector)
print('Filename: ', os.path.basename(file_name))
print("The predicted class is:", predicted_class[0])
predicted_proba_vector = model.predict(prediction_feature)
predicted_proba = predicted_proba_vector[0]
for i in range(len(predicted_proba)):
category = le.inverse_transform(np.array([i]))
print(category[0], "\t\t : ", format(predicted_proba[i], '.15f'))
print('\n')
return predicted_proba_vector
How can I go about getting a GradCAM image for my audio file that I pass into the CNN model?

Siamese network stuck at 50% accuracy

I'm trying to build a Siamese network to do re-identification, the final output of this network should be feature vectors that are compared in order to decide if the input images are from the same class or not. The accuracy of my model is stuck at 50%. I tried multiple variations in the loss function, accuracy and in the layers of the model, but nothing seems to be working and my knowledge in the area stops me from trying another approach. What could possibly resolve this problem?
def cnn(in_dims):
model = Sequential()
model.add(Conv2D(8,(3,3),padding='same',input_shape=(in_dims[0],in_dims[1],in_dims[2]),activation='relu',name='conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool1'))
model.add(Conv2D(16,(3,3),padding='same',activation='relu',name='conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool2'))
model.add(Conv2D(32,(3,3),padding='same',activation='relu',name='conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool3'))
model.add(Conv2D(64,(3,3),padding='same',activation='relu',name='conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool4'))
model.add(Flatten(name='flatten'))
model.add(Dense(64,name='embeddings'))
return model
def double_loss(y_true, y_pred):
total_length = y_pred.shape.as_list()[-1]
image1 = y_pred[:,0:int(total_length*1/2)]
image2 = y_pred[:,int(total_length*1/2):int(total_length*2/2)]
dist = K.sum(K.square(image1 - image2), axis=1)
return dist
def accuracy(y_true, y_pred):
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
input_1 = Input(shape=(128,64,3,), name='input_1')
input_2 = Input(shape=(128,64,3,), name = 'input_2')
shared = cnn([128,64,3])
encoded_image1 = shared(input_1)
encoded_image2 = shared(input_2)
merged_vector = concatenate([encoded_image1, encoded_image2], axis=-1, name='merged_layer')
model = Model(inputs=[input_1, input_2], outputs=merged_vector)
model.compile(loss=double_loss, optimizer=Adam(lr = 0.0001, beta_1 = 0.9, beta_2 = 0.999), metrics=[accuracy])
history = model.fit([X_a_train, X_b_train], y=y_train,
batch_size=128, epochs=100, validation_data=[[X_a_val, X_b_val], y_val])

ValueError: Error when checking target: expected dense_2 to have 2 dimensions, but got array with shape (1,)

I'm preparing some Machine Learning to recognise if a given image contains a feature (1) or does not (0). But the label shape and the output from the model seems to be different.
All the information is contained in a tensorflow Dataset:
path_ds = tf.data.Dataset.from_tensor_slices(allImages)
img_ds = path_ds.map(preprocess_image)
label_ds = tf.data.Dataset.from_tensor_slices(labels)
label_ds = label_ds.map(conversion)
ds = tf.data.Dataset.zip((img_ds, label_ds))
ds = ds.shuffle(buffer_size=image_count).repeat().batch(5).prefetch(10)
iterator = ds.make_one_shot_iterator()
ds_x, ds_y = iterator.get_next()
The model is this:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(704, 480, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(ds_x, ds_y, epochs=3, steps_per_epoch=1, verbose=3)
allImages and labels are lists. All images contains the paths to images, which are later preprocessed, and labels is a list of 1 or 0
The error message I get is the following:
ValueError: Error when checking target: expected dense_2 to have 2 dimensions, but got array with shape (1,)
Full code:
import tensorflow as tf
import os
import glob
import csv
import lab
from PIL import ImageFile
import datetime
import pandas as pd
def preprocess_image(path):
img = tf.read_file(path)
img = tf.image.decode_jpeg(img, channels=3, try_recover_truncated=True, acceptable_fraction=0.9)
img = tf.cast(img, tf.float32)
return img
def conversion(label):
label = tf.cast(label, tf.float32)
return label
def get_keys(d):
if not d.keys():
return None
if list(d.keys())[0] == 'yes':
return 1
if list(d.keys())[0] == 'no':
return 0
def change_range(image,label):
return 2*image-1, label
print("Start", datetime.datetime.now())
orig_path = 'PATH'
CAM = 'CAM'
allImages = []
labels = []
print("Parse labels", datetime.datetime.now())
for folders in os.listdir(orig_path):
print("Reading one folder")
df = pd.read_csv(orig_path + '/' + folders + '/' + folders + '.csv')
df.drop(['file_size', 'region_count', 'region_id', 'region_shape_attributes', 'region_attributes'], axis=1, inplace=True)
df.file_attributes = df.file_attributes.str.replace('true', 'True')
labelsDF = pd.DataFrame(df.file_attributes.apply(eval).values.tolist())
mask = labelsDF.time == 'day'
labels2 = pd.DataFrame(labelsDF[mask].drop(['cloud', 'feature', 'light', 'problems', 'time'], axis=1).eruption.apply(get_keys))
df.file_attributes = labels2
df = df[mask]
df.dropna(inplace=True)
mask2 = df['filename'].isin(os.listdir(orig_path + '/' + folders))
df = df[mask2]
df.filename = '/home/mitiga/Images/' + folders +'/' + df.filename
labels = labels + df['file_attributes'].tolist()
allImages = allImages + df['filename'].tolist()
image_count = len(allImages)
path_ds = tf.data.Dataset.from_tensor_slices(allImages)
img_ds = path_ds.map(preprocess_image)
label_ds = tf.data.Dataset.from_tensor_slices(labels)
label_ds = label_ds.map(conversion)
print(label_ds.output_shapes)
ds = tf.data.Dataset.zip((img_ds, label_ds))
ds = ds.shuffle(buffer_size=image_count).repeat().batch(5).prefetch(10)
iterator = ds.make_one_shot_iterator()
ds_x, ds_y = iterator.get_next()
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(704, 480, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='softmax'))
print(model.summary())
print(ds_y.shape)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(ds_x, ds_y, epochs=3, steps_per_epoch=1, verbose=3)

Code is not executing but is not showing any errors either

I am developing a machine learning algorithm and my code looks like this:
The directories of the images
train_dir = '../input/train_images'
train_labels = pd.read_csv('../input/train.csv')
train_labels['diagnosis'] = train_labels['diagnosis'].astype(str)
train_labels["id_code"]=train_labels["id_code"].apply(lambda x:x+".png")
test_dir = '../input/test_images'
test_labels = '../input/test.csv'
Preprocessing
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255,)
train_generator = train_datagen.flow_from_dataframe(
train_labels[:],
directory="../input/train_images",
x_col='id_code', y_col='diagnosis',
target_size=(150, 150),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
shuffle=True,)
The model
def get_model():
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))
#Compile your model
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.Adam(),
metrics=['acc'])
return model
Training of the model using k-cross validation
k = 4
num_validation_samples = len(train_generator) // k
np.random.shuffle(train_generator)
validation_scores = []
for fold in range(k):
print('processing fold #:', fold)
validation_data = train_generator[num_validation_samples * fold: num_validation_samples * (fold + 1)]
validation_targets = train_labels[num_validation_samples * fold: num_validation_samples * (fold + 1)]
training_data = np.concatenate([train_generator[:num_validation_samples * fold], train_generator[num_validation_samples * (fold + 1) : ]], axis = 0)
training_targets = np.concatenate([train_labels[:num_validation_samples * fold], train_labels[num_validation_samples * (fold + 1) :]], axis = 0)
model = get_model()
#Run the model
model.fit_generator(
training_data,
training_targets,
steps_per_epoch=30,
epochs=30,
batch_size = 20,
verbose = 0)
#Validate the model
val_loss, val_acc = model.evaluate(validation_data, validation_targets, verbose=0)
validation_scores.append(val_loss)
Every part of this code works except for the training of the model part. It shows the execution symbol, that it is executing, but it never executes. I have waited hours, but nothing happens. I do not understand why. I will appreciate the help.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How can i improve my accuracy of audio data classification - python

Related

How to create LSTM model with time series data with joint torque prediction

Get GradCAM image for Mel-Spectrogram from CNN saved model in Python

Siamese network stuck at 50% accuracy

ValueError: Error when checking target: expected dense_2 to have 2 dimensions, but got array with shape (1,)

Code is not executing but is not showing any errors either

Categories

Resources