feature extraction using librosa

feature extraction using librosa - python

I am using following code obtain from Github. This code extract mfccs,chroma, melspectrogram, tonnetz and spectral contrast features give output in form of feat.np. I want to extract some other features like rmse, zerocross but when i add the relevent code i give error while concatenation.
# coding= UTF-8
#
# Author: Fing
# Date : 2017-12-03
#
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf
def extract_feature(file_name):
X, sample_rate = sf.read(file_name, dtype='float32')
if X.ndim > 1:
X = X[:,0]
X = X.T
# short term fourier transform
stft = np.abs(librosa.stft(X))
# mfcc
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
# chroma
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
# melspectrogram
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
# spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
features, labels = np.empty((0,193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
try:
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
except Exception as e:
print("[Error] extract feature error. %s" % (e))
continue
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
print(ext_features)
print(features)
features = np.vstack([features,ext_features])
# labels = np.append(labels, fn.split('/')[1])
labels = np.append(labels, label)
print("extract %s features done" % (sub_dir))
return np.array(features), np.array(labels, dtype = np.int)
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
# Get features and labels
r = os.listdir("data/")
r.sort()
features, labels = parse_audio_files('data', r)
np.save('feat.npy', features)
np.save('label.npy', labels)
`
This code work fine but when i want to extract other features also like rmse,zero crossing rate etc . when i add
#rmse=np.mean(librosa.feature.rmse(y=X).T,axis=0)
i got following error
File "C:\Users\HP\Anaconda2\lib\site-packages\numpy\core\shape_base.py", line 237, in vstack
return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
ValueError: all the input array dimensions except for the concatenation axis must match exactly
how i can extract other features and concatenate also.

The shape of STFT and RMSE are different than MFCC and other features. STFT and MFCC are 2 dimensional while the others are one dimensional features.

Related

Iterate over an audio file with Python's librosa

I was trying to use a voice emotion detecton model on github HERE. Based on their examples, I was able to implement the following code to predict the final emotion of an audio file as a single prediction. Looks like it makes sub-predictions for each 0.4s window in the audio file, and then takes the maximum occurance as the final output (here is the sample file I used).
How can I change it to print a prediction for every 1s chunk of the audio file (as opposed to a single value for the whole file)?
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
# Create a configuration class to help if I want to change parameters later
class Config:
def __init__(self, n_mfcc = 26, n_feat = 13, n_fft = 552, sr = 22050, window = 0.4, test_shift = 0.1):
self.n_mfcc = n_mfcc
self.n_feat = n_feat
self.n_fft = n_fft
self.sr = sr
self.window = window
self.step = int(sr * window)
self.test_shift = test_shift
self.shift = int(sr * test_shift)
config = Config()
model = pickle.load(open('cnn_ep25_mfccOnly_moreData.pkl', 'rb'))
wav, sr = librosa.load('YAF_chain_angry.wav')
all_results = []
# Initialize a local results list
local_results = []
# Initialize min and max values for each file for scaling
_min, _max = float('inf'), -float('inf')
# Load the file
# Create an array to hold features for each window
X = []
# Iterate over sliding 0.4s windows of the audio file
for i in range(int((wav.shape[0]/sr-config.window)/config.test_shift)):
X_sample = wav[i*config.shift: i*config.shift + config.step] # slice out 0.4s window
X_mfccs = librosa.feature.mfcc(X_sample, sr, n_mfcc = config.n_mfcc, n_fft = config.n_fft,
hop_length = config.n_fft)[1:config.n_feat + 1] # generate mfccs from sample
_min = min(np.amin(X_mfccs), _min)
_max = max(np.amax(X_mfccs), _max) # check min and max values
X.append(X_mfccs) # add features of window to X
# Put window data into array, scale, then reshape
X = np.array(X)
X = (X - _min) / (_max - _min)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
# Feed data for each window into model for prediction
for i in range(X.shape[0]):
window = X[i].reshape(1, X.shape[1], X.shape[2], 1)
local_results.append(model.predict(window))
# Aggregate predictions for file into one then append to all_results
local_results = (np.sum(np.array(local_results), axis = 0)/len(local_results))[0]
local_results = list(local_results)
prediction = np.argmax(local_results)
# Turn all results into a dataframe
df_cols = ['neutral', 'happy', 'sad', 'angry', 'fearful', 'disgusted', 'surprised']
print(df_cols)
print(local_results)
print("Prediction: "+ df_cols[prediction])

Sklearn logistic regression shape error, but x, y shapes are consistent

I get a ValueError: Found input variables with inconsistent numbers of samples: [20000, 1] when I run the following even though the row values of x and y are correct. I load in the RCV1 dataset, get indices of the categories with the top x documents, create list of tuples with equal number of randomly-selected positives and negatives for each category, and then finally attempt to run a logistic regression on one of the categories.
import sklearn.datasets
from sklearn import model_selection, preprocessing
from sklearn.linear_model import LogisticRegression
from matplotlib import pyplot as plt
from scipy import sparse
rcv1 = sklearn.datasets.fetch_rcv1()
def get_top_cat_indices(target_matrix, num_cats):
cat_counts = target_matrix.sum(axis=0)
#cat_counts = cat_counts.reshape((1,103)).tolist()[0]
cat_counts = cat_counts.reshape((103,))
#b = sorted(cat_counts, reverse=True)
ind_temp = np.argsort(cat_counts)[::-1].tolist()[0]
ind = [ind_temp[i] for i in range(5)]
return ind
def prepare_data(x, y, top_cat_indices, sample_size):
res_lst = []
for i in top_cat_indices:
# get column of indices with relevant cat
temp = y.tocsc()[:, i]
# all docs with labeled category
cat_present = x.tocsr()[np.where(temp.sum(axis=1)>0)[0],:]
# all docs other than labelled category
cat_notpresent = x.tocsr()[np.where(temp.sum(axis=1)==0)[0],:]
# get indices equal to 1/2 of sample size
idx_cat = np.random.randint(cat_present.shape[0], size=int(sample_size/2))
idx_nocat = np.random.randint(cat_notpresent.shape[0], size=int(sample_size/2))
# concatenate the ids
sampled_x_pos = cat_present.tocsr()[idx_cat,:]
sampled_x_neg = cat_notpresent.tocsr()[idx_nocat,:]
sampled_x = sparse.vstack((sampled_x_pos, sampled_x_neg))
sampled_y_pos = temp.tocsr()[idx_cat,:]
sampled_y_neg = temp.tocsr()[idx_nocat,:]
sampled_y = sparse.vstack((sampled_y_pos, sampled_y_neg))
res_lst.append((sampled_x, sampled_y))
return res_lst
ind = get_top_cat_indices(rcv1.target, 5)
test_res = prepare_data(train_x, train_y, ind, 20000)
x, y = test_res[0]
print(x.shape)
print(y.shape)
LogisticRegression().fit(x, y)
Could it be an issue with the sparse matrices, or problem with dimensionality (there are 20K samples and 47K features)

When I run your code, I get following error:
AttributeError: 'bool' object has no attribute 'any'
That's because y for LogisticRegression needs to numpy array. So, I changed last line to:
LogisticRegression().fit(x, y.A.flatten())
Then I get following error:
ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0
This is because your sampling code has a bug. You need to subset y array with rows having that category before using sampling indices. See code below:
def prepare_data(x, y, top_cat_indices, sample_size):
res_lst = []
for i in top_cat_indices:
# get column of indices with relevant cat
temp = y.tocsc()[:, i]
# all docs with labeled category
c1 = np.where(temp.sum(axis=1)>0)[0]
c2 = np.where(temp.sum(axis=1)==0)[0]
cat_present = x.tocsr()[c1,:]
# all docs other than labelled category
cat_notpresent = x.tocsr()[c2,:]
# get indices equal to 1/2 of sample size
idx_cat = np.random.randint(cat_present.shape[0], size=int(sample_size/2))
idx_nocat = np.random.randint(cat_notpresent.shape[0], size=int(sample_size/2))
# concatenate the ids
sampled_x_pos = cat_present.tocsr()[idx_cat,:]
sampled_x_neg = cat_notpresent.tocsr()[idx_nocat,:]
sampled_x = sparse.vstack((sampled_x_pos, sampled_x_neg))
sampled_y_pos = temp.tocsr()[c1][idx_cat,:]
print(sampled_y_pos.nnz)
sampled_y_neg = temp.tocsr()[c2][idx_nocat,:]
print(sampled_y_neg.nnz)
sampled_y = sparse.vstack((sampled_y_pos, sampled_y_neg))
res_lst.append((sampled_x, sampled_y))
return res_lst
Now, Everything works like a charm

could not broadcast input array from shape (20,310,310) into shape (20)

I'm trying to detect lung cancer nodules using DICOM files. The main steps in cancer detection included following steps.
1) Preprocessing
* Converting the pixel values to Hounsfield Units (HU)
* Resampling to an isomorphic resolution to remove variance in scanner resolution
*Lung segmentation
2) Training the data set using preprocessed images in Tensorflow CNN
3) Testing and validation
I followed few online tutorials to do this.
I need to combine the given solutions in
1) https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial
2) https://www.kaggle.com/sentdex/first-pass-through-data-w-3d-convnet.
I could implement the example in link two. But since it is lack ok lung segmentation and few other preprocessing steps I need to combine the steps in link one with link two. But I'm getting number of errors while doing it. Since I'm new to python can someone please help me in solving it.
There are 20 patient folders and each patient folder has number of slices, which are dicom files.
For the process_data method , slices_path of each patient and patient number was sent.
def process_data(slices,patient,labels_df,img_px_size,hm_slices):
try:
label=labels_df.get_value(patient,'cancer')
patient_pixels = get_pixels_hu(slices)
segmented_lungs2, spacing = resample(patient_pixels, slices, [1,1,1])
new_slices=[]
segmented_lung = segment_lung_mask(segmented_lungs2, False)
segmented_lungs_fill = segment_lung_mask(segmented_lungs2, True)
segmented_lungs=segmented_lungs_fill-segmented_lung
#This method returns smallest integer not less than x.
chunk_sizes =math.ceil(len(segmented_lungs)/HM_SLICES)
for slice_chunk in chunks(segmented_lungs,chunk_sizes):
slice_chunk=list(map(mean,zip(*slice_chunk))) #list - []
#print (slice_chunk)
new_slices.append(slice_chunk)
print(len(segmented_lungs), len(new_slices))
if len(new_slices)==HM_SLICES-1:
new_slices.append(new_slices[-1])
if len(new_slices)==HM_SLICES-2:
new_slices.append(new_slices[-1])
new_slices.append(new_slices[-1])
if len(new_slices)==HM_SLICES+2:
new_val =list(map(mean, zip(*[new_slices[HM_SLICES-1],new_slices[HM_SLICES],])))
del new_slices[HM_SLICES]
new_slices[HM_SLICES-1]=new_val
if len(new_slices)==HM_SLICES+1:
new_val =list(map(mean, zip(*[new_slices[HM_SLICES-1],new_slices[HM_SLICES],])))
del new_slices[HM_SLICES]
new_slices[HM_SLICES-1]=new_val
print('LENGTH ',len(segmented_lungs), len(new_slices))
except Exception as e:
# again, some patients are not labeled, but JIC we still want the error if something
# else is wrong with our code
print(str(e))
#print(len(new_slices))
if label==1: label=np.array([0,1])
elif label==0: label=np.array([1,0])
return np.array(new_slices),label
Main method
# Some constants
#data_dir = '../../CT_SCAN_IMAGE_SET/IMAGES/'
#patients = os.listdir(data_dir)
#labels_df=pd.read_csv('../../CT_SCAN_IMAGE_SET/stage1_labels.csv',index_col=0)
#patients.sort()
#print (labels_df.head())
much_data=[]
much_data2=[]
for num,patient in enumerate(patients):
if num%100==0:
print (num)
try:
slices = load_scan(data_dir + patients[num])
img_data,label=process_data(slices,patients[num],labels_df,IMG_PX_SIZE,HM_SLICES)
much_data.append([img_data,label])
#much_data2.append([processed,label])
except:
print ('This is unlabeled data')
np.save('muchdata-{}-{}-{}.npy'.format(IMG_PX_SIZE,IMG_PX_SIZE,HM_SLICES),much_data)
#np.save('muchdata-{}-{}-{}.npy'.format(IMG_PX_SIZE,IMG_PX_SIZE,HM_SLICES),much_data2)
The preprocessing part works fine but when I'm trying to enter the final out put to a Convolutional NN and train the data set , Following is the error I'm receiving including some of the comments that I had put
0
shape hu
(113, 512, 512)
Resize factor
[ 2.49557522 0.6015625 0.6015625 ]
shape
(282, 308, 308)
chunk size
15
282 19
LENGTH 282 20
Tensor("Placeholder:0", dtype=float32)
..........1.........
..........2.........
..........3.........
..........4.........
WARNING:tensorflow:From C:\Research\Python_installation\lib\site-packages\tensorflow\python\util\tf_should_use.py:170: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
..........5.........
..........6.........
Epoch 1 completed out of 20 loss: 0
..........7.........
Traceback (most recent call last):
File "C:\Research\LungCancerDetaction\sendbox2.py", line 436, in <module>
train_neural_network(x)
File "C:\Research\LungCancerDetaction\sendbox2.py", line 424, in train_neural_network
print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
File "C:\Research\Python_installation\lib\site-packages\tensorflow\python\framework\ops.py", line 606, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "C:\Research\Python_installation\lib\site-packages\tensorflow\python\framework\ops.py", line 3928, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "C:\Research\Python_installation\lib\site-packages\tensorflow\python\client\session.py", line 789, in run
run_metadata_ptr)
File "C:\Research\Python_installation\lib\site-packages\tensorflow\python\client\session.py", line 968, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
File "C:\Research\Python_installation\lib\site-packages\numpy\core\numeric.py", line 531, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: could not broadcast input array from shape (20,310,310) into shape (20)
I think it is the issue with the 'segmented_lungs=segmented_lungs_fill-segmented_lung'
In the working example,
segmented_lungs=[cv2.resize(each_slice,(IMG_PX_SIZE,IMG_PX_SIZE)) for each_slice in patient_pixels]
Please help me in solving this. I'm unable to proceed since some time. If anything is not clear please let me know.
Following is the whole code that had tried.
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import dicom
import os
import scipy.ndimage
import matplotlib.pyplot as plt
import cv2
import math
import tensorflow as tf
from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
# Some constants
data_dir = '../../CT_SCAN_IMAGE_SET/IMAGES/'
patients = os.listdir(data_dir)
labels_df=pd.read_csv('../../CT_SCAN_IMAGE_SET/stage1_labels.csv',index_col=0)
patients.sort()
print (labels_df.head())
#Image pixel array watching
for patient in patients[:10]:
#label is to get the label of the patient. This is what done in the .get_value method.
label=labels_df.get_value(patient,'cancer')
path=data_dir+patient
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#You have dicom files and they have attributes.
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
print (len(slices),slices[0].pixel_array.shape)
#If u need to see many slices and resize the large pixelated 2D images into 150*150 pixelated images
IMG_PX_SIZE=50
HM_SLICES=20
for patient in patients[:1]:
#label is to get the label of the patient. This is what done in the .get_value method.
label=labels_df.get_value(patient,'cancer')
path=data_dir+patient
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#You have dicom files and they have attributes.
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
#This shows the pixel arrayed image related to the second slice of each patient
#subplot
fig=plt.figure()
for num,each_slice in enumerate(slices[:16]):
print (num)
y=fig.add_subplot(4,4,num+1)
#down sizing everything. Resize the imag size as their pixel values are 512*512
new_image=cv2.resize(np.array(each_slice.pixel_array),(IMG_PX_SIZE,IMG_PX_SIZE))
y.imshow(new_image)
plt.show()
print (len(patients))
###################################################################################
def get_pixels_hu(slices):
image = np.array([s.pixel_array for s in slices])
# Convert to int16 (from sometimes int16),
# should be possible as values should always be low enough (<32k)
image = image.astype(np.int16)
# Set outside-of-scan pixels to 0
# The intercept is usually -1024, so air is approximately 0
image[image == -2000] = 0
# Convert to Hounsfield units (HU)
for slice_number in range(len(slices)):
intercept = slices[slice_number].RescaleIntercept
slope = slices[slice_number].RescaleSlope
if slope != 1:
image[slice_number] = slope * image[slice_number].astype(np.float64)
image[slice_number] = image[slice_number].astype(np.int16)
image[slice_number] += np.int16(intercept)
return np.array(image, dtype=np.int16)
#The next problem is each patient is got different number of slices . This is a performance issue.
# Take the slices and put that into a list of slices and chunk that list of slices into fixed numer of
#chunk of slices and averaging those chunks.
#yield is like 'return'. It returns a generator
def chunks(l,n):
for i in range(0,len(l),n):
#print ('Inside yield')
#print (i)
yield l[i:i+n]
def mean(l):
return sum(l)/len(l)
def largest_label_volume(im, bg=-1):
vals, counts = np.unique(im, return_counts=True)
counts = counts[vals != bg]
vals = vals[vals != bg]
if len(counts) > 0:
return vals[np.argmax(counts)]
else:
return None
def segment_lung_mask(image, fill_lung_structures=True):
# not actually binary, but 1 and 2.
# 0 is treated as background, which we do not want
binary_image = np.array(image > -320, dtype=np.int8)+1
labels = measure.label(binary_image)
# Pick the pixel in the very corner to determine which label is air.
# Improvement: Pick multiple background labels from around the patient
# More resistant to "trays" on which the patient lays cutting the air
# around the person in half
background_label = labels[0,0,0]
#Fill the air around the person
binary_image[background_label == labels] = 2
# Method of filling the lung structures (that is superior to something like
# morphological closing)
if fill_lung_structures:
# For every slice we determine the largest solid structure
for i, axial_slice in enumerate(binary_image):
axial_slice = axial_slice - 1
labeling = measure.label(axial_slice)
l_max = largest_label_volume(labeling, bg=0)
if l_max is not None: #This slice contains some lung
binary_image[i][labeling != l_max] = 1
binary_image -= 1 #Make the image actual binary
binary_image = 1-binary_image # Invert it, lungs are now 1
# Remove other air pockets insided body
labels = measure.label(binary_image, background=0)
l_max = largest_label_volume(labels, bg=0)
if l_max is not None: # There are air pockets
binary_image[labels != l_max] = 0
return binary_image
#Loading the files
#Load the scans in given folder path
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
def resample(image, scan, new_spacing=[1,1,1]):
# Determine current pixel spacing
spacing = np.array([scan[0].SliceThickness] + scan[0].PixelSpacing, dtype=np.float32)
resize_factor = spacing / new_spacing
new_real_shape = image.shape * resize_factor
new_shape = np.round(new_real_shape)
real_resize_factor = new_shape / image.shape
new_spacing = spacing / real_resize_factor
print ('Resize factor')
print (real_resize_factor)
image = scipy.ndimage.interpolation.zoom(image, real_resize_factor, mode='nearest')
print ('shape')
print (image.shape)
return image, new_spacing
'''def chunks(l,n):
for i in range(0,len(l),n):
#print ('Inside yield')
#print (i)
yield l[i:i+n]
def mean(l):
return sum(l)/len(l)'''
#processing data
def process_data(slices,patient,labels_df,img_px_size,hm_slices):
#for patient in patients[:10]:
#label is to get the label of the patient. This is what done in the .get_value method.
try:
label=labels_df.get_value(patient,'cancer')
print ('label process data')
print (label)
#path=data_dir+patient
#slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#You have dicom files and they have attributes.
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
#This shows the pixel arrayed image related to the second slice of each patient
patient_pixels = get_pixels_hu(slices)
print ('shape hu')
print (patient_pixels.shape)
segmented_lungs2, spacing = resample(patient_pixels, slices, [1,1,1])
#print ('Pix shape')
#print (segmented_lungs2.shape)
#segmented_lungs=np.array(segmented_lungs2).tolist()
new_slices=[]
segmented_lung = segment_lung_mask(segmented_lungs2, False)
segmented_lungs_fill = segment_lung_mask(segmented_lungs2, True)
segmented_lungs=segmented_lungs_fill-segmented_lung
#print ('length of segmented lungs')
#print (len(segmented_lungs))
#print ('Shape of segmented lungs......................................')
#print (segmented_lungs.shape)
#print ('hiiii')
#segmented_lungs=[cv2.resize(each_slice,(IMG_PX_SIZE,IMG_PX_SIZE)) for each_slice in segmented_lungs3]
#print ('bye')
#print ('length of slices')
#print (len(slices))
#print ('shape of slices')
#print (slices.shape)
#print (each_slice.pixel_array)
#This method returns smallest integer not less than x.
chunk_sizes =math.ceil(len(segmented_lungs)/HM_SLICES)
print ('chunk size ')
print (chunk_sizes)
for slice_chunk in chunks(segmented_lungs,chunk_sizes):
slice_chunk=list(map(mean,zip(*slice_chunk))) #list - []
#print (slice_chunk)
new_slices.append(slice_chunk)
print(len(segmented_lungs), len(new_slices))
if len(new_slices)==HM_SLICES-1:
new_slices.append(new_slices[-1])
if len(new_slices)==HM_SLICES-2:
new_slices.append(new_slices[-1])
new_slices.append(new_slices[-1])
if len(new_slices)==HM_SLICES-3:
new_slices.append(new_slices[-1])
new_slices.append(new_slices[-1])
new_slices.append(new_slices[-1])
if len(new_slices)==HM_SLICES+2:
new_val =list(map(mean, zip(*[new_slices[HM_SLICES-1],new_slices[HM_SLICES],])))
del new_slices[HM_SLICES]
new_slices[HM_SLICES-1]=new_val
if len(new_slices)==HM_SLICES+1:
new_val =list(map(mean, zip(*[new_slices[HM_SLICES-1],new_slices[HM_SLICES],])))
del new_slices[HM_SLICES]
new_slices[HM_SLICES-1]=new_val
if len(new_slices)==HM_SLICES+3:
new_val =list(map(mean, zip(*[new_slices[HM_SLICES-1],new_slices[HM_SLICES],])))
del new_slices[HM_SLICES]
new_slices[HM_SLICES-1]=new_val
print('LENGTH ',len(segmented_lungs), len(new_slices))
except Exception as e:
# again, some patients are not labeled, but JIC we still want the error if something
# else is wrong with our code
print(str(e))
#print(len(new_slices))
if label==1: label=np.array([0,1])
elif label==0: label=np.array([1,0])
return np.array(new_slices),label
# Some constants
#data_dir = '../../CT_SCAN_IMAGE_SET/IMAGES/'
#patients = os.listdir(data_dir)
#labels_df=pd.read_csv('../../CT_SCAN_IMAGE_SET/stage1_labels.csv',index_col=0)
#patients.sort()
#print (labels_df.head())
much_data=[]
much_data2=[]
for num,patient in enumerate(patients):
if num%100==0:
print (num)
try:
slices = load_scan(data_dir + patients[num])
img_data,label=process_data(slices,patients[num],labels_df,IMG_PX_SIZE,HM_SLICES)
much_data.append([img_data,label])
#much_data2.append([processed,label])
except:
print ('This is unlabeled data')
np.save('muchdata-{}-{}-{}.npy'.format(IMG_PX_SIZE,IMG_PX_SIZE,HM_SLICES),much_data)
#np.save('muchdata-{}-{}-{}.npy'.format(IMG_PX_SIZE,IMG_PX_SIZE,HM_SLICES),much_data2)
IMG_SIZE_PX = 50
SLICE_COUNT = 20
n_classes=2
batch_size=10
x = tf.placeholder('float')
y = tf.placeholder('float')
keep_rate = 0.8
def conv3d(x, W):
return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')
def maxpool3d(x):
# size of window movement of window as you slide about
return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1], padding='SAME')
def convolutional_neural_network(x):
# # 5 x 5 x 5 patches, 1 channel, 32 features to compute.
weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,3,1,32])),
# 5 x 5 x 5 patches, 32 channels, 64 features to compute.
'W_conv2':tf.Variable(tf.random_normal([3,3,3,32,64])),
# 64 features
'W_fc':tf.Variable(tf.random_normal([54080,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
# image X image Y image Z
x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
conv1 = tf.nn.relu(conv3d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool3d(conv1)
conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool3d(conv2)
fc = tf.reshape(conv2,[-1, 54080])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
much_data = np.load('muchdata-50-50-20.npy')
# If you are working with the basic sample data, use maybe 2 instead of 100 here... you don't have enough data to really do this
train_data = much_data[:-4]
validation_data = much_data[-4:]
def train_neural_network(x):
print ('..........1.........')
prediction = convolutional_neural_network(x)
print ('..........2.........')
#cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
print ('..........3.........')
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)
print ('..........4.........')
hm_epochs = 20
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
successful_runs = 0
total_runs = 0
print ('..........5.........')
for epoch in range(hm_epochs):
epoch_loss = 0
for data in train_data:
total_runs += 1
try:
X = data[0]
Y = data[1]
_, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
epoch_loss += c
successful_runs += 1
except Exception as e:
# I am passing for the sake of notebook space, but we are getting 1 shaping issue from one
# input tensor. Not sure why, will have to look into it. Guessing it's
# one of the depths that doesn't come to 20.
pass
#print(str(e))
print ('..........6.........')
print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)
print ('..........7.........')
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
print('Done. Finishing accuracy:')
print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
print('fitment percent:',successful_runs/total_runs)
print (x)
# Run this locally:
train_neural_network(x)
P.S : resample() , segment_lung_mask() methods can be found from link 1.

For training you have
for data in train_data:
total_runs += 1
try:
X = data[0]
Y = data[1]
_, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
So x and y are, respectively, the first two elements of a single row of train_data.
However, when calculating the accuracy you have
print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
So x is the first element of all rows of validation_data, which gives it dimensions of (20,310,310), which can't be broadcast to a placeholder of dimension (20). Ditto for y. (Broadcasting means that if you gave it a tensor of dimensions (20, 310) it would know to take each of the 310 columns and feed it to the placeholder separately. It can't figure out what to do with a tensor of (20, 310, 310).)
Incidentally, when you declare your placeholders it's a good idea to specify their dimensions, using None for the dimension depending on the number of separate examples. This way the program can warn you when dimensions don't match up.

The error message seems to indicate that the placeholder tensors x and y have not been defined correctly. They should have the same shape as the input values X = data[0] and Y = data[1], such as
x = tf.placeholder(shape=[20,310,310], dtype=tf.float32)
# if y is a scalar:
y = tf.placeholder(shape=[], dtype=tf.float32)

Stocking different types of data into an 2D numpy array

I would like to know how I can store different data into a numpy array, in order to feed it to a machine Learning SVC algorithm.
My goal, is to get a dataframe of size (sample * features) like this:
With:
Feature 1 in gray containing list of size n
Feature 2 in red, containing 2D numpy array of shape (i,k)
Feature ... Something else (array for pwelch spectrum, integers, float, ...)
Feature n in blue, containing integer.
How can I do that in Python ? Is this going to be ok for sklearn ?
Here is the current error from the code bellow:
ValueError: setting an array element with a sequence.
Code:
# -*- coding: utf-8 -*-
"""----------------------------------------------------------------------------
-------------------------------- Imports --------------------------------------
----------------------------------------------------------------------------"""
import os
import pandas as pd
import numpy as np
from scipy import io as sio
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
"""----------------------------------------------------------------------------
------------------------------ Parameters -------------------------------------
----------------------------------------------------------------------------"""
# Path to the clean EEG .mat files
EEG_path = "data"
# Listing of the .mat files
EEG = list()
for elt in os.listdir(EEG_path):
if os.path.isfile(os.path.join(EEG_path, elt)):
if '.mat' in elt[len(elt)-4:]:
EEG.append(elt)
# Spectrum used
spectrum = ['all', (1,45), (8,12)]
nb_features = 3
"""----------------------------------------------------------------------------
------------------------------ Functions --------------------------------------
----------------------------------------------------------------------------"""
# Function on 1 channel
# Input: All points from one channel, for one epoch
def filter(x, n, fs, fc1, fc2):
b, a = signal.butter(n, [fc1/(fs/2), fc2/(fs/2)], 'bandpass')
y = signal.filtfilt(b, a, x)
return y
def haming(x, L):
# Symetric L-points hamming window
window = signal.hamming(L)
y = x * window.T # Element wise multiplication
return y
# Function on one epoch
# Input is a matrix of size (channel * length)
def amp_mean(x):
size = x.shape
y = list()
for i in range(size[0]):
y.append(np.mean(x[i,:]))
return y
def amp_max(x):
size = x.shape
y = list()
for i in range(size[0]):
y.append(np.max(abs(x[i,:])))
return y
"""----------------------------------------------------------------------------
-------------------------------- Script ---------------------------------------
----------------------------------------------------------------------------"""
# Load data
s_EEG = "{}/{}".format(EEG_path, EEG[4])
data = sio.loadmat(s_EEG)['s_EEG']['data'][0][0].astype(float) # data[i, j ,k]
labels = sio.loadmat(s_EEG)['s_EEG']['labels'][0][0][0] # labels[k]
fs = sio.loadmat(s_EEG)['s_EEG']['sampling_rate'][0][0][0][0] # 500 Hz
size = data.shape
# Creates an empty data frame of size (epoch * features)
df = np.empty(shape = (size[2], nb_features * len(spectrum)))
# Filling the dataframe with features
# for every epoch
for k in range(size[2]):
for freq in spectrum:
data_to_compute = np.empty(shape = size, dtype = float)
# Apply hamming
if freq == 'all':
for i in range(size[0]):
data_to_compute[i,:,k] = haming(data[i,:,k], size[1])
# Apply hamming after filtering
else:
for i in range(size[0]):
data_to_compute[i,:,k] = haming(filter(data[i,:,k],
15, fs, freq[0], freq[1]), size[1])
# data_to_compute is ready to have feature extracted
for n in range(0, df.shape[1], nb_features):
df[k, n] = data_to_compute[:,:,k]
df[k, n+1] = amp_mean(data_to_compute[:,:,k])
df[k, n+2] = amp_max(data_to_compute[:,:,k])
# X signal / Y label
X_train, X_test, Y_train, Y_test = train_test_split(data,
list(labels),
test_size=0.15,
random_state=42)
clf = SVC()
clf.fit(X_train, Y_train)
Variable type:
Thanks !

Why CIFAR-10 images are not displayed properly using matplotlib?

From the training set I took a image('img') of size (3,32,32).
I have used plt.imshow(img.T). The image is not clear. Now changes I have to make to image('img') to make it more clearly visible.
Thanks.

Following prints 5X5 grid of random Cifar10 images. It isn't blurry, though not perfect either. Any suggestions welcome.
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from six.moves import cPickle
f = open('data/cifar10/cifar-10-batches-py/data_batch_1', 'rb')
datadict = cPickle.load(f,encoding='latin1')
f.close()
X = datadict["data"]
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
Y = np.array(Y)
#Visualizing CIFAR 10
fig, axes1 = plt.subplots(5,5,figsize=(3,3))
for j in range(5):
for k in range(5):
i = np.random.choice(range(len(X)))
axes1[j][k].set_axis_off()
axes1[j][k].imshow(X[i:i+1][0])

Make sure you don't normalize your dataset when you want to display the image.
Example :
The loader...
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../data', train=True, download=True,
transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# transforms.Normalize(
# (0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])),
batch_size=64, shuffle=True)
The code that shows the image...
img = next(iter(train_loader))[0][0]
plt.imshow(transforms.ToPILImage()(img))
Normalized
Wihtout normalization

This file reads the cifar10 dataset and plots individual images using matplotlib.
import _pickle as pickle
import argparse
import numpy as np
import os
import matplotlib.pyplot as plt
cifar10 = "./cifar-10-batches-py/"
parser = argparse.ArgumentParser("Plot training images in cifar10 dataset")
parser.add_argument("-i", "--image", type=int, default=0,
help="Index of the image in cifar10. In range [0, 49999]")
args = parser.parse_args()
def unpickle(file):
with open(file, 'rb') as fo:
data = pickle.load(fo, encoding='bytes')
return data
def cifar10_plot(data, meta, im_idx=0):
im = data[b'data'][im_idx, :]
im_r = im[0:1024].reshape(32, 32)
im_g = im[1024:2048].reshape(32, 32)
im_b = im[2048:].reshape(32, 32)
img = np.dstack((im_r, im_g, im_b))
print("shape: ", img.shape)
print("label: ", data[b'labels'][im_idx])
print("category:", meta[b'label_names'][data[b'labels'][im_idx]])
plt.imshow(img)
plt.show()
def main():
batch = (args.image // 10000) + 1
idx = args.image - (batch-1)*10000
data = unpickle(os.path.join(cifar10, "data_batch_" + str(batch)))
meta = unpickle(os.path.join(cifar10, "batches.meta"))
cifar10_plot(data, meta, im_idx=idx)
if __name__ == "__main__":
main()

The image is blurry due to interpolation. To prevent blurring in matplotlib, call imshow with keyword interpolation='nearest':
plt.imshow(img.T, interpolation='nearest')
Also, it appears that your x and y axes are being swapped when you use the transpose so you may want to display like this instead:
plt.imshow(np.transpose(img, (1, 2, 0)), interpolation='nearest')

I have used the following code to show all CIFAR data as one big image. The code show the image, but if you want to save it and not be blurtry i sugest using plt.savefig(fname, format='png', dpi=1000)
import numpy as np
import matplotlib.pyplot as plt
def reshape_and_print(self, cifar_data):
# number of images in rows and columns
rows = cols = np.sqrt(cifar_data.shape[0]).astype(np.int32)
# Image hight and width. Divide by 3 because of 3 color channels
imh = imw = np.sqrt(cifar_data.shape[1] // 3).astype(np.int32)
# reshape to number of images X color channels X image size
# transpose to color channels X number of images X image size
timg = cifar_data.reshape(rows * cols, 3, imh * imh).transpose(1, 0, 2)
# reshape to color channels X rows X cols X image hight X image with
# swap axis to color channels X rows X image hight X cols X image with
timg = timg.reshape(3, rows, cols, imh, imw).swapaxes(2, 3)
# reshape to color channels X combined image hight X combined image with
# transpose to combined image hight X combined image with X color channels
timg = timg.reshape(3, rows * imh, cols * imw).transpose(1, 2, 0)
plt.imshow(timg)
plt.show()
I made a quick data helper class that i used for a small test project, I hope is can be useful:
import gzip
import pickle
import numpy as np
import matplotlib.pyplot as plt
class DataSet(object):
def __init__(self, seed=42, setsize=10000):
self.seed = seed
# set the seed for reproducability
np.random.seed(seed)
# load the data
train_set, test_set = self.load_data()
# self.split_data(train_set, valid_set, test_set)
self.split_data(train_set, test_set, setsize)
def split_data(self, data_set, test_set, split_size):
permutation = np.random.permutation(data_set.shape[0])
self.train = data_set[permutation[:split_size]]
self.valid = data_set[permutation[split_size:split_size * 2]]
self.test = test_set[:split_size]
def reshape_for_print(self, data):
raise NotImplemented
def load_data(self):
raise NotImplemented
def show_all_imgs(self, data):
raise NotImplemented
class CIFAR(DataSet):
def load_data(self):
# try to load data
with open('./data/cifar-100-python/train', 'rb') as f:
data = pickle.load(f, encoding='latin1')
train_set = data['data'].astype(np.float32) / 255.0
with open('./data/cifar-100-python/test', 'rb') as f:
data = pickle.load(f, encoding='latin1')
test_set = data['data'].astype(np.float32) / 255.0
return train_set, test_set
def reshape_for_print(self, data):
gh = gw = np.sqrt(data.shape[0]).astype(np.int32)
imh = imw = np.sqrt(data.shape[1] // 3).astype(np.int32)
timg = data.reshape(gh * gw, 3, imh * imh).transpose(1, 0, 2)
timg = timg.reshape(3, gh, gw, imh, imw).swapaxes(2, 3)
timg = timg.reshape(3, gh * imh, gw * imw).transpose(1, 2, 0)
return timg
def show_all_imgs(self, data):
timg = self.reshape_for_print(data)
plt.imshow(timg)
plt.show()
class MNIST(DataSet):
def load_data(self):
# try to load data
with gzip.open('./data/mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
return train_set[0], test_set[0]
def reshape_for_print(self, data):
gh = gw = np.sqrt(data.shape[0]).astype(np.int32)
imh = imw = np.sqrt(data.shape[1]).astype(np.int32)
timg = data.reshape(gh, gw, imh, imw).swapaxes(1, 2)
timg = timg.reshape(gh * imh, gw * imw)
return timg
def show_all_imgs(self, data):
timg = self.reshape_for_print(data)
plt.imshow(timg, cmap=plt.cm.gray)
plt.show()

I made a function to plot the RGB image from a row in the CIFAR10 dataset.The image will be blurry at best since the original size of the image is very small (32px X 32px).
def unpickle(file):
with open(file, 'rb') as fo:
dict1 = pickle.load(fo, encoding='bytes')
return dict1
pd_tr = pd.DataFrame()
tr_y = pd.DataFrame()
for i in range(1,6):
data = unpickle('data/data_batch_' + str(i))
pd_tr = pd_tr.append(pd.DataFrame(data[b'data']))
tr_y = tr_y.append(pd.DataFrame(data[b'labels']))
pd_tr['labels'] = tr_y
tr_x = np.asarray(pd_tr.iloc[:, :3072])
tr_y = np.asarray(pd_tr['labels'])
ts_x = np.asarray(unpickle('data/test_batch')[b'data'])
ts_y = np.asarray(unpickle('data/test_batch')[b'labels'])
labels = unpickle('data/batches.meta')[b'label_names']
def plot_CIFAR(ind):
arr = tr_x[ind]
sc_dpi = 157.35
R = arr[0:1024].reshape(32,32)/255.0
G = arr[1024:2048].reshape(32,32)/255.0
B = arr[2048:].reshape(32,32)/255.0
img = np.dstack((R,G,B))
title = re.sub('[!##$b]', '', str(labels[tr_y[ind]]))
fig = plt.figure(figsize=(3,3))
ax = fig.add_subplot(111)
ax.imshow(img,interpolation='bicubic')
ax.set_title('Category = '+ title,fontsize =15)
plot_CIFAR(4)

try using
import matplotlib.pyplot as plt
from scipy.misc import toimage
plt.imshow(toimage(img))
I am not 100% sure of how the code works, but I think that because the images are stored in floating point numpy arrays, the imshow() function has a difficult time mapping them to the right colors. By typecasting them to image using toimage() you convert them into proper image format that imshow() expects, i.e not an array but an image encoded as .png or .jpg.
This code works for me every time I want to display images in python.

code result is: Try below code.
I found a very useful link about visualization of mnist and cifar images. You can find codes for various images :
https://machinelearningmastery.com/how-to-load-and-visualize-standard-computer-vision-datasets-with-keras/
cifar10 image code is below:
It works well. Image is above.
# example of loading the cifar10 dataset
from matplotlib import pyplot
from keras.datasets import cifar10
# load dataset
(trainX, trainy), (testX, testy) = cifar10.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))
# plot first few images
for i in range(9):
# define subplot
pyplot.subplot(330 + 1 + i)
# plot raw pixel data
pyplot.imshow(trainX[i])
# show the figure
pyplot.show()

Add 0.5:
plt.imshow(np.transpose(img, (1, 2, 0)) + 0.5)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

feature extraction using librosa - python

The shape of STFT and RMSE are different than MFCC and other features. STFT and MFCC are 2 dimensional while the others are one dimensional features.

Related

Iterate over an audio file with Python's librosa

Sklearn logistic regression shape error, but x, y shapes are consistent

could not broadcast input array from shape (20,310,310) into shape (20)

Stocking different types of data into an 2D numpy array

Why CIFAR-10 images are not displayed properly using matplotlib?

Categories

Resources