AttributeError: 'Tensor' object has no attribute 'ndim' - python

I was following the classification tutorial ( for 1D wavelet scattering and I receive the error:
Traceback (most recent call last):
File "", line 145, in <module>
x = Scattering1D(J, Q=Q)(x_in)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\keras\utils\", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\autograph\impl\", line 692, in wrapper
raise e.ag_error_metadata.to_exception(e)
AttributeError: Exception encountered when calling layer "scattering1d" (type Scattering1D).
in user code:
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\frontend\", line 17, in call *
return self.scattering(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\frontend\", line 14, in scattering *
return self.S.scattering(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\frontend\", line 53, in scattering *
S = scattering1d(x, self.pad_fn, self.backend.unpad, self.backend, self.J, self.log2_T, self.psi1_f, self.psi2_f,
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\core\", line 76, in scattering1d *
U_0 = pad_fn(x)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\frontend\", line 80, in pad_fn *
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\backend\", line 71, in pad *
return agnostic.pad(x, pad_left, pad_right, pad_mode, axis=axis)
File "C:\Users\xwb18152\AppData\Roaming\Python\Python38\site-packages\kymatio\scattering1d\backend\", line 36, in pad *
axis_idx = axis if axis >= 0 else (x.ndim + axis)
AttributeError: 'Tensor' object has no attribute 'ndim'
Call arguments received:
• x=tf.Tensor(shape=(None, 4194304), dtype=float32)
I'm not sure why this is the case. I am running Python 3.8.2 [MSC v.1916 64 bit (AMD64)] on win32. Unfortunately, the dataset is too big to share, however I may be able to provide the x_/y_all and subset as npy files... Below is the code I am using:
import tensorflow.compat.v2 as tf
import numpy as np
import pandas as pd
import os
from random import shuffle
from pathlib import Path
from scipy import signal
from scipy.signal import butter, sosfilt, sosfreqz
import librosa
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from kymatio.keras import Scattering1D
# from sys import getsizeof
# Set seed for reproducibility
SEED = 42
# We now loop through all recording samples for each movement
# to add to dataset (x/y_all)
movements = 'xyz xy xz x yz y z'.split() # 'xyz xy xz x yz y z'.split()
movements_dict = dict(zip(movements, [7, 4, 5, 1, 6, 2, 3]))
len_max_files = 0.4
len_files = 0
for m in movements:
files = [fle for fle in os.listdir(f'D:/rf_recordings/move_{m}') if fle.endswith('.wav') and fle.split('_')[3] == '1']
len_files += int(len(files) * len_max_files)
# len([fle for fle in os.listdir(f'D:/rf_recordings/') if fle.endswith('.wav') and fle.split('_')[3] == '1'])
# Our sampling rate is 2MHz, so a T value of 2**22=~4.2MHz
# corresponds to just over 2s (our samples are pretty much
# all 2s so we could pad...)
T = 2**22
J = 6
Q = 16
log_eps = 1e-6
x_all = np.zeros((len_files, T))
y_all = np.zeros(len_files, dtype=np.uint8)
subset = np.zeros(len_files, dtype=np.uint8)
print('Reading in movement signals')
for m in movements:
files = [fle for fle in os.listdir(f'D:/rf_recordings/move_{m}') if fle.endswith('.wav') and fle.split('_')[3] == '1']
files = files[int(len(files) * len_max_files):]
ratio = int(len(files)*0.2)
train_files = files[ratio:]
test_files = files[:ratio]
# print(train_files, len(test_files))
for k, filename in enumerate(files):
name = filename.split('_')
movedist = name[3]
speed = name[5]
y = movements_dict[m]
if filename in train_files:
subset[k] = 0
subset[k] = 1
# Read in the sample WAV file
fs, x ='D:/rf_recordings/move_{m}/{filename}') # ('move_x_movedist_1_speed_25k_sample_6.wav') #
# y = movements_dict[m] # keep as m for now but we will have to do this with params also later.
# We convert to mono by averaging the left and right channels.
x = np.mean(x, axis=1)
x = np.asarray(x, dtype='float') # np.float32)
# Once the recording is in memory, we normalise it to +1/-1
# x = x / np.max(np.abs(x))
x /= np.max(np.abs(x))
## Pad signal to T
x_pad = librosa.util.fix_length(x, size=T)
# print(x.shape, x_pad.shape)
# If it's too long, truncate it.
if len(x) > T:
x = x[:T]
# If it's too short, zero-pad it.
start = (T - len(x)) // 2
x_all[k, start:start+len(x)] = x
y_all[k] = y
# ## The signal is now zero-padded with shape (4194304,)
# Sx = scattering(x_pad)
# meta = scattering.meta()
# order0 = np.where(meta['order'] == 0)
# order1 = np.where(meta['order'] == 1)
# order2 = np.where(meta['order'] == 2)
# plt.figure(figsize=(8, 8))
# plt.subplot(3, 1, 1)
# plt.plot(Sx[order0][0])
# plt.title('Zeroth-order scattering')
# plt.subplot(3, 1, 2)
# plt.imshow(Sx[order1], aspect='auto')
# plt.title('First-order scattering')
# plt.subplot(3, 1, 3)
# plt.imshow(Sx[order2], aspect='auto')
# plt.title('Second-order scattering')
print('Done reading!')
x_in = layers.Input(shape=(T))
x = Scattering1D(J, Q=Q)(x_in)
x = layers.Lambda(lambda x: x[..., 1:, :])(x)
# To increase discriminability, we take the logarithm of the scattering
# coefficients (after adding a small constant to make sure nothing blows up
# when scattering coefficients are close to zero). This is known as the
# log-scattering transform.
x = layers.Lambda(lambda x: tf.math.log(tf.abs(x) + log_eps))(x)
x = layers.GlobalAveragePooling1D(data_format='channels_first')(x)
x = layers.BatchNormalization(axis=1)(x)
x_out = layers.Dense(10, activation='softmax')(x)
model = tf.keras.models.Model(x_in, x_out)


How to implement multiprocessing in a for loop inside a function

I've built some code to minimize the sum of the weighted least squares of some residuals. I first read all the data from a .gz file and then process it on the code below (details are irrelevant). I want to use multiprocessing in order to speed up the "runFit" function.
My code is below:
Fit 3D lines to cylinders
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.optimize import minimize
from numba import jit
from multiprocessing import Pool
def readData(filename):
"Read compressed data."
return np.loadtxt(filename, delimiter=",")
def weightedResiduals(unknown, wire_coords, radii, d_radii, d_zcoords):
"Calculates the sum of the weighted residuals"
y_intercept = unknown[0]
z_intercept = unknown[1]
xy_slope = unknown[2]
xz_slope = unknown[3]
intercept_vector = np.array([0, y_intercept, z_intercept])
gradient_vector = np.array([1, xy_slope, xz_slope])
gradient_vector /= np.linalg.norm(gradient_vector)
result = 0
for index in range(np.shape(wire_coords)[0]):
distance = np.linalg.norm(np.cross((wire_coords[index]-intercept_vector), gradient_vector)) - radii[index]
weight = (d_radii[index]**2 + d_zcoords[index]**2)**(-1/2)
result += (weight * distance)**2
return result
def runFit(inputfilename, outputfilename):
inputfilename : string
input data file name for fitting.
outputfilename : string
result storage file name.
counter : int
number of successful fits; 100% would be twice the number
of events (two lines per event).
counter = 0
#Reading the required data set
fulldata = readData(inputfilename)
#Defining the output array and filling in the first two columns
event_no = int(fulldata[-1,0])
result = np.zeros((2*event_no, 10))
result[:,0] = np.repeat(np.arange(1, event_no+1), 2)
line_no_array = np.empty((2*event_no,))
line_no_array[::2] = 1
line_no_array[1::2] = 2
result[:,1] = line_no_array
def singleEventFit(event):
#Using masking to obtain required rows
mask = (fulldata==event)
desired_rows = mask[:, 0]
#Calculating the fitted line variables using weighted least squares
for line in range(1,3):
#Extracting the desired rows from the full data array
desired_array = fulldata[np.logical_and(desired_rows,(fulldata==line)[:,1])]
#Extracting grouped data from the desired rows
wire_coords = desired_array[:,2:5]
wire_x_coords = wire_coords[:,0]
wire_y_coords = wire_coords[:,1]
wire_z_coords = wire_coords[:,2]
radii = desired_array[:,5]
d_radii, d_zcoords = desired_array[:,6], desired_array[:,7]
#Estimating an initial guess for the fitted line variables
x_min_index = np.argmin(np.abs(wire_x_coords))
x_max_index = np.argmax(np.abs(wire_x_coords))
y_intercept_guess = wire_y_coords[x_min_index]
z_intercept_guess = wire_z_coords[x_min_index]
xy_slope_guess = (wire_y_coords[x_max_index]-wire_y_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
xz_slope_guess = (wire_z_coords[x_max_index]-wire_z_coords[x_min_index])/(wire_x_coords[x_max_index]-wire_x_coords[x_min_index])
init = np.array([y_intercept_guess, z_intercept_guess, xy_slope_guess, xz_slope_guess])
#Minimizing the sum of the weighted residuals
fit_vars = minimize(weightedResiduals, init, args=(wire_coords, radii, d_radii, d_zcoords), tol=1e-5)
if fit_vars.success == True:
y_intercept, z_intercept = fit_vars.x[0], fit_vars.x[1]
xy_slope, xz_slope = fit_vars.x[2], fit_vars.x[3]
#Using the half of the inverse of the Hessian matrix as the covariance matrix to recover errors
std_array = np.sqrt(np.diag(0.5*fit_vars.hess_inv))
#Inputting the variables and their errors on the output array
result[2*event+line-3, 2], result[2*event+line-3, 4] = y_intercept, xy_slope
result[2*event+line-3, 6], result[2*event+line-3, 8] = z_intercept, xz_slope
result[2*event+line-3, 3], result[2*event+line-3, 5] = std_array[0], std_array[2]
result[2*event+line-3, 7], result[2*event+line-3, 9] = std_array[1], std_array[3]
with Pool() as pool:, [event for event in range(1, event_no+1)])
#Returning resulting array as a text file
np.savetxt(outputfilename, result, delimiter=',')
return counter
start = timer()
if __name__=='__main__':
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
end = timer()
print("Time: " + str(end-start) + "s")
However, I get the following traceback:
Traceback (most recent call last):
File "C:\Users\vanes\Downloads\Python Project\", line 113, in <module>
print("Successful Plots: " + str(runFit("tendata.txt.gz", "output.txt.gz")))
File "C:\Users\vanes\Downloads\Python Project\", line 105, in runFit, [event for event in range(1, event_no+1)])
File "C:\Users\vanes\anaconda3\lib\multiprocessing\", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\vanes\anaconda3\lib\multiprocessing\", line 771, in get
raise self._value
File "C:\Users\vanes\anaconda3\lib\multiprocessing\", line 537, in _handle_tasks
File "C:\Users\vanes\anaconda3\lib\multiprocessing\", line 211, in send
File "C:\Users\vanes\anaconda3\lib\multiprocessing\", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'runFit.<locals>.singleEventFit'
Is there any way that I can use multiprocessing in order to speed up the for-loop?
After reviewing the internet, the recommendation was to move the inner function outside and make it global. However, this can't work since I need variables defined inside "runFit()" in order to execute the loop.

matplotlib triplot and tricontourf

I'm attempting to plot a 2D dataset having unstructured coordinates in matplotlib using tricontourf. I'm able to generate a plot of the 'mesh' with triplot, however when I use the same Triangulation object for tricontourf, I get an error (see below). What am I missing? Example:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def lower(x):
return 2 + 1*x
def upper(x):
return 60 + 4*x
ni = 10
nj = 12
x = np.linspace(1,15,ni)
## make a trapezoid
xy = np.zeros((ni,nj,2),dtype=np.float32)
for i in range(len(x)):
y = np.linspace(lower(x[i]),upper(x[i]),nj)
xy[i,:,0] = x[i]
xy[i,:,1] = y
## add noise
xy += -0.1 + 0.2*np.random.rand(ni,nj,2)
## make tris 'indices list'
xi, yi = np.meshgrid(range(ni), range(nj), indexing='xy')
inds_list = np.stack((xi,yi), axis=2)
inds_list = np.reshape(inds_list, (ni*nj,2), order='C')
inds_list = np.ravel_multi_index((inds_list[:,0],inds_list[:,1]), (ni,nj), order='C')
inds_list = np.reshape(inds_list, (ni,nj), order='F')
tris = np.zeros((2*(ni-1)*(nj-1),3), dtype=np.int64)
for i in range(ni-1):
for j in range(nj-1):
tris[ci,0] = inds_list[i+1, j+1]
tris[ci,1] = inds_list[i, j+1]
tris[ci,2] = inds_list[i, j ]
tris[ci,0] = inds_list[i, j ]
tris[ci,1] = inds_list[i+1, j ]
tris[ci,2] = inds_list[i+1, j+1]
triangulation = mpl.tri.Triangulation(x=xy[:,:,0].ravel(), y=xy[:,:,1].ravel(), triangles=tris)
fig1 = plt.figure(figsize=(4, 4/(16/9)), dpi=300)
ax1 = plt.gca()
ax1.triplot(triangulation, lw=0.5)
however, uncommenting the line with ax1.tricontourf
throws the error:
Traceback (most recent call last):
File "", line 54, in <module>
File "C:\Users\steve\AppData\Roaming\Python\Python38\site-packages\matplotlib\tri\", line 307, in tricontourf
return TriContourSet(ax, *args, **kwargs)
File "C:\Users\steve\AppData\Roaming\Python\Python38\site-packages\matplotlib\tri\", line 29, in __init__
super().__init__(ax, *args, **kwargs)
File "C:\Users\steve\AppData\Roaming\Python\Python38\site-packages\matplotlib\", line 812, in __init__
kwargs = self._process_args(*args, **kwargs)
File "C:\Users\steve\AppData\Roaming\Python\Python38\site-packages\matplotlib\tri\", line 45, in _process_args
tri, z = self._contour_args(args, kwargs)
File "C:\Users\steve\AppData\Roaming\Python\Python38\site-packages\matplotlib\tri\", line 60, in _contour_args
z =[0])
IndexError: list index out of range
I am using:
Python version: 3.8.9
matplotlib version: 3.5.1
I would say you need to provide the array of values to contour, e.g.:
x= xy[:,:,0].ravel()
z= np.random.rand(x.shape[0])
ax1.tricontourf(triangulation, z)

Iterate over an audio file with Python's librosa

I was trying to use a voice emotion detecton model on github HERE. Based on their examples, I was able to implement the following code to predict the final emotion of an audio file as a single prediction. Looks like it makes sub-predictions for each 0.4s window in the audio file, and then takes the maximum occurance as the final output (here is the sample file I used).
How can I change it to print a prediction for every 1s chunk of the audio file (as opposed to a single value for the whole file)?
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
# Create a configuration class to help if I want to change parameters later
class Config:
def __init__(self, n_mfcc = 26, n_feat = 13, n_fft = 552, sr = 22050, window = 0.4, test_shift = 0.1):
self.n_mfcc = n_mfcc
self.n_feat = n_feat
self.n_fft = n_fft = sr
self.window = window
self.step = int(sr * window)
self.test_shift = test_shift
self.shift = int(sr * test_shift)
config = Config()
model = pickle.load(open('cnn_ep25_mfccOnly_moreData.pkl', 'rb'))
wav, sr = librosa.load('YAF_chain_angry.wav')
all_results = []
# Initialize a local results list
local_results = []
# Initialize min and max values for each file for scaling
_min, _max = float('inf'), -float('inf')
# Load the file
# Create an array to hold features for each window
X = []
# Iterate over sliding 0.4s windows of the audio file
for i in range(int((wav.shape[0]/sr-config.window)/config.test_shift)):
X_sample = wav[i*config.shift: i*config.shift + config.step] # slice out 0.4s window
X_mfccs = librosa.feature.mfcc(X_sample, sr, n_mfcc = config.n_mfcc, n_fft = config.n_fft,
hop_length = config.n_fft)[1:config.n_feat + 1] # generate mfccs from sample
_min = min(np.amin(X_mfccs), _min)
_max = max(np.amax(X_mfccs), _max) # check min and max values
X.append(X_mfccs) # add features of window to X
# Put window data into array, scale, then reshape
X = np.array(X)
X = (X - _min) / (_max - _min)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
# Feed data for each window into model for prediction
for i in range(X.shape[0]):
window = X[i].reshape(1, X.shape[1], X.shape[2], 1)
# Aggregate predictions for file into one then append to all_results
local_results = (np.sum(np.array(local_results), axis = 0)/len(local_results))[0]
local_results = list(local_results)
prediction = np.argmax(local_results)
# Turn all results into a dataframe
df_cols = ['neutral', 'happy', 'sad', 'angry', 'fearful', 'disgusted', 'surprised']
print("Prediction: "+ df_cols[prediction])

Stocking different types of data into an 2D numpy array

I would like to know how I can store different data into a numpy array, in order to feed it to a machine Learning SVC algorithm.
My goal, is to get a dataframe of size (sample * features) like this:
Feature 1 in gray containing list of size n
Feature 2 in red, containing 2D numpy array of shape (i,k)
Feature ... Something else (array for pwelch spectrum, integers, float, ...)
Feature n in blue, containing integer.
How can I do that in Python ? Is this going to be ok for sklearn ?
Here is the current error from the code bellow:
ValueError: setting an array element with a sequence.
# -*- coding: utf-8 -*-
-------------------------------- Imports --------------------------------------
import os
import pandas as pd
import numpy as np
from scipy import io as sio
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
------------------------------ Parameters -------------------------------------
# Path to the clean EEG .mat files
EEG_path = "data"
# Listing of the .mat files
EEG = list()
for elt in os.listdir(EEG_path):
if os.path.isfile(os.path.join(EEG_path, elt)):
if '.mat' in elt[len(elt)-4:]:
# Spectrum used
spectrum = ['all', (1,45), (8,12)]
nb_features = 3
------------------------------ Functions --------------------------------------
# Function on 1 channel
# Input: All points from one channel, for one epoch
def filter(x, n, fs, fc1, fc2):
b, a = signal.butter(n, [fc1/(fs/2), fc2/(fs/2)], 'bandpass')
y = signal.filtfilt(b, a, x)
return y
def haming(x, L):
# Symetric L-points hamming window
window = signal.hamming(L)
y = x * window.T # Element wise multiplication
return y
# Function on one epoch
# Input is a matrix of size (channel * length)
def amp_mean(x):
size = x.shape
y = list()
for i in range(size[0]):
return y
def amp_max(x):
size = x.shape
y = list()
for i in range(size[0]):
return y
-------------------------------- Script ---------------------------------------
# Load data
s_EEG = "{}/{}".format(EEG_path, EEG[4])
data = sio.loadmat(s_EEG)['s_EEG']['data'][0][0].astype(float) # data[i, j ,k]
labels = sio.loadmat(s_EEG)['s_EEG']['labels'][0][0][0] # labels[k]
fs = sio.loadmat(s_EEG)['s_EEG']['sampling_rate'][0][0][0][0] # 500 Hz
size = data.shape
# Creates an empty data frame of size (epoch * features)
df = np.empty(shape = (size[2], nb_features * len(spectrum)))
# Filling the dataframe with features
# for every epoch
for k in range(size[2]):
for freq in spectrum:
data_to_compute = np.empty(shape = size, dtype = float)
# Apply hamming
if freq == 'all':
for i in range(size[0]):
data_to_compute[i,:,k] = haming(data[i,:,k], size[1])
# Apply hamming after filtering
for i in range(size[0]):
data_to_compute[i,:,k] = haming(filter(data[i,:,k],
15, fs, freq[0], freq[1]), size[1])
# data_to_compute is ready to have feature extracted
for n in range(0, df.shape[1], nb_features):
df[k, n] = data_to_compute[:,:,k]
df[k, n+1] = amp_mean(data_to_compute[:,:,k])
df[k, n+2] = amp_max(data_to_compute[:,:,k])
# X signal / Y label
X_train, X_test, Y_train, Y_test = train_test_split(data,
clf = SVC(), Y_train)
Variable type:
Thanks !

ValueError: x and y must have same first dimension when plotting

I am trying to plot an array of x and y values and keep getting this error.
ValueError: x and y must have same first dimension
This is my code:
import numpy as np
import pylab as plt
from matplotlib import rc
def analyze(targt_data, targt_data_name, trang_data, trang_data_name, matches):
"""Analyze a set of samples on target data"""
_timefrm = [40, 80, 120]
_scorefilter = 0.8
index = 0
matches = matches[np.where(matches[:, 3] > _scorefilter)]
rc('text', usetex=True)
fig = plt.figure()
plt1 = fig.add_subplot(321)
plt2 = fig.add_subplot(322)
plt3 = fig.add_subplot(323)
plt4 = fig.add_subplot(324)
plt5 = fig.add_subplot(325)
plt6 = fig.add_subplot(326)
matches = matches[np.where(matches[:, 2] == index)]
avg_score = np.mean(matches[:, 3])
rwresults = [targt_data[y-1:y+np.max(_timefrm)] for y in matches[:,1]]
pctresults = [np.log(np.divide(y[1:], y[0])) for y in rwresults]
for res in pctresults:
len(trang_data[index])+np.max(_timefrm)),[index][-1], np.add(res, 1)))
results_name = raw_input('Load matching scores: ')
results, training_data_name, target_data_name = Results(DB).load_matching_scores(results_name)
target_data = TargetData(DB).load(target_data_name)
training_data = TrainingData(DB).load(training_data_name)
analyze(target_data, target_data_name, training_data, training_data_name, results)
Also, here are the values printed out:
(Pdb) len([ns.index][-1], np.add(pctresults[0], 1)))
(Pdb) len(np.arange(len(trang_data[ns.index]), len(trang_data[ns.index])+np.max(_timefrm)))
(Pdb)[ns.index][-1], np.add(pctresults[0], 1)).shape
(Pdb) np.arange(len(trang_data[ns.index]), len(trang_data[ns.index])+np.max(_timefrm)).shape
It turns out one of the subarrays was too short:
(Pdb) len(pctresults[71])
The value error "x and y must have same first dimension" is raised by the plot(x, y) method when x and y are not of the same length.
