pandas: AttributeError: 'dict' object has no attribute 'transform' - python

I'm quite new with python and pandas, and I'm stuck with an error. I'm working with audiofiles, and I need to extract some features from audiostreams.
def load_features(self, audio_file):
def stereo_to_mono(x):
# stereo to mono
if len(x.shape) > 1 and x.shape[1] > 1:
print('Converting stereo to mono')
x = x.mean(axis=1)
return x
def cut_or_pad_to_length(x, duration, fs):
desired_length = int(round(duration * fs))
length = len(x)
diff = length - desired_length
abs_diff = abs(diff)
if diff < 0:
print('Padding')
# put the short signal in the middle
pad_before = abs_diff // 2
pad_after = abs_diff - pad_before
x = np.lib.pad(x, (pad_before, pad_after), 'constant')
elif diff > 1:
print('Cutting')
# cut the beginning
x = x[0:desired_length]
return x
def adjust_input(x, fs):
x = stereo_to_mono(x)
x = cut_or_pad_to_length(x, 2.0, fs)
return x
#x is data, fs is samplerate
x, fs = sf.read(audio_file)
x0 = adjust_input(x, fs)
# pitchgram
x_features = self.ch.transform(x0)
if self.scaler is not None:
x_features = self.scaler.transform(x_features.reshape(1, -1)) \
# 1 data point with 2D features
x_features = x_features.reshape(1, *x_features.shape)
return x_features
def predict_class_label(self, audio_file):
x_features = self.load_features(audio_file)
instrument_class = np_utils.probas_to_classes(self.model.predict(x_features, verbose=0))[0]
label = self.instr_family_le.inverse_transform(instrument_class)
return label
This gives me following error:
File "C:/dipl0m/ml-master/instrument-classification/predict.py", line 104, in predict_probabilities
x_features = self.load_features(audio_file)
File "C:/dipl0m/ml-master/instrument-classification/predict.py", line 87, in load_features
x_features = self.ch.transform(x0)
AttributeError: 'dict' object has no attribute 'transform'
But x and x0 don't seems like dictionary, because I operate with them like lists, but transform gives me that error ... Or am I wrong somewhere? Can't figure it out for a long time.

Related

Fitting model to data using scipy differential evolution: "RuntimeError: The map-like callable must be of the form f(func, iterable)..."

I am trying to fit a model to data (extracted from an Excel file and imported using pandas), using a likelihood method. However, when running the code I get a "RuntimeError: The map-like callable must be of the form f(func, iterable), returning a sequence of numbers the same length as 'iterable'" error, which occurred at the "result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)" line. Below I have my code; it's very integrated so I couldn't find a way to illustrate what's happening without showing most of it.
#================================================================================
import numpy as np
import pandas as pd
import os
from lmfit import minimize, Parameters, Parameter, report_fit
params = Parameters()
params.add('gamma', value=.45, min=0, max=1, vary = True)
params.add('n', value = 1, min=0, max=3, vary = True)
filename = 'data.xlsx'
#================================================================================
def negLogLike(params, xData, yData):
new_xData = []
new_yData = []
for i in range(len(yData)):
if ((yData[i] != 0) and (xData[i] != 0)):
new_xData.append(xData[i])
new_yData.append(yData[i])
model_result = model(new_xData, params)
nll = 0
epsilon = 10**-10
for i in range(len(new_yData)):
if (model_result[i] < epsilon):
model_result[i] = epsilon
if (model_result[i] > 1 - epsilon):
model_result[i] = 1 - epsilon
nll += new_yData[i] * np.log(model_result[i]) + (1 - new_yData[i]) * np.log(1 - model_result[i])
return -nll
#================================================================================
def model(x, params):
try: # Get parameters
g = params['gamma'].value
n = params['n'].value
except KeyError:
g, n = params
y = 1 - np.exp(-g * x**n)
return y
#================================================================================
def GetFits(DataFrame):
cell_count = 2300000
GFP_GC_SIMUL = np.ones(DataFrame.shape[0], float)
GFP_IC_SIMUL = np.ones(DataFrame.shape[0], float)
# Data
for i in range(DataFrame.shape[0]):
GFP_GC_SIMUL[i] = DataFrame.loc[i, 'GFP genomes'] / cell_count
GFP_IC_SIMUL[i] = DataFrame.loc[i, 'GFP IU'] / cell_count
x = np.array(GFP_GC_SIMUL[10:-10])
y = np.array(GFP_IC_SIMUL[10:-10])
print('len=', len(x), x.dtype, ', x=', x)
print('------------------------')
print('len=', len(y), y.dtype, ', y=', y)
result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)
#================================================================================
DataFrame = pd.read_excel('data.xlsx', engine='openpyxl')
GetFits(DataFrame)
When debugging on my own I used print statements to see what x and y data was being supplied to the minimizer and this is what it showed:
len= 34 float64 , x= [0.14478261 0.28695652 0.28695652 0.28695652 0.57391304 0.57391304
0.57391304 0.8738913 0.8738913 0.8738913 1.16086957 1.16086957
1.16086957 1.44780435 1.44780435 1.44780435 1.73478261 1.73478261
1.73478261 2.03476087 2.03476087 2.03476087 2.32173913 2.32173913
2.32173913 2.60869565 2.60869565 2.60869565 2.86956522 2.86956522
2.86956522 7.17391304 7.17391304 7.17391304]
------------------------
len= 34 float64 , y= [0.005 0.01180435 0.01226087 0.01158696 0.036 0.03704348
0.03467391 0.07030435 0.06556522 0.07567391 0.1001087 0.09852174
0.0986087 0.13626087 0.13978261 0.13956522 0.16847826 0.16408696
0.19391304 0.1945 0.21319565 0.19052174 0.32204348 0.23330435
0.25028261 0.28136957 0.26293478 0.25893478 0.28273913 0.29717391
0.273 0.60826087 0.60834783 0.59482609]
I know this is quite a lot but I would appreciate any and all help.

Scorer not recognizing inputs

I'm trying to utilize a custom scorer with the following code
def edge_score(y, y_pred):
y_pred.name = 'y_pred'
y.name = 'y'
df = pd.concat([y_pred, y])
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
df['is_correct'] = 0
df.loc[
df.sign_pred * df.sign_true > 0, 'is_correct'] = 1
df['is_incorrect'] = 0
df.loc[
df.sign_pred * df.sign_true < 0, 'is_incorrect'] = 1
df['is_predicted'] = df.is_correct + df.is_incorrect
df['result'] = df.sign_pred * df.y
df['edge'] = df.result.mean()
output_errors = df[['edge']]
output_errors.to_numpy()
return np.average(output_errors)
edge = make_scorer(edge_score)
I get the following error
AttributeError: 'numpy.ndarray' object has no attribute 'name'
When I comment out the .name lines, I get the following error
TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid
When I convert true and predictions to dataframe, I get the following error
y_pred = pd.DataFrame(y_pred)
y = pd.DataFrame(y)
AttributeError: 'DataFrame' object has no attribute 'y_pred'
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer
You should first create a DataFrame with the two numpy arrays y and y_pred, and then perform all the operations.
def edge_score(y, y_pred):
df = pd.DataFrame({"y":y,
"y_pred":y_pred})
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
df['is_correct'] = 0
df.loc[
df.sign_pred * df.sign_true > 0, 'is_correct'] = 1
df['is_incorrect'] = 0
df.loc[
df.sign_pred * df.sign_true < 0, 'is_incorrect'] = 1
df['is_predicted'] = df.is_correct + df.is_incorrect
df['result'] = df.sign_pred * df.y
df['edge'] = df.result.mean()
output_errors = df[['edge']]
output_errors.to_numpy()
return np.average(output_errors)
edge = make_scorer(edge_score)
Change these lines of code
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
to these:
df['sign_pred'] = np.sign(y_pred)
df['sign_true'] = np.sign(y)
def custom_score(y_true, y_pred):
true_sign = np.sign(y_true)
pred_sign = np.sign(y_pred)
true_vs_pred = np.where(true_sign == pred_sign, 1, 0)
true_pred = (true_vs_pred == 1).sum()
return true_pred
custom_scorer = make_scorer(custom_score, greater_is_better=True)
convert everything to an array and then process that.

Python ValueError: setting an array element with a sequence.I'm getting this value error and I don't know how to solve the issue

def simulate(self, timesteps, **kwargs):
pos = {comp: i for i, comp in enumerate(kwargs)}
population = np.zeros(len(pos), dtype='int')
for comp in pos:
population[pos[comp]] = kwargs[comp] # line where the error is
values = []
values.append(population)
comps = list(self.transitions.nodes)
time = np.arange(1, timesteps, 1, dtype='int')
for t in time:
pop = values[-1]
new_pop = values[-1].copy()
N = np.sum(pop)
I am having this value error and I'm not sure on how to fix it. Any suggestion
Error happens when I call this,
if population is None:
population = SIR1.values_.iloc[-1].copy()
else:
population = pd.concat([population, SIR1.values_.iloc[-1]])
S0 = population.S
I00 = population.I
R0 = population.R
Quarantine.simulate(365 - 74, S=S0, I=I00, R=R0) ### Line causing the error

AttributeError: LstmNetwork instance has no attribute 'x_list_add'

I have two python files namely lstm.py and test.py in same folder. im executing the test.py file in python interactive shell. It shows the
AttributeError: Lstm Param instance has no attribute "x_list_add".
lstm.py
import random
import numpy as np
import math
def sigmoid(x):
return 1. / (1 + np.exp(-x))
def sigmoid_derivative(values):
return values*(1-values)
def tanh_derivative(values):
return 1. - values ** 2
# createst uniform random array w/ values in [a,b) and shape args
def rand_arr(a, b, *args):
np.random.seed(0)
return np.random.rand(*args) * (b - a) + a
class LstmParam:
def __init__(self, mem_cell_ct, x_dim):
self.mem_cell_ct = mem_cell_ct
self.x_dim = x_dim
concat_len = x_dim + mem_cell_ct
# weight matrices
self.wg = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wi = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wf = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wo = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
# bias terms
self.bg = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bi = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bf = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bo = rand_arr(-0.1, 0.1, mem_cell_ct)
# diffs (derivative of loss function w.r.t. all parameters)
self.wg_diff = np.zeros((mem_cell_ct, concat_len))
self.wi_diff = np.zeros((mem_cell_ct, concat_len))
self.wf_diff = np.zeros((mem_cell_ct, concat_len))
self.wo_diff = np.zeros((mem_cell_ct, concat_len))
self.bg_diff = np.zeros(mem_cell_ct)
self.bi_diff = np.zeros(mem_cell_ct)
self.bf_diff = np.zeros(mem_cell_ct)
self.bo_diff = np.zeros(mem_cell_ct)
def apply_diff(self, lr = 1):
self.wg -= lr * self.wg_diff
self.wi -= lr * self.wi_diff
self.wf -= lr * self.wf_diff
self.wo -= lr * self.wo_diff
self.bg -= lr * self.bg_diff
self.bi -= lr * self.bi_diff
self.bf -= lr * self.bf_diff
self.bo -= lr * self.bo_diff
# reset diffs to zero
self.wg_diff = np.zeros_like(self.wg)
self.wi_diff = np.zeros_like(self.wi)
self.wf_diff = np.zeros_like(self.wf)
self.wo_diff = np.zeros_like(self.wo)
self.bg_diff = np.zeros_like(self.bg)
self.bi_diff = np.zeros_like(self.bi)
self.bf_diff = np.zeros_like(self.bf)
self.bo_diff = np.zeros_like(self.bo)
class LstmState:
def __init__(self, mem_cell_ct, x_dim):
self.g = np.zeros(mem_cell_ct)
self.i = np.zeros(mem_cell_ct)
self.f = np.zeros(mem_cell_ct)
self.o = np.zeros(mem_cell_ct)
self.s = np.zeros(mem_cell_ct)
self.h = np.zeros(mem_cell_ct)
self.bottom_diff_h = np.zeros_like(self.h)
self.bottom_diff_s = np.zeros_like(self.s)
class LstmNode:
def __init__(self, lstm_param, lstm_state):
# store reference to parameters and to activations
self.state = lstm_state
self.param = lstm_param
# non-recurrent input concatenated with recurrent input
self.xc = None
def bottom_data_is(self, x, s_prev = None, h_prev = None):
# if this is the first lstm node in the network
if s_prev is None: s_prev = np.zeros_like(self.state.s)
if h_prev is None: h_prev = np.zeros_like(self.state.h)
# save data for use in backprop
self.s_prev = s_prev
self.h_prev = h_prev
# concatenate x(t) and h(t-1)
xc = np.hstack((x, h_prev))
self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
self.state.s = self.state.g * self.state.i + s_prev * self.state.f
self.state.h = self.state.s * self.state.o
self.xc = xc
def top_diff_is(self, top_diff_h, top_diff_s):
# notice that top_diff_s is carried along the constant error carousel
ds = self.state.o * top_diff_h + top_diff_s
do = self.state.s * top_diff_h
di = self.state.g * ds
dg = self.state.i * ds
df = self.s_prev * ds
# diffs w.r.t. vector inside sigma / tanh function
di_input = sigmoid_derivative(self.state.i) * di
df_input = sigmoid_derivative(self.state.f) * df
do_input = sigmoid_derivative(self.state.o) * do
dg_input = tanh_derivative(self.state.g) * dg
# diffs w.r.t. inputs
self.param.wi_diff += np.outer(di_input, self.xc)
self.param.wf_diff += np.outer(df_input, self.xc)
self.param.wo_diff += np.outer(do_input, self.xc)
self.param.wg_diff += np.outer(dg_input, self.xc)
self.param.bi_diff += di_input
self.param.bf_diff += df_input
self.param.bo_diff += do_input
self.param.bg_diff += dg_input
# compute bottom diff
dxc = np.zeros_like(self.xc)
dxc += np.dot(self.param.wi.T, di_input)
dxc += np.dot(self.param.wf.T, df_input)
dxc += np.dot(self.param.wo.T, do_input)
dxc += np.dot(self.param.wg.T, dg_input)
# save bottom diffs
self.state.bottom_diff_s = ds * self.state.f
self.state.bottom_diff_h = dxc[self.param.x_dim:]
class LstmNetwork():
def __init__(self, lstm_param):
self.lstm_param = lstm_param
self.lstm_node_list = []
# input sequence
self.x_list = []
def y_list_is(self, y_list, loss_layer):
"""
Updates diffs by setting target sequence
with corresponding loss layer.
Will *NOT* update parameters. To update parameters,
call self.lstm_param.apply_diff()
"""
assert len(y_list) == len(self.x_list)
idx = len(self.x_list) - 1
# first node only gets diffs from label ...
loss = loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
# here s is not affecting loss due to h(t+1), hence we set equal to zero
diff_s = np.zeros(self.lstm_param.mem_cell_ct)
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
### ... following nodes also get diffs from next nodes, hence we add diffs to diff_h
### we also propagate error along constant error carousel using diff_s
while idx >= 0:
loss += loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h += self.lstm_node_list[idx + 1].state.bottom_diff_h
diff_s = self.lstm_node_list[idx + 1].state.bottom_diff_s
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
return loss
def x_list_clear(self):
self.x_list = []
def x_list_add(self, x):
self.x_list.append(x)
if len(self.x_list) > len(self.lstm_node_list):
# need to add new lstm node, create new state mem
lstm_state = LstmState(self.lstm_param.mem_cell_ct, self.lstm_param.x_dim)
self.lstm_node_list.append(LstmNode(self.lstm_param, lstm_state))
# get index of most recent x input
idx = len(self.x_list) - 1
if idx == 0:
# no recurrent inputs yet
self.lstm_node_list[idx].bottom_data_is(x)
else:
s_prev = self.lstm_node_list[idx - 1].state.s
h_prev = self.lstm_node_list[idx - 1].state.h
self.lstm_node_list[idx].bottom_data_is(x, s_prev, h_prev)
test.py
import numpy as np
import sys
sys.path.append("/home/vj/Downloads/tfdeep/core_lstm/lstm/")
from lstm import LstmParam, LstmNetwork
class ToyLossLayer:
"""
Computes square loss with first element of hidden layer array."""
#classmethod
def loss(self, pred, label):
return (pred[0] - label) ** 2
#classmethod
def bottom_diff(self, pred, label):
diff = np.zeros_like(pred)
diff[0] = 2 * (pred[0] - label)
return diff
def example_0():
# learns to repeat simple sequence from random inputs
np.random.seed(0)
# parameters for input data dimension and lstm cell count
mem_cell_ct = 100
x_dim = 50
lstm_param = LstmParam(mem_cell_ct, x_dim)
lstm_net = LstmNetwork(lstm_param)
y_list = [-0.5, 0.2, 0.1, -0.5]
input_val_arr = [np.random.random(x_dim) for _ in y_list]
for cur_iter in range(100):
print("iter", "%2s" % str(cur_iter),": ")
for ind in range(len(y_list)):
lstm_net.x_list_add(input_val_arr[ind])
#print("y_pred = [" + ", ".join(["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(y_list))]) +"]," end=":")
loss = lstm_net.y_list_is(y_list, ToyLossLayer)
print("loss:", "%.3e" % loss)
lstm_param.apply_diff(lr=0.1)
lstm_net.x_list_clear()
if __name__ == "__main__":
example_0()
while executing this test.py file I am getting I getting the follwing error
Traceback (most recent call last):
File "", line 2, in
File "", line 14, in example_0
AttributeError: LstmNetwork instance has no attribute 'x_list_add'
I am struggling to get the output. I didnot get any post relate to my problem.

opencv FisherFaceRecognizer's train() function shows TypeError: src is not a numpy array, neither a scalar

I'm trying to modify the following code by training OpenCV's Fisher Face classifier for particular images of faces. And I don't know why the code below shows
Traceback (most recent call last):
File "create_model.py", line 109, in <module>
update(emotions)
File "create_model.py", line 104, in update
run_recognizer(emotions)
File "create_model.py", line 101, in run_recognizer
fishface.train(np.array(training_data), npar_trainlabs)
TypeError: src is not a numpy array, neither a scalar
training_data contains dlib's vectorized_landmarks and I'm converting them to numpy array, and training_labels is just either label 1 or 2.
Functions involved in Traceback are following ones:
fishface = cv2.face.createFisherFaceRecognizer()
emotions = ["True", "Glasses"]
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
def get_landmarks(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
cimage = clahe.apply(gray)
detections = detector(cimage, 1)
landmarks_vectorised = []
for k, d in enumerate(detections): # For all detected face instances individually
shape = predictor(cimage, d) # Draw Facial Landmarks with the predictor class
xlist = []
ylist = []
for i in range(1, 68): # Store X and Y coordinates in two lists
xlist.append(float(shape.part(i).x))
ylist.append(float(shape.part(i).y))
xmean = np.mean(xlist) # Get the mean of both axes to determine centre of gravity
ymean = np.mean(ylist)
xcentral = [(x - xmean) for x in xlist] # get distance between each point and the central point in both axes
ycentral = [(y - ymean) for y in ylist]
if xlist[26] == xlist[
29]: # If x-coordinates of the set are the same, the angle is 0, catch to prevent 'divide by 0' error in function
anglenose = 0
else:
anglenose = int(math.atan((ylist[26] - ylist[29]) / (xlist[26] - xlist[29])) * 180 / math.pi)
if anglenose < 0:
anglenose += 90
else:
anglenose -= 90
landmarks_vectorised = []
if len(detections) < 1:
landmarks_vectorised = "error"
for x, y, w, z in zip(xcentral, ycentral, xlist, ylist):
landmarks_vectorised.append(x)
landmarks_vectorised.append(y)
meannp = np.asarray((ymean, xmean))
coornp = np.asarray((z, w))
dist = np.linalg.norm(coornp - meannp)
anglerelative = (math.atan((z - ymean) / (w - xmean)) * 180 / math.pi) - anglenose
landmarks_vectorised.append(dist)
landmarks_vectorised.append(anglerelative)
return landmarks_vectorised
def make_sets(labels):
training_data = []
training_labels = []
for label in labels:
training = glob.glob("data\\%s\\*" % label)
print(len(training))
for item in training:
try:
image = cv2.imread(item)
except:
continue
print(item)
landmarks_vectorised = get_landmarks(image)
if landmarks_vectorised == "error":
print("error with landmarks")
pass
else:
training_data.append(landmarks_vectorised)
if str(label) == "True":
training_labels.append(2)
elif str(label) == "Glasses":
training_labels.append(1)
print("sets created")
return training_data, training_labels
def make_sets(labels):
training_data = []
training_labels = []
for label in labels:
training = glob.glob("data\\%s\\*" % label)
print(len(training))
for item in training:
try:
image = cv2.imread(item)
except:
continue
print(item)
landmarks_vectorised = get_landmarks(image)
if landmarks_vectorised == "error":
print("error with landmarks")
pass
else:
training_data.append(landmarks_vectorised)
if str(label) == "True":
training_labels.append(2)
elif str(label) == "Glasses":
training_labels.append(1)
print("sets created")
return training_data, training_labels
def run_recognizer(emotions):
training_data, training_labels = make_sets(emotions)
print("training fisher face classifier")
print(type(training_data))
print(type(training_labels))
npar_train = np.array(training_data)
npar_trainlabs = np.array(training_labels)
fishface.train(np.array(training_data), npar_trainlabs)
def update(emotions):
run_recognizer(emotions)
fishface.save("glasses.xml")
update(emotions)
Please help me understand the meaning of such an error.
Try to print your training_data and its dtype, maybe you can put them in a list, then transform the list to np.array. Do the same operations to your labels.

Categories