Adding more layers to neural network - python
I want to add more layers in neural network how can I customize this code ? can any one suggest a better way
in model layer there is only one layer .. in this part , I can define other weights ?should I add other layers there but it would make no sense
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
then in batch loss function they multiply by them in order to get the predicted y.. that's simply adding one layer..
tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
The full code is here
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import collections
import os
tff.backends.reference.set_reference_context()
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
DECAY_FACTOR = 0.8
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.StructType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(
lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace(
"\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(
float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(
float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type, iter_n, g_m):
model_g = {
'weights': g_m[0],
'bias': g_m[1]
}
for i in range(iter_n-1, iter_n):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(
grad[j][i], 1 / len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(
bi[j][i], 1 / len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(
model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(
model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(
tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
def loadHistoryModels(round_num):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "r")
lines = f.readlines()
ret_models = []
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
ret_models.append([w_initial, b_initial])
tem_model = []
for i, line in enumerate(lines):
if i % 2 == 0:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis).reshape([784, 10])
tem_model = [lis]
else:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis)
tem_model.append(lis)
ret_models.append(tem_model)
f.close()
return ret_models
if __name__ == "__main__":
start_time = time.time()
# data_num = np.asarray([5923, 6742, 5958, 6131, 5842])
# agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(index) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(index) + ".txt"), "w")
f.close()
# f = open(os.path.join(os.path.dirname(__file__),
# "gradientplus_models.txt"), "w") #alice
# f.close()
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(
mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters-1.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = collections.OrderedDict(
weights=w_initial,
bias=b_initial)
model = initial_model
learning_rate = 0.1
for round_num in range(5):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "w") # alice
f.close()
local_models = federated_train(
model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
# print(local_models[0][0])#第0个agent的weights矩阵
# print(local_models[0][1])#第0个agent的bias矩阵
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(
local_models[local_model_index][0], 1 / NUM_AGENT), m_w)
m_b = np.add(np.multiply(
local_models[local_model_index][1], 1 / NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
f_g = open(os.path.join(os.path.dirname(
__file__), "gradientplus_models" + str(round_num) + ".txt"), "a")
g_w = list(model['weights'].reshape(-1))
g_b = list(model['bias'].reshape(-1))
print(g_w, file=f_g)
print(g_b, file=f_g)
f_g.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "a")
s = ""
for i in list(model['weights']):
for j in i:
s += str(j)+"\t"
s = s[0:-1]
print(s, file=f)
s = ""
for i in list(model['bias']):
s += str(i)+"\t"
s = s[0:-1]
print(s, file=f)
f.close()
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time() - start_time)
shapstart_time = time.time() #change cal time
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
models_hository = loadHistoryModels(round_num)
agent_shapley_history = []
for iter_num in range(1, len(gradient_weights[0]) + 1):
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE,
iter_num, models_hository[iter_num-1]))
print(str(s) + "\t" +
str(group_shapley_value[len(group_shapley_value) - 1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(
index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "a")
print('round {}, loss={}'.format(round_num, agent_shapley), file=f)
agent_shapley_history.append(agent_shapley)
print('round {}, loss={}'.format(
round_num, agent_shapley_history), file=f)
f.close()
print("end_time shap values", time.time() - shapstart_time)
print("end_time", time.time() - start_time)
It seems the source of this code is one of the low-level TFF tutorials, which are really more about introducing users to the core communication abstractions of TFF, as opposed to serving as an introductory guide to federated learning with TFF.
I might suggest a different tutorial, like the one that focuses on image classification for example. This uses TFF's tff.learning.from_keras_model utility to ingest a Keras model directly; if you go this route, you can add layers as you would for any Keras model.
Related
Removing a personalized class inside the view.py - Django
here is my view.py. Despite the code is a little bit long, I'd like to remove the class Ibm() from inside the else element to a new file called Ibm_class.py. I tried to do that but I couldn't find any way! def index(request): if 'GET' == request.method: return render(request, 'auditoria_app/index.html') else: class Ibm(object): def __init__(self, i): self.numeroIbm = inputsCombo[get_column_letter(i) + str(10)].value self.nome = inputsCombo[get_column_letter(i) + str(11)].value self.cidade = inputsCombo[get_column_letter(i) + str(12)].value self.uf = inputsCombo[get_column_letter(i) + str(13)].value self.anosProjeto = inputsCombo[get_column_letter(i) + str(16)].value self.anosAlternativa = inputsDNCombo[get_column_letter(i) + str(14)].value if self.anosAlternativa is None: self.anosAlternativa = 0 self.tipoInvestimento = inputsCombo[get_column_letter(i) + str(21)].value self.tipoProposta = inputsCombo[get_column_letter(i) + str(24)].value self.inicioVigencia = inputsCombo[get_column_letter(i) + str(34)].value self.prazoContrato = inputsCombo[get_column_letter(i) + str(38)].value # gas station variables self.faixaMargem = inputsCombo[get_column_letter(i) + str(19)].value self.rebateTotalCI = 0 self.rebateTotalCB = 0 self.unitariaCI = 0 self.volumeMensalCI = inputsCombo[get_column_letter(i) + str(60)].value self.volumeMensalCB = inputsDNCombo[get_column_letter(i) + str(32)].value self.margemCurva = inputsCombo[get_column_letter(i) + str(67)].value * 1000 self.margemCI = [] self.margemCB = [] self.volume12m = 0 self.margem12m = 0 self.curvaPostoCI = [] self.curvaPostoCB = [] self.rebateCI = [] self.rebateCB = [] self.faixaReal = '' self.volumeTotalCI = inputsCombo[get_column_letter(i) + str(151)].value self.volumeTotalCB = inputsDNCombo[get_column_letter(i) + str(121)].value # SELECT store variables self.feeIsencao = inputsCombo[get_column_letter(i) + str(220)].value self.feeFaturamento = inputsCombo[get_column_letter(i) + str(222)].value self.feeReal = inputsCombo[get_column_letter(i) + str(224)].value self.faturamento = inputsCombo[get_column_letter(i) + str(173)].value self.pvl = inputsCombo[get_column_letter(i) + str(184)].value self.feeLoja = inputsCombo[get_column_letter(i) + str(174)].value self.cashback = [] # credit variables self.prazoMogasCI1 = inputsCombo[get_column_letter(i) + str(159)].value self.prazoMogasCI2 = inputsCombo[get_column_letter(i) + str(160)].value self.prazoMogasCI3 = inputsCombo[get_column_letter(i) + str(161)].value self.prazoMogasCB1 = inputsDNCombo[get_column_letter(i) + str(151)].value self.prazoMogasCB2 = inputsDNCombo[get_column_letter(i) + str(152)].value self.prazoMogasCB3 = inputsDNCombo[get_column_letter(i) + str(153)].value # economics IBM self.unitariaIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(42)].value) self.hsIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(36)].value) self.cmIBM = percentual(outputCombo[get_column_letter(i - 1) + str(39)].value) self.tirIBM = percentual(outputCombo[get_column_letter(i - 1) + str(54)].value) self.npvIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(55)].value) # if self.tipoInvestimento != 'Loja': for k in range(0, self.anosProjeto): margemCI = inputsCombo[get_column_letter(i) + str(109 + k)].value if margemCI is None or margemCI == '': margemCI = 0 self.margemCI.append(arredonda(margemCI * 1000)) curvaPostoCI = inputsCombo[get_column_letter(i) + str(130 + k)].value if curvaPostoCI is None or 0: curvaPostoCI = 1 self.curvaPostoCI.append(arredonda(curvaPostoCI * 100)) rebateCI = inputsCombo[get_column_letter(i) + str(305 + k)].value if rebateCI is None: rebateCI = 0 self.rebateCI.append(rebateCI * 1000) cashback = inputsCombo[get_column_letter(i) + str(485 + k)].value if cashback is None: cashback = 0 else: cashback = cashback / self.faturamento self.cashback.append(cashback) for y in range(0, self.anosAlternativa): margemCB = inputsDNCombo[get_column_letter(i) + str(79 + y)].value if margemCB is None or margemCB == '': margemCB = 0 self.margemCB.append(arredonda(margemCB * 1000)) curvaPostoCB = inputsDNCombo[get_column_letter(i) + str(100 + y)].value if curvaPostoCB is None or 0: curvaPostoCB = 1 self.curvaPostoCB.append(arredonda(curvaPostoCB * 100)) rebateCB = inputsDNCombo[get_column_letter(i) + str(204 + y)].value if rebateCB is None: rebateCB = 0 self.rebateCB.append(rebateCB * 1000) # Selecionando os dados da Loja Select # if self.tipoInvestimento == 'Loja': self.curvaLojaCI = [] self.curvaLojaCB = [] for k in range(0, self.anosProjeto): curvaLojaCI = inputsCombo[get_column_letter(i) + str(198 + k)].value if curvaLojaCI is None or 0: curvaLojaCI = 1 self.curvaLojaCI.append(curvaLojaCI) for k in range(0, self.anosAlternativa): curvaLojaCB = inputsDNCombo[get_column_letter(i) + str(123 + k)].value if curvaLojaCB is None or 0: curvaLojaCB = 1 self.curvaLojaCB.append(curvaLojaCB) self.fpCI = inputsCombo[get_column_letter(i) + str(283)].value if self.fpCI is None: self.fpCI = 0 self.rviCI = inputsCombo[get_column_letter(i) + str(285)].value if self.rviCI is None: self.rviCI = 0 self.fpCB = inputsDNCombo[get_column_letter(i) + str(201)].value if self.fpCB is None: self.fpCB = 0 self.rviCB = inputsDNCombo[get_column_letter(i) + str(202)].value if self.rviCB is None: self.rviCB = 0 # initializing gas station functions self.get_volume() self.rebate_total_CI() self.rebate_total_CB() # self.unitaria_CI() self.check_faixa() def get_volume(self): if self.tipoInvestimento != 'Loja': for j in range(2, sigma.max_row + 1): if self.numeroIbm == sigma['A' + str(j)].value: self.margem12m = sigma['D' + str(j)].value self.volume12m = sigma['E' + str(j)].value def check_faixa(self): if self.tipoInvestimento != 'Loja': if self.faixaMargem[0] == 'R': pass else: for row in range(5, faixas.max_row + 1): cidade = faixas['C' + str(row)].value if cidade == unidecode.unidecode(self.cidade.upper() + '/' + self.uf.upper()): self.faixaReal = faixas['D' + str(row)].value if self.faixaReal == '': self.faixaReal = 'n/a' def rebate_total_CI(self): if self.tipoInvestimento != 'Loja': rebateTotalCI = 0 for k in range(0, self.anosProjeto): rebateTotalCI += self.volumeMensalCI * 12 * self.curvaPostoCI[k] * self.rebateCI[k] self.rebateTotalCI = arredonda(rebateTotalCI / 1000) def rebate_total_CB(self): if self.tipoInvestimento != 'Loja': rebateTotalCB = 0 for k in range(0, self.anosAlternativa): rebateTotalCB += self.volumeMensalCB * 12 * self.curvaPostoCB[k] * self.rebateCB[k] self.rebateTotalCB = arredonda(rebateTotalCB / 1000) # UNIT might be extracted from 'OutPut COMBO' # def unitaria_CI(self): # self.unitariaCI = (self.rebateTotalCI + self.fpCI + self.rviCI)*10/self.volumeTotalCI def check_volume_mensal(self): return self.volumeMensalCI - self.volumeMensalCB def check_volume_total(self): return self.volumeTotalCI - self.volumeTotalCB def check_volumeCB_SIC(self): return self.volumeMensalCB excel_file = request.FILES["excel_file"] wb = openpyxl.load_workbook(excel_file, data_only=True) inputsCombo = wb['Inputs COMBO'] inputsDNCombo = wb['Inputs DN COMBO'] outputCombo = wb['OutPut COMBO'] faixas = wb['Faixas'] wb2 = openpyxl.load_workbook('media/sigma.xlsx', data_only=True) sigma = wb2['Planilha1'] wb3 = openpyxl.load_workbook('media/performance.xlsx', data_only=True) sic = wb3['Performance'] wb4 = openpyxl.load_workbook('media/ultimoContrato.xlsm', data_only=True) ultimoContrato = wb4['Base'] numeroIbms = inputsCombo['F5'].value ibms = [] for i in range(8, 8 + numeroIbms): ibm = Ibm(i) ibms.append(ibm) return render(request, 'auditoria_app/index.html', {'ibms': ibms}) -> wb: file is loaded from a upload -> wb2, wb3, wb4: files are stored inside the media folder I tried to copy/paste this class and then import it to the view.py file but the excel_file variable must be inside the view and is still needed inside the Ibm_class.py. Any suggestion, pls? Thank you!
Use dependency injection on your class instead of relying on local/global scope variables. Here, instead of directly using inputsCombo, inputsDNCombo, outputCombo, faixas, and sigma variables within your class, just pass them to the Ibm constructor as class attributes. views.py ... from my_app.ibm import Ibm # Or wherever you would put that class ... def index(request): ... else: excel_file = request.FILES["excel_file"] wb = openpyxl.load_workbook(excel_file, data_only=True) inputsCombo = wb['Inputs COMBO'] inputsDNCombo = wb['Inputs DN COMBO'] outputCombo = wb['OutPut COMBO'] faixas = wb['Faixas'] wb2 = openpyxl.load_workbook('media/sigma.xlsx', data_only=True) sigma = wb2['Planilha1'] ... for i in range(8, 8 + numeroIbms): # Pass the dependencies to the constructor of Ibm class ibm = Ibm(i, inputsCombo, inputsDNCombo, outputCombo, faixas, sigma) ... ... ibm.py class Ibm(object): # Accept the injected parameters to Ibm class def __init__(self, i, inputsCombo, inputsDNCombo, outputCombo, faixas, sigma): self.numeroIbm = inputsCombo[get_column_letter(i) + str(10)].value self.anosAlternativa = inputsDNCombo[get_column_letter(i) + str(14)].value self.unitariaIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(42)].value) # For the arguments that will be used by your other functions e.g. check_faixa, define them as class attributes here self.faixas = faixas self.sigma = sigma def get_volume(self): if self.tipoInvestimento != 'Loja': for j in range(2, self.sigma.max_row + 1): # Append "self." to the usage of "sigma" to refer to the class attribute that was set in __init__ if self.numeroIbm == self.sigma['A' + str(j)].value: self.margem12m = self.sigma['D' + str(j)].value self.volume12m = self.sigma['E' + str(j)].value def check_faixa(self): if self.tipoInvestimento != 'Loja': if self.faixaMargem[0] == 'R': pass else: for row in range(5, self.faixas.max_row + 1): # Append "self." to the usage of "faixas" to refer to the class attribute that was set in __init__ cidade = self.faixas['C' + str(row)].value if cidade == unidecode.unidecode(self.cidade.upper() + '/' + self.uf.upper()): self.faixaReal = self.faixas['D' + str(row)].value if self.faixaReal == '': self.faixaReal = 'n/a'
Memory leak in python code
I am trying to make my own dataset with torch in Python. However, the memory usage leaks in every file open. The memory leak is occurred during the below for loop for subj, _file in enumerate(filelist): I deleted unnecessary in the code like import parser.add_argument('--stddev', dest='stddev', default='subject', type=str, help="How to calculate ") args = parser.parse_args() # Options if args.sess_num == 0: label_set = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] base_dir = './kriss_proto' ori_base_dir = '{}/original'.format(base_dir) filelist = os.listdir(ori_base_dir) # ['BD001.mat', ...] filenum = len(filelist) # Saving data eeg_data = {} eeg_data['dataset'] = {} split_data = {} split_data['splits'] = [] split_data['splits'].append({}) _meanstd = [] for i in range(32): _meanstd.append([]) if args.stddev == 'subject': for j in range(filenum): _meanstd[i].append([]) idx = 0 for subj, _file in enumerate(filelist): filepath = os.path.join(ori_base_dir, _file) with h5py.File(filepath, 'r') as mat: ## Step by step (with object searching) bd_eeg = mat.get('BD_EEG') vst = bd_eeg.get('VST') for run in vst.keys(): print("Processing {} - {}".format(_file, run)) run_data = vst.get(run) eeg = run_data['data3D'] eeg = th.Tensor(eeg) eeg = eeg.permute(2, 1, 0) label = run_data['class_labels'] label = th.Tensor(label) label = th.squeeze(label) # DATA COPY for i in range(int(label.shape[0])): if (label[i] in label_set) and (int(eeg[i].shape[0]) > 400): eeg_data['dataset'][idx] = {} eeg_data['dataset'][idx]['eeg'] = eeg[i, :, :32] eeg_data['dataset'][idx]['label'] = label[i] - label_set[0] # label should start from 0 if args.stddev == 'whole': for j in range(32): for k in range(int(eeg[i].shape[0])): _meanstd[j].append(eeg[i][k, j]) elif args.stddev == 'subject': for j in range(32): for k in range(int(eeg[i].shape[0])): _meanstd[j][subj].append(eeg[i][k, j]) idx += 1
time complexity issues of my program
my viterbi code program becomes exponential. can you help me find the place i can change to make it dynamic program. I need to remember and use only the 2 previous tags of words. thanks a lot. from collections import defaultdict import sys import re import feature_maker as fm bla = '' all_states = set() #distirbuition over all of the corpus POS_probability = fm.load_obj('probas') POS_probability['START'] = 1.0 def cpd_tagwords(words, tag): pattern = re.compile("\W")# to check for .,: etc. if pattern.match(words) and tag == words: return 1 elif pattern.match(tag): return 0 for word in emle.split("\n"): if word.__contains__(words) and word.__contains__(tag): return word[word.index(":") + 2:] #if we dont have data about the word with the tag,just retturn the probability #to get the tag over all of the word in the corpus. return POS_probability[tag] def cpd_tags(early, prev, current): lambda1 = 0 lambda3 = 0 lambda6 = 0 for word in qmle.split("\n"): word1 = word.split() if len(word1) > 0: if word1[0].__contains__(current): #for tuple of 1 if len(word1) == 2: lambda1 = word[word.index("]:") + 3:] if len(word1) > 2 and word1[1].__contains__(prev): #for tuple of 2 if len(word1) == 3: lambda3 = word[word.index("]:") + 3:] if len(word1) > 3 and word1[2].__contains__(early): #for tuple of 3 if len(word1) == 4: lambda6 = word[word.index("]:") + 3:] return (0.6*float(lambda6)) + (0.3*float(lambda3)) + (0.1*float(lambda1)) #map: popular_copuler['POS'] = list of all pos that can come before it. popular_copules = fm.load_obj('popular_copules') # Viterbi Algo def viterbi(sentence, tags1): def findSet(index,tag): if tag == 'ALL': return tags1 if index in range(1, len(sentence) + 1): possible_tags = set(popular_copules[tag]) if possible_tags == set([]): return tags1 return set(popular_copules[tag]) elif index == 0 or index == -1: return {'START'} # stores (word:tag) in this whole sentence sentence_with_tag = defaultdict(str) # inner function to commpute pi values--start def pi_viterbi(k, u, v, sentence):#here is the start of the bad sequence prob = defaultdict(float) # initialization if k == 0 and u == 'START' and v == 'START': return (1., 'START') else: for w in findSet(k - 2,u): prev = pi_viterbi(k - 1, w, u, sentence)[0] # tuple((w,u,v)) q = cpd_tags(w, u, v)** e = cpd_tagwords(sentence[k - 1].lower(), v) probability = float(prev) * q * float(e) prob[tuple((w, u))] = probability** #here is the end of the bad sequence max_tuple = max(prob.items(), key=lambda x: x[1]) # print (max_tuple[1],max_tuple[0][0]) return max_tuple[1], max_tuple[0][0] # inner function to commpute pi values--end sentence_with_tag = list() backpointer = defaultdict(str) tags = defaultdict(str) k = len(sentence) u_glob = '' v_glob = '' glob = 0. for i in range(1, k + 1): prob = defaultdict(float) #for current word we check all the tags """ changed from for u in findSet(i - 1):""" for u in findSet(i ,'ALL'): #going backwards we call findset with u so it gives us only # tags v that go togeter alot with u(this is purnnig) """ changed from for v in findSet(i)""" for v in findSet(i-1,u_glob): #siwtched u and v value, w = pi_viterbi(i, v, u, sentence)#the v recursion in the algorithm prob[tuple((i, u, v))] = value backpointer[tuple((i, u, v))] = w #bp from the algorithm max_tuple = max(prob.items(), key=lambda x: x[1]) backpointer[tuple((i, max_tuple[0][1], max_tuple[0][-1]))] = max_tuple[0][1] # bp (k,u,v)= tag w # sentence_with_tag.append(max_tuple[0][-1]) u_glob = max_tuple[0][-2] v_glob = max_tuple[0][-1] glob = max_tuple[1] print ('Max', max_tuple) tags[k - 1] = u_glob tags[k] = v_glob for i in range((k - 2), 0, -1): tag = backpointer[tuple(((i + 2), tags[i + 1], tags[i + 2]))] tags[i] = tag tag_list = list() for i in range(1, len(tags) + 1): tag_list.append(tags[i]) file = open(sys.argv[4], 'w') file.truncate() for word in tag_list: file.write(word) # tag list as results return tag_list file=open(sys.argv[1],"r+") fQ = open(sys.argv[2], 'r') qmle = fQ.read() fQ.close() f = open("tags.txt",'r+') tags = f.read() f.close() fe = open(sys.argv[3], 'r') emle = fe.read() distinct_tags = set() # what is the list of all tags? for word in tags.split(): distinct_tags.add(word) sentence = [] sentence1 = [] sentence1 = file.read() sentence = sentence1.split() file.close() file = open(sys.argv[4], 'w') file.truncate() viterbi(sentence, distinct_tags) how can I reduce the time complexity?
make_node requires 4D tensor of kernels
I have trained cnn model and saved parameters in five files,but when I use these params to test photos ,I meet a question like this:enter image description here the code of load_data is: def load_data(pag_name):`` k = 0 for filename in os.listdir(pag_name): if (filename != '.DS_Store'): k = k + 1 num = k # test_per = k*4 print k i = 0 j = 0 label = 0 train_set = numpy.empty((num, 1, 56, 56)) while (j < 1): for filename in os.listdir(pag_name): if (filename != '.DS_Store'): filename = pag_name+ '/' + filename image = Image.open(filename) #print image.size #print image img_ndarray = numpy.asarray(image, dtype='float64') / 256 img_ndarray = numpy.asarray([img_ndarray]) # train_set[i] = numpy.ndarray.flatten(img_ndarray) train_set[i] = img_ndarray #print train_set.shape # print filename1 # print 'label:', label # print 'i:',i i = i + 1 j = j + 1 def shared_dataset(data_x, borrow=True): shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) return shared_x train_set = shared_dataset(train_set) print train_set.get_value(borrow=True).shape return train_set and the code of use_CNN is : def use_CNN(pag_name,nkerns=[20,40,60]): data = load_data(pag_name) data_num = data.get_value(borrow=True).shape[0] layer0_params,layer01_params,layer1_params,layer2_params,layer3_params = load_params() x = T.matrix('x') layer0_input = x.reshape((data_num,1,56,56)) layer0 = LeNetConvPoolLayer( input=layer0_input, params_W = layer0_params[0], params_b = layer0_params[1], image_shape=(data_num, 1, 56, 56), filter_shape=(nkerns[0], 1, 5,5), poolsize=(2, 2)` ) I haven't meet this problem ,and I don't know where and how I change my code.
the result of this error is the params are not 4D, the params I load is 3D, like my W and b is (20,1,5,5),but I load (1,5,5),so I meet this problem.
Avoid 'Memory Error" when dealing with large arrays
I'm facing sometimes Memory Error, sometimes it goes through fine and sometimes it pops up.. Specifically when trying to subtract large array by one. I tried many ways to do this subtraction, is there any way to avoid this? and is my other code parts will also sometime arise this error? Here is my code: def home(request): if request.method=="POST": img = UploadForm(request.POST, request.FILES) no_clus = int(request.POST.get('num_clusters', 10)) if img.is_valid(): paramFile =io.TextIOWrapper(request.FILES['pic'].file) portfolio1 = csv.DictReader(paramFile) users = [] users = [row["BASE_NAME"] for row in portfolio1] print(len(users)) my_list = users vectorizer = CountVectorizer() dtm = vectorizer.fit_transform(my_list) lsa = TruncatedSVD(n_components=100) dtm_lsa = lsa.fit_transform(dtm) dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa) dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)) # print(1-similarity) k = len(my_list) # dist1 = (1- similarity) # dist1=similarity # dist1.astype(float) #print(dist1) # print(cosine_similarity(tfidf_matrix[3:4], tfidf_matrix)) # float dist = 1 - similarity; data2 = np.asarray(dist1) arr_3d = data2.reshape((1, k, k)) # arr_3d= 1- arr_3d #print(arr_3d) no_cluster = number_cluster(len(my_list)) print(no_cluster) for i in range(len(arr_3d)): # print (i+1910) # km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i]) km = AgglomerativeClustering(n_clusters=no_cluster, linkage='average').fit(arr_3d[i]) # km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i]) # km = MeanShift() # km = KMeans(n_clusters=no_clus, init='k-means++') # km = MeanShift() # km = km.fit(arr_3d[i]) # print km labels = km.labels_ csvfile = settings.MEDIA_ROOT +'\\'+ 'images\\export.csv' csv_input = pd.read_csv(csvfile, encoding='latin-1') csv_input['cluster_ID'] = labels csv_input['BASE_NAME'] = my_list csv_input.to_csv(settings.MEDIA_ROOT +'/'+ 'output.csv', index=False) clus_groups = list() for j in range(no_cluster): # print(" cluster no %i:%s" % (j, [my_list[i] for i, x in enumerate(labels) if x == j])) list_of_ints = ([my_list[i] for i, x in enumerate(labels) if x == j]) clus_groups.append(' '.join(list_of_ints)) vectorizer = CountVectorizer() dtm = vectorizer.fit_transform(my_list) lsa = TruncatedSVD(n_components=100) dtm_lsa = lsa.fit_transform(dtm) dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa) dist1 = (1 - np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)) # similarity = np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T) k = len(my_list) # dist1 = 1 - similarity data2 = np.asarray(dist1) arr_3d = data2.reshape((1, k, k)) # arr_3d= 1- arr_3d #no_clus = 5 # no_clus=get_name(request) for i in range(len(arr_3d)): # print (i+1910) # km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i]) # km = AgglomerativeClustering(n_clusters=no_clus, linkage='average').fit(arr_3d[i]) # km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i]) km = KMeans(n_clusters=no_clus, init='k-means++') km = km.fit(arr_3d[i]) # print km labels2 = km.labels_ # error = km.inertia_ print(labels2) labels = labels.tolist() labels2 = labels2.tolist() # new=list() csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1') labels1 = csv_input['cluster_ID'] new_list = [] for k in labels1: new_list.append(labels2[k]) # lookup the value in list2 at the index given by list1 print(new_list) print(len(new_list)) csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1') csv_input['cluster_ID'] = labels csv_input['BASE_NAME'] = my_list csv_input['User_Map'] = new_list csv_input.to_csv(settings.MEDIA_ROOT + '/' + 'output1.csv', index=False) #filename= settings.MEDIA_ROOT +'/'+ 'output.csv' send_file(request) # my_list = portfolio #save_file('output1.csv') # csv(request) # return HttpResponseRedirect(reverse('labels')) return render(request, 'new.html', {'labels': labels}) else: img=UploadForm() images=Upload.objects.all() return render(request,'new.html',{'form':img,'images':images}) the error is when trying to do dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)) .. I also tried to create new array with all ones with the same size and then subtract.. How should I modify this to prevent this error? Note that the user interface that will run this code can be operated on any pc!