Adding more layers to neural network - python

I want to add more layers in neural network how can I customize this code ? can any one suggest a better way
in model layer there is only one layer .. in this part , I can define other weights ?should I add other layers there but it would make no sense
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
then in batch loss function they multiply by them in order to get the predicted y.. that's simply adding one layer..
tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
The full code is here
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import collections
import os
tff.backends.reference.set_reference_context()
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
DECAY_FACTOR = 0.8
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.StructType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(
lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace(
"\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(
float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(
float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type, iter_n, g_m):
model_g = {
'weights': g_m[0],
'bias': g_m[1]
}
for i in range(iter_n-1, iter_n):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(
grad[j][i], 1 / len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(
bi[j][i], 1 / len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(
model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(
model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(
tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
def loadHistoryModels(round_num):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "r")
lines = f.readlines()
ret_models = []
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
ret_models.append([w_initial, b_initial])
tem_model = []
for i, line in enumerate(lines):
if i % 2 == 0:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis).reshape([784, 10])
tem_model = [lis]
else:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis)
tem_model.append(lis)
ret_models.append(tem_model)
f.close()
return ret_models
if __name__ == "__main__":
start_time = time.time()
# data_num = np.asarray([5923, 6742, 5958, 6131, 5842])
# agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(index) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(index) + ".txt"), "w")
f.close()
# f = open(os.path.join(os.path.dirname(__file__),
# "gradientplus_models.txt"), "w") #alice
# f.close()
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(
mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters-1.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = collections.OrderedDict(
weights=w_initial,
bias=b_initial)
model = initial_model
learning_rate = 0.1
for round_num in range(5):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "w") # alice
f.close()
local_models = federated_train(
model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
# print(local_models[0][0])#第0个agent的weights矩阵
# print(local_models[0][1])#第0个agent的bias矩阵
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(
local_models[local_model_index][0], 1 / NUM_AGENT), m_w)
m_b = np.add(np.multiply(
local_models[local_model_index][1], 1 / NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
f_g = open(os.path.join(os.path.dirname(
__file__), "gradientplus_models" + str(round_num) + ".txt"), "a")
g_w = list(model['weights'].reshape(-1))
g_b = list(model['bias'].reshape(-1))
print(g_w, file=f_g)
print(g_b, file=f_g)
f_g.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "a")
s = ""
for i in list(model['weights']):
for j in i:
s += str(j)+"\t"
s = s[0:-1]
print(s, file=f)
s = ""
for i in list(model['bias']):
s += str(i)+"\t"
s = s[0:-1]
print(s, file=f)
f.close()
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time() - start_time)
shapstart_time = time.time() #change cal time
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
models_hository = loadHistoryModels(round_num)
agent_shapley_history = []
for iter_num in range(1, len(gradient_weights[0]) + 1):
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE,
iter_num, models_hository[iter_num-1]))
print(str(s) + "\t" +
str(group_shapley_value[len(group_shapley_value) - 1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(
index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "a")
print('round {}, loss={}'.format(round_num, agent_shapley), file=f)
agent_shapley_history.append(agent_shapley)
print('round {}, loss={}'.format(
round_num, agent_shapley_history), file=f)
f.close()
print("end_time shap values", time.time() - shapstart_time)
print("end_time", time.time() - start_time)

It seems the source of this code is one of the low-level TFF tutorials, which are really more about introducing users to the core communication abstractions of TFF, as opposed to serving as an introductory guide to federated learning with TFF.
I might suggest a different tutorial, like the one that focuses on image classification for example. This uses TFF's tff.learning.from_keras_model utility to ingest a Keras model directly; if you go this route, you can add layers as you would for any Keras model.

Related

Removing a personalized class inside the view.py - Django

here is my view.py. Despite the code is a little bit long, I'd like to remove the class Ibm() from inside the else element to a new file called Ibm_class.py. I tried to do that but I couldn't find any way!
def index(request):
if 'GET' == request.method:
return render(request, 'auditoria_app/index.html')
else:
class Ibm(object):
def __init__(self, i):
self.numeroIbm = inputsCombo[get_column_letter(i) + str(10)].value
self.nome = inputsCombo[get_column_letter(i) + str(11)].value
self.cidade = inputsCombo[get_column_letter(i) + str(12)].value
self.uf = inputsCombo[get_column_letter(i) + str(13)].value
self.anosProjeto = inputsCombo[get_column_letter(i) + str(16)].value
self.anosAlternativa = inputsDNCombo[get_column_letter(i) + str(14)].value
if self.anosAlternativa is None:
self.anosAlternativa = 0
self.tipoInvestimento = inputsCombo[get_column_letter(i) + str(21)].value
self.tipoProposta = inputsCombo[get_column_letter(i) + str(24)].value
self.inicioVigencia = inputsCombo[get_column_letter(i) + str(34)].value
self.prazoContrato = inputsCombo[get_column_letter(i) + str(38)].value
# gas station variables
self.faixaMargem = inputsCombo[get_column_letter(i) + str(19)].value
self.rebateTotalCI = 0
self.rebateTotalCB = 0
self.unitariaCI = 0
self.volumeMensalCI = inputsCombo[get_column_letter(i) + str(60)].value
self.volumeMensalCB = inputsDNCombo[get_column_letter(i) + str(32)].value
self.margemCurva = inputsCombo[get_column_letter(i) + str(67)].value * 1000
self.margemCI = []
self.margemCB = []
self.volume12m = 0
self.margem12m = 0
self.curvaPostoCI = []
self.curvaPostoCB = []
self.rebateCI = []
self.rebateCB = []
self.faixaReal = ''
self.volumeTotalCI = inputsCombo[get_column_letter(i) + str(151)].value
self.volumeTotalCB = inputsDNCombo[get_column_letter(i) + str(121)].value
# SELECT store variables
self.feeIsencao = inputsCombo[get_column_letter(i) + str(220)].value
self.feeFaturamento = inputsCombo[get_column_letter(i) + str(222)].value
self.feeReal = inputsCombo[get_column_letter(i) + str(224)].value
self.faturamento = inputsCombo[get_column_letter(i) + str(173)].value
self.pvl = inputsCombo[get_column_letter(i) + str(184)].value
self.feeLoja = inputsCombo[get_column_letter(i) + str(174)].value
self.cashback = []
# credit variables
self.prazoMogasCI1 = inputsCombo[get_column_letter(i) + str(159)].value
self.prazoMogasCI2 = inputsCombo[get_column_letter(i) + str(160)].value
self.prazoMogasCI3 = inputsCombo[get_column_letter(i) + str(161)].value
self.prazoMogasCB1 = inputsDNCombo[get_column_letter(i) + str(151)].value
self.prazoMogasCB2 = inputsDNCombo[get_column_letter(i) + str(152)].value
self.prazoMogasCB3 = inputsDNCombo[get_column_letter(i) + str(153)].value
# economics IBM
self.unitariaIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(42)].value)
self.hsIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(36)].value)
self.cmIBM = percentual(outputCombo[get_column_letter(i - 1) + str(39)].value)
self.tirIBM = percentual(outputCombo[get_column_letter(i - 1) + str(54)].value)
self.npvIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(55)].value)
# if self.tipoInvestimento != 'Loja':
for k in range(0, self.anosProjeto):
margemCI = inputsCombo[get_column_letter(i) + str(109 + k)].value
if margemCI is None or margemCI == '':
margemCI = 0
self.margemCI.append(arredonda(margemCI * 1000))
curvaPostoCI = inputsCombo[get_column_letter(i) + str(130 + k)].value
if curvaPostoCI is None or 0:
curvaPostoCI = 1
self.curvaPostoCI.append(arredonda(curvaPostoCI * 100))
rebateCI = inputsCombo[get_column_letter(i) + str(305 + k)].value
if rebateCI is None:
rebateCI = 0
self.rebateCI.append(rebateCI * 1000)
cashback = inputsCombo[get_column_letter(i) + str(485 + k)].value
if cashback is None:
cashback = 0
else:
cashback = cashback / self.faturamento
self.cashback.append(cashback)
for y in range(0, self.anosAlternativa):
margemCB = inputsDNCombo[get_column_letter(i) + str(79 + y)].value
if margemCB is None or margemCB == '':
margemCB = 0
self.margemCB.append(arredonda(margemCB * 1000))
curvaPostoCB = inputsDNCombo[get_column_letter(i) + str(100 + y)].value
if curvaPostoCB is None or 0:
curvaPostoCB = 1
self.curvaPostoCB.append(arredonda(curvaPostoCB * 100))
rebateCB = inputsDNCombo[get_column_letter(i) + str(204 + y)].value
if rebateCB is None:
rebateCB = 0
self.rebateCB.append(rebateCB * 1000)
# Selecionando os dados da Loja Select
# if self.tipoInvestimento == 'Loja':
self.curvaLojaCI = []
self.curvaLojaCB = []
for k in range(0, self.anosProjeto):
curvaLojaCI = inputsCombo[get_column_letter(i) + str(198 + k)].value
if curvaLojaCI is None or 0:
curvaLojaCI = 1
self.curvaLojaCI.append(curvaLojaCI)
for k in range(0, self.anosAlternativa):
curvaLojaCB = inputsDNCombo[get_column_letter(i) + str(123 + k)].value
if curvaLojaCB is None or 0:
curvaLojaCB = 1
self.curvaLojaCB.append(curvaLojaCB)
self.fpCI = inputsCombo[get_column_letter(i) + str(283)].value
if self.fpCI is None:
self.fpCI = 0
self.rviCI = inputsCombo[get_column_letter(i) + str(285)].value
if self.rviCI is None:
self.rviCI = 0
self.fpCB = inputsDNCombo[get_column_letter(i) + str(201)].value
if self.fpCB is None:
self.fpCB = 0
self.rviCB = inputsDNCombo[get_column_letter(i) + str(202)].value
if self.rviCB is None:
self.rviCB = 0
# initializing gas station functions
self.get_volume()
self.rebate_total_CI()
self.rebate_total_CB()
# self.unitaria_CI()
self.check_faixa()
def get_volume(self):
if self.tipoInvestimento != 'Loja':
for j in range(2, sigma.max_row + 1):
if self.numeroIbm == sigma['A' + str(j)].value:
self.margem12m = sigma['D' + str(j)].value
self.volume12m = sigma['E' + str(j)].value
def check_faixa(self):
if self.tipoInvestimento != 'Loja':
if self.faixaMargem[0] == 'R':
pass
else:
for row in range(5, faixas.max_row + 1):
cidade = faixas['C' + str(row)].value
if cidade == unidecode.unidecode(self.cidade.upper() + '/' + self.uf.upper()):
self.faixaReal = faixas['D' + str(row)].value
if self.faixaReal == '':
self.faixaReal = 'n/a'
def rebate_total_CI(self):
if self.tipoInvestimento != 'Loja':
rebateTotalCI = 0
for k in range(0, self.anosProjeto):
rebateTotalCI += self.volumeMensalCI * 12 * self.curvaPostoCI[k] * self.rebateCI[k]
self.rebateTotalCI = arredonda(rebateTotalCI / 1000)
def rebate_total_CB(self):
if self.tipoInvestimento != 'Loja':
rebateTotalCB = 0
for k in range(0, self.anosAlternativa):
rebateTotalCB += self.volumeMensalCB * 12 * self.curvaPostoCB[k] * self.rebateCB[k]
self.rebateTotalCB = arredonda(rebateTotalCB / 1000)
# UNIT might be extracted from 'OutPut COMBO'
# def unitaria_CI(self):
# self.unitariaCI = (self.rebateTotalCI + self.fpCI + self.rviCI)*10/self.volumeTotalCI
def check_volume_mensal(self):
return self.volumeMensalCI - self.volumeMensalCB
def check_volume_total(self):
return self.volumeTotalCI - self.volumeTotalCB
def check_volumeCB_SIC(self):
return self.volumeMensalCB
excel_file = request.FILES["excel_file"]
wb = openpyxl.load_workbook(excel_file, data_only=True)
inputsCombo = wb['Inputs COMBO']
inputsDNCombo = wb['Inputs DN COMBO']
outputCombo = wb['OutPut COMBO']
faixas = wb['Faixas']
wb2 = openpyxl.load_workbook('media/sigma.xlsx', data_only=True)
sigma = wb2['Planilha1']
wb3 = openpyxl.load_workbook('media/performance.xlsx', data_only=True)
sic = wb3['Performance']
wb4 = openpyxl.load_workbook('media/ultimoContrato.xlsm', data_only=True)
ultimoContrato = wb4['Base']
numeroIbms = inputsCombo['F5'].value
ibms = []
for i in range(8, 8 + numeroIbms):
ibm = Ibm(i)
ibms.append(ibm)
return render(request, 'auditoria_app/index.html', {'ibms': ibms})
-> wb: file is loaded from a upload
-> wb2, wb3, wb4: files are stored inside the media folder
I tried to copy/paste this class and then import it to the view.py file but the excel_file variable must be inside the view and is still needed inside the Ibm_class.py. Any suggestion, pls?
Thank you!
Use dependency injection on your class instead of relying on local/global scope variables. Here, instead of directly using inputsCombo, inputsDNCombo, outputCombo, faixas, and sigma variables within your class, just pass them to the Ibm constructor as class attributes.
views.py
...
from my_app.ibm import Ibm # Or wherever you would put that class
...
def index(request):
...
else:
excel_file = request.FILES["excel_file"]
wb = openpyxl.load_workbook(excel_file, data_only=True)
inputsCombo = wb['Inputs COMBO']
inputsDNCombo = wb['Inputs DN COMBO']
outputCombo = wb['OutPut COMBO']
faixas = wb['Faixas']
wb2 = openpyxl.load_workbook('media/sigma.xlsx', data_only=True)
sigma = wb2['Planilha1']
...
for i in range(8, 8 + numeroIbms):
# Pass the dependencies to the constructor of Ibm class
ibm = Ibm(i, inputsCombo, inputsDNCombo, outputCombo, faixas, sigma)
...
...
ibm.py
class Ibm(object):
# Accept the injected parameters to Ibm class
def __init__(self, i, inputsCombo, inputsDNCombo, outputCombo, faixas, sigma):
self.numeroIbm = inputsCombo[get_column_letter(i) + str(10)].value
self.anosAlternativa = inputsDNCombo[get_column_letter(i) + str(14)].value
self.unitariaIBM = arredonda(outputCombo[get_column_letter(i - 1) + str(42)].value)
# For the arguments that will be used by your other functions e.g. check_faixa, define them as class attributes here
self.faixas = faixas
self.sigma = sigma
def get_volume(self):
if self.tipoInvestimento != 'Loja':
for j in range(2, self.sigma.max_row + 1): # Append "self." to the usage of "sigma" to refer to the class attribute that was set in __init__
if self.numeroIbm == self.sigma['A' + str(j)].value:
self.margem12m = self.sigma['D' + str(j)].value
self.volume12m = self.sigma['E' + str(j)].value
def check_faixa(self):
if self.tipoInvestimento != 'Loja':
if self.faixaMargem[0] == 'R':
pass
else:
for row in range(5, self.faixas.max_row + 1): # Append "self." to the usage of "faixas" to refer to the class attribute that was set in __init__
cidade = self.faixas['C' + str(row)].value
if cidade == unidecode.unidecode(self.cidade.upper() + '/' + self.uf.upper()):
self.faixaReal = self.faixas['D' + str(row)].value
if self.faixaReal == '':
self.faixaReal = 'n/a'

Memory leak in python code

I am trying to make my own dataset with torch in Python. However, the memory usage leaks in every file open.
The memory leak is occurred during the below for loop
for subj, _file in enumerate(filelist):
I deleted unnecessary in the code like import
parser.add_argument('--stddev', dest='stddev', default='subject', type=str, help="How to calculate ")
args = parser.parse_args()
# Options
if args.sess_num == 0:
label_set = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
base_dir = './kriss_proto'
ori_base_dir = '{}/original'.format(base_dir)
filelist = os.listdir(ori_base_dir) # ['BD001.mat', ...]
filenum = len(filelist)
# Saving data
eeg_data = {}
eeg_data['dataset'] = {}
split_data = {}
split_data['splits'] = []
split_data['splits'].append({})
_meanstd = []
for i in range(32):
_meanstd.append([])
if args.stddev == 'subject':
for j in range(filenum):
_meanstd[i].append([])
idx = 0
for subj, _file in enumerate(filelist):
filepath = os.path.join(ori_base_dir, _file)
with h5py.File(filepath, 'r') as mat:
## Step by step (with object searching)
bd_eeg = mat.get('BD_EEG')
vst = bd_eeg.get('VST')
for run in vst.keys():
print("Processing {} - {}".format(_file, run))
run_data = vst.get(run)
eeg = run_data['data3D']
eeg = th.Tensor(eeg)
eeg = eeg.permute(2, 1, 0)
label = run_data['class_labels']
label = th.Tensor(label)
label = th.squeeze(label)
# DATA COPY
for i in range(int(label.shape[0])):
if (label[i] in label_set) and (int(eeg[i].shape[0]) > 400):
eeg_data['dataset'][idx] = {}
eeg_data['dataset'][idx]['eeg'] = eeg[i, :, :32]
eeg_data['dataset'][idx]['label'] = label[i] - label_set[0] # label should start from 0
if args.stddev == 'whole':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j].append(eeg[i][k, j])
elif args.stddev == 'subject':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j][subj].append(eeg[i][k, j])
idx += 1

time complexity issues of my program

my viterbi code program becomes exponential. can you help me find the place i can change to make it dynamic program. I need to remember and use only the 2 previous tags of words.
thanks a lot.
from collections import defaultdict
import sys
import re
import feature_maker as fm
bla = ''
all_states = set()
#distirbuition over all of the corpus
POS_probability = fm.load_obj('probas')
POS_probability['START'] = 1.0
def cpd_tagwords(words, tag):
pattern = re.compile("\W")# to check for .,: etc.
if pattern.match(words) and tag == words:
return 1
elif pattern.match(tag):
return 0
for word in emle.split("\n"):
if word.__contains__(words) and word.__contains__(tag):
return word[word.index(":") + 2:]
#if we dont have data about the word with the tag,just retturn the probability
#to get the tag over all of the word in the corpus.
return POS_probability[tag]
def cpd_tags(early, prev, current):
lambda1 = 0
lambda3 = 0
lambda6 = 0
for word in qmle.split("\n"):
word1 = word.split()
if len(word1) > 0:
if word1[0].__contains__(current): #for tuple of 1
if len(word1) == 2:
lambda1 = word[word.index("]:") + 3:]
if len(word1) > 2 and word1[1].__contains__(prev): #for tuple of 2
if len(word1) == 3:
lambda3 = word[word.index("]:") + 3:]
if len(word1) > 3 and word1[2].__contains__(early): #for tuple of 3
if len(word1) == 4:
lambda6 = word[word.index("]:") + 3:]
return (0.6*float(lambda6)) + (0.3*float(lambda3)) + (0.1*float(lambda1))
#map: popular_copuler['POS'] = list of all pos that can come before it.
popular_copules = fm.load_obj('popular_copules')
# Viterbi Algo
def viterbi(sentence, tags1):
def findSet(index,tag):
if tag == 'ALL':
return tags1
if index in range(1, len(sentence) + 1):
possible_tags = set(popular_copules[tag])
if possible_tags == set([]):
return tags1
return set(popular_copules[tag])
elif index == 0 or index == -1:
return {'START'}
# stores (word:tag) in this whole sentence
sentence_with_tag = defaultdict(str)
# inner function to commpute pi values--start
def pi_viterbi(k, u, v, sentence):#here is the start of the bad sequence
prob = defaultdict(float)
# initialization
if k == 0 and u == 'START' and v == 'START':
return (1., 'START')
else:
for w in findSet(k - 2,u):
prev = pi_viterbi(k - 1, w, u, sentence)[0]
# tuple((w,u,v))
q = cpd_tags(w, u, v)**
e = cpd_tagwords(sentence[k - 1].lower(), v)
probability = float(prev) * q * float(e)
prob[tuple((w, u))] = probability**
#here is the end of the bad sequence
max_tuple = max(prob.items(), key=lambda x: x[1])
# print (max_tuple[1],max_tuple[0][0])
return max_tuple[1], max_tuple[0][0]
# inner function to commpute pi values--end
sentence_with_tag = list()
backpointer = defaultdict(str)
tags = defaultdict(str)
k = len(sentence)
u_glob = ''
v_glob = ''
glob = 0.
for i in range(1, k + 1):
prob = defaultdict(float)
#for current word we check all the tags
""" changed from for u in findSet(i - 1):"""
for u in findSet(i ,'ALL'):
#going backwards we call findset with u so it gives us only
# tags v that go togeter alot with u(this is purnnig)
""" changed from for v in findSet(i)"""
for v in findSet(i-1,u_glob):
#siwtched u and v
value, w = pi_viterbi(i, v, u, sentence)#the v recursion in the algorithm
prob[tuple((i, u, v))] = value
backpointer[tuple((i, u, v))] = w #bp from the algorithm
max_tuple = max(prob.items(), key=lambda x: x[1])
backpointer[tuple((i, max_tuple[0][1], max_tuple[0][-1]))] = max_tuple[0][1] # bp (k,u,v)= tag w
# sentence_with_tag.append(max_tuple[0][-1])
u_glob = max_tuple[0][-2]
v_glob = max_tuple[0][-1]
glob = max_tuple[1]
print ('Max', max_tuple)
tags[k - 1] = u_glob
tags[k] = v_glob
for i in range((k - 2), 0, -1):
tag = backpointer[tuple(((i + 2), tags[i + 1], tags[i + 2]))]
tags[i] = tag
tag_list = list()
for i in range(1, len(tags) + 1):
tag_list.append(tags[i])
file = open(sys.argv[4], 'w')
file.truncate()
for word in tag_list:
file.write(word)
# tag list as results
return tag_list
file=open(sys.argv[1],"r+")
fQ = open(sys.argv[2], 'r')
qmle = fQ.read()
fQ.close()
f = open("tags.txt",'r+')
tags = f.read()
f.close()
fe = open(sys.argv[3], 'r')
emle = fe.read()
distinct_tags = set()
# what is the list of all tags?
for word in tags.split():
distinct_tags.add(word)
sentence = []
sentence1 = []
sentence1 = file.read()
sentence = sentence1.split()
file.close()
file = open(sys.argv[4], 'w')
file.truncate()
viterbi(sentence, distinct_tags)
how can I reduce the time complexity?

make_node requires 4D tensor of kernels

I have trained cnn model and saved parameters in five files,but when I use these params to test photos ,I meet a question like this:enter image description here
the code of load_data is:
def load_data(pag_name):``
k = 0
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
k = k + 1
num = k
# test_per = k*4
print k
i = 0
j = 0
label = 0
train_set = numpy.empty((num, 1, 56, 56))
while (j < 1):
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
filename = pag_name+ '/' + filename
image = Image.open(filename)
#print image.size
#print image
img_ndarray = numpy.asarray(image, dtype='float64') / 256
img_ndarray = numpy.asarray([img_ndarray])
# train_set[i] = numpy.ndarray.flatten(img_ndarray)
train_set[i] = img_ndarray
#print train_set.shape
# print filename1
# print 'label:', label
# print 'i:',i
i = i + 1
j = j + 1
def shared_dataset(data_x, borrow=True):
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x
train_set = shared_dataset(train_set)
print train_set.get_value(borrow=True).shape
return train_set
and the code of use_CNN is :
def use_CNN(pag_name,nkerns=[20,40,60]):
data = load_data(pag_name)
data_num = data.get_value(borrow=True).shape[0]
layer0_params,layer01_params,layer1_params,layer2_params,layer3_params = load_params()
x = T.matrix('x')
layer0_input = x.reshape((data_num,1,56,56))
layer0 = LeNetConvPoolLayer(
input=layer0_input,
params_W = layer0_params[0],
params_b = layer0_params[1],
image_shape=(data_num, 1, 56, 56),
filter_shape=(nkerns[0], 1, 5,5),
poolsize=(2, 2)`
)
I haven't meet this problem ,and I don't know where and how I change my code.
the result of this error is the params are not 4D, the params I load is 3D, like my W and b is (20,1,5,5),but I load (1,5,5),so I meet this problem.

Avoid 'Memory Error" when dealing with large arrays

I'm facing sometimes Memory Error, sometimes it goes through fine and sometimes it pops up.. Specifically when trying to subtract large array by one. I tried many ways to do this subtraction, is there any way to avoid this? and is my other code parts will also sometime arise this error?
Here is my code:
def home(request):
if request.method=="POST":
img = UploadForm(request.POST, request.FILES)
no_clus = int(request.POST.get('num_clusters', 10))
if img.is_valid():
paramFile =io.TextIOWrapper(request.FILES['pic'].file)
portfolio1 = csv.DictReader(paramFile)
users = []
users = [row["BASE_NAME"] for row in portfolio1]
print(len(users))
my_list = users
vectorizer = CountVectorizer()
dtm = vectorizer.fit_transform(my_list)
lsa = TruncatedSVD(n_components=100)
dtm_lsa = lsa.fit_transform(dtm)
dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T))
# print(1-similarity)
k = len(my_list)
# dist1 = (1- similarity)
# dist1=similarity
# dist1.astype(float)
#print(dist1)
# print(cosine_similarity(tfidf_matrix[3:4], tfidf_matrix))
# float dist = 1 - similarity;
data2 = np.asarray(dist1)
arr_3d = data2.reshape((1, k, k))
# arr_3d= 1- arr_3d
#print(arr_3d)
no_cluster = number_cluster(len(my_list))
print(no_cluster)
for i in range(len(arr_3d)):
# print (i+1910)
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i])
km = AgglomerativeClustering(n_clusters=no_cluster, linkage='average').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i])
# km = MeanShift()
# km = KMeans(n_clusters=no_clus, init='k-means++')
# km = MeanShift()
# km = km.fit(arr_3d[i])
# print km
labels = km.labels_
csvfile = settings.MEDIA_ROOT +'\\'+ 'images\\export.csv'
csv_input = pd.read_csv(csvfile, encoding='latin-1')
csv_input['cluster_ID'] = labels
csv_input['BASE_NAME'] = my_list
csv_input.to_csv(settings.MEDIA_ROOT +'/'+ 'output.csv', index=False)
clus_groups = list()
for j in range(no_cluster):
# print(" cluster no %i:%s" % (j, [my_list[i] for i, x in enumerate(labels) if x == j]))
list_of_ints = ([my_list[i] for i, x in enumerate(labels) if x == j])
clus_groups.append(' '.join(list_of_ints))
vectorizer = CountVectorizer()
dtm = vectorizer.fit_transform(my_list)
lsa = TruncatedSVD(n_components=100)
dtm_lsa = lsa.fit_transform(dtm)
dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
dist1 = (1 - np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T))
# similarity = np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)
k = len(my_list)
# dist1 = 1 - similarity
data2 = np.asarray(dist1)
arr_3d = data2.reshape((1, k, k))
# arr_3d= 1- arr_3d
#no_clus = 5
# no_clus=get_name(request)
for i in range(len(arr_3d)):
# print (i+1910)
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='average').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i])
km = KMeans(n_clusters=no_clus, init='k-means++')
km = km.fit(arr_3d[i])
# print km
labels2 = km.labels_
# error = km.inertia_
print(labels2)
labels = labels.tolist()
labels2 = labels2.tolist()
# new=list()
csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1')
labels1 = csv_input['cluster_ID']
new_list = []
for k in labels1:
new_list.append(labels2[k]) # lookup the value in list2 at the index given by list1
print(new_list)
print(len(new_list))
csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1')
csv_input['cluster_ID'] = labels
csv_input['BASE_NAME'] = my_list
csv_input['User_Map'] = new_list
csv_input.to_csv(settings.MEDIA_ROOT + '/' + 'output1.csv', index=False)
#filename= settings.MEDIA_ROOT +'/'+ 'output.csv'
send_file(request)
# my_list = portfolio
#save_file('output1.csv')
# csv(request)
# return HttpResponseRedirect(reverse('labels'))
return render(request, 'new.html', {'labels': labels})
else:
img=UploadForm()
images=Upload.objects.all()
return render(request,'new.html',{'form':img,'images':images})
the error is when trying to do dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)) .. I also tried to create new array with all ones with the same size and then subtract.. How should I modify this to prevent this error? Note that the user interface that will run this code can be operated on any pc!

Categories