Related
.Hi all, I have 70k images saved into .h5 file and now with this script I want to read from that file and annotate text instances into .json file. When I run this script it takes very long time to annotate 1 image (cca 2h).
When I do this with 15 images then the script works fine and annotate all 15 images about a few seconds.
Now with 70k images -> .h5 file is 51gb.
I don't know is problem in code or the h5 file is too big? Because code works fine with small amount of images, but I'm working on some project where I need 70k or 700k images.
from __future__ import division
import os
import os.path as osp
from re import U
import numpy as np
import matplotlib.pyplot as plt
import h5py
from common import *
import json
import cv2
import numpy as np
from itertools import cycle
import js2py
#from gen import brojac
#from synthgen import imnames
global x
global y
def write_json(data, filename='annotation.json'):
with open(filename,'w') as file:
json.dump(data,file,indent=4)
DATA_PATH = 'results'
DB_FNAME = osp.join(DATA_PATH,'SynthText.h5')
def get_data():
return h5py.File(DB_FNAME,'r')
def viz_textbb(text_im, imageName, charBB_list, wordBB, textToList, alpha=1.0):
"""
text_im : image containing text
charBB_list : list of 2x4xn_i bounding-box matrices
wordBB : 2x4xm matrix of word coordinates
"""
#print("k",z, type(z))
plt.close(1)
plt.figure(1)
plt.imshow(text_im)
H,W = text_im.shape[:2]
global imnames
#print("MOLIIIM",wordBB)
#DODANO IZ MAIN-a
#**********************************************
db = h5py.File('results/SynthText.h5', 'r')
dsets = sorted(db['data'].keys())
for k in dsets:
db = get_data()
imnames = sorted(db['data'].keys())
start = 0
count = 0
coordinate = []
coordinate1 = []
name = []
name1 = []
final = []
upperList = []
downList = []
counter = 0
FinalFinal = []
imageData = { }
dictList = []
for eachWord in textToList:
length = len(eachWord)
for i in range(0,4):
for j in range(start,length+start):
coordinate.append([charBB_list[0][0][i][j], charBB_list[0][1][i][j]])
coordinate1.append((charBB_list[0][0][i][j], charBB_list[0][1][i][j]))
name.append(coordinate)
name1.append(coordinate1)
coordinate = []
for j in range(0, length):
for i in range(len(name)) :
#print(i,j, name[i][j]) ## koordinate da se snađem, treba
final.append(name[i][j])
#print(name)
#NEŠTA ZA CRTANJE, NEBITNO
if(i == 0 or i == 1):
upperList.append(name[i][j])
if(i == 2):
downList.append(name[i+1][j])
if(i == 3):
downList.append(name[i-1][j])
down = reversed(downList)
joinList = [*upperList,*down,upperList[0]]
FinalFinal.append(joinList)
imageData['transcription']=eachWord
imageData['language']="Latin"
imageData['illegibility']=False
imageData['points']=final
dictionary_copy = imageData.copy()
dictList.append(dictionary_copy)
del(dictionary_copy)
finalToList = np.array(final)
name=[]
final = []
upperList = []
downList = []
start = len(eachWord) + start
#del(dictList[0])
finalDict = {f'gt_{imageName}':dictList}
#print(type(finalDict)) --> dict
#print(imageName,finalDict)
#print(finalDict)
#print(len(textToList))
#print(textToList)
with open("annotation.json") as json_file:
data=json.load(json_file)
temp=data["annotations"]
#temp.append(finalDict)
temp.update(finalDict)
#temp['annotations'] = finalDict
write_json(data)
json_file.close()
for list in FinalFinal:
x,y = zip(*list)
plt.plot(x,y)
#print(x,y)
# points = tuple(zip(x,y))
# # boundaries of the bounding box
# left, right = min(points, key=lambda p: p[0]), max(points, key=lambda p: p[0])
# bottom, top = min(points, key=lambda p: p[1]), max(points, key=lambda p: p[1])
# # area
# base = right[0] - left[0]
# height = top[1] - bottom[1]
# A = base * height
#print(A)
for i in range(len(charBB_list)):
# #print(charBB_list) #ispisuje x-eve za jedan vrh svih instanci pojedinih slova, pa drugi, 3. i 4. i onda posebno y-one
bbs = charBB_list[i]
ni = bbs.shape[-1]
for j in range(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]] #ako se doda ,bb[:,0] -> printa isto kao i gornji lijevi
#plt.plot(bb[0,:], bb[1,:], 'r', alpha=alpha)
# plot the word-BB:
for i in range(wordBB.shape[-1]):
bb = wordBB[:,:,i] #koordinate wordBB-a
bb = np.c_[bb,bb[:,0]] #spaja skroz lijevu, TREBA
#plt.plot(bb[0,:], bb[1,:], 'g', alpha=alpha)
# visualize the indiv vertices:
vcol = ['r','g','b','k']
#for j in range(4):
#plt.scatter(bb[0,j],bb[1,j],color=vcol[j])
#print(bb) # ----> KOORDINATE wordBB-a
#print(bb[1,j])
plt.gca().set_xlim([0,W-1])
plt.gca().set_ylim([H-1,0])
plt.show(block=False)
def main(db_fname):
db = h5py.File(db_fname, 'r')
dsets = sorted(db['data'].keys())
print ("total number of images : ", colorize(Color.RED, len(dsets), highlight=True))
for k in dsets:
rgb = db['data'][k][...]
charBB = db['data'][k].attrs['charBB']
wordBB = db['data'][k].attrs['wordBB']
txt = db['data'][k].attrs['txt']
textToList = (db['data'][k].attrs['txt']).tolist()
#print(textToList)
viz_textbb(rgb, k,[charBB], wordBB, textToList)
print ("image name : ", colorize(Color.RED, k, bold=True))
print (" ** no. of chars : ", colorize(Color.YELLOW, charBB.shape[-1]))
print (" ** no. of words : ", colorize(Color.YELLOW, wordBB.shape[-1]))
print (" ** text : ", colorize(Color.GREEN, txt))
#print("To know", z[1], type(z[1]))
# OTKOMATI OVO DOLJE AKO ŽELIM STISKAT ENTER
# if 'q' in input("next? ('q' to exit) : "):
# break
db.close()
if __name__=='__main__':
main('results/SynthText.h5')
I want to add more layers in neural network how can I customize this code ? can any one suggest a better way
in model layer there is only one layer .. in this part , I can define other weights ?should I add other layers there but it would make no sense
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
then in batch loss function they multiply by them in order to get the predicted y.. that's simply adding one layer..
tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
The full code is here
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import collections
import os
tff.backends.reference.set_reference_context()
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
DECAY_FACTOR = 0.8
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.StructType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(
lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace(
"\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(
float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(
float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type, iter_n, g_m):
model_g = {
'weights': g_m[0],
'bias': g_m[1]
}
for i in range(iter_n-1, iter_n):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(
grad[j][i], 1 / len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(
bi[j][i], 1 / len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(
model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(
model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(
tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
def loadHistoryModels(round_num):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "r")
lines = f.readlines()
ret_models = []
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
ret_models.append([w_initial, b_initial])
tem_model = []
for i, line in enumerate(lines):
if i % 2 == 0:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis).reshape([784, 10])
tem_model = [lis]
else:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis)
tem_model.append(lis)
ret_models.append(tem_model)
f.close()
return ret_models
if __name__ == "__main__":
start_time = time.time()
# data_num = np.asarray([5923, 6742, 5958, 6131, 5842])
# agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(index) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(index) + ".txt"), "w")
f.close()
# f = open(os.path.join(os.path.dirname(__file__),
# "gradientplus_models.txt"), "w") #alice
# f.close()
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(
mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters-1.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = collections.OrderedDict(
weights=w_initial,
bias=b_initial)
model = initial_model
learning_rate = 0.1
for round_num in range(5):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "w") # alice
f.close()
local_models = federated_train(
model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
# print(local_models[0][0])#第0个agent的weights矩阵
# print(local_models[0][1])#第0个agent的bias矩阵
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(
local_models[local_model_index][0], 1 / NUM_AGENT), m_w)
m_b = np.add(np.multiply(
local_models[local_model_index][1], 1 / NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
f_g = open(os.path.join(os.path.dirname(
__file__), "gradientplus_models" + str(round_num) + ".txt"), "a")
g_w = list(model['weights'].reshape(-1))
g_b = list(model['bias'].reshape(-1))
print(g_w, file=f_g)
print(g_b, file=f_g)
f_g.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "a")
s = ""
for i in list(model['weights']):
for j in i:
s += str(j)+"\t"
s = s[0:-1]
print(s, file=f)
s = ""
for i in list(model['bias']):
s += str(i)+"\t"
s = s[0:-1]
print(s, file=f)
f.close()
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time() - start_time)
shapstart_time = time.time() #change cal time
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
models_hository = loadHistoryModels(round_num)
agent_shapley_history = []
for iter_num in range(1, len(gradient_weights[0]) + 1):
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE,
iter_num, models_hository[iter_num-1]))
print(str(s) + "\t" +
str(group_shapley_value[len(group_shapley_value) - 1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(
index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "a")
print('round {}, loss={}'.format(round_num, agent_shapley), file=f)
agent_shapley_history.append(agent_shapley)
print('round {}, loss={}'.format(
round_num, agent_shapley_history), file=f)
f.close()
print("end_time shap values", time.time() - shapstart_time)
print("end_time", time.time() - start_time)
It seems the source of this code is one of the low-level TFF tutorials, which are really more about introducing users to the core communication abstractions of TFF, as opposed to serving as an introductory guide to federated learning with TFF.
I might suggest a different tutorial, like the one that focuses on image classification for example. This uses TFF's tff.learning.from_keras_model utility to ingest a Keras model directly; if you go this route, you can add layers as you would for any Keras model.
Hey I am having this Index Error where I am trying to composite events but my indices start at 0 and not 1 and while have tried to do a number of things like trying to .append[i+1] I am unable to fix this error I am having.
Theres apparently something wrong with this specific line of code : dset_IDX[offset:offset_next] = event_id[file_indices]
While the .py file is over a 1000 lines of code so I can not show all of it I am able to show the part of the function that is giving me the error
def count_events(files):
# Because we want to remove events with 0 hits,
# we need to count the events beforehand (to create the h5 file).
# This function counts and indexes the events with more than 0 hits.
# Files need to be iterated in the same order to use the indexes.
""" This is where we manually specify the file"""
num_events = 0
nonzero_file_events = []
for file_index, f in enumerate(files):
data = np.load(f, allow_pickle=True)
nonzero_file_events.append([])
hits = data['digi_hit_pmt']
for i in range(len(hits)):
if len(hits[i]) != 0:
nonzero_file_events[file_index].append(i)
num_events += 1
return num_events, nonzero_file_events
def GenMapping(csv_file):
mPMT_to_index = {}
with open(csv_file) as f:
rows = f.readline().split(",")[1:]
rows = [int(r.strip()) for r in rows]
for line in f:
line_split = line.split(",")
col = int(line_split[0].strip())
for row, value in zip(rows, line_split[1:]):
value = value.strip()
if value: # If the value is not empty
mPMT_to_index[int(value)] = [col, row]
npmap = np.zeros((max(mPMT_to_index) + 1, 2), dtype=np.int)
for k, v in mPMT_to_index.items():
npmap[k] = v
return npmap
def GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2):
"""
Inputs:
avg_mu_per_ev == Poisson distribution mean for number of muons in each spill
sigma_time_offset == Width of spill (Gaussian) in nanoseconds
"""
files = ['event998.npz']
# Remove whitespace
files = [x.strip() for x in files]
# Check that files were provided
if len(files) == 0:
raise ValueError("No files provided!!")
print("Merging " + str(len(files)) + " files")
# Start merging
num_nonzero_events, nonzero_event_indexes = count_events(files)
print(num_nonzero_events)
# np.random.poisson( avg_mu_per_ev, number_of_throws )
num_muons = np.random.poisson(avg_mu_per_ev, num_nonzero_events - 2954)
# creates h5 file to generate the h5 file
dtype_events = np.dtype(np.float32)
dtype_labels = np.dtype(np.int32)
dtype_energies = np.dtype(np.float32)
dtype_positions = np.dtype(np.float32)
dtype_IDX = np.dtype(np.int32)
dtype_PATHS = h5py.special_dtype(vlen=str)
dtype_angles = np.dtype(np.float32)
# sets h5 file to be written
h5_file = h5py.File('multimuonfile(2).h5', 'w')
dset_event_data = h5_file.create_dataset("event_data",
shape=(num_nonzero_events,) + IMAGE_SHAPE,
dtype=dtype_events)
dset_labels = h5_file.create_dataset("labels",
shape=(num_nonzero_events,),
dtype=dtype_labels)
dset_energies = h5_file.create_dataset("energies",
shape=(num_nonzero_events, 1),
dtype=dtype_energies)
dset_positions = h5_file.create_dataset("positions",
shape=(num_nonzero_events, 1, 3),
dtype=dtype_positions)
dset_IDX = h5_file.create_dataset("event_ids",
shape=(num_nonzero_events,),
dtype=dtype_IDX)
dset_PATHS = h5_file.create_dataset("root_files",
shape=(num_nonzero_events,),
dtype=dtype_PATHS)
dset_angles = h5_file.create_dataset("angles",
shape=(num_nonzero_events, 2),
dtype=dtype_angles)
# 22 -> gamma, 11 -> electron, 13 -> muon
# corresponds to labelling used in CNN with only barrel
# IWCDmPMT_4pi_full_tank_gamma_E0to1000MeV_unif-pos-R371-y521cm_4pi-dir_3000evts_329.npz has an event
# with pid 11 though....
# pid_to_label = {22:0, 11:1, 13:2}
offset = 0
offset_next = 0
mPMT_to_index = GenMapping(PMT_LABELS)
# Loop over files
for file_index, filename in enumerate(files):
data = np.load(filename, allow_pickle=True)
nonzero_events_in_file = len(nonzero_event_indexes[file_index])
x_data = np.zeros((nonzero_events_in_file,) + IMAGE_SHAPE,
dtype=dtype_events)
digi_hit_pmt = data['digi_hit_pmt']
# digi_hit_charge = data['digi_hit_charge']
# digi_hit_time = data['digi_hit_time']
# digi_hit_trigger = data['digi_hit_trigger']
# trigger_time = data['trigger_time']
delay = 0
# Loop over events in file
# Loop over number of muons in each event
event_id = np.array([], dtype=np.int32)
root_file = np.array([], dtype=np.str)
pid = np.array([])
position = np.array([])
direction = np.array([])
energy = np.array([])
labels = np.array([])
# with open("ResultFile.txt", "w") as text_file:
# sys.stdout = open("Result2.txt", "w")
for i, nmu in enumerate(num_muons):
# np.savetxt(text_file, i, nmu,fmt="%d")
# text_file.write("processing output entry " + str(i) + " with " + nmu + " muons")
print("processing output entry ", i, " with ", nmu, " muons")
indices = np.random.randint(0, len(digi_hit_pmt), max(1, nmu))
time_offs = [0.]
if nmu > 1:
time_offs = np.append(time_offs, np.random.normal(0., sigma_time_offset, nmu - 1))
hit_pmts, charge, time = SumEvents(indices, time_offs, data, nmu == 0)
hit_mpmts = hit_pmts // 19
pmt_channels = hit_pmts % 19
rows = mPMT_to_index[hit_mpmts, 0]
cols = mPMT_to_index[hit_mpmts, 1]
x_data[i - delay, rows, cols, pmt_channels] = charge
x_data[i - delay, rows, cols, pmt_channels + 19] = time
# fix below!!!
idx0 = indices[0]
event_id = np.append(event_id, data['event_id'][idx0])
root_file = np.append(root_file, data['root_file'][idx0])
pid = np.append(pid, data['pid'][idx0])
position = np.append(position, data['position'][idx0])
direction = np.append(direction, data['direction'][idx0])
energy = np.append(energy, np.sum(data['energy'][indices]))
labels = np.append(labels, nmu)
offset_next += nonzero_events_in_file
file_indices = nonzero_event_indexes[file_index]
dset_IDX[offset:offset_next] = event_id[file_indices]
dset_PATHS[offset:offset_next] = root_file[file_indices]
dset_energies[offset:offset_next, :] = energy[file_indices].reshape(-1, 1)
dset_positions[offset:offset_next, :, :] = position[file_indices].reshape(-1, 1, 3)
dset_labels[offset:offset_next] = labels[file_indices]
print(event_id)
direction = direction[file_indices]
polar = np.arccos(direction[:, 1])
azimuth = np.arctan2(direction[:, 2], direction[:, 0])
dset_angles[offset:offset_next, :] = np.hstack((polar.reshape(-1, 1), azimuth.reshape(-1, 1)))
dset_event_data[offset:offset_next, :] = x_data
offset = offset_next
print("Finished file: {}".format(filename))
#sys.stdout.close()
print("Saving")
#h5_file.close()
print("Finished")
# In[ ]:
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
Traceback
Merging 1 files
2958
processing output entry 0 with 3 muons
processing output entry 1 with 1 muons
processing output entry 2 with 3 muons
processing output entry 3 with 3 muons
Traceback (most recent call last):
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1068, in <module>
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1044, in GenerateMultiMuonSample_h5
dset_IDX[offset:offset_next] = event_id[file_indices]
IndexError: index 4 is out of bounds for axis 0 with size 4
not much info is provided but what i have understood,
the error says that axis 0 has size=4 and you are trying to access index 4 which is not possible with size 4 as it starts with 0 and max index could be 3.
I've a problem with my Python code: when I call the function from another class, the class where I am calling the function restart, and the compiler gives this error message:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
The class where I launch the code is this:
finestre = creatore_finestre()
print(finestre[0])
The code of the function is:
DIR_DATA = '../../../data/'
SIGNALS_INDEX = {
'HR': 0,
'ABPSys': 1,
'ABPDias': 2,
'ABPMean': 3,
'CVP': 4,
'PULSE': 5,
'RESP': 6,
'SpO2': 7,
'NBPSys': 8,
'NBPDias': 9,
'NBPMean': 10,
}
def download_information_database(id_patient):
wfdb.dldatabase('mimic2db/numerics', DIR_DATA + id_patient, records=[id_patient])
def create_csv(id_patient, signal):
# Download the patient information
download_information_database(id_patient)
# Firstly, we read the patient information
patient_dir = DIR_DATA + id_patient + "/"
record = wfdb.rdsamp(patient_dir + id_patient, channels=[SIGNALS_INDEX[signal]])
# We calculate the datetime base
date_str = record.basedate + ' ' + record.basetime
date = datetime.datetime.strptime(date_str, '%d/%m/%Y %H:%M:%S')
# We read the signal values
signal = record.p_signals
# We write the csv file
with open(patient_dir + id_patient + '.csv', 'w+') as csvfile:
writer = csv.writer(csvfile, delimiter=',',lineterminator="\n")
for s in signal:
date = date + datetime.timedelta(minutes=1)
if not math.isnan(float(s)):
writer.writerow([date.strftime("'[%H:%M:%S %d/%m/%Y]'"),str(int(s[0]) * 1000)])
def creatore_finestre():
#Open the file of information for each patients
in_file = open("../../../data/CodePatientsTraining.csv", "r+")
lettore_file = csv.reader(in_file)
#Create dictionary of list
finestre = defaultdict(list)
for nomeFile in lettore_file:
print(nomeFile[0])
create_csv(nomeFile[0],"ABPMean")
f = open("../../../data/" + nomeFile[0] + "/" + nomeFile[0] + ".csv", "r+")
reader = csv.reader(f)
line = in_file.readline()
lista = list(reader)
i = 0
for timestamp in lista:
if (timestamp[0] != nomeFile[1]):
i += 1
else:
print(timestamp[0], nomeFile[1], i)
break
decade = 0
somma = 0
arrivo = 1
minute = 10
while (i != 0):
i -= 1
if (lista[i][1] != '0' and lista[i][1] != '-' and int(lista[i][1]) > 0):
somma += float(lista[i][1])
decade += 1
if (decade == minute):
f = SlidingWindows((somma / minute), nomeFile[4])
finestre[arrivo].append(f)
print("T[" + str(arrivo) + "]:")
for value in finestre[arrivo]:
print(value)
decade = 0
arrivo += 1
somma = 0
return finestre
My idea is to create a SlidingWindows for each CSV file in the function, and take all the sliding windows from the other class.
import your_module
if __name__ == '__main__':
extractor = your_Module.your_function()
extractor.runInParallel(numProcesses=2, numThreads=4)
I have trained cnn model and saved parameters in five files,but when I use these params to test photos ,I meet a question like this:enter image description here
the code of load_data is:
def load_data(pag_name):``
k = 0
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
k = k + 1
num = k
# test_per = k*4
print k
i = 0
j = 0
label = 0
train_set = numpy.empty((num, 1, 56, 56))
while (j < 1):
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
filename = pag_name+ '/' + filename
image = Image.open(filename)
#print image.size
#print image
img_ndarray = numpy.asarray(image, dtype='float64') / 256
img_ndarray = numpy.asarray([img_ndarray])
# train_set[i] = numpy.ndarray.flatten(img_ndarray)
train_set[i] = img_ndarray
#print train_set.shape
# print filename1
# print 'label:', label
# print 'i:',i
i = i + 1
j = j + 1
def shared_dataset(data_x, borrow=True):
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x
train_set = shared_dataset(train_set)
print train_set.get_value(borrow=True).shape
return train_set
and the code of use_CNN is :
def use_CNN(pag_name,nkerns=[20,40,60]):
data = load_data(pag_name)
data_num = data.get_value(borrow=True).shape[0]
layer0_params,layer01_params,layer1_params,layer2_params,layer3_params = load_params()
x = T.matrix('x')
layer0_input = x.reshape((data_num,1,56,56))
layer0 = LeNetConvPoolLayer(
input=layer0_input,
params_W = layer0_params[0],
params_b = layer0_params[1],
image_shape=(data_num, 1, 56, 56),
filter_shape=(nkerns[0], 1, 5,5),
poolsize=(2, 2)`
)
I haven't meet this problem ,and I don't know where and how I change my code.
the result of this error is the params are not 4D, the params I load is 3D, like my W and b is (20,1,5,5),but I load (1,5,5),so I meet this problem.