make_node requires 4D tensor of kernels - python

I have trained cnn model and saved parameters in five files,but when I use these params to test photos ,I meet a question like this:enter image description here
the code of load_data is:
def load_data(pag_name):``
k = 0
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
k = k + 1
num = k
# test_per = k*4
print k
i = 0
j = 0
label = 0
train_set = numpy.empty((num, 1, 56, 56))
while (j < 1):
for filename in os.listdir(pag_name):
if (filename != '.DS_Store'):
filename = pag_name+ '/' + filename
image = Image.open(filename)
#print image.size
#print image
img_ndarray = numpy.asarray(image, dtype='float64') / 256
img_ndarray = numpy.asarray([img_ndarray])
# train_set[i] = numpy.ndarray.flatten(img_ndarray)
train_set[i] = img_ndarray
#print train_set.shape
# print filename1
# print 'label:', label
# print 'i:',i
i = i + 1
j = j + 1
def shared_dataset(data_x, borrow=True):
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x
train_set = shared_dataset(train_set)
print train_set.get_value(borrow=True).shape
return train_set
and the code of use_CNN is :
def use_CNN(pag_name,nkerns=[20,40,60]):
data = load_data(pag_name)
data_num = data.get_value(borrow=True).shape[0]
layer0_params,layer01_params,layer1_params,layer2_params,layer3_params = load_params()
x = T.matrix('x')
layer0_input = x.reshape((data_num,1,56,56))
layer0 = LeNetConvPoolLayer(
input=layer0_input,
params_W = layer0_params[0],
params_b = layer0_params[1],
image_shape=(data_num, 1, 56, 56),
filter_shape=(nkerns[0], 1, 5,5),
poolsize=(2, 2)`
)
I haven't meet this problem ,and I don't know where and how I change my code.

the result of this error is the params are not 4D, the params I load is 3D, like my W and b is (20,1,5,5),but I load (1,5,5),so I meet this problem.

Related

Annotating images from h5 file

.Hi all, I have 70k images saved into .h5 file and now with this script I want to read from that file and annotate text instances into .json file. When I run this script it takes very long time to annotate 1 image (cca 2h).
When I do this with 15 images then the script works fine and annotate all 15 images about a few seconds.
Now with 70k images -> .h5 file is 51gb.
I don't know is problem in code or the h5 file is too big? Because code works fine with small amount of images, but I'm working on some project where I need 70k or 700k images.
from __future__ import division
import os
import os.path as osp
from re import U
import numpy as np
import matplotlib.pyplot as plt
import h5py
from common import *
import json
import cv2
import numpy as np
from itertools import cycle
import js2py
#from gen import brojac
#from synthgen import imnames
global x
global y
def write_json(data, filename='annotation.json'):
with open(filename,'w') as file:
json.dump(data,file,indent=4)
DATA_PATH = 'results'
DB_FNAME = osp.join(DATA_PATH,'SynthText.h5')
def get_data():
return h5py.File(DB_FNAME,'r')
def viz_textbb(text_im, imageName, charBB_list, wordBB, textToList, alpha=1.0):
"""
text_im : image containing text
charBB_list : list of 2x4xn_i bounding-box matrices
wordBB : 2x4xm matrix of word coordinates
"""
#print("k",z, type(z))
plt.close(1)
plt.figure(1)
plt.imshow(text_im)
H,W = text_im.shape[:2]
global imnames
#print("MOLIIIM",wordBB)
#DODANO IZ MAIN-a
#**********************************************
db = h5py.File('results/SynthText.h5', 'r')
dsets = sorted(db['data'].keys())
for k in dsets:
db = get_data()
imnames = sorted(db['data'].keys())
start = 0
count = 0
coordinate = []
coordinate1 = []
name = []
name1 = []
final = []
upperList = []
downList = []
counter = 0
FinalFinal = []
imageData = { }
dictList = []
for eachWord in textToList:
length = len(eachWord)
for i in range(0,4):
for j in range(start,length+start):
coordinate.append([charBB_list[0][0][i][j], charBB_list[0][1][i][j]])
coordinate1.append((charBB_list[0][0][i][j], charBB_list[0][1][i][j]))
name.append(coordinate)
name1.append(coordinate1)
coordinate = []
for j in range(0, length):
for i in range(len(name)) :
#print(i,j, name[i][j]) ## koordinate da se snađem, treba
final.append(name[i][j])
#print(name)
#NEŠTA ZA CRTANJE, NEBITNO
if(i == 0 or i == 1):
upperList.append(name[i][j])
if(i == 2):
downList.append(name[i+1][j])
if(i == 3):
downList.append(name[i-1][j])
down = reversed(downList)
joinList = [*upperList,*down,upperList[0]]
FinalFinal.append(joinList)
imageData['transcription']=eachWord
imageData['language']="Latin"
imageData['illegibility']=False
imageData['points']=final
dictionary_copy = imageData.copy()
dictList.append(dictionary_copy)
del(dictionary_copy)
finalToList = np.array(final)
name=[]
final = []
upperList = []
downList = []
start = len(eachWord) + start
#del(dictList[0])
finalDict = {f'gt_{imageName}':dictList}
#print(type(finalDict)) --> dict
#print(imageName,finalDict)
#print(finalDict)
#print(len(textToList))
#print(textToList)
with open("annotation.json") as json_file:
data=json.load(json_file)
temp=data["annotations"]
#temp.append(finalDict)
temp.update(finalDict)
#temp['annotations'] = finalDict
write_json(data)
json_file.close()
for list in FinalFinal:
x,y = zip(*list)
plt.plot(x,y)
#print(x,y)
# points = tuple(zip(x,y))
# # boundaries of the bounding box
# left, right = min(points, key=lambda p: p[0]), max(points, key=lambda p: p[0])
# bottom, top = min(points, key=lambda p: p[1]), max(points, key=lambda p: p[1])
# # area
# base = right[0] - left[0]
# height = top[1] - bottom[1]
# A = base * height
#print(A)
for i in range(len(charBB_list)):
# #print(charBB_list) #ispisuje x-eve za jedan vrh svih instanci pojedinih slova, pa drugi, 3. i 4. i onda posebno y-one
bbs = charBB_list[i]
ni = bbs.shape[-1]
for j in range(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]] #ako se doda ,bb[:,0] -> printa isto kao i gornji lijevi
#plt.plot(bb[0,:], bb[1,:], 'r', alpha=alpha)
# plot the word-BB:
for i in range(wordBB.shape[-1]):
bb = wordBB[:,:,i] #koordinate wordBB-a
bb = np.c_[bb,bb[:,0]] #spaja skroz lijevu, TREBA
#plt.plot(bb[0,:], bb[1,:], 'g', alpha=alpha)
# visualize the indiv vertices:
vcol = ['r','g','b','k']
#for j in range(4):
#plt.scatter(bb[0,j],bb[1,j],color=vcol[j])
#print(bb) # ----> KOORDINATE wordBB-a
#print(bb[1,j])
plt.gca().set_xlim([0,W-1])
plt.gca().set_ylim([H-1,0])
plt.show(block=False)
def main(db_fname):
db = h5py.File(db_fname, 'r')
dsets = sorted(db['data'].keys())
print ("total number of images : ", colorize(Color.RED, len(dsets), highlight=True))
for k in dsets:
rgb = db['data'][k][...]
charBB = db['data'][k].attrs['charBB']
wordBB = db['data'][k].attrs['wordBB']
txt = db['data'][k].attrs['txt']
textToList = (db['data'][k].attrs['txt']).tolist()
#print(textToList)
viz_textbb(rgb, k,[charBB], wordBB, textToList)
print ("image name : ", colorize(Color.RED, k, bold=True))
print (" ** no. of chars : ", colorize(Color.YELLOW, charBB.shape[-1]))
print (" ** no. of words : ", colorize(Color.YELLOW, wordBB.shape[-1]))
print (" ** text : ", colorize(Color.GREEN, txt))
#print("To know", z[1], type(z[1]))
# OTKOMATI OVO DOLJE AKO ŽELIM STISKAT ENTER
# if 'q' in input("next? ('q' to exit) : "):
# break
db.close()
if __name__=='__main__':
main('results/SynthText.h5')

Adding more layers to neural network

I want to add more layers in neural network how can I customize this code ? can any one suggest a better way
in model layer there is only one layer .. in this part , I can define other weights ?should I add other layers there but it would make no sense
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
then in batch loss function they multiply by them in order to get the predicted y.. that's simply adding one layer..
tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
The full code is here
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import collections
import os
tff.backends.reference.set_reference_context()
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
DECAY_FACTOR = 0.8
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.StructType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(
lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace(
"\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(
float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(
float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
"***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type, iter_n, g_m):
model_g = {
'weights': g_m[0],
'bias': g_m[1]
}
for i in range(iter_n-1, iter_n):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(
grad[j][i], 1 / len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(
bi[j][i], 1 / len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(
model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(
model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(
tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
def loadHistoryModels(round_num):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "r")
lines = f.readlines()
ret_models = []
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
ret_models.append([w_initial, b_initial])
tem_model = []
for i, line in enumerate(lines):
if i % 2 == 0:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis).reshape([784, 10])
tem_model = [lis]
else:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis)
tem_model.append(lis)
ret_models.append(tem_model)
f.close()
return ret_models
if __name__ == "__main__":
start_time = time.time()
# data_num = np.asarray([5923, 6742, 5958, 6131, 5842])
# agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(index) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(index) + ".txt"), "w")
f.close()
# f = open(os.path.join(os.path.dirname(__file__),
# "gradientplus_models.txt"), "w") #alice
# f.close()
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(
mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters-1.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = collections.OrderedDict(
weights=w_initial,
bias=b_initial)
model = initial_model
learning_rate = 0.1
for round_num in range(5):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "w") # alice
f.close()
local_models = federated_train(
model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
# print(local_models[0][0])#第0个agent的weights矩阵
# print(local_models[0][1])#第0个agent的bias矩阵
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(
local_models[local_model_index][0], 1 / NUM_AGENT), m_w)
m_b = np.add(np.multiply(
local_models[local_model_index][1], 1 / NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
f_g = open(os.path.join(os.path.dirname(
__file__), "gradientplus_models" + str(round_num) + ".txt"), "a")
g_w = list(model['weights'].reshape(-1))
g_b = list(model['bias'].reshape(-1))
print(g_w, file=f_g)
print(g_b, file=f_g)
f_g.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "a")
s = ""
for i in list(model['weights']):
for j in i:
s += str(j)+"\t"
s = s[0:-1]
print(s, file=f)
s = ""
for i in list(model['bias']):
s += str(i)+"\t"
s = s[0:-1]
print(s, file=f)
f.close()
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time() - start_time)
shapstart_time = time.time() #change cal time
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
models_hository = loadHistoryModels(round_num)
agent_shapley_history = []
for iter_num in range(1, len(gradient_weights[0]) + 1):
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE,
iter_num, models_hository[iter_num-1]))
print(str(s) + "\t" +
str(group_shapley_value[len(group_shapley_value) - 1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(
index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "a")
print('round {}, loss={}'.format(round_num, agent_shapley), file=f)
agent_shapley_history.append(agent_shapley)
print('round {}, loss={}'.format(
round_num, agent_shapley_history), file=f)
f.close()
print("end_time shap values", time.time() - shapstart_time)
print("end_time", time.time() - start_time)
It seems the source of this code is one of the low-level TFF tutorials, which are really more about introducing users to the core communication abstractions of TFF, as opposed to serving as an introductory guide to federated learning with TFF.
I might suggest a different tutorial, like the one that focuses on image classification for example. This uses TFF's tff.learning.from_keras_model utility to ingest a Keras model directly; if you go this route, you can add layers as you would for any Keras model.

IndexError: index 4 is out of bounds for axis 0 with size 4

Hey I am having this Index Error where I am trying to composite events but my indices start at 0 and not 1 and while have tried to do a number of things like trying to .append[i+1] I am unable to fix this error I am having.
Theres apparently something wrong with this specific line of code : dset_IDX[offset:offset_next] = event_id[file_indices]
While the .py file is over a 1000 lines of code so I can not show all of it I am able to show the part of the function that is giving me the error
def count_events(files):
# Because we want to remove events with 0 hits,
# we need to count the events beforehand (to create the h5 file).
# This function counts and indexes the events with more than 0 hits.
# Files need to be iterated in the same order to use the indexes.
""" This is where we manually specify the file"""
num_events = 0
nonzero_file_events = []
for file_index, f in enumerate(files):
data = np.load(f, allow_pickle=True)
nonzero_file_events.append([])
hits = data['digi_hit_pmt']
for i in range(len(hits)):
if len(hits[i]) != 0:
nonzero_file_events[file_index].append(i)
num_events += 1
return num_events, nonzero_file_events
def GenMapping(csv_file):
mPMT_to_index = {}
with open(csv_file) as f:
rows = f.readline().split(",")[1:]
rows = [int(r.strip()) for r in rows]
for line in f:
line_split = line.split(",")
col = int(line_split[0].strip())
for row, value in zip(rows, line_split[1:]):
value = value.strip()
if value: # If the value is not empty
mPMT_to_index[int(value)] = [col, row]
npmap = np.zeros((max(mPMT_to_index) + 1, 2), dtype=np.int)
for k, v in mPMT_to_index.items():
npmap[k] = v
return npmap
def GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2):
"""
Inputs:
avg_mu_per_ev == Poisson distribution mean for number of muons in each spill
sigma_time_offset == Width of spill (Gaussian) in nanoseconds
"""
files = ['event998.npz']
# Remove whitespace
files = [x.strip() for x in files]
# Check that files were provided
if len(files) == 0:
raise ValueError("No files provided!!")
print("Merging " + str(len(files)) + " files")
# Start merging
num_nonzero_events, nonzero_event_indexes = count_events(files)
print(num_nonzero_events)
# np.random.poisson( avg_mu_per_ev, number_of_throws )
num_muons = np.random.poisson(avg_mu_per_ev, num_nonzero_events - 2954)
# creates h5 file to generate the h5 file
dtype_events = np.dtype(np.float32)
dtype_labels = np.dtype(np.int32)
dtype_energies = np.dtype(np.float32)
dtype_positions = np.dtype(np.float32)
dtype_IDX = np.dtype(np.int32)
dtype_PATHS = h5py.special_dtype(vlen=str)
dtype_angles = np.dtype(np.float32)
# sets h5 file to be written
h5_file = h5py.File('multimuonfile(2).h5', 'w')
dset_event_data = h5_file.create_dataset("event_data",
shape=(num_nonzero_events,) + IMAGE_SHAPE,
dtype=dtype_events)
dset_labels = h5_file.create_dataset("labels",
shape=(num_nonzero_events,),
dtype=dtype_labels)
dset_energies = h5_file.create_dataset("energies",
shape=(num_nonzero_events, 1),
dtype=dtype_energies)
dset_positions = h5_file.create_dataset("positions",
shape=(num_nonzero_events, 1, 3),
dtype=dtype_positions)
dset_IDX = h5_file.create_dataset("event_ids",
shape=(num_nonzero_events,),
dtype=dtype_IDX)
dset_PATHS = h5_file.create_dataset("root_files",
shape=(num_nonzero_events,),
dtype=dtype_PATHS)
dset_angles = h5_file.create_dataset("angles",
shape=(num_nonzero_events, 2),
dtype=dtype_angles)
# 22 -> gamma, 11 -> electron, 13 -> muon
# corresponds to labelling used in CNN with only barrel
# IWCDmPMT_4pi_full_tank_gamma_E0to1000MeV_unif-pos-R371-y521cm_4pi-dir_3000evts_329.npz has an event
# with pid 11 though....
# pid_to_label = {22:0, 11:1, 13:2}
offset = 0
offset_next = 0
mPMT_to_index = GenMapping(PMT_LABELS)
# Loop over files
for file_index, filename in enumerate(files):
data = np.load(filename, allow_pickle=True)
nonzero_events_in_file = len(nonzero_event_indexes[file_index])
x_data = np.zeros((nonzero_events_in_file,) + IMAGE_SHAPE,
dtype=dtype_events)
digi_hit_pmt = data['digi_hit_pmt']
# digi_hit_charge = data['digi_hit_charge']
# digi_hit_time = data['digi_hit_time']
# digi_hit_trigger = data['digi_hit_trigger']
# trigger_time = data['trigger_time']
delay = 0
# Loop over events in file
# Loop over number of muons in each event
event_id = np.array([], dtype=np.int32)
root_file = np.array([], dtype=np.str)
pid = np.array([])
position = np.array([])
direction = np.array([])
energy = np.array([])
labels = np.array([])
# with open("ResultFile.txt", "w") as text_file:
# sys.stdout = open("Result2.txt", "w")
for i, nmu in enumerate(num_muons):
# np.savetxt(text_file, i, nmu,fmt="%d")
# text_file.write("processing output entry " + str(i) + " with " + nmu + " muons")
print("processing output entry ", i, " with ", nmu, " muons")
indices = np.random.randint(0, len(digi_hit_pmt), max(1, nmu))
time_offs = [0.]
if nmu > 1:
time_offs = np.append(time_offs, np.random.normal(0., sigma_time_offset, nmu - 1))
hit_pmts, charge, time = SumEvents(indices, time_offs, data, nmu == 0)
hit_mpmts = hit_pmts // 19
pmt_channels = hit_pmts % 19
rows = mPMT_to_index[hit_mpmts, 0]
cols = mPMT_to_index[hit_mpmts, 1]
x_data[i - delay, rows, cols, pmt_channels] = charge
x_data[i - delay, rows, cols, pmt_channels + 19] = time
# fix below!!!
idx0 = indices[0]
event_id = np.append(event_id, data['event_id'][idx0])
root_file = np.append(root_file, data['root_file'][idx0])
pid = np.append(pid, data['pid'][idx0])
position = np.append(position, data['position'][idx0])
direction = np.append(direction, data['direction'][idx0])
energy = np.append(energy, np.sum(data['energy'][indices]))
labels = np.append(labels, nmu)
offset_next += nonzero_events_in_file
file_indices = nonzero_event_indexes[file_index]
dset_IDX[offset:offset_next] = event_id[file_indices]
dset_PATHS[offset:offset_next] = root_file[file_indices]
dset_energies[offset:offset_next, :] = energy[file_indices].reshape(-1, 1)
dset_positions[offset:offset_next, :, :] = position[file_indices].reshape(-1, 1, 3)
dset_labels[offset:offset_next] = labels[file_indices]
print(event_id)
direction = direction[file_indices]
polar = np.arccos(direction[:, 1])
azimuth = np.arctan2(direction[:, 2], direction[:, 0])
dset_angles[offset:offset_next, :] = np.hstack((polar.reshape(-1, 1), azimuth.reshape(-1, 1)))
dset_event_data[offset:offset_next, :] = x_data
offset = offset_next
print("Finished file: {}".format(filename))
#sys.stdout.close()
print("Saving")
#h5_file.close()
print("Finished")
# In[ ]:
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
Traceback
Merging 1 files
2958
processing output entry 0 with 3 muons
processing output entry 1 with 1 muons
processing output entry 2 with 3 muons
processing output entry 3 with 3 muons
Traceback (most recent call last):
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1068, in <module>
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1044, in GenerateMultiMuonSample_h5
dset_IDX[offset:offset_next] = event_id[file_indices]
IndexError: index 4 is out of bounds for axis 0 with size 4
not much info is provided but what i have understood,
the error says that axis 0 has size=4 and you are trying to access index 4 which is not possible with size 4 as it starts with 0 and max index could be 3.

Memory leak in python code

I am trying to make my own dataset with torch in Python. However, the memory usage leaks in every file open.
The memory leak is occurred during the below for loop
for subj, _file in enumerate(filelist):
I deleted unnecessary in the code like import
parser.add_argument('--stddev', dest='stddev', default='subject', type=str, help="How to calculate ")
args = parser.parse_args()
# Options
if args.sess_num == 0:
label_set = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
base_dir = './kriss_proto'
ori_base_dir = '{}/original'.format(base_dir)
filelist = os.listdir(ori_base_dir) # ['BD001.mat', ...]
filenum = len(filelist)
# Saving data
eeg_data = {}
eeg_data['dataset'] = {}
split_data = {}
split_data['splits'] = []
split_data['splits'].append({})
_meanstd = []
for i in range(32):
_meanstd.append([])
if args.stddev == 'subject':
for j in range(filenum):
_meanstd[i].append([])
idx = 0
for subj, _file in enumerate(filelist):
filepath = os.path.join(ori_base_dir, _file)
with h5py.File(filepath, 'r') as mat:
## Step by step (with object searching)
bd_eeg = mat.get('BD_EEG')
vst = bd_eeg.get('VST')
for run in vst.keys():
print("Processing {} - {}".format(_file, run))
run_data = vst.get(run)
eeg = run_data['data3D']
eeg = th.Tensor(eeg)
eeg = eeg.permute(2, 1, 0)
label = run_data['class_labels']
label = th.Tensor(label)
label = th.squeeze(label)
# DATA COPY
for i in range(int(label.shape[0])):
if (label[i] in label_set) and (int(eeg[i].shape[0]) > 400):
eeg_data['dataset'][idx] = {}
eeg_data['dataset'][idx]['eeg'] = eeg[i, :, :32]
eeg_data['dataset'][idx]['label'] = label[i] - label_set[0] # label should start from 0
if args.stddev == 'whole':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j].append(eeg[i][k, j])
elif args.stddev == 'subject':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j][subj].append(eeg[i][k, j])
idx += 1

How to make a dataset similar to CIFAR10 with several images

I try to make a dataset that is similar to CIFAR10. I found this tutorial:
How to create dataset similar to cifar-10
I already can make a dataset with 1 image, but when I try to use several images I got this error:
tensorflow.python.framework.errors.InvalidArgumentError: Indices are not valid: not lexicographically sorted or containing repeats.
Can anyone help me to solve this problem?
This is my code:
from PIL import Image
import numpy as np
out =np.empty([20,7501])
for j in xrange(0, 10):
im = Image.open('%d_receipt.jpg' % j)
im = (np.array(im))
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [0]
out[j] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
for i in xrange(0, 10):
im = Image.open('%d_news.jpg' % i)
im = (np.array(im))
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [1]
j = i + 10
out[j] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
out.tofile("data_batch.bin")
I do it like this:
import numpy as np
import scipy.io
mat = scipy.io.loadmat('train_32x32.mat')
data = mat['X']
label = mat['y']
R_data = data[:,:,0,:]
G_data = data[:,:,1,:]
B_data = data[:,:,2,:]
R_data = np.transpose(R_data, (2,0,1))
G_data = np.transpose(G_data, (2,0,1))
B_data = np.transpose(B_data, (2,0,1))
R_data = np.reshape(R_data,(73257,32*32))
G_data = np.reshape(G_data,(73257,32*32))
B_data = np.reshape(B_data,(73257,32*32))
outdata = np.concatenate((label,R_data,G_data,B_data), axis = 1)
step = 10000
for i in range(1,6):
temp = outdata[i*step:(i+1)*step,:]
temp.tofile('SVHN_train_data_batch%d.bin' % i)
print('save data %d' % i)
Then, just put it directly in the train code of Cifar10 tensorflow example.
I too tried to follow the tutorial you posted in the question however I couldn't get it to work so I made my own solution. It can be found on my github here: https://github.com/jdeepee/machine_learning/tree/master
The code is commented so should be easy enough to follow. I should note it iterated through a master directory containing multiple folders which contain the images.
The below snippet is what I did to adapt CIFAR-10 to GTSRB. More details here. https://github.com/hashkanna/traffic-signs/blob/master/Traffic_Signs_Recognition_binFiles.ipynb
out = {}
for i in range(5):
bin_val = (i%5) + 1
#im = Image.open(X_train[i])
#im = np.array(im)
im = X_train[i]
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [y_train[i]]
out[bin_val] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
for i in range(5,len(X_train)):
bin_val = (i%5) + 1
#im = Image.open(X_train[i])
#im = np.array(im)
im = X_train[i]
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [y_train[i]]
new_array = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
out[bin_val] = np.append(out[bin_val], new_array, 0)
for bin_val in range(1,6):
out[bin_val].tofile("/Users/kanna/Downloads/data_batch_%s.bin"%bin_val)

Categories