Avoid 'Memory Error" when dealing with large arrays - python

I'm facing sometimes Memory Error, sometimes it goes through fine and sometimes it pops up.. Specifically when trying to subtract large array by one. I tried many ways to do this subtraction, is there any way to avoid this? and is my other code parts will also sometime arise this error?
Here is my code:
def home(request):
if request.method=="POST":
img = UploadForm(request.POST, request.FILES)
no_clus = int(request.POST.get('num_clusters', 10))
if img.is_valid():
paramFile =io.TextIOWrapper(request.FILES['pic'].file)
portfolio1 = csv.DictReader(paramFile)
users = []
users = [row["BASE_NAME"] for row in portfolio1]
my_list = users
vectorizer = CountVectorizer()
dtm = vectorizer.fit_transform(my_list)
lsa = TruncatedSVD(n_components=100)
dtm_lsa = lsa.fit_transform(dtm)
dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T))
# print(1-similarity)
k = len(my_list)
# dist1 = (1- similarity)
# dist1=similarity
# dist1.astype(float)
# print(cosine_similarity(tfidf_matrix[3:4], tfidf_matrix))
# float dist = 1 - similarity;
data2 = np.asarray(dist1)
arr_3d = data2.reshape((1, k, k))
# arr_3d= 1- arr_3d
no_cluster = number_cluster(len(my_list))
for i in range(len(arr_3d)):
# print (i+1910)
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i])
km = AgglomerativeClustering(n_clusters=no_cluster, linkage='average').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i])
# km = MeanShift()
# km = KMeans(n_clusters=no_clus, init='k-means++')
# km = MeanShift()
# km = km.fit(arr_3d[i])
# print km
labels = km.labels_
csvfile = settings.MEDIA_ROOT +'\\'+ 'images\\export.csv'
csv_input = pd.read_csv(csvfile, encoding='latin-1')
csv_input['cluster_ID'] = labels
csv_input['BASE_NAME'] = my_list
csv_input.to_csv(settings.MEDIA_ROOT +'/'+ 'output.csv', index=False)
clus_groups = list()
for j in range(no_cluster):
# print(" cluster no %i:%s" % (j, [my_list[i] for i, x in enumerate(labels) if x == j]))
list_of_ints = ([my_list[i] for i, x in enumerate(labels) if x == j])
clus_groups.append(' '.join(list_of_ints))
vectorizer = CountVectorizer()
dtm = vectorizer.fit_transform(my_list)
lsa = TruncatedSVD(n_components=100)
dtm_lsa = lsa.fit_transform(dtm)
dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
dist1 = (1 - np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T))
# similarity = np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)
k = len(my_list)
# dist1 = 1 - similarity
data2 = np.asarray(dist1)
arr_3d = data2.reshape((1, k, k))
# arr_3d= 1- arr_3d
#no_clus = 5
# no_clus=get_name(request)
for i in range(len(arr_3d)):
# print (i+1910)
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='ward').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='average').fit(arr_3d[i])
# km = AgglomerativeClustering(n_clusters=no_clus, linkage='complete').fit(arr_3d[i])
km = KMeans(n_clusters=no_clus, init='k-means++')
km = km.fit(arr_3d[i])
# print km
labels2 = km.labels_
# error = km.inertia_
labels = labels.tolist()
labels2 = labels2.tolist()
# new=list()
csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1')
labels1 = csv_input['cluster_ID']
new_list = []
for k in labels1:
new_list.append(labels2[k]) # lookup the value in list2 at the index given by list1
csv_input = pd.read_csv(settings.MEDIA_ROOT +'/'+ 'output.csv',encoding='latin-1')
csv_input['cluster_ID'] = labels
csv_input['BASE_NAME'] = my_list
csv_input['User_Map'] = new_list
csv_input.to_csv(settings.MEDIA_ROOT + '/' + 'output1.csv', index=False)
#filename= settings.MEDIA_ROOT +'/'+ 'output.csv'
# my_list = portfolio
# csv(request)
# return HttpResponseRedirect(reverse('labels'))
return render(request, 'new.html', {'labels': labels})
return render(request,'new.html',{'form':img,'images':images})
the error is when trying to do dist1 = (1- np.asarray(numpy.asmatrix(dtm_lsa) * numpy.asmatrix(dtm_lsa).T)) .. I also tried to create new array with all ones with the same size and then subtract.. How should I modify this to prevent this error? Note that the user interface that will run this code can be operated on any pc!


Data from three .txt files - loop together to one, and make a plot?

I want to end up with a scatterplot that differentiates color between different values.
First i need to analyze my data. Problem is i have a FE-Model, that exports element numbers coupled with 4 nodes. These 4 nodes have 4 coordinate sets, but if 4 elements share 1 node, it will only give 1 coordinate set for these 4 nodes.
I end up with three .txt files.
.txt with element number (and data i am analyzing for the plot)
.txt with the element number and node numbers.
.txt with node coordinates.
Is it possible to make a loop that connects these datapoints?
I would like to include an example, but i have not yet made one.
I have tried something like this
from numpy import loadtxt
from fpdf import FPDF
Sek = ['Sektion_100']
I = 0
HeaderY =['nr.','LC','Element nr.','Myy','Nyy','MRd_y','URy']
HeaderX =['nr.','LC','Element nr.','Mxx','Nxx','MRd_x','URx']
#load the excel file
#header = loadtxt"Loads\Area_233.txt", unpack=False, skiprows=1)
pdf = FPDF(orientation = 'P', unit = 'mm', format = 'A4')
MaxURx =[]
MaxURy =[]
data = loadtxt("Loads/Sektion_150.txt", unpack=False, skiprows=1)
nr = data[:,1]
Mxx = data[:,2]
Nxx = -data[:,4]
Myy = data[:,3]
Nyy = -data[:,5]
topologi = loadtxt("Loads/Sektion_150_topologi.txt", unpack=False, skiprows=1)
nr1 = topologi[:,0]
node1 = topologi[:,1]
node2 = topologi[:,2]
node3 = topologi[:,3]
node4 = topologi[:,4]
knuder = loadtxt("Loads/Sektion_150_knuder.txt", unpack=False, skiprows=1)
nr2 = knuder[:,0]
x = knuder[:,1]
y = knuder[:,2]
z = knuder[:,3]
Picture of dataset
I have included a picture of my dataset here. In "Sektion_150_Knuder" NR = Node number.
I hope anyone have some pointers in the right direction to solve this problem.
I found the answer.
import xlwings as xw
import matplotlib.pyplot as plt
from math import pi
from numpy import loadtxt
import numpy as np
from fpdf import FPDF
import matplotlib as mpl
from matplotlib.ticker import ScalarFormatter
Sek = ['Sektion_100','Sektion_110','Sektion_120','Sektion_130','Sektion_140','Sektion_150']
I = 0
HeaderY =['nr.','LC','Element nr.','Myy','Nyy','MRd_y','URy']
HeaderX =['nr.','LC','Element nr.','Mxx','Nxx','MRd_x','URx']
#load the excel file
#header = loadtxt"Loads\Area_233.txt", unpack=False, skiprows=1)
pdf = FPDF(orientation = 'P', unit = 'mm', format = 'A4')
MaxURx =[]
MaxURy =[]
Elem = np.array(loadtxt("Element/Sektion_100_elements.txt", unpack=False, skiprows=1))
Node = np.array(loadtxt("Element/Sektion_100_nodes.txt", unpack=False, skiprows=1))
#Elem = np.array(loadtxt("Element/"+Sek+"_elements.txt", unpack=False, skiprows=1))
#Node = np.array(loadtxt("Element/"+Sek+"_nodes.txt", unpack=False, skiprows=1))
data = loadtxt("Loads/Sektion_100.txt", unpack=False, skiprows=1)
Mxx = data[:,2]
Nxx = -data[:,4]
Myy = data[:,3]
Nyy = -data[:,5]
R1x = []
R2x = []
MRdx = []
URx = []
R1y = []
R2y = []
MRdy = []
URy = []
min_nx = int(round(max(min(Nxx),-300),-1)-10)
max_nx = int(round(max(Nxx),-1)+10)
min_ny = int(round(min(Nyy),-1)-10)
max_ny = int(round(max(Nyy),-1)+10)
xrange = range(min_nx, max_nx+50, round((max_nx+50-min_nx)/20))
yrange = range(min_ny, max_ny+50, round((max_ny+50-min_ny)/20))
x2 =range(0,len(data),1)
wbx =xw.Book("Capacity\\Sektion_100_L.xlsm", None, True)
sht_x1 = wbx.sheets["Beregning"]
for i in xrange:
kx = sht_x1.range("N25").value = i
Q1x = sht_x1["AV24"].value
Q2x = sht_x1["BC24"].value
for i in x2:
if Myy[i] <= 0:
mrdx = np.interp(Nxx[i],xrange,R1x)
urx = Mxx[i] / mrdx
mrdx = np.interp(Nxx[i],xrange,R2x)
urx = Mxx[i] / mrdx
MRdx += [round(mrdx,2)]
URx += [round(urx,2)]
sort_tabelX = np.flipud(TabelX[TabelX[:,5].argsort()])
LimX = 25
for i in x2:
if sort_tabelX[i,5] > 1.05 :
LimX = i+2
LimX = max(25,LimX)
TABx2 = np.unique(TABx[:,2])
#np.savetxt("array1.txt", TABx2)
# %%
NumOUE = len(TABx2)
NumElem = len(Elem)
EleRange = range(0,NumElem)
OUERange = range(0,NumOUE)
EO = np.searchsorted(Elem[:,0], TABx2)
EleCorOx =[]
EleCorOy =[]
EleCorOz =[]
EleCorUx =[]
EleCorUy =[]
EleCorUz =[]
for i in EleRange:
Na = np.searchsorted(Node[:,0],Elem[i,1])
Nb = np.searchsorted(Node[:,0],Elem[i,2])
Nc = np.searchsorted(Node[:,0],Elem[i,3])
Nd = np.searchsorted(Node[:,0],Elem[i,4])
if i in EO:
EleCorOx += [(Node[Na,1] + Node[Nb,1] + Node[Nc,1] + Node[Nd,1])/4]
EleCorOy += [(Node[Na,2] + Node[Nb,2] + Node[Nc,2] + Node[Nd,2])/4]
EleCorOz += [(Node[Na,3] + Node[Nb,3] + Node[Nc,3] + Node[Nd,3])/4]
EleCorUx += [(Node[Na,1] + Node[Nb,1] + Node[Nc,1] + Node[Nd,1])/4]
EleCorUy += [(Node[Na,2] + Node[Nb,2] + Node[Nc,2] + Node[Nd,2])/4]
EleCorUz += [(Node[Na,3] + Node[Nb,3] + Node[Nc,3] + Node[Nd,3])/4]
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter3D(EleCorUx,EleCorUy,EleCorUz,color = 'Blue')
ax.scatter3D(EleCorOx,EleCorOy,EleCorOz,color = 'red')
This code is for showing Sektion 100. Small changes gives me the plots for 110, 120, 130 and so on.
If anyone can use it.

Annotating images from h5 file

.Hi all, I have 70k images saved into .h5 file and now with this script I want to read from that file and annotate text instances into .json file. When I run this script it takes very long time to annotate 1 image (cca 2h).
When I do this with 15 images then the script works fine and annotate all 15 images about a few seconds.
Now with 70k images -> .h5 file is 51gb.
I don't know is problem in code or the h5 file is too big? Because code works fine with small amount of images, but I'm working on some project where I need 70k or 700k images.
from __future__ import division
import os
import os.path as osp
from re import U
import numpy as np
import matplotlib.pyplot as plt
import h5py
from common import *
import json
import cv2
import numpy as np
from itertools import cycle
import js2py
#from gen import brojac
#from synthgen import imnames
global x
global y
def write_json(data, filename='annotation.json'):
with open(filename,'w') as file:
DATA_PATH = 'results'
DB_FNAME = osp.join(DATA_PATH,'SynthText.h5')
def get_data():
return h5py.File(DB_FNAME,'r')
def viz_textbb(text_im, imageName, charBB_list, wordBB, textToList, alpha=1.0):
text_im : image containing text
charBB_list : list of 2x4xn_i bounding-box matrices
wordBB : 2x4xm matrix of word coordinates
#print("k",z, type(z))
H,W = text_im.shape[:2]
global imnames
db = h5py.File('results/SynthText.h5', 'r')
dsets = sorted(db['data'].keys())
for k in dsets:
db = get_data()
imnames = sorted(db['data'].keys())
start = 0
count = 0
coordinate = []
coordinate1 = []
name = []
name1 = []
final = []
upperList = []
downList = []
counter = 0
FinalFinal = []
imageData = { }
dictList = []
for eachWord in textToList:
length = len(eachWord)
for i in range(0,4):
for j in range(start,length+start):
coordinate.append([charBB_list[0][0][i][j], charBB_list[0][1][i][j]])
coordinate1.append((charBB_list[0][0][i][j], charBB_list[0][1][i][j]))
coordinate = []
for j in range(0, length):
for i in range(len(name)) :
#print(i,j, name[i][j]) ## koordinate da se snađem, treba
if(i == 0 or i == 1):
if(i == 2):
if(i == 3):
down = reversed(downList)
joinList = [*upperList,*down,upperList[0]]
dictionary_copy = imageData.copy()
finalToList = np.array(final)
final = []
upperList = []
downList = []
start = len(eachWord) + start
finalDict = {f'gt_{imageName}':dictList}
#print(type(finalDict)) --> dict
with open("annotation.json") as json_file:
#temp['annotations'] = finalDict
for list in FinalFinal:
x,y = zip(*list)
# points = tuple(zip(x,y))
# # boundaries of the bounding box
# left, right = min(points, key=lambda p: p[0]), max(points, key=lambda p: p[0])
# bottom, top = min(points, key=lambda p: p[1]), max(points, key=lambda p: p[1])
# # area
# base = right[0] - left[0]
# height = top[1] - bottom[1]
# A = base * height
for i in range(len(charBB_list)):
# #print(charBB_list) #ispisuje x-eve za jedan vrh svih instanci pojedinih slova, pa drugi, 3. i 4. i onda posebno y-one
bbs = charBB_list[i]
ni = bbs.shape[-1]
for j in range(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]] #ako se doda ,bb[:,0] -> printa isto kao i gornji lijevi
#plt.plot(bb[0,:], bb[1,:], 'r', alpha=alpha)
# plot the word-BB:
for i in range(wordBB.shape[-1]):
bb = wordBB[:,:,i] #koordinate wordBB-a
bb = np.c_[bb,bb[:,0]] #spaja skroz lijevu, TREBA
#plt.plot(bb[0,:], bb[1,:], 'g', alpha=alpha)
# visualize the indiv vertices:
vcol = ['r','g','b','k']
#for j in range(4):
#print(bb) # ----> KOORDINATE wordBB-a
def main(db_fname):
db = h5py.File(db_fname, 'r')
dsets = sorted(db['data'].keys())
print ("total number of images : ", colorize(Color.RED, len(dsets), highlight=True))
for k in dsets:
rgb = db['data'][k][...]
charBB = db['data'][k].attrs['charBB']
wordBB = db['data'][k].attrs['wordBB']
txt = db['data'][k].attrs['txt']
textToList = (db['data'][k].attrs['txt']).tolist()
viz_textbb(rgb, k,[charBB], wordBB, textToList)
print ("image name : ", colorize(Color.RED, k, bold=True))
print (" ** no. of chars : ", colorize(Color.YELLOW, charBB.shape[-1]))
print (" ** no. of words : ", colorize(Color.YELLOW, wordBB.shape[-1]))
print (" ** text : ", colorize(Color.GREEN, txt))
#print("To know", z[1], type(z[1]))
# if 'q' in input("next? ('q' to exit) : "):
# break
if __name__=='__main__':

Adding more layers to neural network

I want to add more layers in neural network how can I customize this code ? can any one suggest a better way
in model layer there is only one layer .. in this part , I can define other weights ?should I add other layers there but it would make no sense
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
then in batch loss function they multiply by them in order to get the predicted y.. that's simply adding one layer..
tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
The full code is here
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import collections
import os
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.StructType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.StructType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
return tff.sequence_sum(
lambda b: batch_loss(model, b), BATCH_TYPE),
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
f = open(os.path.join(os.path.dirname(__file__),
"test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace(
"\n", "").split("\t")
for i in p:
if i != "":
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
f = open(os.path.join(os.path.dirname(__file__),
"test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i != "":
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
float(line[784].replace("*", "").replace("\n", "")))
weights_line = line[1:785]
float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split(
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type, iter_n, g_m):
model_g = {
'weights': g_m[0],
'bias': g_m[1]
for i in range(iter_n-1, iter_n):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(
grad[j][i], 1 / len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(
bi[j][i], 1 / len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(
model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(
model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(
tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
for i in new_original_l:
if i == removed_ele:
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
return set_all
def loadHistoryModels(round_num):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "r")
lines = f.readlines()
ret_models = []
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
ret_models.append([w_initial, b_initial])
tem_model = []
for i, line in enumerate(lines):
if i % 2 == 0:
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis).reshape([784, 10])
tem_model = [lis]
lis = line.strip().replace("[", "").replace("]", "").split(",")
lis = [float(i.strip()) for i in lis]
lis = np.array(lis)
return ret_models
if __name__ == "__main__":
start_time = time.time()
# data_num = np.asarray([5923, 6742, 5958, 6131, 5842])
# agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__),
"weights_" + str(index) + ".txt"), "w")
f = open(os.path.join(os.path.dirname(__file__),
"bias_" + str(index) + ".txt"), "w")
# f = open(os.path.join(os.path.dirname(__file__),
# "gradientplus_models.txt"), "w") #alice
# f.close()
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "w")
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
federated_train_data_divide = None
federated_train_data = None
federated_train_data_divide = [get_data_for_federated_agents(
mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters-1.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
initial_model = collections.OrderedDict(
model = initial_model
learning_rate = 0.1
for round_num in range(5):
f = open(os.path.join(os.path.dirname(__file__),
"gradientplus_models" + str(round_num) + ".txt"), "w") # alice
local_models = federated_train(
model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
# print(local_models[0][0])#第0个agent的weights矩阵
# print(local_models[0][1])#第0个agent的bias矩阵
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
f = open(os.path.join(os.path.dirname(__file__), "bias_" +
str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***", file=f)
print("-" * 50, file=f)
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(
local_models[local_model_index][0], 1 / NUM_AGENT), m_w)
m_b = np.add(np.multiply(
local_models[local_model_index][1], 1 / NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
f_g = open(os.path.join(os.path.dirname(
__file__), "gradientplus_models" + str(round_num) + ".txt"), "a")
g_w = list(model['weights'].reshape(-1))
g_b = list(model['bias'].reshape(-1))
print(g_w, file=f_g)
print(g_b, file=f_g)
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "w")
f = open(os.path.join(os.path.dirname(__file__),
"initial_model_parameters" + str(round_num-1) + ".txt"), "a")
s = ""
for i in list(model['weights']):
for j in i:
s += str(j)+"\t"
s = s[0:-1]
print(s, file=f)
s = ""
for i in list(model['bias']):
s += str(i)+"\t"
s = s[0:-1]
print(s, file=f)
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time() - start_time)
shapstart_time = time.time() #change cal time
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
models_hository = loadHistoryModels(round_num)
agent_shapley_history = []
for iter_num in range(1, len(gradient_weights[0]) + 1):
group_shapley_value = []
for s in all_sets:
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE,
iter_num, models_hository[iter_num-1]))
print(str(s) + "\t" +
str(group_shapley_value[len(group_shapley_value) - 1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(
index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
f = open(os.path.join(os.path.dirname(__file__),
"alice" + ".txt"), "a")
print('round {}, loss={}'.format(round_num, agent_shapley), file=f)
print('round {}, loss={}'.format(
round_num, agent_shapley_history), file=f)
print("end_time shap values", time.time() - shapstart_time)
print("end_time", time.time() - start_time)
It seems the source of this code is one of the low-level TFF tutorials, which are really more about introducing users to the core communication abstractions of TFF, as opposed to serving as an introductory guide to federated learning with TFF.
I might suggest a different tutorial, like the one that focuses on image classification for example. This uses TFF's tff.learning.from_keras_model utility to ingest a Keras model directly; if you go this route, you can add layers as you would for any Keras model.

IndexError: index 4 is out of bounds for axis 0 with size 4

Hey I am having this Index Error where I am trying to composite events but my indices start at 0 and not 1 and while have tried to do a number of things like trying to .append[i+1] I am unable to fix this error I am having.
Theres apparently something wrong with this specific line of code : dset_IDX[offset:offset_next] = event_id[file_indices]
While the .py file is over a 1000 lines of code so I can not show all of it I am able to show the part of the function that is giving me the error
def count_events(files):
# Because we want to remove events with 0 hits,
# we need to count the events beforehand (to create the h5 file).
# This function counts and indexes the events with more than 0 hits.
# Files need to be iterated in the same order to use the indexes.
""" This is where we manually specify the file"""
num_events = 0
nonzero_file_events = []
for file_index, f in enumerate(files):
data = np.load(f, allow_pickle=True)
hits = data['digi_hit_pmt']
for i in range(len(hits)):
if len(hits[i]) != 0:
num_events += 1
return num_events, nonzero_file_events
def GenMapping(csv_file):
mPMT_to_index = {}
with open(csv_file) as f:
rows = f.readline().split(",")[1:]
rows = [int(r.strip()) for r in rows]
for line in f:
line_split = line.split(",")
col = int(line_split[0].strip())
for row, value in zip(rows, line_split[1:]):
value = value.strip()
if value: # If the value is not empty
mPMT_to_index[int(value)] = [col, row]
npmap = np.zeros((max(mPMT_to_index) + 1, 2), dtype=np.int)
for k, v in mPMT_to_index.items():
npmap[k] = v
return npmap
def GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2):
avg_mu_per_ev == Poisson distribution mean for number of muons in each spill
sigma_time_offset == Width of spill (Gaussian) in nanoseconds
files = ['event998.npz']
# Remove whitespace
files = [x.strip() for x in files]
# Check that files were provided
if len(files) == 0:
raise ValueError("No files provided!!")
print("Merging " + str(len(files)) + " files")
# Start merging
num_nonzero_events, nonzero_event_indexes = count_events(files)
# np.random.poisson( avg_mu_per_ev, number_of_throws )
num_muons = np.random.poisson(avg_mu_per_ev, num_nonzero_events - 2954)
# creates h5 file to generate the h5 file
dtype_events = np.dtype(np.float32)
dtype_labels = np.dtype(np.int32)
dtype_energies = np.dtype(np.float32)
dtype_positions = np.dtype(np.float32)
dtype_IDX = np.dtype(np.int32)
dtype_PATHS = h5py.special_dtype(vlen=str)
dtype_angles = np.dtype(np.float32)
# sets h5 file to be written
h5_file = h5py.File('multimuonfile(2).h5', 'w')
dset_event_data = h5_file.create_dataset("event_data",
shape=(num_nonzero_events,) + IMAGE_SHAPE,
dset_labels = h5_file.create_dataset("labels",
dset_energies = h5_file.create_dataset("energies",
shape=(num_nonzero_events, 1),
dset_positions = h5_file.create_dataset("positions",
shape=(num_nonzero_events, 1, 3),
dset_IDX = h5_file.create_dataset("event_ids",
dset_PATHS = h5_file.create_dataset("root_files",
dset_angles = h5_file.create_dataset("angles",
shape=(num_nonzero_events, 2),
# 22 -> gamma, 11 -> electron, 13 -> muon
# corresponds to labelling used in CNN with only barrel
# IWCDmPMT_4pi_full_tank_gamma_E0to1000MeV_unif-pos-R371-y521cm_4pi-dir_3000evts_329.npz has an event
# with pid 11 though....
# pid_to_label = {22:0, 11:1, 13:2}
offset = 0
offset_next = 0
mPMT_to_index = GenMapping(PMT_LABELS)
# Loop over files
for file_index, filename in enumerate(files):
data = np.load(filename, allow_pickle=True)
nonzero_events_in_file = len(nonzero_event_indexes[file_index])
x_data = np.zeros((nonzero_events_in_file,) + IMAGE_SHAPE,
digi_hit_pmt = data['digi_hit_pmt']
# digi_hit_charge = data['digi_hit_charge']
# digi_hit_time = data['digi_hit_time']
# digi_hit_trigger = data['digi_hit_trigger']
# trigger_time = data['trigger_time']
delay = 0
# Loop over events in file
# Loop over number of muons in each event
event_id = np.array([], dtype=np.int32)
root_file = np.array([], dtype=np.str)
pid = np.array([])
position = np.array([])
direction = np.array([])
energy = np.array([])
labels = np.array([])
# with open("ResultFile.txt", "w") as text_file:
# sys.stdout = open("Result2.txt", "w")
for i, nmu in enumerate(num_muons):
# np.savetxt(text_file, i, nmu,fmt="%d")
# text_file.write("processing output entry " + str(i) + " with " + nmu + " muons")
print("processing output entry ", i, " with ", nmu, " muons")
indices = np.random.randint(0, len(digi_hit_pmt), max(1, nmu))
time_offs = [0.]
if nmu > 1:
time_offs = np.append(time_offs, np.random.normal(0., sigma_time_offset, nmu - 1))
hit_pmts, charge, time = SumEvents(indices, time_offs, data, nmu == 0)
hit_mpmts = hit_pmts // 19
pmt_channels = hit_pmts % 19
rows = mPMT_to_index[hit_mpmts, 0]
cols = mPMT_to_index[hit_mpmts, 1]
x_data[i - delay, rows, cols, pmt_channels] = charge
x_data[i - delay, rows, cols, pmt_channels + 19] = time
# fix below!!!
idx0 = indices[0]
event_id = np.append(event_id, data['event_id'][idx0])
root_file = np.append(root_file, data['root_file'][idx0])
pid = np.append(pid, data['pid'][idx0])
position = np.append(position, data['position'][idx0])
direction = np.append(direction, data['direction'][idx0])
energy = np.append(energy, np.sum(data['energy'][indices]))
labels = np.append(labels, nmu)
offset_next += nonzero_events_in_file
file_indices = nonzero_event_indexes[file_index]
dset_IDX[offset:offset_next] = event_id[file_indices]
dset_PATHS[offset:offset_next] = root_file[file_indices]
dset_energies[offset:offset_next, :] = energy[file_indices].reshape(-1, 1)
dset_positions[offset:offset_next, :, :] = position[file_indices].reshape(-1, 1, 3)
dset_labels[offset:offset_next] = labels[file_indices]
direction = direction[file_indices]
polar = np.arccos(direction[:, 1])
azimuth = np.arctan2(direction[:, 2], direction[:, 0])
dset_angles[offset:offset_next, :] = np.hstack((polar.reshape(-1, 1), azimuth.reshape(-1, 1)))
dset_event_data[offset:offset_next, :] = x_data
offset = offset_next
print("Finished file: {}".format(filename))
# In[ ]:
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
Merging 1 files
processing output entry 0 with 3 muons
processing output entry 1 with 1 muons
processing output entry 2 with 3 muons
processing output entry 3 with 3 muons
Traceback (most recent call last):
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1068, in <module>
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1044, in GenerateMultiMuonSample_h5
dset_IDX[offset:offset_next] = event_id[file_indices]
IndexError: index 4 is out of bounds for axis 0 with size 4
not much info is provided but what i have understood,
the error says that axis 0 has size=4 and you are trying to access index 4 which is not possible with size 4 as it starts with 0 and max index could be 3.

How to make a dataset similar to CIFAR10 with several images

I try to make a dataset that is similar to CIFAR10. I found this tutorial:
How to create dataset similar to cifar-10
I already can make a dataset with 1 image, but when I try to use several images I got this error:
tensorflow.python.framework.errors.InvalidArgumentError: Indices are not valid: not lexicographically sorted or containing repeats.
Can anyone help me to solve this problem?
This is my code:
from PIL import Image
import numpy as np
out =np.empty([20,7501])
for j in xrange(0, 10):
im = Image.open('%d_receipt.jpg' % j)
im = (np.array(im))
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [0]
out[j] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
for i in xrange(0, 10):
im = Image.open('%d_news.jpg' % i)
im = (np.array(im))
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [1]
j = i + 10
out[j] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
I do it like this:
import numpy as np
import scipy.io
mat = scipy.io.loadmat('train_32x32.mat')
data = mat['X']
label = mat['y']
R_data = data[:,:,0,:]
G_data = data[:,:,1,:]
B_data = data[:,:,2,:]
R_data = np.transpose(R_data, (2,0,1))
G_data = np.transpose(G_data, (2,0,1))
B_data = np.transpose(B_data, (2,0,1))
R_data = np.reshape(R_data,(73257,32*32))
G_data = np.reshape(G_data,(73257,32*32))
B_data = np.reshape(B_data,(73257,32*32))
outdata = np.concatenate((label,R_data,G_data,B_data), axis = 1)
step = 10000
for i in range(1,6):
temp = outdata[i*step:(i+1)*step,:]
temp.tofile('SVHN_train_data_batch%d.bin' % i)
print('save data %d' % i)
Then, just put it directly in the train code of Cifar10 tensorflow example.
I too tried to follow the tutorial you posted in the question however I couldn't get it to work so I made my own solution. It can be found on my github here: https://github.com/jdeepee/machine_learning/tree/master
The code is commented so should be easy enough to follow. I should note it iterated through a master directory containing multiple folders which contain the images.
The below snippet is what I did to adapt CIFAR-10 to GTSRB. More details here. https://github.com/hashkanna/traffic-signs/blob/master/Traffic_Signs_Recognition_binFiles.ipynb
out = {}
for i in range(5):
bin_val = (i%5) + 1
#im = Image.open(X_train[i])
#im = np.array(im)
im = X_train[i]
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [y_train[i]]
out[bin_val] = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
for i in range(5,len(X_train)):
bin_val = (i%5) + 1
#im = Image.open(X_train[i])
#im = np.array(im)
im = X_train[i]
r = im[:,:,0].flatten()
g = im[:,:,1].flatten()
b = im[:,:,2].flatten()
label = [y_train[i]]
new_array = np.array(list(label) + list(r) + list(g) + list(b),np.uint8)
out[bin_val] = np.append(out[bin_val], new_array, 0)
for bin_val in range(1,6):
