Execute mapping efficiently in Python - python

I have some code that is mapping data to a matrix... Most of this is all setup so that my problem can be easily reproduced but the only part I need to speed up is what's after the comment # the part I want to speed up
import numpy as np
# config
matrix_height = 100
matrix_width = 200
# fake data
x_data = np.array(range(10000))
y_data = [{i:i for i in range(100)} for t in range(len(x_data))]
# fake mapping
x_to_index = {x: np.random.randint(matrix_width) for x in x_data }
y_to_index = {}
for y_dict in y_data:
for y_key, y_val in y_dict.items():
y_start = np.random.randint(matrix_height-2)
y_to_index[y_key] = (y_start, y_start+2 )
# data that must be stored
total_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
n_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
latest_value = np.zeros([matrix_height, matrix_width]).astype(int)
# the part I want to speed up
for x, y_dict in zip(x_data, y_data):
x_index = x_to_index[x]
for y_key, y_data in y_dict.items():
y_slice = slice(*y_to_index[y_key])
total_matrix[ y_slice, x_index ] += y_data
latest_value[ y_slice, x_index ] = y_data
n_matrix[ y_slice, x_index ] += 1
Again, I'm only concerned with the part below the comment # the part I want to speed up.
I'm not sure how to speed this up but it seems like it should be possible to use a mapping function that can do this all in parallel...
I'm looking for a marked improvement in that last section. Any ideas?

Tailored to number of cores.
For total_matrix, addition is commutative.
For latest_value, apply the split cols in reverse order.
import numpy as np
import time
import multiprocessing as mp
def func(cols, zz, tm, nm, lv, setOrder):
for c in cols:
for t in zz:
tm[slice(*t[0]), c] += t[1]
lv[slice(*t[0]), c] = t[1]
nm[slice(*t[0]), c] += 1
return [tm, nm, lv, setOrder]
if __name__ == '__main__':
mp.freeze_support()
matrix_height = 100
matrix_width = 200
total_range = 10000
x_data = np.array(range(total_range))
y_data = [{i:i for i in range(matrix_height)} for t in range(len(x_data))]
x_to_index = {x: np.random.randint(matrix_width) for x in x_data}
# potential general purpose cols list
#cols = np.random.randint(0, total_range, (1, total_range))[0]
cols = [np.int(x_to_index[k]) for k in x_to_index]
y_to_index = {}
for y_dict in y_data:
for y_key, y_val in y_dict.items():
y_start = np.random.randint(matrix_height-2)
y_to_index[y_key] = (y_start, y_start+2)
# potential general purpose rows list
#rows = [(np.random.randint(matrix_height), np.random.randint(matrix_height)) for x in range(matrix_height)]
rows = [y_to_index[k] for k in y_to_index]
# potential general purpose y data
#y_dat = np.random.randint(0, matrix_height, (1, matrix_height))[0]
y_dat = [i for i in range(matrix_height)]
o_total_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
o_n_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
o_latest_value = np.zeros([matrix_height, matrix_width]).astype(int)
startTime = time.time()
for x, y_dict in zip(x_data, y_data):
x_index = x_to_index[x]
for y_key, y_data in y_dict.items():
y_slice = slice(*y_to_index[y_key])
o_total_matrix[ y_slice, x_index ] += y_data
o_latest_value[ y_slice, x_index ] = y_data
o_n_matrix[ y_slice, x_index ] += 1
print('original time was {0:5.2f} sec'.format(time.time() - startTime))
procs = mp.cpu_count()
i_tm = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs
i_nm = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs
i_lv = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs
zz = list(zip(rows, y_dat))
procs_split = np.array_split(cols, procs)
itup = []
for x in range(procs):
itup.append(((list(procs_split[x])), zz, i_tm[x], i_nm[x], i_lv[x], x))
startTime = time.time()
with mp.Pool(processes=procs) as pool:
ret = pool.starmap(func, itup)
i_total_matrix = ret[0][0]
i_n_matrix = ret[0][1]
for x in range(1, procs):
i_total_matrix = np.add(i_total_matrix, ret[x][0])
i_n_matrix = np.add(i_n_matrix, ret[x][1])
colOrder = [0] * procs
for x in range(procs):
colOrder[x] = (procs-1) - ret[x][3]
i_latest_value = ret[colOrder[0]][2]
for x in range(1, procs):
np.putmask(i_latest_value, i_latest_value == 0, ret[x][2])
print('improved time was {0:5.2f} sec'.format(time.time() - startTime))
comparison = i_total_matrix == o_total_matrix
if not comparison.all():
print('ERROR TOTAL MATRIX')
comparison = i_n_matrix == o_n_matrix
if not comparison.all():
print('ERROR N MATRIX')
comparison = i_latest_value == o_latest_value
if not comparison.all():
print('ERROR LATEST VALUE')
After a trial run, the results showed approx. twice as fast:
original time was 7.12 sec
improved time was 2.29 sec

Related

Really different matrixes in Python producing the same images

I am working on some RGB images processing. I suggest anyone who reads this to ignore the functions I've created before the main code (these are not relevant to the problem itself, I beleive).
The RGB1 image I am using is this one:
Here is my code so far:
import sys
import numpy as np
import quaternion
import matplotlib.pyplot as plt
from pandas import *
import scipy.io as spio
import matplotlib.image as img
# IGNORE UNTIL NEXT COMMENT #
np.set_printoptions(threshold=sys.maxsize)
def row_wise_fft(A):
A = np.asarray(A)
rowWiseFFT = np.zeros((A.shape[0], A.shape[1]), dtype='complex')
for i in range(0, A.shape[0]):
rowWiseFFT[i, :] = np.fft.fft(A[i, :])
return rowWiseFFT
def row_wise_ifft(A):
A = np.asarray(A)
rowWiseFFT = np.zeros(A.shape, dtype='complex')
for i in range(0, A.shape[0]):
rowWiseFFT[i, :] = np.fft.ifft(A[i, :])
return rowWiseFFT
def split_parts(A):
return [A.real, A.imag]
def column_wise_fft(A):
A = np.asarray(A)
columnWiseFFT = np.zeros((A.shape[0], A.shape[1]), dtype='complex')
for i in range(0, A.shape[1]):
columnWiseFFT[:, i] = np.fft.fft(A[:, i])
return columnWiseFFT
def final_dqft(FuvR, FuvI):
[FuvR, FuvI] = [np.asarray(FuvR), np.asanyarray(FuvI)]
[FuvRReal, FuvRImag] = split_parts(FuvR)
[FuvIReal, FuvIImag] = split_parts(FuvI)
FuvQ = [[None]*FuvR.shape[1]]*FuvR.shape[0]
FuvQ = np.asarray(FuvQ)
for i in range(0, FuvQ.shape[0]):
for k in range(0, FuvQ.shape[1]):
FuvQ[i][k] = np.quaternion(
FuvRReal[i][k], FuvIReal[i][k], FuvRImag[i][k], FuvIImag[i][k])
return FuvQ
def column_wise_ifftshift(A):
columnWiseIfftshift = np.zeros((A.shape[0], A.shape[1]), dtype='complex')
for i in range(0, A.shape[1]):
columnWiseIfftshift[:, i] = np.fft.ifftshift(A[:, i])
return columnWiseIfftshift
def column_wise_fftshift(A):
columnWisefftshift = np.zeros(A.shape, dtype='complex')
for i in range(0, A.shape[1]):
columnWisefftshift[:, i] = np.fft.fftshift(A[:, i])
return columnWisefftshift
def row_wise_ifftshift(A):
rowWiseIfftshift = np.zeros((A.shape[0], A.shape[1]), dtype='complex')
for i in range(0, len(A)):
rowWiseIfftshift[i] = np.fft.ifftshift(A[i])
return rowWiseIfftshift
def row_wise_fftshift(A):
rowWisefftshift = np.zeros((A.shape[0], A.shape[1]), dtype='complex')
for i in range(0, len(A)):
rowWisefftshift[i] = np.fft.fftshift(A[i])
return rowWisefftshift
def full_DQFT_calculator(A):
fun = row_wise_fftshift(row_wise_fft(row_wise_ifftshift(A)))
[funReal, funImag] = split_parts(fun)
FuvR = column_wise_fftshift(
column_wise_fft(column_wise_ifftshift(funReal)))
FuvI = column_wise_fftshift(
column_wise_fft(column_wise_ifftshift(funImag)))
FuvQ = final_dqft(FuvR, FuvI)
return FuvQ
# STOP IGNORING FROM NOW ON #
image = img.imread("RGB1.jpg")
# Keep the Red,Green and Blue Channels.
R = np.asarray(image[:, :, 0])
G = np.asarray(image[:, :, 1])
B = np.asarray(image[:, :, 2])
# Defining h_r = 0 ; h_i = Red ; h_j = Green ; h_k = Blue
(h_r, h_i, h_j, h_k) = (np.zeros(R.shape), R, G, B)
# Some intermediatte calculations
firstIntegral = full_DQFT_calculator(h_i)
secondIntegral = full_DQFT_calculator(h_j)
thirdIntegral = full_DQFT_calculator(h_k)
final_result = np.empty(firstIntegral.shape, dtype='quaternion')
for i in range(0, final_result.shape[0]):
for j in range(0, final_result.shape[1]):
final_result[i][j] = np.quaternion(-firstIntegral[i][j].x-secondIntegral[i][j].y+thirdIntegral[i][j].z, firstIntegral[i][j].w - secondIntegral[i][j].z - thirdIntegral[i]
[j].y, secondIntegral[i][j].w - thirdIntegral[i][j].x - firstIntegral[i][j].z, firstIntegral[i][j].y + secondIntegral[i][j].x + thirdIntegral[i][j].w)
real_final_result = np.empty(final_result.shape, dtype='float')
for i in range(0, real_final_result.shape[0]):
for j in range(0, real_final_result.shape[1]):
real_final_result[i][j] = final_result[i][j].w
print('Parte real da primeira linha da matriz (transformada quaterniĆ³nica): \n',real_final_result[0], '\n')
i_final_result = np.empty(final_result.shape, dtype='float')
for i in range(0, i_final_result.shape[0]):
for j in range(0, i_final_result.shape[1]):
i_final_result[i][j] = final_result[i][j].x
print('Componente i da primeira linha da matriz (transformada quaterniĆ³nica): \n',i_final_result[0], '\n')
print(real_final_result[0] - i_final_result[0])
The difference between the two matrixes (i.e., the last print) is huge but when I try to print the matrixes using the following code:
fig = plt.figure()
plt.set_cmap('gray')
ax1 = fig.add_subplot(121)
plt.imshow(real_final_result)
plt.clim([0,1])
ax1.title.set_text('Re(TFQ)')
ax2 = fig.add_subplot(122)
plt.imshow(i_final_result)
plt.clim([0, 1])
ax2.title.set_text('I(TFQ)')
plt.show()
I obtain the exact same image. Can someone tell why is this happening?

Python function calling with variable vs raw numbers

I am trying to implement a pso algorithm from Wikipedia https://en.wikipedia.org/wiki/Particle_swarm_optimization.
My problem is that when I am calling the cost function with a variable (Gbest), and then manually calling the cost function (with the Gbest data) I get a different output (cost) like the image bellow:
Code fault
I am new to python so thank you for any suggestions.
Here is the complete code:
import matplotlib.pyplot as plt
import numpy as np
from control.matlab import *
A = np.array([[0,0,1],[0,1,0],[1,2,-2]])
B = np.array( [[0],[1],[0]])
C = np.array([[0, 1,0]])
D = np.zeros([C.shape[0],B.shape[1]])
sys = ss(A,B,C,D)
sys_tf = tf(sys)
s = tf('s')
def cost(kp,ki):
global sys_tf, G, y, t, r
G = kp + ki/s
C = feedback(sys_tf*G, 1)
y, t = step(C, linspace(0,100))
r = np.ones(len(t))
return np.sum(y-r)**2
part = 100
ite = 10000
dim = 2
w = 0.001
wdamp = 0.99
phip = 0.9
phig = 0.1
blo, bup = -10,10
x = np.zeros([dim, part])
v = np.zeros([dim, part])
pbest = np.zeros([dim, part])
gbest = np.array([1000000,1000000])
for i in range(part):
for k in range(dim):
x[k][i] = pbest[k][i] = np.random.uniform(blo, bup)
v[k][i] = np.random.uniform(-np.abs(bup - blo), np.abs(bup - blo))
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
for it in range(ite):
for i in range(part):
for k in range(dim):
rp = np.random.uniform(0,1)
rg = np.random.uniform(0,1)
v[k,:] = w*v[k,:] + phip*rp*(pbest[k,:] - x[k,:]) + phig*rg*(gbest[k] - x[k,:])
x[k,:] = x[k,:] + v[k,:]
w = w*wdamp
if cost(x[0][i], x[1][i]) < cost(pbest[0][i], pbest[1][i]):
pbest[:,i] = x[:,i]
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
plt.plot(t, y, 'ro')
plt.plot(t, r, 'x')
plt.pause(0.005)
plt.title(gbest)
print([gbest, cost(gbest[0], gbest[1])])

Optimizing a complex algorithm

I know this is not an ideal place for questions of this scope, but I'm not sure where else to ask this or how to break it down. I've been working on a function for the past couple weeks, that runs, but for it to be feasible for my purposes, I need to speed it up 200-300x.
I have an image array, where all pixels of similar color have been averaged and set to that average value. Then I have a 2D array of the same height and width, which labels each unique and non-contiguous feature of the image.
Using these I need to assess the size of each feature and its level of contrast to each of its neighbors. These values are used in an equation and if the output of that equation is below a certain threshold, that feature is merged with its most similar neighbor.
I've uploaded the image and the feature label array (printed with numpy.savetext()) to OneDrive and attached links
code:
def textureRemover(pix, labeledPix, ratio = 1.0):
numElements = numpy.amax(labeledPix)
maxSize = numpy.count_nonzero(labeledPix)
MAXIMUMCONTRAST = 443.405
for regionID in range(numElements):
start = time.clock()
regionID += 1
if regionID not in labeledPix:
continue
#print(regionID)
#print((regionID / numElements) * 100, '%')
neighborIDs = getNeighbors(labeledPix, regionID)
if 0 in neighborIDs:
neighborIDs.remove(0) #remove white value
regionMask = labeledPix == regionID
region = pix[regionMask]
size = numpy.count_nonzero(regionMask)
contrastMin = (ratio - (size / maxSize)) * MAXIMUMCONTRAST
regionMean = region.mean(axis = 0)
if len(neighborIDs) > 200:
contrast = numpy.zeros(labeledPix.shape)
contrast[labeledPix!=0] = numpy.sqrt(numpy.sum((regionMean - pix[labeledPix!=0])**2, axis = -1))
significantMask = (contrast < contrastMin)
significantContrasts = list(numpy.unique(contrast[significantMask]))
significantNeighbors = {}
for significantContrast in significantContrasts:
minContrast = min(significantContrasts)
if labeledPix[contrast == minContrast][0] in neighborIDs:
significantNeighbors[minContrast] = labeledPix[contrast == minContrast][0]
else:
significantContrasts.pop(significantContrasts.index(minContrast))
else:
significantNeighbors = {}
for neighborID in neighborIDs:
neighborMask = labeledPix == neighborID
neighbor = pix[neighborMask]
neighborMean = neighbor.mean(axis = 0)
contrast = numpy.sqrt(numpy.sum((regionMean - neighborMean)**2, axis = -1))
if contrast < contrastMin:
significantNeighbors[contrast] = neighborID
if significantNeighbors:
contrasts = significantNeighbors.keys()
minContrast = min(contrasts)
minNeighbor = significantNeighbors[minContrast]
neighborMask = labeledPix == minNeighbor
neighborSize = numpy.count_nonzero(neighborMask)
if neighborSize <= size:
labeledPix[neighborMask] = regionID
pix[neighborMask] = regionMean
else:
labeledPix[regionMask] = minNeighbor
pix[regionMask] = pix[neighborMask].mean(axis = 0)
print(time.clock() - start)
return pix
pix
labeledPix
I know I'm asking for a lot of help, but I've been stuck on this for a few weeks and am unsure what else I can do. Any help will be greatly appreciated!
Here is an optimized version of most of your logic (I underestimated the amount of work that would be...). I skipped the >200 branch and am using fake data because I couldn't access your link. When I switch off your >200 branch your and my code appear to give the same result but mine is quite a bit faster on the fake example.
Sample output:
original
26.056154000000003
optimized
0.763613000000003
equal
True
Code:
import numpy as np
from numpy.lib.stride_tricks import as_strided
def mockdata(m, n, k):
colors = np.random.random((m, n, 3))
i, j = np.ogrid[:m, :n]
labels = np.round(k*k * (np.sin(0.05 * i) + np.sin(0.05 * j)**2)).astype(int) % k
return colors, labels
DIAG_NEIGHBORS = True
MAXIMUMCONTRAST = 443.405
def textureRemover2(pix, labeledPix, ratio=1.0):
start = time.clock()
pix, labeledPix = pix.copy(), labeledPix.copy()
pixf, labeledPixf = pix.reshape(-1, 3), labeledPix.ravel()
m, n = labeledPix.shape
s, t = labeledPix.strides
# find all sizes in O(n)
sizes = np.bincount(labeledPixf)
n_ids = len(sizes)
# make index for quick access to labeled areas
lblidx = np.split(np.argsort(labeledPixf), np.cumsum(sizes[:-1]))
lblidx[0] = None
# find all mean colors in O(n)
regionMeans = np.transpose([np.bincount(labeledPix.ravel(), px)
/ np.maximum(sizes, 1)
for px in pix.reshape(-1, 3).T])
# find all neighbors in O(n)
horz = set(frozenset(p) for bl in as_strided(labeledPix, (m,n-1,2), (s,t,t))
for p in bl)
vert = set(frozenset(p) for bl in as_strided(labeledPix, (m-1,n,2), (s,t,s))
for p in bl)
nb = horz|vert
if DIAG_NEIGHBORS:
dwnrgt = set(frozenset(p) for bl in as_strided(
labeledPix, (m-1,n-1,2), (s,t,s+t)) for p in bl)
dwnlft = set(frozenset(p) for bl in as_strided(
labeledPix[::-1], (m-1,n-1,2), (-s,t,t-s)) for p in bl)
nb = nb|dwnrgt|dwnlft
nb = {p for p in nb if len(p) == 2 and not 0 in p}
nb_dict = {}
for a, b in nb:
nb_dict.setdefault(a, set()).add(b)
nb_dict.setdefault(b, set()).add(a)
maxSize = labeledPix.size - sizes[0]
for id_ in range(1, n_ids):
nbs = list(nb_dict.get(id_, set()))
if not nbs:
continue
d = regionMeans[id_] - regionMeans[nbs]
d = np.einsum('ij,ij->i', d, d)
mnd = np.argmin(d)
if d[mnd] < ((ratio - sizes[id_]/maxSize) * MAXIMUMCONTRAST)**2:
mn = nbs[mnd]
lrg, sml = (id_, mn) if sizes[id_] >= sizes[mn] else (mn, id_)
sizes[lrg], sizes[sml] = sizes[lrg] + sizes[sml], 0
for nb in nb_dict[sml]:
nb_dict[nb].remove(sml)
nb_dict[nb].add(lrg)
nb_dict[lrg].update(nb_dict[sml])
nb_dict[lrg].remove(lrg)
nb_dict[sml] = set()
pixf[lblidx[sml]] = regionMeans[lrg]
labeledPixf[lblidx[sml]] = lrg
lblidx[lrg], lblidx[sml] = np.r_[lblidx[lrg],lblidx[sml]], None
print(time.clock() - start)
return pix
from scipy.ndimage.morphology import binary_dilation
import time
STRUCTEL = np.ones((3,3), int) if DIAG_NEIGHBORS else np.array([[0,1,0],[1,1,1],[0,1,0]], int)
def getNeighbors(labeledPix, regionID):
nb = set(labeledPix[binary_dilation(labeledPix == regionID, structure=STRUCTEL)])
nb.remove(regionID)
return sorted(nb)
numpy = np
def textureRemover(pix, labeledPix, ratio = 1.0):
pix, labeledPix = pix.copy(), labeledPix.copy()
numElements = numpy.amax(labeledPix)
maxSize = numpy.count_nonzero(labeledPix)
MAXIMUMCONTRAST = 443.405
start = time.clock()
for regionID in range(numElements):
regionID += 1
if regionID not in labeledPix:
continue
#print(regionID)
#print((regionID / numElements) * 100, '%')
neighborIDs = getNeighbors(labeledPix, regionID)
if 0 in neighborIDs:
neighborIDs.remove(0) #remove white value
regionMask = labeledPix == regionID
region = pix[regionMask]
size = numpy.count_nonzero(regionMask)
contrastMin = (ratio - (size / maxSize)) * MAXIMUMCONTRAST
regionMean = region.mean(axis = 0)
if len(neighborIDs) > 20000:
contrast = numpy.zeros(labeledPix.shape)
contrast[labeledPix!=0] = numpy.sqrt(numpy.sum((regionMean - pix[labeledPix!=0])**2, axis = -1))
significantMask = (contrast < contrastMin)
significantContrasts = list(numpy.unique(contrast[significantMask]))
significantNeighbors = {}
for significantContrast in significantContrasts:
minContrast = min(significantContrasts)
if labeledPix[contrast == minContrast][0] in neighborIDs:
significantNeighbors[minContrast] = labeledPix[contrast == minContrast][0]
else:
significantContrasts.pop(significantContrasts.index(minContrast))
else:
significantNeighbors = {}
for neighborID in neighborIDs:
neighborMask = labeledPix == neighborID
neighbor = pix[neighborMask]
neighborMean = neighbor.mean(axis = 0)
contrast = numpy.sqrt(numpy.sum((regionMean - neighborMean)**2, axis = -1))
if contrast < contrastMin:
significantNeighbors[contrast] = neighborID
if significantNeighbors:
contrasts = significantNeighbors.keys()
minContrast = min(contrasts)
minNeighbor = significantNeighbors[minContrast]
neighborMask = labeledPix == minNeighbor
neighborSize = numpy.count_nonzero(neighborMask)
if neighborSize <= size:
labeledPix[neighborMask] = regionID
pix[neighborMask] = regionMean
else:
labeledPix[regionMask] = minNeighbor
pix[regionMask] = pix[neighborMask].mean(axis = 0)
print(time.clock() - start)
return pix
data = mockdata(200, 200, 1000)
print('original')
res0 = textureRemover(*data)
print('optimized')
res2 = textureRemover2(*data)
print('equal')
print(np.allclose(res0, res2))

How to add a member function to an existing Python object?

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().
An example is:
import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related
class A(object):
"""the neural network for simulation"""
'''
todo:
- find boundary
- get_angles_from_coefficients
'''
def __init__(self,
index, # the index of the current network
list_of_coor_data_files, # accept multiple files of training data
energy_expression_file, # input, output files
preprocessing_settings = None,
connection_between_layers = None, connection_with_bias_layers = None,
PCs = None, # principal components
):
self._index = index
self._list_of_coor_data_files = list_of_coor_data_files
self._energy_expression_file = energy_expression_file
self._data_set = []
for item in list_of_coor_data_files:
self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)
self._preprocessing_settings = preprocessing_settings
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
self._node_num = [8, 15, 2, 15, 8]
self._PCs = PCs
def save_into_file(self, filename = None):
if filename is None:
filename = "network_%s.pkl" % str(self._index) # by default naming with its index
with open(filename, 'wb') as my_file:
pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)
return
def get_cossin_from_a_coordinate(self, a_coordinate):
num_of_coordinates = len(a_coordinate) / 3
a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors
diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors
cos_of_angles = range(len(normal_vectors_normalized_1))
sin_of_angles_vec = range(len(normal_vectors_normalized_1))
sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization
for index in range(len(normal_vectors_normalized_1)):
cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));
return cos_of_angles + sin_of_angles
def get_many_cossin_from_coordinates(self, coordinates):
return map(self.get_cossin_from_a_coordinate, coordinates)
def get_many_cossin_from_coordiantes_in_file (self, filename):
coordinates = np.loadtxt(filename)
return self.get_many_cossin_from_coordinates(coordinates)
def mapminmax(self, my_list): # for preprocessing in network
my_min = min(my_list)
my_max = max(my_list)
mul_factor = 2.0 / (my_max - my_min)
offset = (my_min + my_max) / 2.0
result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
return (result_list, (mul_factor, offset)) # also return the parameters for processing
def get_mapminmax_preprocess_result_and_coeff(self,data=None):
if data is None:
data = self._data_set
data = np.array(data)
data = np.transpose(data)
result = []; params = []
for item in data:
temp_result, preprocess_params = self.mapminmax(item)
result.append(temp_result)
params.append(preprocess_params)
return (np.transpose(np.array(result)), params)
def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
# try begin
if preprocessing_settings is None:
preprocessing_settings = self._preprocessing_settings
temp_setttings = np.transpose(np.array(preprocessing_settings))
result = []
for item in input_data:
item = np.multiply(item - temp_setttings[1], temp_setttings[0])
result.append(item)
return result
# try end
def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
if connection_between_layers is None:
connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None:
connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
expression = ""
# first part: network
for i in range(2):
expression = '\n' + expression
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
bias_coef = connection_with_bias_layers[i].params
for j in range(np.size(mul_coef, 0)):
temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j)
for k in range(np.size(mul_coef, 1)):
temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)
temp_expression += ' %f);\n' % (bias_coef[j])
expression = temp_expression + expression # order of expressions matter in OpenMM
# second part: definition of inputs
index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
for i in range(len(index_of_backbone_atoms) - 3):
index_of_coss = i
index_of_sins = i + 4
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
(i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])
return expression
def write_expression_into_file(self, out_file = None):
if out_file is None: out_file = self._energy_expression_file
expression = self.get_expression_of_network()
with open(out_file, 'w') as f_out:
f_out.write(expression)
return
def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
if input_data is None: input_data = self._data_set
if connection_between_layers is None: connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
temp_mid_result = range(4)
mid_result = []
# first need to do preprocessing
for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):
for i in range(4):
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
bias_coef = connection_with_bias_layers[i].params
previous_result = item if i == 0 else temp_mid_result[i - 1]
temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
if i != 3: # the last output layer is a linear layer, while others are tanh layers
temp_mid_result[i] = map(tanh, temp_mid_result[i])
mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
return mid_result
def get_PC_and_save_it_to_network(self):
'''get PCs and save the result into _PCs
'''
mid_result = self.get_mid_result()
self._PCs = [item[1] for item in mid_result]
return
def train(self):
####################### set up autoencoder begin #######################
node_num = self._node_num
in_layer = LinearLayer(node_num[0], "IL")
hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
out_layer = LinearLayer(node_num[4], "OL")
layer_list = [in_layer] + hidden_layers + [out_layer]
molecule_net = FeedForwardNetwork()
molecule_net.addInputModule(in_layer)
for item in (hidden_layers + bias_layers):
molecule_net.addModule(item)
molecule_net.addOutputModule(out_layer)
connection_between_layers = range(4); connection_with_bias_layers = range(4)
for i in range(4):
connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers
molecule_net.addConnection(connection_with_bias_layers[i])
molecule_net.sortModules() # this is some internal initialization process to make this module usable
####################### set up autoencoder end #######################
trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
data_set = SupervisedDataSet(node_num[0], node_num[4])
sincos = self._data_set
(sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
for item in sincos_after_process: # is it needed?
data_set.addSample(item, item)
trainer.trainUntilConvergence(data_set, maxEpochs=50)
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
print("Done!\n")
return
def create_sge_files_for_simulation(self,potential_centers = None):
if potential_centers is None:
potential_centers = self.get_boundary_points()
neural_network_related.create_sge_files(potential_centers)
return
def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
if list_of_points is None: list_of_points = self._PCs
x = [item[0] for item in list_of_points]
y = [item[1] for item in list_of_points]
temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
hist_matrix = temp[0]
# add a set of zeros around this region
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)
hist_matrix = (hist_matrix != 0).astype(int)
sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]
bin_width_0 = temp[1][1]-temp[1][0]
bin_width_1 = temp[2][1]-temp[2][0]
min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid
min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1
potential_centers = []
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks
temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
potential_centers.append(temp_potential_center)
return potential_centers
# this function is added after those old objects of A were created
def plotting_in_PC_space_with_coloring_option(self,
list_of_coordinate_files_for_plotting=None, # accept multiple files
color_option='pure'):
'''
by default, we are using training data, and we also allow external data input
'''
if list_of_coordinate_files_for_plotting is None:
PCs_to_plot = self._PCs
else:
temp_sincos = []
for item in list_of_coordinate_files_for_plotting:
temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)
temp_mid_result = self.get_mid_result(input_data = temp_sincos)
PCs_to_plot = [item[1] for item in temp_mid_result]
(x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])
# coloring
if color_option == 'pure':
coloring = 'red'
elif color_option == 'step':
coloring = range(len(x))
fig, ax = plt.subplots()
ax.scatter(x,y, c=coloring)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
plt.show()
return
But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?
Thanks!
Something like this:
class A:
def q(self): print 1
a = A()
def f(self): print 2
setattr(A, 'f', f)
a.f()
This is called a monkey patch.

RAM full in numpy sagemath

I wrote the next code. In 1-2 hours of execution time the RAM of my laptop (8gb) is filled and the sistem crash:
from scipy.stats import uniform
import numpy as np
cant_de_cadenas =[700,800,900]
cantidad_de_cadenas=np.array([])
for kkkkk in cant_de_cadenas:
cantidad_de_cadenas=np.append(cantidad_de_cadenas,kkkkk)
cantidad_de_cadenas=np.transpose(cantidad_de_cadenas)
b=10
h=b
Longitud=1
numero_experimentos=100
densidad_de_cadenas =cantidad_de_cadenas/(b**2)
prob_perc=np.array([])
tiempos=np.array([])
S_int=np.array([])
S_medio=np.array([])
desviacion_standard=np.array([])
desviacion_standard_nuevo=np.array([])
anisotropia_macroscopica_porcentual=np.array([])
componente_y=np.array([])
componente_x=np.array([])
import time
for N in cant_de_cadenas:
empieza=time.clock()
PERCOLACION=np.array([])
size_medio_intuitivo = np.array([])
size_medio_nuevo = np.array([])
std_dev_size_medio_intuitivo = np.array([])
std_dev_size_medio_nuevo = np.array([])
comp_y = np.array([])
comp_x = np.array([])
for u in xrange(numero_experimentos):
perco = False
array_x1=uniform.rvs(loc=-b/2, scale=b, size=N)
array_y1=uniform.rvs(loc=-h/2, scale=h, size=N)
array_angle=uniform.rvs(loc=-0.5*(np.pi), scale=np.pi, size=N)
array_pendiente_x=1./np.tan(array_angle)
random=uniform.rvs(loc=-1, scale=2, size=N)
lambda_sign=np.zeros([N])
for t in xrange(N):
if random[t]<0:
lambda_sign[t]=-1
else:
lambda_sign[t]=1
array_lambdas=(lambda_sign*Longitud)/np.sqrt(1+array_pendiente_x**2)
array_x2= array_x1 + array_lambdas*array_pendiente_x
array_y2= array_y1 + array_lambdas*1
array_x1 = np.append(array_x1, [-b/2, b/2, -b/2, -b/2])
array_y1 = np.append(array_y1, [-h/2, -h/2, -h/2, h/2])
array_x2 = np.append(array_x2, [-b/2, b/2, b/2, b/2])
array_y2 = np.append(array_y2, [h/2, h/2, -h/2, h/2])
M = np.zeros([N+4,N+4])
for j in xrange(N+4):
if j>0:
x_A1B1 = array_x2[j]-array_x1[j]
y_A1B1 = array_y2[j]-array_y1[j]
x_A1A2 = array_x1[0:j]-array_x1[j]
y_A1A2 = array_y1[0:j]-array_y1[j]
x_A2A1 = -1*x_A1A2
y_A2A1 = -1*y_A1A2
x_A2B2 = array_x2[0:j]-array_x1[0:j]
y_A2B2 = array_y2[0:j]-array_y1[0:j]
x_A1B2 = array_x2[0:j]-array_x1[j]
y_A1B2 = array_y2[0:j]-array_y1[j]
x_A2B1 = array_x2[j]-array_x1[0:j]
y_A2B1 = array_y2[j]-array_y1[0:j]
p1 = x_A1B1*y_A1A2 - y_A1B1*x_A1A2
p2 = x_A1B1*y_A1B2 - y_A1B1*x_A1B2
p3 = x_A2B2*y_A2B1 - y_A2B2*x_A2B1
p4 = x_A2B2*y_A2A1 - y_A2B2*x_A2A1
condicion_1=p1*p2
condicion_2=p3*p4
for k in xrange (j):
if condicion_1[k]<=0 and condicion_2[k]<=0:
M[j,k]=1
del condicion_1
del condicion_2
if j+1<N+4:
x_A1B1 = array_x2[j]-array_x1[j]
y_A1B1 = array_y2[j]-array_y1[j]
x_A1A2 = array_x1[j+1:]-array_x1[j]
y_A1A2 = array_y1[j+1:]-array_y1[j]
x_A2A1 = -1*x_A1A2
y_A2A1 = -1*y_A1A2
x_A2B2 = array_x2[j+1:]-array_x1[j+1:]
y_A2B2 = array_y2[j+1:]-array_y1[j+1:]
x_A1B2 = array_x2[j+1:]-array_x1[j]
y_A1B2 = array_y2[j+1:]-array_y1[j]
x_A2B1 = array_x2[j]-array_x1[j+1:]
y_A2B1 = array_y2[j]-array_y1[j+1:]
p1 = x_A1B1*y_A1A2 - y_A1B1*x_A1A2
p2 = x_A1B1*y_A1B2 - y_A1B1*x_A1B2
p3 = x_A2B2*y_A2B1 - y_A2B2*x_A2B1
p4 = x_A2B2*y_A2A1 - y_A2B2*x_A2A1
condicion_1=p1*p2
condicion_2=p3*p4
for k in xrange ((N+4)-j-1):
if condicion_1[k]<=0 and condicion_2[k]<=0:
M[j,k+j+1]=1
del condicion_1
del condicion_2
M[N,N+2]=0
M[N,N+3]=0
M[N+1,N+2]=0
M[N+1,N+3]=0
M[N+2,N]=0
M[N+2,N+1]=0
M[N+3,N]=0
M[N+3,N+1]=0
CD=np.array([])
POPOPO=[]
for g in xrange(N):
lala=0
r=False
while lala<=len(POPOPO)-1:
esta= g in POPOPO[lala]
if esta is True:
lala=len(POPOPO)
r=True
else:
lala=lala+1
if r is False:
L=np.array([g])
for s in xrange(N):
if M[g,s] != 0:
L=np.append(L,s)
x=0
while x<= N:
for l in xrange(N):
z= l in L
d=L[x]
if z is False and M[d,l] != 0:
L=np.append(L,l)
if x+1<len(L):
x+=1
else:
x=N+1.
q= len (L)
CD=np.append(CD, q)
POPOPO.append(L)
M_horizontal=M.copy()
M_horizontal[:,N+2] = np.zeros(N+4)
M_horizontal[:,N+3] = np.zeros(N+4)
M_horizontal[N+2] = np.zeros(N+4)
M_horizontal[N+3] = np.zeros(N+4)
L=np.array([N])
for s in xrange(N+4):
if M_horizontal[N,s] != 0:
L=np.append(L,s)
x=0
while x<= N+4:
for l in xrange(N+4):
z= l in L
d=L[x]
if z is False and M_horizontal[d,l] != 0:
L=np.append(L,l)
if x+1<len(L):
x+=1
else:
x=(N+4)+1.
LV1_in_L = N in L
LV2_in_L= (N+1) in L
if LV1_in_L is True and LV2_in_L is True:
perc_horiz=True
else:
perc_horiz=False
M_vertical=M.copy()
M_vertical[:,N] = np.zeros(N+4)
M_vertical[:,N+1] = np.zeros(N+4)
M_vertical[N] = np.zeros(N+4)
M_vertical[N+1] = np.zeros(N+4)
L=np.array([N+2])
for s in xrange(N+4):
if M_vertical[N+2,s] != 0:
L=np.append(L,s)
x=0
while x<= N+4:
for l in xrange(N+4):
z= l in L
d=L[x]
if z is False and M_vertical[d,l] != 0:
L=np.append(L,l)
if x+1<len(L):
x+=1
else:
x=(N+4)+1.
LH1_in_L = (N+2) in L
LH2_in_L= (N+3) in L
if LH1_in_L is True and LH2_in_L is True:
perc_ver = True
else:
perc_ver = False
if perc_ver is True or perc_horiz is True:
PERCOLACION=np.append(PERCOLACION,1)
perco=True
D = np.array([])
W = np.array([])
for c in xrange (int(min(CD)), int(max(CD)+1),1):
D=np.append(D,c)
frec = sum (CD == c)
W = np.append(W,frec)
if perco is True:
posicion=np.argmax(D)
D=np.delete(D,posicion)
W=np.delete(W,posicion)
if len(D) == 0 and len(W)==0:
S_medio_intuitivo_exp_u=0
S_medio_nuevo_exp_u = 0
std_dev_exp_u = 0
std_dev_nuevo_exp_u = 0
else:
S_medio_intuitivo_exp_u = np.average (D,weights=W)
peso_nuevo=D*W
S_medio_nuevo_exp_u = np.average (D,weights=peso_nuevo)
tipos=sum(W)
X=W*((D-S_medio_intuitivo_exp_u)**2)
S=sum(X)
std_dev_exp_u = np.sqrt(S/(tipos-1.))
tipos_nuevo=sum(peso_nuevo)
X_nuevo=peso_nuevo*((D-S_medio_nuevo_exp_u)**2)
S_nuevo=sum(X_nuevo)
std_dev_nuevo_exp_u = np.sqrt(S_nuevo/(tipos_nuevo-1.))
componente_longitudinal=Longitud*np.abs(np.cos(array_angle))
comp_y=np.append(comp_y, sum(componente_longitudinal)/N)
componente_transversal=Longitud*np.abs(np.sin(array_angle))
comp_x=np.append(comp_x, sum(componente_transversal)/N)
std_dev_size_medio_intuitivo=np.append(std_dev_size_medio_intuitivo, std_dev_exp_u)
std_dev_size_medio_nuevo=np.append(std_dev_size_medio_nuevo, std_dev_nuevo_exp_u)
size_medio_intuitivo=np.append(size_medio_intuitivo, S_medio_intuitivo_exp_u)
size_medio_nuevo=np.append(size_medio_nuevo, S_medio_nuevo_exp_u)
percolation_probability=sum(PERCOLACION)/numero_experimentos
prob_perc=np.append(prob_perc, percolation_probability)
S_int = np.append (S_int, sum(size_medio_intuitivo)/numero_experimentos)
S_medio=np.append (S_medio, sum(size_medio_nuevo)/numero_experimentos)
desviacion_standard = np.append (desviacion_standard, sum(std_dev_size_medio_intuitivo)/numero_experimentos)
desviacion_standard_nuevo=np.append (desviacion_standard_nuevo, sum(std_dev_size_medio_nuevo)/numero_experimentos)
tiempos=np.append(tiempos, time.clock()-empieza)
componente_y=np.append(componente_y, sum(comp_y)/numero_experimentos)
componente_x=np.append(componente_x, sum(comp_x)/numero_experimentos)
anisotropia_macroscopica_porcentual=100*(1-(componente_y/componente_x))
I tryed with gc and gc.collect() and 'del'command for deleting arrays after his use and nothing work!
What am I doing wrong? Why the memory becomes full while running (starts with 10% of RAM used and in 1-2hour is totally full used)?
Lets take an array M with size 300MB. When this is overwritten (for example, in each iteration), have we 300MB occupated in RAM memory or just 300MB? In the case that we have just 300MB, there should be no problem, so why have I this RAM issue? In the case of the RAM is acumulated, how can I do for free RAM memory occupated for the 'old' array?
Please help me, I'm totally stuck!
Thanks a lot!

Categories