how can I speed up my python code? - python

It's a program of import multiple images and extract feature using dct and histogram.
1) Import multiple images from folder
2) Make image size 256*256
3) Use image of 64*64 block unit with stride = 32
4) Do dct(8*8 size)
5) make histogram of dct
6) Extract features from dct coefficient histogram
The problem is that it's too slow.
I think it's because there's so many "for loop".
This is my full-code in python.
How can I change my code to speed up?
I am not familiar with python.
Please help me
import numpy as np
from scipy.fftpack import dct
from PIL import Image
import glob
import matplotlib.pyplot as plt
def find_index(x,key):
for i in range(0,len(x)):
if x[i] == key :
return i
else:
i = i+1
def image_open(path):
image_list = []
#for filename in glob.glob('path/*.jpg'):
for filename in glob.glob(path+'/*.jpg'):
im=Image.open(filename)
image_list.append(im)
return image_list
def dct_2(img):
#Get 2D Cosine Transform of Image
return dct(dct(np.asarray(img).T, norm='ortho').T, norm='ortho')
def return_array(array):
zero = [0.0, 0.0, 0.0, 0.0, 0.0]
range = int((max(array)) - min(array))
x, bins, patch = plt.hist(array, bins=range)
x = list(zero) + list(x) + list(zero)
return x
path = 'C:\\Users\\LG\\PycharmProjects\\photo' #folder that contains many images
images = image_open(path)
row = 0
array_matrix = []
label_matrix = []
for i in range(0, len(images)): #access image
box3 = (0,0,256,256)
a = images[i].crop(box3)
(y,cb,cr) = a.split() #ycbcr
width , height = y.size
y.show()
for q in range(0, height-32 , 32): #use image 64*64 block unit
for w in range(0 , width-32 ,32):
box1 =(q,w,q+64,w+64)
block = y.crop(box1)
array1 , array2 , array3 , array4 , array5 , array6 , array7 , array8 ,array9 = [],[],[],[],[],[],[],[],[]
for j in range(0,64,8): #dct
for n in range(0,64,8):
box2 = (j,n,j+8,n+8)
temp = block.crop(box2)
dct_temp = dct_2(temp)
array1.append(dct_temp[0,1])
array2.append(dct_temp[1,0])
array3.append(dct_temp[0,2])
array4.append(dct_temp[1,1])
array5.append(dct_temp[2,0])
array6.append(dct_temp[0,3])
array7.append(dct_temp[1,2])
array8.append(dct_temp[2,1])
array9.append(dct_temp[3,0])
x1 = return_array(array1) #extract feature from dct histogram
index = find_index(x1, max(x1))
u = [index - 5, index + 5, 1]
array_matrix.append(x1[u[0]:u[1] + 1:u[2]])
x2 = return_array(array2)
index = find_index(x2, max(x2))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x2[u[0]:u[1] + 1:u[2]])
x3 = return_array(array3)
index = find_index(x3, max(x3))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x3[u[0]:u[1] + 1:u[2]])
x4 = return_array(array4)
index = find_index(x4, max(x4))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x4[u[0]:u[1] + 1:u[2]])
x5 = return_array(array5)
index = find_index(x5, max(x5))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x5[u[0]:u[1] + 1:u[2]])
x6 = return_array(array6)
index = find_index(x6, max(x6))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x6[u[0]:u[1] + 1:u[2]])
x7 = return_array(array7)
index = find_index(x7, max(x7))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x7[u[0]:u[1] + 1:u[2]])
x8 = return_array(array8)
index = find_index(x8, max(x8))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x8[u[0]:u[1] + 1:u[2]])
x9 = return_array(array9)
index = find_index(x9, max(x9))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x9[u[0]:u[1] + 1:u[2]])
print(w/32)
row = row+1
print(array_matrix)

Rather than assuming that a specific section is taking longer than others, I'd recommend profiling your script. A profiler will collect metrics on how long certain parts of your program takes, and also allow you to better see how much any changes affect the code (makes it better, worse, etc).
Once you know where your problem lies, then you can take a more targeted approach at making it faster.
Have a look at the profiling module: https://docs.python.org/2/library/profile.html
Also have a look at some tutorials:
https://julien.danjou.info/blog/2015/guide-to-python-profiling-cprofile-concrete-case-carbonara
https://zapier.com/engineering/profiling-python-boss/
https://marcobonzanini.com/2015/01/05/my-python-code-is-slow-tips-for-profiling/

Related

Find parameters from the Vasicek model

I am given the following bond:
and need to fit the Vasicek model to this data.
My attempt is the following:
# ... imports
years = np.array([1, 2, 3, 4, 7, 10])
pric = np.array([0, .93, .85, .78, .65, .55, .42])
X = sympy.symbols("a b sigma")
a, b, s = X
rt1_rt = np.diff(pric)
ab_rt = np.array([a*(b-r) for r in pric[1:] ])
term = rt1_rt - ab_rt
def normpdf(x, mean, sd):
var = sd**2
denom = (2*sym.pi*var)**.5
num = sym.E**(-(x-mean)**2/(2*var))
return num/denom
pdfs = np.array([sym.log(normpdf(x, 0, s)) for x in term])
func = 0
for el in pdfs:
func += el
func = func.factor()
lmd = sym.lambdify(X, func)
def target_fun(params):
return lmd(*params)
result = scipy.optimize.least_squares(target_fun, [10, 10, 10])
I don't think that it outputs correct solution.
Your code is almost correct.
You want to maximize your function, therefore you need to place minus sign in front of lmd in your function.
def target_fun(params):
return -lmd(*params)
Additionally, the initial values are usually set to less than 1. Picking 10 is not the best choice as the algorithm might converge to a saddle point.
Consider [0.01, 0.01, 0.01].

MatPlotLib Scatter not working Inside Function

I am a beginner in Python. I have been trying my hands on MatPlotLib to compare the stats of soccer players in FIFA 20. Basically the problem I'm facing is:
def make_graph(value1, value2, namevalue, label1, label2):
print(value1, value2, namevalue)
plt.scatter(value1, value2)
plt.xlabel(label1)
plt.ylabel(label2)
for i in range(len(namevalue)):
plt.text(value1[i] + 0.3, value2[i] + 0.3, namevalue[i], fontdict=dict(color='red', size=10), bbox=dict(facecolor = 'yellow', alpha=0.5))
plt.xlim(min(value1) - 5, max(value2) + 5)
plt.ylim(min(value1) - 5, max(value2) + 5)
plt.show()
def Test():
df = xlrd.open_workbook(path)
data = df.sheet_by_index(0)
data.cell_value(0,0)
name = []
pace = []
shoot = []
for i in range(1, 450):
#print(data.cell_value(i, 3))
buff = str(data.cell_value(i,2)).strip()
if buff == "LM" or buff == "RM":
pacebuffer = int(data.cell_value(i, 4))
shootbuffer = int(data.cell_value(i, 5))
if pacebuffer >= 90:
name.append(data.cell_value(i, 3).strip("\n"))
pace.append(pacebuffer)
shoot.append(shootbuffer)
#print(name)
make_graph(pace, shoot, name, "Pace", "Shoot")
The particular code is showing me an empty graph.
BUT
When I write the same piece of code inside Test() which I wrote inside make_graph() , it gives me the desired output.
But in this way I have to rewrite that plotting thing every time I write some other functions and that's really a problem. Any idea how to fix this?
It is your x and y lims :
plt.xlim(min(value1) - 5, max(value2) + 5)
plt.ylim(min(value1) - 5, max(value2) + 5)
You should change to :
plt.xlim(min(value1) - 5, max(value1) + 5)
plt.ylim(min(value2) - 5, max(value2) + 5)
Technically your plt.scatter was working but then your x and y lims meant that you couldn't see.

How to create a 2D array with N lots of random numbers?

I am trying to obtain a variance for a value I obtained by processing a 2x150 array into a discrete correlation function. In order to do this I need to randomly sample 80% of the original data N times, which will allow me to calculate a variance over these values.
have so far been able to create one randomly sampled set of data using this:
rand_indices = []
running_var = (len(find_length)*0.8)
x=0
while x<running_var:
rand_inx = randint(0, (len(find_length)-1))
rand_indices.append(rand_inx)
x=x+1
which creates an array 80% of the length of my original with randomly selected indices to be picked out and processed.
My problem is that I am not sure how to iterate this in order to get N sets of these random numbers, I think ideally in a Nx120 sized array. My whole code so far is:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from random import randint
useless, just_to, find_length = np.loadtxt("w2_mjy_final.dat").T
w2_dat = np.loadtxt("w2_mjy_final.dat")
w2_rel = np.delete(w2_dat, 2, axis = 1)
w2_array = np.asarray(w2_rel)
w1_dat = np.loadtxt("w1_mjy_final.dat")
w1_rel = np.delete(w1_dat, 2, axis=1)
w1_array = np.asarray(w1_rel)
peaks = []
y=1
N = 0
x = 0
z = 0
rand_indices = []
rand_indices2d = []
running_var = (len(find_length)*0.8)
while z<N:
while x<running_var:
rand_inx = randint(0, (len(find_length)-1))
rand_indices.append(rand_inx)
x=x+1
rand_indices2d.append(rand_indices)
z=z+1
while y<N:
w1_sampled = w1_array[rand_indices, :]
w2_sampled = w2_array[rand_indices, :]
w1s_t, w1s_dat = zip(*w1_sampled)
w2s_t, w2s_dat = zip(*w2_sampled)
w2s_mean = np.mean(w2s_dat)
w2s_stdev = np.std(w2s_dat)
w1s_mean = np.mean(w1s_dat)
w1s_stdev = np.std(w1s_dat)
taus = []
dcfs = []
bins = 40
for i in w2s_t:
for j in w1s_t:
tau_datpoint = i-j
taus.append(tau_datpoint)
for k in w2s_dat:
for l in w1s_dat:
dcf_datpoint = ((k - w2s_mean)*(l - w1s_mean))/((w2s_stdev*w1s_stdev))
dcfs.append(dcf_datpoint)
plotdat = np.vstack((taus, dcfs)).T
sort_plotdat = sorted(plotdat, key=lambda x:x[0])
np.savetxt("w1sw2sarray.txt", sort_plotdat)
taus_sort, dcfs_sort = np.loadtxt("w1w2array.txt").T
dcfs_means, taubins_edges, taubins_number = stats.binned_statistic(taus_sort, dcfs_sort, statistic='mean', bins=bins)
taubin_edge = np.delete(taubins_edges, 0)
import operator
indexs, values = max(enumerate(dcfs_means), key=operator.itemgetter(1))
percents = values*0.8
dcf_lists = dcfs_means.tolist()
centarr_negs, centarr_poss = np.split(dcfs_means, [indexs])
centind_negs = np.argmin(np.abs(centarr_negs - percents))
centind_poss = np.argmin(np.abs(centarr_poss - percents))
lagcent_negs = taubins_edges[centind_negs]
lagcent_poss = taubins_edges[int((bins/2)+centind_poss)]
sampled_peak = (np.abs(lagcent_poss - lagcent_negs)/2)+lagcent_negs
peaks.append(sampled_peak)
y=y+1
print peaks
Seeing as you're using numpy already, why not use np.random.randint
In your case:
np.random.randint(len(find_length)-1, size=(N, running_var))
Would give you an N*running_var sized matrix, with random integer entries from 0 to len(find_length)-2 inclusive.
Example Usage:
>>> N=4
>>> running_var=6
>>> find_length = [1,2,3]
>>> np.random.randint(len(find_length)-1, size=(N, running_var))
array([[1, 0, 1, 0, 0, 1],
[1, 0, 1, 1, 0, 0],
[1, 1, 0, 0, 1, 0],
[1, 1, 0, 1, 0, 1]])

While loop repeat same output when it should be different

I don't understand why the following code output the same random variables for simulated_returns_pr from the SECOND loop (same for the 2 charts from the function). Actually I removed some code but all following variable which should be different are also the same from the SECOND loop. I am missing something but do not understand. Any contribution would be appreciated.
My code:
logR= timeseries
i=1
while i < 5:
simulated_returns_pr= np.random.normal(loc=mean(logR)*30, scale=stdev(logR)*np.sqrt(30.), size=30)
seed = 2
N = 30
def Brownian(seed, N):
np.random.seed(seed)
dt = 1./N # time step
b = simulated_returns_pr*np.sqrt(dt)
W = np.cumsum(b) # brownian path
return W, b
b = Brownian(seed, N)[1]
W = Brownian(seed, N)[0]
W = np.insert(W, 0, 0.)
plt.rcParams['figure.figsize'] = (10,8)
xb = np.linspace(1, len(b), len(b))
plt.plot(xb, b)
plt.title('Brownian Increments')
plt.show()
xw = np.linspace(1, len(W), len(W))
plt.plot(xw, W)
plt.title('Brownian Motion')
plt.show()
i += 1
Output simulated_returns_pr:
[ 0.012191 1.16322303 -0.23225735 -0.12357125 0.35687974 1.02187274
0.25248517 0.74665974 0.54373161 0.43677913 0.69960184 -0.81226681
0.50380517 -0.25108897 0.47459444 0.49541601 0.79958083 -0.20233765
0.5142276 -0.31340253 0.46332258 0.48350956 0.06662023 0.53800548
-0.01440759 -0.23280276 -0.07377719 -0.29948791 0.15798112 0.10707121]
[-0.10796927 0.07350919 -0.97356921 0.9275805 -0.80101665 -0.32191758
0.35499571 -0.52506813 -0.43075947 -0.35577774 0.37944815 1.25577886
0.12274682 -0.4609512 0.37320789 -0.19828379 0.09220437 0.69335439
-0.27465829 0.10637854 -0.3402222 0.02308293 0.2309978 -0.3959363
-0.06873477 -0.01706476 -0.21917336 -0.49603296 -0.61363441 0.02456247]
[-0.10796927 0.07350919 -0.97356921 0.9275805 -0.80101665 -0.32191758
0.35499571 -0.52506813 -0.43075947 -0.35577774 0.37944815 1.25577886
0.12274682 -0.4609512 0.37320789 -0.19828379 0.09220437 0.69335439
-0.27465829 0.10637854 -0.3402222 0.02308293 0.2309978 -0.3959363
-0.06873477 -0.01706476 -0.21917336 -0.49603296 -0.61363441 0.02456247]
[-0.10796927 0.07350919 -0.97356921 0.9275805 -0.80101665 -0.32191758
0.35499571 -0.52506813 -0.43075947 -0.35577774 0.37944815 1.25577886
0.12274682 -0.4609512 0.37320789 -0.19828379 0.09220437 0.69335439
-0.27465829 0.10637854 -0.3402222 0.02308293 0.2309978 -0.3959363
-0.06873477 -0.01706476 -0.21917336 -0.49603296 -0.61363441 0.02456247]

python: adjust imported coordinates

I have a python script where i import coordinates of triangular elements, and element definitions from two seperate text files. I first define unique nodes for each triangluar element and then define a new rectangular element in between the triangular elements (this python scripts writes an input file for an FEM-calculation).
The coordinate file looks like the following:
id,x,y,
1, 0, 0
2, 0, 1
3, 0, 2
4, 1, 0
5, 1, 1
6, 1, 2
7, 2, 0
8, 2, 1
9, 2, 2
The element file looks like this:
id, n1, n2, n3
1, 1, 2, 4
2, 1, 2, 5
3, 2, 3, 5
4, 3, 5, 6
5, 5, 6, 8
6, 6, 8, 9
7, 5, 7, 8
8, 4, 5, 7
What i want to happend is that the coordinates of the nodes of the triangular elements are moved towarts the centre of mass of the triangular element, on this way the rectangular element in between the triangular elements will get a fysical thickness.
However i do something wrong in my python script (see below).
The part in the script between the horizontal lines should change the coordinates.
But for my script extra coordinates are added, instead of the (for the example here) 9 initial coordinates.
And the coordinates are also not moved in the correct direction.
Why does this happen and how can i solve this?
#!/usr/bin/env python
# Inlezen coordinaten
open("D://Documents//SkyDrive//afstuderen//99 EEM - Abaqus 6.11.2//scripting//_COORDINATEN.txt", "r")
import csv
import itertools
with open("_COORDINATEN.txt") as file:
data = csv.reader(file)
next(data)
coords = []
coords = ([[float(x) for x in line[1:]] for line in data])
#inlezen elementen
open("D://Documents//SkyDrive//afstuderen//99 EEM - Abaqus 6.11.2//scripting//_ELEMENTEN.txt", "r")
import csv
import itertools
with open("_ELEMENTEN.txt") as file:
data2 = csv.reader(file)
next(data2)
elems = []
elems = ([[int(x)-1 for x in line[1:]] for line in data2])
#Flip the original elements if required
for i,elem in enumerate(elems):
ecoords = [coords[e] for e in elem]
a = [x2-x1 for x1,x2 in zip(ecoords[0],ecoords[1])]
b = [x2-x1 for x1,x2 in zip(ecoords[1],ecoords[2])]
n = a[0]*b[1]-a[1]*b[0]
if n < 0:
elems[i] = [ elem[0], elem[2], elem[1] ]
#bewerking elementen
newcoords = []
newelems = []
for elem in elems:
ecoords = [coords[e] for e in elem]
newelem = range( len(newcoords), len(newcoords)+len(ecoords) )
newcoords += ecoords
newelems.append( newelem )
cohelems = []
for e,elem in enumerate(elems):
for edge in [[0,1],[1,2],[2,0]]:
eedge = [elem[i] for i in edge]
for e2,elem2 in enumerate(elems[e+1:]):
e2 += e+1
for edge2 in [[0,1],[1,2],[2,0]]:
eedge2 = [elem2[i] for i in edge2]
if all([i in eedge2 for i in eedge]):
newedge = [newelems[e][i] for i in edge ]
newedge += [newelems[e2][i] for i in edge2]
cohelems.append( newedge[-1::-1] )
#---------------------------------------------------------------------
def add_vectors(*points):
new_x = 0.0
new_y = 0.0
for point in points:
new_x += point[0]
new_y += point[1]
return [new_x, new_y]
def subtract_vectors(a, b):
new_x = a[0] - b[0]
new_y = a[1] - b[1]
return [new_x, new_y]
def mul_by_scalar(vector, scalar):
new_x = vector[0] * scalar
new_y = vector[1] * scalar
return [new_x, new_y]
new_triangles = []
for elem in elems:
new_triangles += [coords[e] for e in elem]
print 'new_triangles =', new_triangles
CM = mul_by_scalar(add_vectors(*new_triangles), 1.0/3)
point_to_CM_vectors = []
for point in new_triangles:
point_to_CM_vectors.append(subtract_vectors(CM, point))
new_triangle2 = []
#for e,elem in enumerate(elems):
for elem in elems:
for point, motion in zip(new_triangles, point_to_CM_vectors):
new_triangle2.append(add_vectors(point, mul_by_scalar(motion, 0.01)))
# new_triangle2 += [add_vectors(point, mul_by_scalar(motion, 0.01))]
print 'new_triangle2 =', new_triangle2
#---------------------------------------------------------------------
Thank you all in advance for the help!

Categories