This question already has answers here:
Python equivalent to 'hold on' in Matlab
(5 answers)
Closed 6 years ago.
This has been surprisingly difficult to find information on. I have two functions that I want to chart together, enumeration() and betterEnumeration()
import matplotlib.pyplot as plt
import time
import numpy as np
import sympy
from sympy import S, symbols
import random
from math import floor
def enumeration(array):
max = None
to_return = (max, 0, 0)
for i in range(0, len(array) + 1):
for j in range(0, i):
currentSum = 0
for k in range(j, i):
currentSum += array[k]
if (max is None) or (currentSum > max):
max = currentSum
to_return = (max, j, k)
return to_return
def betterEnumeration(array):
max = None
to_return = (max, 0, 0)
for i in range(1, len(array) + 1):
currentSum = 0
for j in range(i, len(array) + 1):
currentSum += array[j - 1]
if (max is None) or (currentSum > max):
max = currentSum
to_return = (max, i-1, j-1)
return to_return
I also have two helper functions randomArray() and regressionCurve().
def randomArray(totalNumbers,min,max):
array = []
while totalNumbers > 0:
array.append(random.randrange(min,max))
totalNumbers -= 1
return array
def regressionCurve(x,y):
# calculate polynomial
p = np.polyfit(x, y, 3)
f = np.poly1d(p)
# calculate new x's and y's
x_new = np.linspace(x[0], x[-1], 50)
y_new = f(x_new)
x = symbols("x")
poly = sum(S("{:6.5f}".format(v))*x**i for i, v in enumerate(p[::-1]))
eq_latex = sympy.printing.latex(poly)
plt.plot(x_new, y_new, label="${}$".format(eq_latex))
plt.legend(fontsize="small")
plt.show()
I want to plot both of these functions on the same chart, both the raw data points as well as the regression curves. The following code will chart the data points for enumeration() and then make a regression curve for them, but I'm not sure how to plot both enumeration() and betterEnumeration() on the same chart.
def chart():
nValues = [10,25,50,100,250,500,1000]
avgExecTimes = []
for n in nValues: # For each n value
totals = []
sum = 0
avgExecTime = 0
for i in range(0,10): # Create and test 10 random arrays
executionTimes = []
array = randomArray(n,0,10)
t1 = time.clock()
enumeration(array)
t2 = time.clock()
total = t2-t1
totals.append(total)
executionTimes.append(total)
print("Time elapsed(n=" + str(n) + "): " + str(total))
for t in totals: # Find avg running time for each n's 10 executions
sum += t
avgExecTime = sum/10
avgExecTimes.append(avgExecTime)
print("Avg execution time: " + str(avgExecTime))
# Chart execution times
plt.plot(nValues,avgExecTimes)
plt.ylabel('Seconds')
plt.xlabel('n')
plt.show()
# Chart curve that fits
x = np.array(nValues)
y = np.array(avgExecTimes)
regressionCurve(x,y)
To add a line to a plot:
plt.plot(x,y)
so, if you wanted to plot x1, y1 and then add x2,y2:
plt.plot(x1,y1)
plt.plot(x2,y2)
However, that's going to plot the second line in the default color. You're going to want to add a color component:
plt.plot(x1,y1, c='b')
plt.plot(x2,y2, c= 'g')
and if the units are different, you'll want to look into twinx, which will allow you to plot with 2 different y axes but the same x axis.
You're going to want to plot both sets of data from within the same function or both outside of the function. Otherwise, you're running into a local vs. global issue as well.
Related
I am having a problem updating my x0x1 array with the new values in my while loop. When I print my array out to check each generation iteration, its printing out the same values each time (despite me getting different z vector values). Not sure why.
I am getting the z-values for each iteration in my for loop, but for some reason, my newly generated x0x1 arrays each generation of the while loop are not being updated.
import numpy as np
import numpy.random
def calculateFunctionValueMatrix(x0x1, N):
functionValueArray = np.zeros((N-1, 1))
for i in range(0, N-1):
functionValueArray[i] = calculateFunctionValue(x0x1[i])
return functionValueArray
def calculateFunctionValue(x):
function = (x[0]-1)**2 + 5*((x[1]-x[0]**2)**2)
return function
def x0x1Array(N):
x0x1 = np.random.uniform(-2, 2, size = (N-1,2))
generateCandidateVector(x0x1)
def generateCandidateVector(x0x1):
print(x0x1)
K = 10
F= 0.8
N = 10
generation = 1
while generation <= K:
for i in range(0, N-1):
x0_ind, x1_ind, x2_ind = np.random.choice(len(x0x1), 3)
x0 = x0x1[x0_ind]
x1 = x0x1[x1_ind]
x2 = x0x1[x2_ind]
vectorZ = x0 + F*(x1-x2)
print("this is vector", vectorZ)
if(calculateFunctionValue(vectorZ) < calculateFunctionValue(x0x1[i])):
vectorZ = x0x1[i]
elif(calculateFunctionValue(vectorZ) > calculateFunctionValue(x0x1[i])):
x0x1[i] = x0x1[i]
print(x0x1)
if(np.std(calculateFunctionValueMatrix(x0x1, N)) < 0.01):
print("Optimal Solution Found")
generation = generation + 1
def main():
N=50
x0x1Array(N)
main()
I am essentially looking to compute and plot the Riemann/Darboux sum of an arbitrary function very similar to this code:
import numpy as np
import matplotlib.pyplot as plt
f = lambda x : 1/(1+x**2)
a = 0; b = 5; N = 10
n = 10 # Use n*N+1 points to plot the function smoothly
x = np.linspace(a,b,N+1)
y = f(x)
X = np.linspace(a,b,n*N+1)
Y = f(X)
plt.figure(figsize=(15,5))
plt.subplot(1,3,1)
plt.plot(X,Y,'b')
x_left = x[:-1] # Left endpoints
y_left = y[:-1]
plt.plot(x_left,y_left,'b.',markersize=10)
plt.bar(x_left,y_left,width=(b-a)/N,alpha=0.2,align='edge',edgecolor='b')
plt.title('Left Riemann Sum, N = {}'.format(N))
plt.subplot(1,3,2)
plt.plot(X,Y,'b')
x_mid = (x[:-1] + x[1:])/2 # Midpoints
y_mid = f(x_mid)
plt.plot(x_mid,y_mid,'b.',markersize=10)
plt.bar(x_mid,y_mid,width=(b-a)/N,alpha=0.2,edgecolor='b')
plt.title('Midpoint Riemann Sum, N = {}'.format(N))
plt.subplot(1,3,3)
plt.plot(X,Y,'b')
x_right = x[1:] # Left endpoints
y_right = y[1:]
plt.plot(x_right,y_right,'b.',markersize=10)
plt.bar(x_right,y_right,width=-(b-a)/N,alpha=0.2,align='edge',edgecolor='b')
plt.title('Right Riemann Sum, N = {}'.format(N))
plt.show()
dx = (b-a)/N
x_left = np.linspace(a,b-dx,N)
x_midpoint = np.linspace(dx/2,b - dx/2,N)
x_right = np.linspace(dx,b,N)
print("Partition with",N,"subintervals.")
left_riemann_sum = np.sum(f(x_left) * dx)
print("Left Riemann Sum:",left_riemann_sum)
midpoint_riemann_sum = np.sum(f(x_midpoint) * dx)
print("Midpoint Riemann Sum:",midpoint_riemann_sum)
right_riemann_sum = np.sum(f(x_right) * dx)
print("Right Riemann Sum:",right_riemann_sum)
Taken from HERE.
However, instead of computing the left, right and middle sums, what I am looking to find is a way to compute the upper and lower darboux sums, that is, the sum over the infima/suprema of each interval.
To achieve this I would have to find the infimum/supremum of the function in each interval, but I am at a loss here. I believe to make this work, I have to assume that the function is continuous such that infimum=minimum, supremum=maximum. I think I need more than just the numpy library?
You can do this by creating a matrix containing all the intervals and applying the maximum/minimum a long one axis.
import numpy as np
import matplotlib.pyplot as plt
f = lambda x : 1/(1+x**2)
a = 0; b = 5; N = 10
n = 10 # Use n*N+1 points to plot the function smoothly
x = np.linspace(a,b,N+1)
y = f(x)
X = np.linspace(a,b,n*N+1)
Y = f(X)
plt.plot(X,Y,'b')
# create offset vector with n elements, broadcast is used to enable transpose
dx = np.broadcast_to(np.linspace(0,(b-a)/N,n), (1,n)).T
# Compute all values for the offset vector (nxN matrix)
F = f(x[:-1]+dx)
y_upper = np.amax(F,0)
y_lower = np.amin(F,0)
print(y_upper)
plt.bar(x[1:],y_upper,width=-(b-a)/N,alpha=0.2,align='edge',edgecolor='b')
plt.bar(x[1:],y_lower,width=-(b-a)/N,alpha=0.2,align='edge',edgecolor='r')
plt.title('Darboux sums, N = {}'.format(N))
plt.show()
I currently have the code and I having some trouble trying to plot it, I know that trying to plot both ymax and y won't work in this case, but how would I go about plotting just the value for y? I have plotted the function before by removing the ymax from the return, but I need to print the values and plot the solution for y.
import numpy as np
import matplotlib.pyplot as plt
def GaussElimination(A):
'''
Description: Use Gauss elimination to solve a set of simultaneous equations
Parameters: A a matrix of coefficient and constant value for the system
Return: a matrix holding the solution to the equation. This corresponds to the last n
'''
nr,nc=A.shape
B= A.copy()
# start the gauss elimination
for r in range(nr):
#pivoting
max=abs(B[r][r])
maxr = r
for rr in range(r,nr):
if max < abs(B[rr][r]):
max = abs(B[rr][r])
maxr = rr
if max == 0:
print("Singular Matrix")
return []
# swap if needed
if (maxr != r):
for c in range(nc):
temp = B[r][c]
B[r][c]=B[maxr][c]
B[maxr][c] = temp
# scale the row
scale = B[r][r]
for c in range(r,nc):
B[r][c] = B[r][c]/scale
# eliminate values in the columns
for rr in range(nr):
if rr != r:
scale = B[rr][r]
for c in range(r,nc):
B[rr][c]=B[rr][c] - scale*B[r][c]
if (nc == nr+1):
return B[:,nc-1]
else:
return B[:,(nr):nc]
def SimplySupportedBeam(n):
M = np.zeros([n+1,n+1])
C = np.array([[0],[150],[0],[0],[0],[0]])
for r in range(n-3):
M[r][r] = 1
M[r][r+1] = -4
M[r][r+2] = 6
M[r][r+3] = -4
M[r][r+4] = 1
M[n-3][1] = 1
M[n-2][n-1] = 1
M[n-1][n-5] = 1
M[n-1][n-4] = -2
M[n-1][n-3] = 1
M[n][n-2] = 1
M[n][n-1] = -2
M[n][n] = 1
A = np.concatenate((M,C), axis=1)
y0 = GaussElimination(A)
y = y0[1:n]
ymax = np.amax(abs(y))
return y, ymax
n = int(input("Index of the last node: "))
print (SimplySupportedBeam(n))
plt.figure(1)
plt.plot(SimplySupportedBeam(n))
plt.show()
How would I plot just the value I get for y from my code?
It seems like y is 1D numpy array.
If you just want to plot its values against their indices you should be able to do so using either
plt.plot(SimplySupportedBeam(n)[0])
or
y, ymax = SimplySupportedBeam(n)
plt.plot(y)
The problem was that your function returns two values, i.e. y and ymax.
(I did not
Assume that you have an NxM matrix, with values ranging from [0,100]. What I'd like to do is place points with a density (inversely) relative to the values in that area.
For example, here's a 2D Gaussian field, inverted s.t. the centroid has a value of 0, and the perimeter is at 100:
I'd like to pack the points so that they appear somewhat similar to this image:
Note how there is a radial spread outwards.
My attempt looks a little different :( ...
What I attempt to do is (i) generate a boolean area, of the same shape and size, and (ii) move through the rows and columns. If the value of the boolean array at some point is True, then pass; otherwise, add a [row,col] point to a list and cover the boolean array with True in a radius proportional to the value in the Gaussian array.
The choice of Gaussian for this example isn't important, the fundamental idea is that: given a floating point matrix, how can one place points with a density proportional to those values?
Any help very much appreciated :)
import matplotlib.pyplot as plt
import numpy as np
from math import exp
def gaussian(x,y,x0,y0,A=10.0,sigma_x=10.0,sigma_y=10.0):
return A - A*exp(-((x-x0)**2/(2*sigma_x**2) + (y-y0)**2/(2*sigma_y**2)))
def generate_grid(width=100,height=100):
grid = np.empty((width,height))
for x in range(0,width):
for y in range(0,height):
grid[x][y] = gaussian(x,y,width/2,height/2,A=100.0)
return grid
def cover_array(a,row,col,radius):
nRows = np.shape(grid)[0]
nCols = np.shape(grid)[1]
mid = round(radius / 2)
half_radius = int(round(radius))
for x in range(-half_radius,half_radius):
for y in range(-half_radius,half_radius):
if row+x >= 0 and x+row < nRows and col+y >= 0 and y+col < nCols:
if (x-mid)**2 + (y-mid)**2 <= radius**2:
a[row+x][col+y] = True
def pack_points(grid):
points = []
nRows = np.shape(grid)[0]
nCols = np.shape(grid)[1]
maxDist = 50.0
minDist = 0.0
maxEdge = 10.0
minEdge = 5.0
grid_min = 0.0
grid_max = 100.0
row = 0
col = 0
arrayCovered = np.zeros((nRows,nCols))
while True:
if row >= nRows:
return np.array(points)
if arrayCovered[row][col] == False:
radius = maxEdge * ((grid[row][col] - grid_min) / (grid_max - grid_min))
cover_array(arrayCovered,row,col,radius)
points.append([row,col])
col += 1
if col >= nCols:
row += 1
col = 0
grid = generate_grid()
plt.imshow(grid)
plt.show()
points = pack_points(grid)
plt.scatter(points[:,0],points[:,1])
plt.show()
Here is a cheap and simple method, although it requires hand-setting an amount parameter:
import numpy as np
import matplotlib.pyplot as plt
def gaussian(x,y,x0,y0,A=10.0,sigma_x=10.0,sigma_y=10.0):
return A - A*np.exp(-((x-x0)**2/(2*sigma_x**2) + (y-y0)**2/(2*sigma_y**2)))
def distribute_points(data, amount=1):
p = amount * (1 / data)
r = np.random.random(p.shape)
return np.where(p > r)
ii, jj = np.mgrid[-10:10:.1, -10:10:.1]
data = gaussian(ii, jj, 0, 0)
px, py = distribute_points(data, amount=.03)
plt.imshow(data)
plt.scatter(px, py, marker='.', c='#ff000080')
plt.xticks([])
plt.yticks([])
plt.xlim([0, len(ii)])
plt.ylim([0, len(jj)])
Result:
I am trying to optimize a snippet that gets called a lot (millions of times) so any type of speed improvement (hopefully removing the for-loop) would be great.
I am computing a correlation function of some j'th particle with all others
C_j(|r-r'|) = sqrt(E((s_j(r')-s_k(r))^2)) averaged over k.
My idea is to have a variable corrfun which bins data into some bins (the r, defined elsewhere). I find what bin of r each s_k belongs to and this is stored in ind. So ind[0] is the index of r (and thus the corrfun) for which the j=0 point corresponds to. Multiple points can fall into the same bin (in fact I want bins to be big enough to contain multiple points) so I sum together all of the (s_j(r')-s_k(r))^2 and then divide by number of points in that bin (stored in variable rw). The code I ended up making for this is the following (np is for numpy):
for k, v in enumerate(ind):
if j==k:
continue
corrfun[v] += (s[k]-s[j])**2
rw[v] += 1
rw2 = rw
rw2[rw < 1] = 1
corrfun = np.sqrt(np.divide(corrfun, rw2))
Note, the rw2 business was because I want to avoid divide by 0 problems but I do return the rw array and I want to be able to differentiate between the rw=0 and rw=1 elements. Perhaps there is a more elegant solution for this as well.
Is there a way to make the for-loop faster? While I would like to not add the self interaction (j==k) I am even ok with having self interaction if it means I can get significantly faster calculation (length of ind ~ 1E6 so self interaction is probably insignificant anyways).
Thank you!
Ilya
Edit:
Here is the full code. Note, in the full code I am averaging over j as well.
import numpy as np
def twopointcorr(x,y,s,dr):
width = np.max(x)-np.min(x)
height = np.max(y)-np.min(y)
n = len(x)
maxR = np.sqrt((width/2)**2 + (height/2)**2)
r = np.arange(0, maxR, dr)
print(r)
corrfun = r*0
rw = r*0
print(maxR)
''' go through all points'''
for j in range(0, n-1):
hypot = np.sqrt((x[j]-x)**2+(y[j]-y)**2)
ind = [np.abs(r-h).argmin() for h in hypot]
for k, v in enumerate(ind):
if j==k:
continue
corrfun[v] += (s[k]-s[j])**2
rw[v] += 1
rw2 = rw
rw2[rw < 1] = 1
corrfun = np.sqrt(np.divide(corrfun, rw2))
return r, corrfun, rw
I debug test it the following way
from twopointcorr import twopointcorr
import numpy as np
import matplotlib.pyplot as plt
import time
n=1000
x = np.random.rand(n)
y = np.random.rand(n)
s = np.random.rand(n)
print('running two point corr functinon')
start_time = time.time()
r,corrfun,rw = twopointcorr(x,y,s,0.1)
print("--- Execution time is %s seconds ---" % (time.time() - start_time))
fig1=plt.figure()
plt.plot(r, corrfun,'-x')
fig2=plt.figure()
plt.plot(r, rw,'-x')
plt.show()
Again, the main issue is that in the real dataset n~1E6. I can resample to make it smaller, of course, but I would love to actually crank through the dataset.
Here is the code that use broadcast, hypot, round, bincount to remove all the loops:
def twopointcorr2(x, y, s, dr):
width = np.max(x)-np.min(x)
height = np.max(y)-np.min(y)
n = len(x)
maxR = np.sqrt((width/2)**2 + (height/2)**2)
r = np.arange(0, maxR, dr)
osub = lambda x:np.subtract.outer(x, x)
ind = np.clip(np.round(np.hypot(osub(x), osub(y)) / dr), 0, len(r)-1).astype(int)
rw = np.bincount(ind.ravel())
rw[0] -= len(x)
corrfun = np.bincount(ind.ravel(), (osub(s)**2).ravel())
return r, corrfun, rw
to compare, I modified your code as follows:
def twopointcorr(x,y,s,dr):
width = np.max(x)-np.min(x)
height = np.max(y)-np.min(y)
n = len(x)
maxR = np.sqrt((width/2)**2 + (height/2)**2)
r = np.arange(0, maxR, dr)
corrfun = r*0
rw = r*0
for j in range(0, n):
hypot = np.sqrt((x[j]-x)**2+(y[j]-y)**2)
ind = [np.abs(r-h).argmin() for h in hypot]
for k, v in enumerate(ind):
if j==k:
continue
corrfun[v] += (s[k]-s[j])**2
rw[v] += 1
return r, corrfun, rw
and here is the code to check the results:
import numpy as np
n=1000
x = np.random.rand(n)
y = np.random.rand(n)
s = np.random.rand(n)
r1, corrfun1, rw1 = twopointcorr(x,y,s,0.1)
r2, corrfun2, rw2 = twopointcorr2(x,y,s,0.1)
assert np.allclose(r1, r2)
assert np.allclose(corrfun1, corrfun2)
assert np.allclose(rw1, rw2)
and the %timeit results:
%timeit twopointcorr(x,y,s,0.1)
%timeit twopointcorr2(x,y,s,0.1)
outputs:
1 loop, best of 3: 5.16 s per loop
10 loops, best of 3: 134 ms per loop
Your original code on my system runs in about 5.7 seconds. I fully vectorized the inner loop and got it to run in 0.39 seconds. Simply replace your "go through all points" loop with this:
points = np.column_stack((x,y))
hypots = scipy.spatial.distance.cdist(points, points)
inds = np.rint(hypots.clip(max=maxR) / dr).astype(np.int)
# go through all points
for j in range(n): # n.b. previously n-1, not sure why
ind = inds[j]
np.add.at(corrfun, ind, (s - s[j])**2)
np.add.at(rw, ind, 1)
rw[ind[j]] -= 1 # subtract self
The first observation was that your hypot code was computing 2D distances, so I replaced that with cdist from SciPy to do it all in a single call. The second was that the inner for loop was slow, and thanks to an insightful comment from #hpaulj I vectorized that as well using np.add.at().
Since you asked how to vectorize the inner loop as well, I did that later. It now takes 0.25 seconds to run, for a total speedup of over 20x. Here's the final code:
points = np.column_stack((x,y))
hypots = scipy.spatial.distance.cdist(points, points)
inds = np.rint(hypots.clip(max=maxR) / dr).astype(np.int)
sn = np.tile(s, (n,1)) # n copies of s
diffs = (sn - sn.T)**2 # squares of pairwise differences
np.add.at(corrfun, inds, diffs)
rw = np.bincount(inds.flatten(), minlength=len(r))
np.subtract.at(rw, inds.diagonal(), 1) # subtract self
This uses more memory but does produce a substantial speedup vs. the single-loop version above.
Ok, so as it turns out outer products are incredibly memory expensive, however, using answers from #HYRY and #JohnZwinck i was able to make code that is still roughly linear in n in memory and computes fast (0.5 seconds for the test case)
import numpy as np
def twopointcorr(x,y,s,dr,maxR=-1):
width = np.max(x)-np.min(x)
height = np.max(y)-np.min(y)
n = len(x)
if maxR < dr:
maxR = np.sqrt((width/2)**2 + (height/2)**2)
r = np.arange(0, maxR+dr, dr)
corrfun = r*0
rw = r*0
for j in range(0, n):
ind = np.clip(np.round(np.hypot(x[j]-x,y[j]-y) / dr), 0, len(r)-1).astype(int)
np.add.at(corrfun, ind, (s - s[j])**2)
np.add.at(rw, ind, 1)
rw[0] -= n
corrfun = np.sqrt(np.divide(corrfun, np.maximum(rw,1)))
r=np.delete(r,-1)
rw=np.delete(rw,-1)
corrfun=np.delete(corrfun,-1)
return r, corrfun, rw