numpy to generate discrete probability distribution - python

I'm following a code example I found at http://docs.scipy.org/doc/scipy/reference/tutorial/stats.html#subclassing-rv-discrete for implementing a random number generator for discrete values of a normal distribution. The exact example (not surprisingly) works quite well, but if I modify it to allow only left or right-tailed results, the distribution around 0 should is too low (bin zero should contain more values). I must have hit a boundary condition, but am unable to work it out. Am I missing something?
This is the result of counting the random numbers per bin:
np.bincount(rvs) [1082 2069 1833 1533 1199 837 644 376 218 111 55 20 12 7 2 2]
This is the histogram:
from scipy import stats
np.random.seed(42)
def draw_discrete_gaussian(rng, tail='both'):
# number of integer support points of the distribution minus 1
npoints = rng if tail == 'both' else rng * 2
npointsh = npoints / 2
npointsf = float(npoints)
# bounds for the truncated normal
nbound = 4
# actual bounds of truncated normal
normbound = (1+1/npointsf) * nbound
# integer grid
grid = np.arange(-npointsh, npointsh+2, 1)
# bin limits for the truncnorm
gridlimitsnorm = (grid-0.5) / npointsh * nbound
# used later in the analysis
gridlimits = grid - 0.5
grid = grid[:-1]
probs = np.diff(stats.truncnorm.cdf(gridlimitsnorm, -normbound, normbound))
gridint = grid
normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
# print 'mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f'% normdiscrete.stats(moments = 'mvsk')
rnd_val = normdiscrete.rvs()
if tail == 'both':
return rnd_val
if tail == 'left':
return -abs(rnd_val)
elif tail == 'right':
return abs(rnd_val)
rng = 15
tail = 'right'
rvs = [draw_discrete_gaussian(rng, tail=tail) for i in xrange(10000)]
if tail == 'both':
rng_min = rng / -2.0
rng_max = rng / 2.0
elif tail == 'left':
rng_min = -rng
rng_max = 0
elif tail == 'right':
rng_min = 0
rng_max = rng
gridlimits = np.arange(rng_min-.5, rng_max+1.5, 1)
print gridlimits
f, l = np.histogram(rvs, bins=gridlimits)
# cheap way of creating histogram
import matplotlib.pyplot as plt
%matplotlib inline
bins, edges = f, l
left,right = edges[:-1],edges[1:]
X = np.array([left, right]).T.flatten()
Y = np.array([bins, bins]).T.flatten()
# print 'rvs', rvs
print 'np.bincount(rvs)', np.bincount(rvs)
plt.plot(X,Y)
plt.show()

I try to answer my own question based on comments from #user333700 and #user235711:
I insert into the method before normdiscrete = ...
if tail == 'right':
gridint = gridint[npointsh:]
probs = probs[npointsh:]
s = probs.sum()
probs = probs / s
elif tail == 'left':
gridint = gridint[0: npointsh]
probs = probs[0: npointsh]
s = probs.sum()
probs = probs / s
The resulting histograms and look much nicer:

Related

Floating RMS in Python

I'm trying to implement a floating window RMS in python. I'm simulating an incoming stream of measurement data by simpling iterating over time and calculating the sine wave. Since it's a perfect sine wave, its easy to compare the results using math. I also added a numpy calculation to confirm my arrays are populated correctly.
However my floating RMS is not returning the right values, unrelated to my sample size.
Code:
import matplotlib.pyplot as plot
import numpy as np
import math
if __name__ == '__main__':
# sine generation
time_array = []
value_array = []
start = 0
end = 6*math.pi
steps = 100000
amplitude = 10
#rms calc
acc_load_current = 0
sample_size = 1000
for time in np.linspace(0, end, steps):
time_array.append(time)
actual_value = amplitude * math.sin(time)
value_array.append(actual_value)
# rms calc
acc_load_current -= (acc_load_current/sample_size)
# square here
sq_value = actual_value * actual_value
acc_load_current += sq_value
# mean and then root here
floating_rms = np.sqrt(acc_load_current/sample_size)
fixed_rms = np.sqrt(np.mean(np.array(value_array)**2))
math_rms = 1/math.sqrt(2) * amplitude
print(floating_rms)
print(fixed_rms)
print(math_rms)
plot.plot(time_array, value_array)
plot.show()
Result:
2.492669969708522
7.071032456438027
7.071067811865475
I solved the issue by usin a recursive average with zero crossing detection:
import matplotlib.pyplot as plot
import numpy as np
import math
def getAvg(prev_avg, x, n):
return (prev_avg * n + x) / (n+1)
if __name__ == '__main__':
# sine generation
time_array = []
value_array = []
used_value_array = []
start = 0
end = 6*math.pi + 0.5
steps = 10000
amplitude = 325
#rms calc
rms_stream = 0
stream_counter = 0
#zero crossing
in_crossing = 0
crossing_counter = 0
crossing_limits = [-5,5]
left_crossing = 0
for time in np.linspace(0, end, steps):
time_array.append(time)
actual_value = amplitude * math.sin(time) + 4 * np.random.rand()
value_array.append(actual_value)
# detect zero crossing, by checking the first time we reach the limits
# and then not counting until we left it again
is_crossing = crossing_limits[0] < actual_value < crossing_limits[1]
# when we are at amp/2 we can be sure the noise is not causing zero crossing
left_crossing = abs(actual_value) > amplitude/2
if is_crossing and not in_crossing:
in_crossing = 1
crossing_counter += 1
elif not is_crossing and in_crossing and left_crossing:
in_crossing = 0
# rms calc
# square here
if 2 <= crossing_counter <= 3:
sq_value = actual_value * actual_value
rms_stream = getAvg(rms_stream, sq_value, stream_counter)
stream_counter += 1
# debugging by recording the used values
used_value_array.append(actual_value)
else:
used_value_array.append(0)
# mean and then root here
stream_rms_sqrt = np.sqrt(rms_stream)
fixed_rms_sqrt = np.sqrt(np.mean(np.array(value_array)**2))
math_rms_sqrt = 1/math.sqrt(2) * amplitude
print(stream_rms_sqrt)
print(fixed_rms_sqrt)
print(math_rms_sqrt)
plot.plot(time_array, value_array, time_array, used_value_array)
plot.show()

How do I only plot the values I want?

I currently have the code and I having some trouble trying to plot it, I know that trying to plot both ymax and y won't work in this case, but how would I go about plotting just the value for y? I have plotted the function before by removing the ymax from the return, but I need to print the values and plot the solution for y.
import numpy as np
import matplotlib.pyplot as plt
def GaussElimination(A):
'''
Description: Use Gauss elimination to solve a set of simultaneous equations
Parameters: A a matrix of coefficient and constant value for the system
Return: a matrix holding the solution to the equation. This corresponds to the last n
'''
nr,nc=A.shape
B= A.copy()
# start the gauss elimination
for r in range(nr):
#pivoting
max=abs(B[r][r])
maxr = r
for rr in range(r,nr):
if max < abs(B[rr][r]):
max = abs(B[rr][r])
maxr = rr
if max == 0:
print("Singular Matrix")
return []
# swap if needed
if (maxr != r):
for c in range(nc):
temp = B[r][c]
B[r][c]=B[maxr][c]
B[maxr][c] = temp
# scale the row
scale = B[r][r]
for c in range(r,nc):
B[r][c] = B[r][c]/scale
# eliminate values in the columns
for rr in range(nr):
if rr != r:
scale = B[rr][r]
for c in range(r,nc):
B[rr][c]=B[rr][c] - scale*B[r][c]
if (nc == nr+1):
return B[:,nc-1]
else:
return B[:,(nr):nc]
def SimplySupportedBeam(n):
M = np.zeros([n+1,n+1])
C = np.array([[0],[150],[0],[0],[0],[0]])
for r in range(n-3):
M[r][r] = 1
M[r][r+1] = -4
M[r][r+2] = 6
M[r][r+3] = -4
M[r][r+4] = 1
M[n-3][1] = 1
M[n-2][n-1] = 1
M[n-1][n-5] = 1
M[n-1][n-4] = -2
M[n-1][n-3] = 1
M[n][n-2] = 1
M[n][n-1] = -2
M[n][n] = 1
A = np.concatenate((M,C), axis=1)
y0 = GaussElimination(A)
y = y0[1:n]
ymax = np.amax(abs(y))
return y, ymax
n = int(input("Index of the last node: "))
print (SimplySupportedBeam(n))
plt.figure(1)
plt.plot(SimplySupportedBeam(n))
plt.show()
How would I plot just the value I get for y from my code?
It seems like y is 1D numpy array.
If you just want to plot its values against their indices you should be able to do so using either
plt.plot(SimplySupportedBeam(n)[0])
or
y, ymax = SimplySupportedBeam(n)
plt.plot(y)
The problem was that your function returns two values, i.e. y and ymax.
(I did not

Place points with variable density

Assume that you have an NxM matrix, with values ranging from [0,100]. What I'd like to do is place points with a density (inversely) relative to the values in that area.
For example, here's a 2D Gaussian field, inverted s.t. the centroid has a value of 0, and the perimeter is at 100:
I'd like to pack the points so that they appear somewhat similar to this image:
Note how there is a radial spread outwards.
My attempt looks a little different :( ...
What I attempt to do is (i) generate a boolean area, of the same shape and size, and (ii) move through the rows and columns. If the value of the boolean array at some point is True, then pass; otherwise, add a [row,col] point to a list and cover the boolean array with True in a radius proportional to the value in the Gaussian array.
The choice of Gaussian for this example isn't important, the fundamental idea is that: given a floating point matrix, how can one place points with a density proportional to those values?
Any help very much appreciated :)
import matplotlib.pyplot as plt
import numpy as np
from math import exp
def gaussian(x,y,x0,y0,A=10.0,sigma_x=10.0,sigma_y=10.0):
return A - A*exp(-((x-x0)**2/(2*sigma_x**2) + (y-y0)**2/(2*sigma_y**2)))
def generate_grid(width=100,height=100):
grid = np.empty((width,height))
for x in range(0,width):
for y in range(0,height):
grid[x][y] = gaussian(x,y,width/2,height/2,A=100.0)
return grid
def cover_array(a,row,col,radius):
nRows = np.shape(grid)[0]
nCols = np.shape(grid)[1]
mid = round(radius / 2)
half_radius = int(round(radius))
for x in range(-half_radius,half_radius):
for y in range(-half_radius,half_radius):
if row+x >= 0 and x+row < nRows and col+y >= 0 and y+col < nCols:
if (x-mid)**2 + (y-mid)**2 <= radius**2:
a[row+x][col+y] = True
def pack_points(grid):
points = []
nRows = np.shape(grid)[0]
nCols = np.shape(grid)[1]
maxDist = 50.0
minDist = 0.0
maxEdge = 10.0
minEdge = 5.0
grid_min = 0.0
grid_max = 100.0
row = 0
col = 0
arrayCovered = np.zeros((nRows,nCols))
while True:
if row >= nRows:
return np.array(points)
if arrayCovered[row][col] == False:
radius = maxEdge * ((grid[row][col] - grid_min) / (grid_max - grid_min))
cover_array(arrayCovered,row,col,radius)
points.append([row,col])
col += 1
if col >= nCols:
row += 1
col = 0
grid = generate_grid()
plt.imshow(grid)
plt.show()
points = pack_points(grid)
plt.scatter(points[:,0],points[:,1])
plt.show()
Here is a cheap and simple method, although it requires hand-setting an amount parameter:
import numpy as np
import matplotlib.pyplot as plt
def gaussian(x,y,x0,y0,A=10.0,sigma_x=10.0,sigma_y=10.0):
return A - A*np.exp(-((x-x0)**2/(2*sigma_x**2) + (y-y0)**2/(2*sigma_y**2)))
def distribute_points(data, amount=1):
p = amount * (1 / data)
r = np.random.random(p.shape)
return np.where(p > r)
ii, jj = np.mgrid[-10:10:.1, -10:10:.1]
data = gaussian(ii, jj, 0, 0)
px, py = distribute_points(data, amount=.03)
plt.imshow(data)
plt.scatter(px, py, marker='.', c='#ff000080')
plt.xticks([])
plt.yticks([])
plt.xlim([0, len(ii)])
plt.ylim([0, len(jj)])
Result:

Angle Interpolation

I was trying to interpolate the angle which are in list.
Dir DirOffset
0 109.6085
30 77.5099
60 30.5287
90 -10.2748
120 -75.359
150 -147.6015
180 -162.7055
210 21.0103
240 3.5502
270 -11.5475
300 -39.8371
330 -109.5473
360 109.6085
I have written the code to interpolate angle(It keeps on calculating the mean in between angle to reach the interpolation value) which is taking long time. Please help me if some one have the faster and shorter code.
from cmath import rect, phase
from math import radians, degrees, sqrt
#Calculate the mean of angles in List
def mean_angle(degArray):
return degrees(phase(sum(rect(1, radians(d)) for d in degArray)/len(degArray)))
#Calculate Interpolation Angle
def Interpolate_angle(Dir, DirOffset, ValuetoInterpolate):
#Create Lower and Higher bin of ValuetoInterpolate
DirLBin = round(float(ValuetoInterpolate)/30,0)*30
DirHBin = round(float(ValuetoInterpolate+15)/30,0)*30
#Check if the ValuetoInterpolate lies between Lower and Higher bin
if DirLBin == DirHBin:
DirLBin = DirHBin-30
if DirLBin <= ValuetoInterpolate <= DirHBin:
DBin = [float(DirLBin), float(DirHBin)]
Doff = [DirOffset[Dir.index(DirLBin)], DirOffset[Dir.index(DirHBin)]]
else:
DirHBin = DirLBin+30
DBin = [float(DirLBin), float(DirHBin)]
Doff = [DirOffset[Dir.index(DirLBin)], DirOffset[Dir.index(DirHBin)]]
else:
DBin = [float(DirLBin), float(DirHBin)]
Doff = [DirOffset[Dir.index(DirLBin)], DirOffset[Dir.index(DirHBin)]]
#Run 50 iterations to calculate the mean of angle and find the ValuetoInterpolate
for i in range(51):
DMean = mean_angle(DBin)
DOMean = mean_angle(Doff)
if DMean < 0 :
DMean = 360+DMean
if DBin[0] <= ValuetoInterpolate <=DMean:
DBin = [float(DBin[0]), float(DMean)]
Doff = [float(Doff[0]), float(DOMean)]
else:
DBin = [float(DMean), float(DBin[1])]
Doff = [float(DOMean), float(Doff[1])]
return DOMean
Dir = range(0,370,30)
DirOffset = [109.6085,77.5099,30.5287,-10.2748,-75.359,-147.6015,-162.7055,21.0103,3.5502,-11.5475,-39.8371,-109.5473,109.6085]
ValuetoInterpolate = 194.4
print Interpolate_angle(Dir, DirOffset, ValuetoInterpolate)
I got the solution for above question after searching answers from stackoverflow, then I modified little bit to get the solution as per my requirement. The solution might be useful for some one in need of it.
I interpolated the Degrees using below function for each directional bin (0,30,60....360) till 360(360 and 0 degree will be same) and store them in dictionary to create a DataFrame(pandas DataFrame) and left join it with main DataFrame and process further.
def InterpolateDegrees(109.6085,77.5099)
will return interpolated array of DirectionOffset 0 to 30 degree with an interval of 0.1 (0.0, 0.1, 0.2, 0.3......28.7, 29.8, 29.9)
import numpy as np
from math import fabs
def InterpolateDegrees(start, end, BinSector=12):
BinAngle = 360/BinSector
amount = np.arange(0,1,(1/(BinAngle/0.1)))
dif = fabs(end-start)
if dif >180:
if end>start:
start+=360
else:
end+=360
#Interpolate it
value = (start + ((end-start)*amount))
#Wrap it
rzero = 360
Arr = np.where((value>=0) & (value<=360), (value), (value % rzero))
return Arr
Here is a Pandas/Numpy based solution for interpolating an angle series with NaN data.
import pandas as pd
import numpy as np
def interpolate_degrees(series: pd.Series) -> pd.Series:
# I don't want to modify in place
series = series.copy()
# convert to radians
a = np.radians(series)
# unwrap if not nan
a[~np.isnan(a)] = np.unwrap(a[~np.isnan(a)])
series.update(a)
# interpolate unwrapped values
interpolated = series.interpolate()
# wrap 0 - 360 (2*pi)
wrapped = (interpolated + 2*np.pi) % (2 * np.pi)
# cconvert back to degrees
degrees = np.degrees(wrapped)
series.update(degrees)
return series
Usage:
angle = [350, np.nan, 355, np.nan, 359, np.nan, 1, np.nan, 10]
df = pd.DataFrame(data=angle, columns=['angle'])
df['interpolated'] = interpolate_degrees(df.angle)

Find number of connected components of a k-nearest neighbours graph?

Is there an elegant way finding the number of connected components using a pre-calculated KDTree? Right now find the connected components using a breath-first search algorithm with the adjacency matrix given by the KDTree of k-nearest neighbours, but is there a better possibility?
import collections
import numpy as np
from sklearn import neighbors
N = 100
N_k = 8
ra = np.random.random
X0,X1 = ra(N),ra(N)
X0[0:N//2]+= 2
X1[0:N//2]+= 2
X = np.array([X0,X1]).T
tree = neighbors.KDTree(X)
dist, adj = tree.query(X, k = N_k+1)
dist = dist[:,1::]
adj = adj[:,1::]
print("Inside of find_components_lifo")
print("N = %d/ N_k = %d"%(N,N_k))
labels = np.zeros(N, dtype = np.int) - 1
n = 0
steps = 0
remains = (labels == -1)
while n < N:
i = np.arange(0,N,1)[remains][np.random.randint(0,N - n)]
# This is important for directed graphs
labels[i] = i
lifo = collections.deque([i])
while lifo:
ele = lifo.pop()
for k in adj[ele,:]:
if labels[k] == -1:
labels[k] = labels[i]
lifo.append(k)
elif labels[k] != labels[i]:
labels[labels == labels[i]] = labels[k]
remains = (labels == -1)
n = N - len(np.nonzero(remains)[0])
unique = np.unique(labels)
labels_ = np.zeros(N, dtype = np.int) - 1
for i, label in enumerate(unique):
choice = (labels == label)
N_cl = len(np.nonzero(choice)[0])
print("We found a cluster with N = %d"%N_cl)
labels_[choice] = i
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fixticks(ax)
plt.show()
colors_ = np.array(colors)
for i in range(N):
for j in range(N_k):
ax.plot([X0[i],X0[adj[i,j]]],[X1[i],X1[adj[i,j]]], color = colors_[labels_[i]], alpha = 0.3)
ax.grid(True)
ax.scatter(X0,X1, s = 60, color = "Black")
plt.show()
I
I think you can use scipy's connected_components and scikit-learn's kneighbors_graph together. Does this produce what you're looking for?
from sklearn import neighbors
from scipy.sparse import csgraph
adj = neighbors.kneighbors_graph(X, N_k)
n_components, labels = csgraph.connected_components(adj)

Categories