rasterio data - python - execution time - preprocessing - python

I'm using Rasterio to work with a satellite image, and I need to iterate through the entire file. and applying the formula to each pixel. This process takes a long time and makes it difficult for me to try out different modifications because it takes a long time to see the results each time.
any suggestions to improve time execution ?
And is it better to work on this project locally or via Jupiter, Google Colab, or other tools?
def dn_to_radiance(data_array, band_number):
# getting the G value
channel_gain = float(Landsat8_mlt_dict['RADIANCE_MULT_BAND_' + str(band_number) + ' '])
# Getting the B value
channel_offset = float(Landsat8_mlt_dict['RADIANCE_ADD_BAND_' + str(band_number) + ' '])
# creating a temp array to store the radiance value
# np.empty_like Return a new array with the same shape and type as a given array.
new_data_array = np.empty_like(data_array)
# loooping through the image
for i, row in enumerate(data_array):
for j, col in enumerate(row):
# checking if the pixel value is not nan, to avoid background correction
if data_array[i][j].all() != np.nan:
new_data_array[i][j] = data_array[i][j] * channel_gain + channel_offset
print(f'Radiance calculated for band {band_number}')
return new_data_array
Landsat8_mlt_dict = {}
with open('LC08_L2SP_190037_20190619_20200827_02_T1_MTL.txt', 'r') as _:
# print(type(_))
for line in _:
line = line.strip()
if line != 'END':
key, value = line.split('=')
Landsat8_mlt_dict[key] = value
# print(Landsat8_mlt_dict)
def radiance_to_reflectance(arr, ESUN, ):
# getting the d value
d = float(Landsat8_mlt_dict['EARTH_SUN_DISTANCE '])
# calculating rh phi value from theta
phi = 90 - float(Landsat8_mlt_dict['SUN_ELEVATION '])
# creating the temp array
new_data_array = np.empty_like(arr)
# loop to finf the reflectance
for i, row in enumerate(arr):
for j, col in enumerate(row):
if arr[i][j].all() != np.nan:
new_data_array[i][j] = np.pi * arr[i][j] * d ** 2 / (ESUN * cos(phi * math.pi / 180))
print(f"Reflectance of Band calculated")
return new_data_array

You could use thir-party libraries such as EOReader to convert Landsat bands to reflectance for you.
from eoreader.reader import Reader
from eoreader.bands import RED, GREEN
prod = Reader().open(r"LC08_L1TP_200030_20201220_20210310_02_T1.tar")
# Load those bands as a dict of xarray.DataArray
bands = prod.load([RED, GREEN])
green = bands[GREEN]
red = bands[RED]
Disclaimer: I am the maintener of EOReader
If you want to do that yourself, you should do some tutorials on how to handle arrays in Python.
Never ever loop over them! You should instead vectorize your computations: it will go way faster!

Related

How to display a 2d interpolation function in python as a matrix?

I looked around a lot but it's hard to find an answer. Basically when one interpolates v -> w you would normally use one of the many interpolation functions. But I want to get the corresponding matrix Av = w.
In my case w is a 200x200 matrices with v beeing a random subset of w with half as many points. I don't really care for fancy math it could be as simple as weighting the known points by distance squared. I already tried just implementing it all with some for loops but it only really works with small values. But maybe it helps explaining my question.
from random import sample
def testScatter(xbig, ybig):
NumberOfPoints = int(xbig * ybig / 2) #half as many points as in full Sample
#choose random coordinates
Index = sample(range(xbig * ybig),NumberOfPoints)
IndexYScatter = np.remainder(Index, xbig)
IndexXScatter = np.array((Index - IndexYScatter) / xbig, dtype=int)
InterpolationMatrix = np.zeros((xbig * ybig , NumberOfPoints), dtype=np.float32)
WeightingSum = np.zeros(xbig * ybig )
coordsSamplePoints = []
for i in range(NumberOfPoints): #first set all the given points (no need to interpolate)
coordsSamplePoints.append(IndexYScatter[i] + xbig * IndexXScatter[i])
InterpolationMatrix[coordsSamplePoints[i], i] = 1
WeightingSum[coordsSamplePoints[i]] = 1
for x in range(xbig * ybig): #now comes the interpolation
if x not in coordsSamplePoints:
YIndexInterpol = x % xbig #xcoord in interpolated matrix
XIndexInterpol = (x - YIndexInterpol) / xbig #ycoord in interp. matrix
for y in range(NumberOfPoints):
XIndexScatter = IndexXScatter[y]
YIndexScatter = IndexYScatter[y]
distanceSquared = (np.float32(YIndexInterpol) - np.float32(YIndexScatter))**2+(np.float32(XIndexInterpol) - np.float32(XIndexScatter))**2
InterpolationMatrix[x,y] = 1/distanceSquared
WeightingSum[x] += InterpolationMatrix[x,y]
return InterpolationMatrix/ WeightingSum[:,None] , IndexXScatter, IndexYScatter
You need to spend some time with the Numpy documentation start at the top of this page and working your way down. Studying answers here on SO for questions asking how to vectorize an operation when using Numpy array's would help you. If you find that you are iterating over indices and performing calcs with Numpy arrays there is probably a better way.
First cut...
The first for loop can be replaced with:
coordsSamplePoints = IndexYScatter + (xbig * IndexXScatter)
InterpolationMatrix[coordsSamplePoints,np.arange(coordsSamplePoints.shape[0])] = 1
WeightingSum[coordsSamplePoints] = 1
This mainly makes use of elementwise arithmetic and Index arrays - the complete Indexing Tutorial should be read
You can test this by enhancing the function and executing the for loop along with Numpy way then comparing the result.
...
IM = InterpolationMatrix.copy()
WS = WeightingSum.copy()
for i in range(NumberOfPoints): #first set all the given points (no need to interpolate)
coordsSamplePoints.append(IndexYScatter[i] + xbig * IndexXScatter[i])
InterpolationMatrix[coordsSamplePoints[i], i] = 1
WeightingSum[coordsSamplePoints[i]] = 1
cSS = IndexYScatter + (xbig * IndexXScatter)
IM[cSS,np.arange(cSS.shape[0])] = 1
WS[cSS] = 1
# TEST Validity
print((cSS == coordsSamplePoints).all(),
(IM == InterpolationMatrix).all(),
(WS == WeightingSum).all())
...
The outer loop:
...
for x in range(xbig * ybig): #now comes the interpolation
if x not in coordsSamplePoints:
YIndexInterpol = x % xbig #xcoord in interpolated matrix
XIndexInterpol = (x - YIndexInterpol) / xbig #ycoord in interp. matrix
...
Can be replaced with:
...
space = np.arange(xbig * ybig)
mask = ~(space == cSS[:,None]).any(0)
iP = space[mask] # points to interpolate
yIndices = iP % xbig
xIndices = (iP - yIndices) / xbig
...
Complete solution:
import random
import numpy as np
def testScatter(xbig, ybig):
NumberOfPoints = int(xbig * ybig / 2) #half as many points as in full Sample
#choose random coordinates
Index = random.sample(range(xbig * ybig),NumberOfPoints)
IndexYScatter = np.remainder(Index, xbig)
IndexXScatter = np.array((Index - IndexYScatter) / xbig, dtype=int)
InterpolationMatrix = np.zeros((xbig * ybig , NumberOfPoints), dtype=np.float32)
WeightingSum = np.zeros(xbig * ybig )
coordsSamplePoints = IndexYScatter + (xbig * IndexXScatter)
InterpolationMatrix[coordsSamplePoints,np.arange(coordsSamplePoints.shape[0])] = 1
WeightingSum[coordsSamplePoints] = 1
IM = InterpolationMatrix
cSS = coordsSamplePoints
WS = WeightingSum
space = np.arange(xbig * ybig)
mask = ~(space == cSS[:,None]).any(0)
iP = space[mask] # points to interpolate
yIndices = iP % xbig
xIndices = (iP - yIndices) / xbig
dSquared = ((yIndices[:,None] - IndexYScatter) ** 2) + ((xIndices[:,None] - IndexXScatter) ** 2)
IM[iP,:] = 1/dSquared
WS[iP] = IM[iP,:].sum(1)
return IM / WS[:,None], IndexXScatter, IndexYScatter
I'm getting about 200x improvement with this over your original with (100,100) for the arguments. Probably some other minor improvements but they won't effect execution time significantly.
Broadcasting is another Numpy skill that is a must.

How to Transform Normalization Image Math Equation to Python?

I try to learn how to transform equation to python script.
I choose to start it from FingerPrint Enhancement from Academic resources here.
to start learn i search a fingerprint image to be enhance. I choose this image:
so, i do the first step is converting to gray:
import cv2
import numpy as np
input = 'PATH OF IMAGE'
img = cv2.imread(input)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
and below is the result:
ok the problem start from here...
please try to understood me, I try to learn how to convert math equation to python script.
not try to looking for another / existing script in Github (for example).
the equation is:
all detail from the academic research. Told that:
Let I(i, j) denote the gray-level value at pixel (i, j), M and
VAR denote the estimated mean and variance of I, respectively, and G(i, j) denote the normalized gray-level value at pixel (i, j).
A gray-level fingerprint image, I is defined as an N x N matrix, where I(i, j) represents the intensity of the pixel at the
i-th row and j-th column. We assume that all the images are
scanned at a resolution of 500 dots per inch (dpi). The mean and variance of a gray-level fingerprint image, I, are defined as
and
respectively
ok, we start to transform the equation:
def mean(gray):
rows, cols = gray.shape
sum = 0
for i in range(0,rows):
for j in range(0, cols):
pix = (gray[i,j].item())
sum += pix
M = sum/N
return M
def var(gray, M):
rows, cols = gray.shape
N = gray.size
sum = 0
for i in range(0,rows):
for j in range(0, cols):
vix = ((img[i,j].item()) - M)**2
sum += vix
VAR = sum/N
return VAR
def normalize(img, M0, VAR0):
M = mean(img)
VAR = var(img, M)
rows,cols = img.shape
normim = np.zeros((rows, cols))
for i in range(0, rows):
for j in range(0, cols):
if (gray[i,j].item()) > M:
G0 = M0 + ((((VAR0)*(((gray[i,j].item())-(M))**2))/(VAR))**(1/2))
normim[i,j] = int(G0)
else:
G1 = M0 - ((((VAR0)*(((gray[i,j].item())-(M))**2))/(VAR))**(1/2))
normim[i,j] = int(G1)
return normim
M0 = 100 #follow the academic research document
VAR0 = 100 #follow the academic research document
normgray = normalize(gray, 100,100)
cv2.imshow('test', normgray)
cv2.waitKey(1)
the result is out of expected:
all is white.
can somebody help me? please your advise.
to remind you, I'm not try to looking for the another script / another example. I try to understood how to transform a math equation to python script. about another script, i already have, even i already map it here.
This is a simple problem of not respecting the data types in between transformations. Specifically, when you load in the image, it is going to be unsigned 8-bit integer so the expected values should be within [0, 255], yet your calculations for the mean and variance will exceed this dynamic range and thus your calculations will overflow. The quickest way to resolve this problem is to convert your image so that it will respect a data type that can handle the precision of the calculations you want, like floating-point. Perform the calculations, and when you're done convert the image back to the expected data type, so unsigned 8-bit integer.
In addition, there are several errors in your code. For one thing, you didn't provide the variable N, which should be the total number of pixels in the image. In addition, your var function accepts gray as the variable yet you are using img to try and access pixel data, so this will also give off an error when you try and run it. Finally, you omitted the packages you're using so I added these in.
I've also downloaded your image locally so I can run the code to verify that it works. I've patched up the end of your code so that the image window that displays the result properly closes after you push a key and I've written the output image to file.
Therefore:
# Added so the code can run
import cv2
import numpy as np
# Added so the code can run
gray = cv2.imread('gnN4Q.png', 0)
gray = gray.astype(np.float) # Change to floating-point
N = gray.shape[0]*gray.shape[1]
def mean(gray):
rows, cols = gray.shape
sum = 0
for i in range(0,rows):
for j in range(0, cols):
pix = (gray[i,j].item())
sum += pix
M = sum/N # Added above
return M
def var(gray, M):
rows, cols = gray.shape
N = gray.size
sum = 0
for i in range(0,rows):
for j in range(0, cols):
vix = ((gray[i,j].item()) - M)**2 # Change
sum += vix
VAR = sum/N
return VAR
def normalize(img, M0, VAR0):
M = mean(img)
VAR = var(img, M)
rows,cols = img.shape
normim = np.zeros((rows, cols))
for i in range(0, rows):
for j in range(0, cols):
if (gray[i,j].item()) > M:
G0 = M0 + ((((VAR0)*(((gray[i,j].item())-(M))**2))/(VAR))**(1/2))
normim[i,j] = int(G0)
else:
G1 = M0 - ((((VAR0)*(((gray[i,j].item())-(M))**2))/(VAR))**(1/2))
normim[i,j] = int(G1)
return normim
M0 = 100 #follow the academic research document
VAR0 = 100 #follow the academic research document
normgray = normalize(gray, 100,100)
normgray = normgray.astype(np.uint8) # Added - convert back to uint8
cv2.imshow('test', normgray)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite('output.png', normgray)
The output image we get is:
I didn't run your code but make sure G0 or G1 doesn't get too big. It could be that your value is above 255, thus the resulting all-white image.

Fourier's fit coefficients

lately i am been working fitting a fourier series function to a periodic signal for retrieve the amplitude and the phase of each component via least squares, so i modified the code of this file for it:
import math
import numpy as np
#period of the signal
per=1.0
w = 2.0*np.pi/per
#number of fourier components.
nf = 5
fp = open("file.cat","r")
# m1 is the number of unknown coefficients.
m1 = 2*nf + 1
# Create empty matrices.
x = np.zeros((m1,m1))
y = np.zeros((m1,1))
xi = [0.0]*m1
# Read (time, value) from each line of the file.
for line in fp:
t = float(line.split()[0])
yi = float(line.split()[1])
xi[0] = 1.0
for k in range(1,nf+1):
xi[2*k-1] = np.sin(k*w*t)
xi[2*k] = np.cos(k*w*t)
for j in range(m1):
for k in range(m1):
x[j,k] += xi[j]*xi[k]
y[j] += yi*xi[j]
fp.close()
# Copy to big matrices.
X = np.mat( x.copy() )
Y = np.mat( y.copy() )
# Invert X and multiply by Y to get coefficients.
A = X.I*Y
A0 = A[0]
# Solution is A0 + Sum[ Amp*sin(k*wt + phi) ]
print "a[0] = %f" % A[0]
for k in range(1,nf+1):
amp = math.sqrt(A[2*k-1]**2 + A[2*k]**2)
phs = math.atan2(A[2*k],A[2*k-1])
print "amp[%d] = %f phi = %f" % (k, amp, phs)
but the plot show this (without the points, of course):
and it should show something like this:
somebody can tell me how can i compute the phase and the amplitude in another simpler way? a guide maybe, i will be very grateful.
cheers!
PD. I will attach the FILE that i used, just because :)
EDITED
The error was with a index :(
First, I defined the vector with the values:
amp = np.array([np.sqrt((A[2*k-1])**2 + (A[2*k])**2) for k in range(1,nf+1)])
phs = np.array([math.atan2(A[2*k],A[2*k-1]) for k in range(1,nf+1)])
and then, to build the signal, I defined:
def term(t): return np.array([amp[k]*np.sin(k*w*t + phs[k]) for k in range(len(amp))])
Signal = np.array([A0+sum(term(phase[i])) for i in range(len(mag))])
but within the np.sin(), k should be k+1, because the index start in 0 ·__·
def term(t): return np.array([amp[k]*np.sin((k+1)*w*t + phs[k]) for k in range(len(amp))])
plt.plot(phase,Signal,'r-',lw=3)
and that is all.
Thanks Marco Tompitak for the help!!
You're specifying the wrong period for the signal:
#period of the signal
per=0.178556
This gives you the resulting Fourier fit, indeed with a maximum period of ~0.17. The problem is that this number specifies the longest period that is present in your Fourier series. The function only has components with perior 0.17 or shorter. Apparently you are expecting a fit with period ~1, so it can never approximate that properly. You should specify per=1.0. There's nothing wrong with the algorithm; a quick writeup of a similar algorithm in Mathematica gives the same output and plausible results:

Spyder wont run code - Python

im new to programming and have been using python to simulate to some physical systems, in Spyder on OSX 10.9.2. I dont think this a problem with my code because it runs fine once but then after that when i hit run, the command line (Python interpreter i think its called?) just displays runfile('/Users/Paddy/....name of file) and i cant run the code again after that. even other simple small programs wont run. The '>>>' in the command line has disappeared.
I have searched the web for a solution but to be honest, im not exactly sure what im looking for or what type of error this is, whether its a bug in Spyder or otherwise. Should my code have some sort of 'termination'?
Ive included the full body of code im working on just incase there is an error in there. Like i say, im completely new to this and i cnt tell whether this is an issue with Spyder or my code. Any help would be greatly appreciated, i have a deadline looming! Thanks
# Velocity Verlet integrator
def Verlet(x, V, dt, A):
x_new = x + V*dt + (A(x,V,R)*dt**2)/2
V_new = V + (A(x,V,R) + (2/(2-dt))*((((48/x_new**13)-(24/x_new**7)) - V + (0.5)*A(x,V,R)*dt + 2**(0.5) * R)) )/2 * dt
return (x_new, V_new)
# Start main program
# Import required libraries
import numpy as np
from numpy import array, zeros
import random
mu, sigma = 0, 0.1 # mean and variance
S = np.random.normal(mu, sigma, 1000) # Random numbers generated from gaussian
# Then the function itself
def A(x,V,R):
Acc = (((48/x**13)-(24/x**7)) - V + 2**(0.5) * R)
return Acc
# Set starting values for position and velocity
x = array([5])
V = array([0])
N = 1000 # integration time steps
M = 10 # save position every M timestep
dt = 1.0 / (N) # calculate timestep length in seconds
# Lists for storing the position and velocity
Xlist = zeros([1,N/M]) #define vector dimensions
Vlist = zeros([1,N/M])
# Put the initial values into the lists
Xlist[:,0] = x
Vlist[:,0] = V
# Run simulation
print "Total number of steps:", N
print "Saving location every %d steps." % (M)
print "Start."
for i in range(N/M):
# Run for M steps before saving values
for j in range(M):
# Update position and velocity based on the current ones
# and the acceleration function
R = random.choice(S) # selects random number from S
x, V = Verlet(x, V, dt, A)
# Save values into lists
Xlist[:, i] = x
Vlist[:, i] = V
print ("Stop.")
print (Xlist)
print (Vlist)
L = zeros([1,N/M])
k=0
while k < 101:
l = k+1
L[:,l]
print (L)
# Plot results
from matplotlib import pyplot as plt
#plt.plot(L, Xlist)
# Set equal axis
plt.axis('equal')
# Draw x and y axis lines
plt.axhline(color="black")
plt.axvline(color="black")
#plt.show()
It's an infinite loop in your while k < 101 loop because you never increment k. Try for example:
k=0
while k < 100:
L[:,k]
k += 1
Also note that python is 0 based. So you need k to go from 0 to 99 for a 100 length vector, not 1 to 100.

Optimizing windowed feature generation from input image in Python

I have created a script in Python which takes an image as input and produces a new image where each pixel corresponds to a feature calculated from the windowed group of pixels in the input image. The following picture will highlight this idea:
In the border cases we can either insert NaN into the output image or just use the pixels we have available inside the window. What would be an optimized way to do achieve this functionality in Python or some other programming language? At the moment, my script is simply using a bunch of for-loops to get the job done. Here you can see the code:
# This function will return the statistical features
#
#
# INPUTS:
# 'data' the data from which statistical features are to be calculated
# "winSize" specifying the window size, must be odd and > 1
#
# OUTPUT:
# 'meanData, stdData' statistical feature matrices (numpy ndarrays)
def get_stat_feats(data, winSize):
rows = data.shape[0]
cols = data.shape[1]
dist = int(math.floor(float(winSize)/2.0))
neigh = range(-dist, dist+1)
temp = np.zeros((int(winSize)**2, 1))
meanData = np.zeros(data.shape)
stdData = np.zeros(data.shape)
for row in range(0, rows):
for col in range(0, cols):
index = 0
makeNaN = 0
for y in neigh:
for x in neigh:
indY = row + y
indX = col + x
# Check that we are inside the image
if indY >= 0 and indY <= rows-1 and indX >= 0 and indX <= cols-1:
temp[index] = data[indY, indX]
index += 1
else:
makeNaN = 1
if makeNaN == 1:
meanData[row, col] = np.NAN
stdData[row, col] = np.NAN
else:
meanData[row, col] = np.mean(temp)
stdData[row, col] = np.std(temp)
return meanData, stdData
Thnx for any help! =) If there any more information needed, please ask =)
generic_filter from scipy.ndimage should be a decent solution for this. Probably faster solution, but this is the simplest i think.
It can take a mode parameter to define how to handle the edges. For example you could set it to treat elements outside the border to constant and equal NaN like this:
generic_filter(a, f, size=winSize, mode='constant', cval=np.nan)
def get_stat_feats(data, winSize):
from scipy.ndimage import generic_filter
import numpy as np
mean = lambda x: x.mean()
std = lambda x: x.std()
meanData = generic_filter(data, mean, size=winSize)
stdData = generic_filter(data, std, size=winSize)
return meanData, stdData
force float and round return value:
import numpy as np
def get_stat_feats(data, winSize):
from scipy.ndimage import generic_filter
import numpy as np
data = data.astype(float)
mean = lambda x: x.mean()
std = lambda x: x.std()
meanData = generic_filter(data, mean, size=winSize)
stdData = generic_filter(data, std, size=winSize)
return np.round(meanData,2), np.round(stdData, 2)

Categories