I'm working on an image processing assignment, and we are asked to detect edges of a photo using integral images and specific methods and libraries. Below is the code I wrote:
`
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter
import matplotlib
import numpy as np
#Function 1: Given an image, returns the image and it's squared version in array format
def toArrayAndSquare(im):
img = [[0 for x in range(im.size[0])] for y in range(im.size[1])]
sqr = [[0 for x in range(im.size[0])] for y in range(im.size[1])]
for i in range (0,im.size[0]):
for j in range (0,im.size[1]):
img[j][i] = im.getpixel((i,j))
sqr[j][i] = img[j][i] ** 2
return img,sqr
#Function 2: Given an image, applies a certain threshold
def applyThreshold (im, th):
res = [[0 for x in range(len(im[0]))] for y in range(len(im))]
for i in range (0,len(im)):
for j in range (0,len(im[0])):
if(im[i][j]<th):
res[i][j] = 0
else:
res[i][j] = 255
return res
imgArray = toArrayAndSquare(image1)
def integralArray (imagee, k):
image = np.array(imagee)
height = len(image)
width = len(image[0])
integral_image = [ [ 0 for y in range(width) ] for x in range(height) ]
for i in range (0, height):
sum = 0
for j in range (0, width):
sum += image[i][j]
integral_image[i][j] = sum
if j>0:
integral_image[i][j] += integral_image[i-1][j]
output_image = [ [ 0 for y in range(width) ] for x in range(height)]
for i in range(height):
for j in range(width):
min_row, max_row = max( 0, i-k), min( height-1, i+k)
min_col, max_col = max( 0, j-k), min( width-1, j+k)
output_image[i][j] = integral_image[max_row][max_col]
if min_row > 0:
output_image[i][j] -= integral_image[min_row-1][max_col]
if min_col > 0:
output_image[i][j] -= integral_image[max_row][min_col-1]
if min_col > 0 and min_row > 0:
output_image[i][j] += integral_image[min_row-1][min_col-1]
return output_image
image3 = integralArray(imgArrayDouble, 1)
def localSum(imagee, x1, y1, x2, y2):
image = np.array(imagee)
topLeft = image[x1][y1]
bottomRight = image[x2][y2]
bottomLeft = image[x1][y2]
topRight = image[x2][y1]
Local_Sum = topLeft + bottomRight - bottomLeft - topRight
print(topLeft , bottomRight , bottomLeft , topRight)
return Local_Sum
def imgWithIntegral(imagee, x, y):
# Variance = mean of square of image - square of mean of image
array1 = imagee[0]
array2 = imagee[1]
OutputArray = np.array(imagee)
OutputArray = (np.var(array1))**2 - np.var(array2)
return OutputArray
`
Now I'm asked to implement another function that will calculate the variance of every pixel using sliding window which is the last method in the code and actually I don't know how it can be done.
Any help?
I didn't expect much from it(The last method) as the aim is to create sliding window to calculate the variance of every pixel and I didn't know how to create it.
Related
I have used this method to create an inverse mapping to redistort an image and it works fine. Heres what it looks like in code:
# invert the mapping
combined_map_inverted = invert_map(combined_map, shape)
# apply mapping to image
frame = cv2.remap(img, combined_map_inverted, None ,cv2.INTER_LINEAR)
Notice that its a combined map, not separated into x and y. How can I take a single (x,y) point in the undistorted image and find the corresponding distorted point? I see this answer but I'm unsure how to apply it to my case.
The combined map is a simple look up table - mapping from (u,v) to x and from (u,v) to y.
Assume (u, v) is the column, row coordinate of the undistorted image.
Than the coordinate in the distorted image is:
x = combined_map_inverted[v, u, 0]
y = combined_map_inverted[v, u, 1]
In more compact form:
x, y = combined_map_inverted[v, u].tolist()
In case we want to get the value in the (x, y) coordinate, we may use bi-linear interpolation as described in my following answer (or use other kind of interpolation).
I tried testing it using the code from your previous post:
import cv2
import glob
import numpy as np
import math
import os
if os.path.isfile('xymap_inverted.npy'):
xymap_inverted = np.load('xymap_inverted.npy')
else:
A = -1010
B = -3.931
C = 5.258
D = 978.3
M = -193.8
N = 1740
def get_tan_func_value(x):
return A * math.tan((((x-N)/M)+B)/C) + D
def get_inverse_tan_func_value(x):
return M * (C*math.atan((x-D)/A) - B) + N
# answer from linked post
#def invert_map(F, shape):
# I = np.zeros_like(F)
# I[:,:,1], I[:,:,0] = np.indices(shape)
# P = np.copy(I)
# for i in range(10):
# P += I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
# return P
# https://stackoverflow.com/a/72649764/4926757
def invert_map(F):
(h, w) = F.shape[:2] # (h, w, 2), "xymap"
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices((h,w)) # identity map
P = np.copy(I)
for i in range(10):
correction = I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
P += correction * 0.5
return P
# import image
#images = glob.glob('*.jpg')
img = cv2.imread('image1.jpg') #img = cv2.imread(images[0])
h, w = img.shape[:2]
map_x_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_x_inverse_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_y = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
# x tan function map
for i in range(map_x_tan.shape[0]):
map_x_tan[i,:] = [get_tan_func_value(x) for x in range(map_x_tan.shape[1])]
# x inverse tan function map
for i in range(map_x_inverse_tan.shape[0]):
map_x_inverse_tan[i,:] = [get_inverse_tan_func_value(x) for x in range(map_x_inverse_tan.shape[1])]
# default y map
for j in range(map_y.shape[1]):
map_y[:,j] = [y for y in range(map_y.shape[0])]
# convert x tan map to 2 channel (x,y) map
(xymap_tan, _) = cv2.convertMaps(map1=map_x_tan, map2=map_y, dstmap1type=cv2.CV_32FC2)
# invert the 2 channel x tan map
xymap_inverted = invert_map(xymap_tan)
np.save('xymap_inverted.npy', xymap_inverted)
combined_map_inverted = xymap_inverted
u = 150
v = 120
x, y = combined_map_inverted[v, u].tolist()
The output is:
x = 278.2418212890625
y = 120.0
Bi-lienar interpolation example:
x0 = int(x)
y0 = int(y)
x1 = int(x0 + 1)
y1 = int(y0 + 1)
dx = x - x0
dy = y - y0
new_pixel = np.round(img[y0,x0]*(1-dx)*(1-dy) + img[y1,x0]*(1-dx)*dy + img[y0,x1]*dx*(1-dy) + img[y1,x1]*dx*dy)
Testing by remapping an entire image, and comparing with cv2.remap:
def bilinear_interp(img, x, y):
x0 = int(x)
y0 = int(y)
x1 = int(x0 + 1)
y1 = int(y0 + 1)
dx = x - x0
dy = y - y0
new_pixel = np.round(img[y0,x0]*(1-dx)*(1-dy) + img[y1,x0]*(1-dx)*dy + img[y0,x1]*dx*(1-dy) + img[y1,x1]*dx*dy)
return new_pixel.astype(np.uint8)
img = cv2.imread('image1.jpg')
ref_img = cv2.remap(img, xymap_inverted, None, cv2.INTER_LINEAR)
cv2.imwrite('ref_img.jpg', ref_img)
new_img = np.zeros_like(img)
for v in range(img.shape[0]):
for u in range(img.shape[1]):
x, y = combined_map_inverted[v, u].tolist()
if (x >= 0) and (y >= 0) and (x < img.shape[1]-1) and (y < img.shape[0]-1):
new_img[v, u] = bilinear_interp(img, x, y)
cv2.imwrite('new_img.jpg', new_img)
abs_diff = cv2.absdiff(ref_img, new_img)
cv2.imshow('abs_diff', abs_diff) # Display the absolute difference for testing
cv2.waitKey()
cv2.destroyAllWindows()
ref_img and new_img are almost the same.
I am trying to implement a version of Shi-Tomasi without using the GoodFeaturesToTrack() function. How do I do this using non-local maximum supression? Code attached if it helps: Trying to save the location of each max x,each max y , and Q into the array results
def nonMaximalSupress(image,NHoodSize):
#
dX = NHoodSize
dY = NHoodSize
M, N = image.shape
for x in range(0,M-dX+1):
for y in range(0,N-dY+1):
window = image[x:x+dX, y:y+dY]
if np.sum(window)==0:
localMax=0
else:
localMax = np.amax(window)
maxCoord = np.argmax(window)
# zero all but the localMax in the window
window[:] = 0
window.flat[maxCoord] = localMax
return image
################################
NUMRESULTS = 5000
def detectKeypoints(img, alpha):
I = np.float32(img)
results = np.zeros( (NUMRESULTS,3), dtype = float)
sigma = float(1.414)
ksize = int(1 + np.sqrt(-2*sigma**2*np.log(0.005)))
#############my code here############
# Shi-Tomasi Method
# Find the X and Y gradient components using Sobel kernels
Ix = cv2.Sobel(I, cv2.CV_64F,1,0,ksize)
Iy = cv2.Sobel(I,cv2.CV_64F,0,1,ksize)
# components of the local structure matrix
A = np.square(Ix)
B = np.square(Iy)
C = np.multiply(Ix,Iy)
M = [[A,C], # local structure matrix
[C,B]]
# get a Gaussian kernel for set ksize and signma value
gauss = cv2.getGaussianKernel(ksize,sigma)
# smooth individual components of the local structure matrix
A_filter = cv2.filter2D(A, cv2.CV_64F, gauss)
B_filter = cv2.filter2D(B, cv2.CV_64F, gauss)
C_filter = cv2.filter2D(C, cv2.CV_64F, gauss)
Q = 0
yloc = 0
xloc = 0
for c in range(img.shape[0]):
for r in range(img.shape[1]):
M_bar = ([[A_filter[c,r], C_filter[c,r]],
[C_filter[c,r], B_filter[c,r]]])
eg = np.linalg.eigvals(M_bar)
#img[c,r] = min(eg[0], eg[1])
Q = min(eg[0],eg[1])
possible_corner_locations = nonMaximalSupress(img, ksize)
for c1 in range(possible_corner_locations.shape[0]):
for r1 in range(possible_corner_locations.shape[1]):
if possible_corner_locations[c1,r1] != 0:
yloc = c1
xloc = r1
for i in range(NUMRESULTS):
results[i,0] = xloc
results[i,1] = yloc
results[i,2] = Q
else:
pass
return results
I'm trying to evaluate the quality of image provided by implementing nearest neighbour and bi-linear interpolation to resize an image. Currently the two images look identical. I cannot seem to find out the reason for the bi-linear method not providing the smooth output picture it should. Below is nearest neighbour
def scale_image_NN(image, scaling_factor):
cv2.imshow('Original image', lena)
cv2.waitKey(0)
print 'Running'
size = np.shape(image)
scaled_image = np.zeros((size[0]*scaling_factor, size[1]*scaling_factor,3), dtype=np.uint32)
for i in range (0, scaling_factor*size[0]-3):
for j in range (0, scaling_factor*size[1]-3):
x = int(m.floor(i/scaling_factor))
y = int(m.floor(j/scaling_factor))
for k in range (0, 3):
scaled_image[i+1, j+1, k] = image[x+1, y+1, k]
cv2.imshow('Scaled image - NN', scaled_image)
cv2.waitKey(0)
cv2.imwrite('NN.jpg',scaled_image)
and subsequently bi-linear interpolation
def scale_image_BL(image, scaling_factor):
cv2.imshow('Original image', lena)
cv2.waitKey(0)
print 'Running'
orig_size = np.shape(image)
h = orig_size[0]
w = orig_size[1]
c = orig_size[2]
r = scaling_factor
padded_image = np.zeros((h*scaling_factor, w*scaling_factor, c), dtype=np.uint8)
for i in range (0, h*scaling_factor):
x1 = int(m.floor(i/r))
x2 = int(m.ceil(i/r))
if x1 == 0:
x1 = 1
x = np.remainder(i/r,1)
for j in range (0, w*scaling_factor):
y1 = int(m.floor(j/r))
y2 = int(m.ceil(j/r))
if y1 == 0:
y1 = 1
ctl = image[x1, y1, :]
cbl = image[x2, y1, :]
ctr = image[x1, y2, :]
cbr = image[x2, y2, :]
y = np.remainder(j/r, 1)
tr = (ctr*y) + (ctl*(1-y))
br = (ctr*y) + (cbl*(1-y))
padded_image[i, j, :] = (br*x)+(tr*(1-x))
scaledImage = padded_image.astype(np.uint8)
cv2.imshow('Scaled image - BL',scaledImage)
cv2.waitKey(0)
cv2.imwrite('BL.jpg',scaledImage)
The problem was due Why does Python return 0 for simple division calculation?
During the calculation of the two positions to interpolate between, say x1 or x2 in the bi-linear interpolation, python was returing 0 for simple division such as 1/2, and not 0.5, thus there weren't always two points to interpolate between resulting in the NN-type output.
For scale_image_BL(image, scaling_factor) to work, simply include :
from future import division
at the beginning of the script.
I'm attempting to implement the algorithm for generating 2D Perlin noise here but I'm having some trouble doing it in Python (which I am relatively new to).
I was expecting the final noise values ('z' in the linked example), to be somewhere between 0.0 and 1.0, but that's not what I'm getting. My code is below, I'd really appreciate any input.
Thanks!
perlin.py:
import math
import numpy
import random
import vector as vctr
from PIL import Image
def dot(v1, v2):
"""
Returns the dot product of the two input vectors.
Args:
v1 - First vector
v2 - Second vector
Return:
Resulting dot product
"""
return (v1.x * v2.x) + (v1.y * v2.y)
def fade(t):
"""
Fade 3t^2 - 2t^3
Args:
t - Value to fade.
Return:
Faded value.
"""
return (3 * (t ** 2)) - (2 * (t ** 3))
def lerp(minVal, maxVal, term):
"""
Args:
Return:
"""
return (maxVal - minVal) * term + minVal
def generateImage(noises, file="perlin.png"):
"""
Generates a image on disc of the resulting noise values
Args:
noises (list) - 2d list of noise values
file (str) - location of file to write to
"""
pixels = numpy.zeros((height, width, 3), dtype=numpy.uint8)
for x in range(0, width):
for y in range(0, height):
rgb = 255 * noises[x][y]
pixels[x, y] = [rgb, rgb, rgb]
# Print pixels as image
img = Image.fromarray(pixels, 'RGB')
img.save(file)
# Define the noise region
width = 300
height = 300
# Column ordered array of generated gradient vectors
g = numpy.zeros((width + 1, height + 1)).tolist()
# List of final noise values
z = numpy.zeros((width, height)).tolist()
# Fill list with randomly directed unit vectors (one for each grid point)
for x in range(0, width + 1):
for y in range(0, height + 1):
randX = random.uniform(-1.0, 1.0)
randY = random.uniform(-1.0, 1.0)
length = math.sqrt(randX**2 + randY**2)
g[x][y] = vctr.vector(randX / length, randY / length)
# For each cell in the sampling space (i.e. each pixel)
for x in range(0, width):
for y in range(0, height):
# Generate random point (p) within and relative to current cell
pX = random.uniform(0.0, 1.0)
pY = random.uniform(0.0, 1.0)
# Get the gradient vectors for each cell corner
g_tl = g[x][y]
g_tr = g[x + 1][y]
g_bl = g[x][y + 1]
g_br = g[x + 1][y + 1]
# Vectors from each cell corner to the generated point
# X axis is positive going right, Y is positive going down
tl = vctr.vector(pX, pY)
tr = vctr.vector(pX - 1, pY)
bl = vctr.vector(pX, pY - 1)
br = vctr.vector(pX - 1, pY - 1)
# Dot product these vectors to get gradient values
u = dot(tl, g_tl)
v = dot(tr, g_tr)
s = dot(bl, g_bl)
t = dot(br, g_br)
# Interpolate the gradient values
sX = fade(pX)
sY = fade(pY)
a = s + (sX * (t - s))
b = u + (sX * (v - u))
value = a + (sY * (a - b))
if (value < 0.0) or (value > 1.0):
print("VALUE IS OUT OF BOUNDS??? " + str(value))
z[x][y] = value
generateImage(z)
print("Completed Perlin noise generation!")
vector.py:
class vector:
def __init__(self, x, y):
"""
Initialise a new vector in 2D space with the input X and Y values.
x: X value of vector
y: Y value of vector
"""
self.x = x
self.y = y
I have this programme to discuss and I think its a challenging one.. Here I have a yml file which contains the data for an image. The image has x,y,z values and intensity data which is stored in this yml file. I have used opencv to load the data and its working fine with masking.. but I am having problems in dynamically appending the masks created.. Here is the code I made for solving the problem :
import cv
from math import floor, sqrt, ceil
from numpy import array, dot, subtract, add, linalg as lin
mask_size = 9
mask_size2 = mask_size / 2
f = open("Classified_Image1.txt", "w")
def distance(centre, point):
''' To find out the distance between centre and the point '''
dist = sqrt(
((centre[0]-point[0])**2) +
((centre[1]-point[1])**2) +
((centre[2]-point[2])**2)
)
return dist
def CalcCentre(points): # Calculates centre for a given set of points
centre = array([0,0,0])
count = 0
for p in points:
centre = add(centre, array(p[:3]))
count += 1
centre = dot(1./count, centre)
print centre
return centre
def addrow(data, points, x, y, ix , iy ):# adds row to the mask
iy = y + 1
for dx in xrange(-mask_size2 , mask_size2 + 2):
ix = x + dx
rowpoints = addpoints(data, points, iy, ix)
return rowpoints
def addcolumn(data, points, x, y, ix , iy ):# adds column to the mask
ix = x + 1
for dy in xrange(-mask_size2-1 , mask_size2 + 1):
iy = y + dy
columnpoints = addpoints(data, points, iy, ix)
return columnpoints
def addpoints (data, points, iy, ix): # adds a list of relevant points
if 0 < ix < data.width and 0 < iy < data.height:
pnt = data[iy, ix]
if pnt != (0.0, 0.0, 0.0):
print ix, iy
print pnt
points.append(pnt)
return points
def CreateMask(data, y, x):
radius = 0.3
points = []
for dy in xrange(-mask_size2, mask_size2 + 1): ''' Masking the data '''
for dx in xrange(-mask_size2, mask_size2 + 1):
ix, iy = x + dx, y + dy
points = addpoints(data, points, iy , ix )
if len(points) > 3:
centre = CalcCentre(points)
distances = []
for point in points :
dist = distance(centre, point)
distances.append(dist)
distancemax = max(distances)
print distancemax
if distancemax < radius: ''' Dynamic Part of the Programme'''
#while dist < radius: # Going into infinite loop .. why ?
p = addrow(data, points, x, y, ix , iy )
q = addcolumn(data, points, x, y, ix , iy )
dist = distance(centre, point) # While should not go in infinite
#loop as dist is changing here
print dist
print len(p), p
print len(q), q
points = p + q
points = list(set(points)) # To remove duplicate points in the list
print len(points), points
def ComputeClasses(data):
for y in range(0, data.height):
for x in range(0, data.width):
CreateMask(data, y, x)
if __name__ == "__main__":
data = cv.Load("Z:/data/xyz_00000_300.yml")
print "data loaded"
ComputeClasses(data)
Feel free to suggest alternative methods/ideas to solve this problem.
Thanks in advance.