Related
Recently, I tried to implement Phong shading with only NumPy and PIL using python. But there is some black-and-white noise in the rendered image. Can you point out what I should do to improve my code to fix the issue?
The resulting image is as follows:
The mesh model could be downloaded from https://github.com/google/nerfactor/blob/main/third_party/xiuminglib/data/models/teapot.obj.
You could try the code below by yourself.
import random
import numpy as np
import trimesh
from meshio import load_obj
from PIL import Image
def phong_shading(light_direction, view_direction, normal, material):
# Calculate the ambient color
ambient_color = material.ambient_color
# Calculate the diffuse color
diffuse_coefficient = max(np.dot(normal, light_direction), 0)
diffuse_color = diffuse_coefficient * material.diffuse_color
# Calculate the specular color
halfway_direction = normalize(light_direction + view_direction)
specular_coefficient = max(np.dot(normal, halfway_direction), 0)
specular_coefficient = specular_coefficient ** material.shininess
specular_color = specular_coefficient * material.specular_color
# Combine the ambient, diffuse and specular colors
final_color = specular_color + diffuse_color + ambient_color
return final_color
def normalize(v, axis=-1, epsilon=1e-12):
square_sum = np.sum(np.square(v), axis, keepdims=True)
v_inv_norm = 1. / np.sqrt(np.maximum(square_sum, epsilon))
return v * v_inv_norm
def rasterize_triangle(vertices):
# calculate the bounding box of the triangle
min_x = int(min(vertices[:, 0]))
max_x = int(max(vertices[:, 0])) + 1
min_y = int(min(vertices[:, 1]))
max_y = int(max(vertices[:, 1])) + 1
for x in range(min_x, max_x):
for y in range(min_y, max_y):
if point_in_triangle(vertices, x, y):
yield (x, y)
def is_point_in_triangle(vertices, x, y):
v0, v1, v2 = vertices
A = 1/2 * (-v1[1]*v2[0] + v0[1]*(-v1[0] + v2[0]) +
v0[0]*(v1[1] - v2[1]) + v1[0]*v2[1])
s = v0[1]*v2[0] - v0[0]*v2[1] + (v2[1] - v0[1])*x + (v0[0] - v2[0])*y
t = v0[0]*v1[1] - v0[1]*v1[0] + (v0[1] - v1[1])*x + (v1[0] - v0[0])*y
return 0 <= s and s <= A and 0 <= t and t <= A and (s + t) <= A
def point_in_triangle(vertices, x, y):
# x, y = point
v0, v1, v2 = vertices
x1, y1, x2, y2, x3, y3 = v0[0], v0[1], v1[0], v1[1], v2[0], v2[1]
# Compute barycentric coordinates
denom = (y2 - y3) * (x1 - x3) + (x3 - x2) * (y1 - y3)
l1 = ((y2 - y3) * (x - x3) + (x3 - x2) * (y - y3)) / denom
l2 = ((y3 - y1) * (x - x3) + (x1 - x3) * (y - y3)) / denom
l3 = 1 - l1 - l2
# Check if point is inside the triangle
return 0 <= l1 <= 1 and 0 <= l2 <= 1 and 0 <= l3 <= 1
def world_to_camera_coordinates(vertices, camera_position):
''' convert from world coordinate to camera_coordinate.
this function has the assumption that the camera is looking at the origin.
and the y axis of the camera is pointing down to the ground.
Args:
vertices (np.array): the vertices of the mesh in world coordinate.
Returns:
the vertices in camera coordinate.
'''
camera_z_axis = -normalize(camera_position) # (3,)
world_z_axis = np.array([0, 0, 1])
project_y_on_z = -(-world_z_axis # camera_z_axis.T) * camera_z_axis
camera_y_axis = project_y_on_z - world_z_axis # (3,)
camera_x_axis = np.cross(camera_y_axis, camera_z_axis) # (3,)
camera_matrix = np.stack([camera_x_axis, camera_y_axis, camera_z_axis])
return (camera_matrix # (vertices - camera_position).T).T
def camera_to_screen_coordinates(vertices, width, height, fov, near_clip, far_clip):
aspect_ratio = width / height
# Create the perspective projection matrix
projection_matrix = perspective(fov, aspect_ratio, near_clip, far_clip)
# create a matrix to store the transformed vertices
transformed_vertices = np.ones((len(vertices), 4))
transformed_vertices[:, :3] = vertices
# multiply each vertex by the projection matrix
transformed_vertices = np.matmul(transformed_vertices, projection_matrix.T)
# Convert from homogeneous coordinates to screen coordinates
transformed_vertices[:, 0] = (
transformed_vertices[:, 0] / transformed_vertices[:, 3]) * (width / 2) + (width / 2)
transformed_vertices[:, 1] = (
transformed_vertices[:, 1] / transformed_vertices[:, 3]) * (height / 2) + (height / 2)
return transformed_vertices[:, :2]
def perspective(fov, aspect_ratio, near_clip, far_clip):
fov = np.radians(fov)
t = np.tan(fov / 2) * near_clip
b = -t
r = t * aspect_ratio
l = -r
projection_matrix = np.array(
[
[(2 * near_clip) / (r - l), 0, (r + l) / (r - l), 0],
[0, (2 * near_clip) / (t - b), (t + b) / (t - b), 0],
[0, 0, -(far_clip + near_clip) / (far_clip - near_clip),
-(2 * far_clip * near_clip) / (far_clip - near_clip)],
[0, 0, -1, 0]
]
)
return projection_matrix
def transform_to_screen_space(vertices, camera_position, img_width, img_height):
assert img_width == img_height, 'The image must be square'
# Transform the vertices to camera space
camera_vertices = world_to_camera_coordinates(vertices, camera_position)
# Transform the vertices to perspective space
fov = 45
focal = img_width / (2 * np.tan(np.radians(fov / 2)))
screen_vertices = camera_vertices / camera_vertices[:, 2].reshape(-1, 1)
screen_vertices[:, :2] = screen_vertices[:, :2] * focal + img_height / 2
return screen_vertices, camera_vertices
def area_triangle(v1, v2, v3):
''' compute the area of a triangle.
'''
return 0.5 * np.linalg.norm(np.cross(v2 - v1, v3 - v1))
def compute_vertices_normals(vertices, faces):
''' compute the normal vector for each vertex.
Args:
vertices (np.array): the vertices of the mesh in world coordinate.
faces
'''
# method with trimesh
# '''
mesh = trimesh.Trimesh(vertices=vertices, faces=faces, processed=False)
vertices_normals = normalize(mesh.vertex_normals, epsilon=1e-160)
# '''
# method with numpy
'''
vertices_normals = np.zeros_like(vertices).astype(np.float128)
v1 = vertices[faces][:, 0]
v2 = vertices[faces][:, 1]
v3 = vertices[faces][:, 2]
normal_before_normalization = np.cross(v2 - v1, v3 - v1)
per_face_area = 0.5 * np.linalg.norm(
normal_before_normalization, axis=-1, keepdims=True
)
per_face_area_enlarged = per_face_area * \
per_face_area.shape[0] / per_face_area.sum()
per_face_normal = normalize(normal_before_normalization, epsilon=1e-160)
weighted_normal = per_face_normal * per_face_area_enlarged
weighted_normal_boardcast = np.reshape(
np.repeat(np.expand_dims(weighted_normal, 1), 3, axis=1), (-1, 3)
)
np.add.at(vertices_normals, faces.ravel(), weighted_normal_boardcast)
vertices_normals = normalize(vertices_normals, epsilon=1e-160)
'''
return vertices_normals
def barycentric_coords(triangle_vertices, x, y):
x1, y1, z1 = triangle_vertices[0]
x2, y2, z2 = triangle_vertices[1]
x3, y3, z3 = triangle_vertices[2]
# calculate barycentric coordinates
lambda1 = ((y2 - y3)*(x - x3) + (x3 - x2)*(y - y3)) / \
((y2 - y3)*(x1 - x3) + (x3 - x2)*(y1 - y3))
lambda2 = ((y3 - y1)*(x - x3) + (x1 - x3)*(y - y3)) / \
((y2 - y3)*(x1 - x3) + (x3 - x2)*(y1 - y3))
lambda3 = 1 - lambda1 - lambda2
return np.array([lambda1, lambda2, lambda3]).reshape(-1, 1)
def render_phong(vertices, faces, camera_position, light_position, width, height, material):
# compute the normal vector for each vertex
vertices_normals = compute_vertices_normals(vertices, faces)
# Transform the vertices to screen space
transformed_vertices, camera_vertices = transform_to_screen_space(
vertices, camera_position, width, height)
# Create an empty image
img = Image.new('RGB', (width, height), (0, 0, 0))
pixels = img.load()
pixel_depth = np.ones((width, height)) * np.inf
for face in faces:
v1 = transformed_vertices[face[0]]
v2 = transformed_vertices[face[1]]
v3 = transformed_vertices[face[2]]
if area_triangle(v1, v2, v3) == 0:
continue
# calculate the normal vector for the face
normal = vertices_normals[face]
# calculate the light and view direction vectors for each vertex
light_direction = normalize(light_position - vertices[face])
view_direction = normalize(camera_position - vertices[face])
# Rasterize the triangle
for x, y in rasterize_triangle(transformed_vertices[face]):
for i in range(20):
tubx = random.uniform(0, 1.0) + x
tuby = random.uniform(0, 1.0) + y
# calculate the barycentric coordinates of the pixel
barycentric = barycentric_coords(
transformed_vertices[face], tubx, tuby)
if np.min(barycentric) < 0: # Check if pixel is outside of the triangle
continue
# Interpolate the vertex attributes to get per-pixel attributes
interpolated_normal = (barycentric * normal).sum(axis=0)
interpolated_light_direction = (
barycentric * light_direction
).sum(axis=0)
interpolated_view_direction = (
barycentric * view_direction
).sum(axis=0)
interpolated_camera_vertices = (
barycentric * camera_vertices[face]).sum(axis=0)
# Calculate the color of the pixel
color = phong_shading(interpolated_light_direction,
interpolated_view_direction, interpolated_normal, material)
if x >= 0 and x < width and y >= 0 and y < height:
oldr, oldg, oldb = pixels[x, y]
newr, newg, newb = (np.clip(color, 0, 1)
* 255).astype(np.uint8)
# newr = newr if newr > oldr else oldr
# newg = newg if newg > oldg else oldg
# newb = newb if newb > oldb else oldb
depth = interpolated_camera_vertices[2]
if depth < pixel_depth[x, y]:
# print(depth, pixel_depth[x, y])
pixel_depth[x, y] = depth
pixels[x, y] = (newr, newg, newb)
# if x < 453 and x > 415 and y > 255 and y < 265:
# img.save(f"debug/f_{face}_x_{x}_y_{y}_d_{depth}.jpg")
return img
class PhongShader():
def __init__(self, light_position, camera_position, image_width=512, image_height=512):
# assert the camera position is not along z axis.
self.light_position = light_position
self.camera_position = camera_position
self.image_width = image_width
self.image_height = image_height
def render(self, vertices, faces, material):
return render_phong(vertices, faces, self.camera_position, self.light_position, self.image_width, self.image_height, material)
class Material():
def __init__(self) -> None:
self.ambient_color = np.array([0.1, 0.1, 0.1])
self.diffuse_color = np.array([1., 0.0, 0.5])
self.specular_color = np.array([0.5, 0.5, 0.5])
self.shininess = 50
def main():
# load the mesh
mesh = trimesh.load('teapot.obj')
vertices, faces = mesh.vertices, mesh.faces
# create a shader
shader = PhongShader(light_position=np.array(
[8, 0, 0]), camera_position=np.array([8, 0, 0]))
# render the image
material = Material()
img = shader.render(vertices, faces, material)
img.save("output.jpg")
if __name__ == '__main__':
main()
The possible reason could be discreazation in coding. But I am not sure how to fix it.
I have a set of 68 keypoints (size [68, 2]) that I am mapping to gaussian heatmaps. To do this, I have the following function:
def generate_gaussian(t, x, y, sigma=10):
"""
Generates a 2D Gaussian point at location x,y in tensor t.
x should be in range (-1, 1).
sigma is the standard deviation of the generated 2D Gaussian.
"""
h,w = t.shape
# Heatmap pixel per output pixel
mu_x = int(0.5 * (x + 1.) * w)
mu_y = int(0.5 * (y + 1.) * h)
tmp_size = sigma * 3
# Top-left
x1,y1 = int(mu_x - tmp_size), int(mu_y - tmp_size)
# Bottom right
x2, y2 = int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)
if x1 >= w or y1 >= h or x2 < 0 or y2 < 0:
return t
size = 2 * tmp_size + 1
tx = np.arange(0, size, 1, np.float32)
ty = tx[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
g = torch.tensor(np.exp(- ((tx - x0) ** 2 + (ty - y0) ** 2) / (2 * sigma ** 2)))
# Determine the bounds of the source gaussian
g_x_min, g_x_max = max(0, -x1), min(x2, w) - x1
g_y_min, g_y_max = max(0, -y1), min(y2, h) - y1
# Image range
img_x_min, img_x_max = max(0, x1), min(x2, w)
img_y_min, img_y_max = max(0, y1), min(y2, h)
t[img_y_min:img_y_max, img_x_min:img_x_max] = \
g[g_y_min:g_y_max, g_x_min:g_x_max]
return t
def rescale(a, img_size):
# scale tensor to [-1, 1]
return 2 * a / img_size[0] - 1
My current code uses a for loop to compute the gaussian heatmap for each of the 68 keypoint coordinates, then stacks the resulting tensors to create a [68, H, W] tensor:
x_k1 = [generate_gaussian(torch.zeros(H, W), x, y) for x, y in rescale(kp1.numpy(), frame.shape)]
x_k1 = torch.stack(x_k1, dim=0)
However, this method is super slow. Is there some way that I can do this without a for loop?
Edit:
I tried #Cris Luengo's proposal to compute a 1D Gaussian:
def generate_gaussian1D(t, x, y, sigma=10):
h,w = t.shape
# Heatmap pixel per output pixel
mu_x = int(0.5 * (x + 1.) * w)
mu_y = int(0.5 * (y + 1.) * h)
tmp_size = sigma * 3
# Top-left
x1, y1 = int(mu_x - tmp_size), int(mu_y - tmp_size)
# Bottom right
x2, y2 = int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)
if x1 >= w or y1 >= h or x2 < 0 or y2 < 0:
return t
size = 2 * tmp_size + 1
tx = np.arange(0, size, 1, np.float32)
ty = tx[:, np.newaxis]
x0 = y0 = size // 2
g = torch.tensor(np.exp(-np.power(tx - mu_x, 2.) / (2 * np.power(sigma, 2.))))
g = g * g[:, None]
g_x_min, g_x_max = max(0, -x1), min(x2, w) - x1
g_y_min, g_y_max = max(0, -y1), min(y2, h) - y1
img_x_min, img_x_max = max(0, x1), min(x2, w)
img_y_min, img_y_max = max(0, y1), min(y2, h)
t[img_y_min:img_y_max, img_x_min:img_x_max] = \
g[g_y_min:g_y_max, g_x_min:g_x_max]
return t
but my output ends up being an incomplete gaussian.
I'm not sure what I'm doing wrong. Any help would be appreciated.
You generate an NxN array g with a Gaussian centered on its center pixel. N is computed such that it extends by 3*sigma from that center pixel. This is the fastest way to build such an array:
tmp_size = sigma * 3
tx = np.arange(1, tmp_size + 1, 1, np.float32)
g = np.exp(-(tx**2) / (2 * sigma**2))
g = np.concatenate((np.flip(g), [1], g))
g = g * g[:, None]
What we're doing here is compute half a 1D Gaussian. We don't even bother computing the value of the Gaussian for the middle pixel, which we know will be 1. We then build the full 1D Gaussian by flipping our half-Gaussian and concatenating. Finally, the 2D Gaussian is built by the outer product of the 1D Gaussian with itself.
We could shave a bit of extra time by building a quarter of the 2D Gaussian, then concatenating four rotated copies of it. But the difference in computational cost is not very large, and this is much simpler. Note that np.exp is the most expensive operation here by far, so just minimizing how often we call it we significantly reduce the computational cost.
However, the best way to speed up the complete code is to compute the array g only once, rather than anew for each key point. Note how your sigma doesn't change, so all the arrays g that are computed are identical. If you compute it only once, it no longer matters which method you use to compute it, since this will be a minimal portion of the total program anyway.
You could, for example, have a global variable _gaussian to hold your array, and have your function compute it only the first time it is called. Or you could separate your function into two functions, one that constructs this array, and one that copies it into an image, and call them as follows:
g = create_gaussian(sigma=3)
x_k1 = [
copy_gaussian(torch.zeros(H, W), x, y, g)
for x, y in rescale(kp1.numpy(), frame.shape)
]
On the other hand, you're likely best off using existing functionality. For example, DIPlib has a function dip.DrawBandlimitedPoint() [disclosure: I'm an author] that adds a Gaussian blob to an image. Likely you'll find similar functions in other libraries.
I have reached to this bilinear interpolation code (added here), but I would like to improve this code to 3D, meaning update it to work with an RGB image (3D, instead of only 2D).
If you have any suggestions of how I can to that I would love to know.
This was the one dimension linear interpolation:
import math
def linear1D_resize(in_array, size):
"""
`in_array` is the input array.
`size` is the desired size.
"""
ratio = (len(in_array) - 1) / (size - 1)
out_array = []
for i in range(size):
low = math.floor(ratio * i)
high = math.ceil(ratio * i)
weight = ratio * i - low
a = in_array[low]
b = in_array[high]
out_array.append(a * (1 - weight) + b * weight)
return out_array
And this for the 2D:
import math
def bilinear_resize(image, height, width):
"""
`image` is a 2-D numpy array
`height` and `width` are the desired spatial dimension of the new 2-D array.
"""
img_height, img_width = image.shape[:2]
resized = np.empty([height, width])
x_ratio = float(img_width - 1) / (width - 1) if width > 1 else 0
y_ratio = float(img_height - 1) / (height - 1) if height > 1 else 0
for i in range(height):
for j in range(width):
x_l, y_l = math.floor(x_ratio * j), math.floor(y_ratio * i)
x_h, y_h = math.ceil(x_ratio * j), math.ceil(y_ratio * i)
x_weight = (x_ratio * j) - x_l
y_weight = (y_ratio * i) - y_l
a = image[y_l, x_l]
b = image[y_l, x_h]
c = image[y_h, x_l]
d = image[y_h, x_h]
pixel = a * (1 - x_weight) * (1 - y_weight) + b * x_weight * (1 - y_weight) + c * y_weight * (1 - x_weight) + d * x_weight * y_weight
resized[i][j] = pixel # pixel is the scalar with the value comptued by the interpolation
return resized
Check out some of the scipy ndimage interpolate functions. They will do what you're looking for and are 'using numpy'.
They are also very functional, fast and have been tested many times.
Richard
Now that my perlin generator is 'working' I created noise, to find that it is nothing like what I see on the internets...
My noise:
Notice the streaks:
What I am aiming to get (obviously with corresponding colour):
1:
Why does mine look so noisy and nasty?
Code (sorry for no stub, the Perlin noise makes up most of the program so it's important to include the full program):
from PIL import Image
from tkinter import filedialog
from random import randint, random
#Initialise width / height
width = 625
height = 625
#Import gradient picture - 200*1 image used to texture perlin noise
#R,G,B,Alpha
gradient = Image.open("image.png")
gradlist = list(gradient.getdata())
#Create new image
img = Image.new('RGBA', (width, height), color=(255, 255, 255, 255))
#Perlin noise modules --------------------------------------------------------------------------------------------------------
#Modules
from random import sample
from math import floor
p = sample([x for x in range(0, (width * height))], (width * height)) * 2
#Antialising
def fade(t):
retval = 6*(t**5) - 15*(t**4) + 10*(t**3)
return retval
#Linear interpolation
def lerp(t,a,b):
retval = a + (t * (b - a))
return retval
#Clever bitwise hash stuff - picks a unit vector from 12 possible - (1,1,0),(-1,1,0),(1,-1,0),(-1,-1,0),(1,0,1),(-1,0,1),(1,0,-1),(-1,0,-1),(0,1,1),(0,-1,1),(0,1,-1),(0,-1,-1)
def grad(hash, x, y, z):
h = hash % 15
if h < 8:
u = x
else:
u = y
if h < 4:
v = y
elif h in (12, 14):
v = x
else:
v = z
return (u if (h & 1) == 0 else -u) + (v if (h & 2) == 0 else -v)
#Perlin function
def perlin(x,y,z):
ix = int(floor(x)) & 255
iy = int(floor(y)) & 255
iz = int(floor(z)) & 255
x -= int(floor(x))
y -= int(floor(y))
z -= int(floor(z))
u = fade(x)
v = fade(y)
w = fade(z)
#Complicated hash stuff
A = p[ix] + iy
AA = p[A] + iz
AB = p[A + 1] + iz
B = p[ix + 1] + iy
BA = p[B] + iz
BB = p[B + 1] + iz
return -lerp(w, lerp(v, lerp(u, grad(p[AA], x, y, z),grad(p[BA], x - 1, y, z)),lerp(u, grad(p[AB], x, y - 1, z),grad(p[BB], x - 1, y - 1, z))),lerp(v, lerp(u, grad(p[AA + 1], x, y, z - 1),grad(p[BA + 1], x - 1, y, z - 1)), lerp(u, grad(p[AB + 1], x, y - 1, z - 1),grad(p[BB + 1], x - 1, y - 1, z - 1))))
def octavePerlin(x,y,z,octaves,persistence):
total = 0
frequency = 1
amplitude = 1
maxValue = 0
for x in range(octaves):
total += perlin(x * frequency, y * frequency, z * frequency) * amplitude
maxValue += amplitude
amplitude *= persistence
frequency *= 2
return total / maxValue
z = random()
img.putdata([gradlist[int(octavePerlin((x + random() - 0.5) / 1000, (y + random() - 0.5) / 1000, z, 4, 2) * 100 + 100)] for x in range(width) for y in range(height)])
img.save(filedialog.asksaveasfilename() + ".png", "PNG")
I've taken the Wikipedia Perlin Noise Algorithm and implemented it in Python, here is the code:
import random
import math
from PIL import Image
from decimal import Decimal
IMAGE_SIZE = 200
PERLIN_RESOLUTION = 10
GRADIENT = []
for x in range(PERLIN_RESOLUTION + 1):
GRADIENT.append([])
for y in range(PERLIN_RESOLUTION + 1):
angle = random.random() * 2 * math.pi
vector = (
Decimal(math.cos(angle)),
Decimal(math.sin(angle))
)
GRADIENT[x].append(vector)
def lerp(a0, a1, w):
return (1 - w)*a0 + w*a1
def dotGridGradient(ix, iy, x, y):
dx = x - Decimal(ix)
dy = y - Decimal(iy)
return (dx*GRADIENT[iy][ix][0] + dy*GRADIENT[iy][ix][1])
def perlin(x, y):
if x > 0.0:
x0 = int(x)
else:
x0 = int(x) - 1
x1 = x0 + 1
if y > 0.0:
y0 = int(y)
else:
y0 = int(y) - 1
y1 = y0 + 1
sx = x - Decimal(x0)
sy = y - Decimal(y0)
n0 = dotGridGradient(x0, y0, x, y)
n1 = dotGridGradient(x1, y0, x, y)
ix0 = lerp(n0, n1, sx)
n0 = dotGridGradient(x0, y1, x, y)
n1 = dotGridGradient(x1, y1, x, y)
ix1 = lerp(n0, n1, sx)
value = lerp(ix0, ix1, sy)
return value
image = Image.new('RGB', (IMAGE_SIZE, IMAGE_SIZE))
pixels = image.load()
for i in range(IMAGE_SIZE):
x = Decimal(i) / IMAGE_SIZE
for j in range(IMAGE_SIZE):
y = Decimal(j) / IMAGE_SIZE
value = perlin(x * 10, y * 10)
greyscale = (value + 1) * 255 / 2
pixels[i, j] = (greyscale, greyscale, greyscale)
image.save('artifacts.png', 'PNG')
Here is the resulting image that is created by the script:
I must be missing something here, you can very clearly see the vertices. Can anyone let me know what is going wrong?
You need to use smoothstep instead of linear interpolation.
def smoothstep(a0, a1, w):
value = w*w*w*(w*(w*6 - 15) + 10)
return a0 + value*(a1 - a0)