I am trying to get blender render depth map of an object and then moving it to overlay the original object. Currently I have no issue with rendering the object and extracting it into it's place.
However, I am stuck when trying to position the object into it's original position.
I'm trying to apply inverse camera world matrix to the rendered pointcloud (in blue). Unfortunately, when I apply said camera inverse it doesn't appear nowhere near where I'd expect (in red).
I have attached the entirety of code that I have to replicate this behaviour. I would appreciate it if someone would point me to the right matrix that I should be multiplying the point cloud by.
from mathutils import Vector, Quaternion, Euler, Matrix
import numpy as np
import bpy
def main_script():
clear_scene()
prepare_views()
tmp_path = "/tmp/tmp_render.exr"
scene = get_scene("Scene")
camera = create_camera("Camera")
camera.rotation_euler = Euler([np.pi * 0.5, 0, np.pi * 0.5], "XYZ")
camera.location = Vector([4.5, 0, 1])
bpy.ops.mesh.primitive_monkey_add(
location=(0, 0, 1), rotation=(0, 0, np.pi*0.5), size=1.0)
_w, _h = 640, 480
update_scene()
init_rendering(scene, camera, width=640, height=480)
update_scene()
matrix_K = get_calibration_matrix_K_from_blender(scene, camera.data)
_fy, _fx = matrix_K[0][0], matrix_K[1][1]
_cy, _cx = matrix_K[0][2], matrix_K[1][2]
scene.render.filepath = tmp_path
bpy.ops.render.render(write_still=True)
depth = read_exr(tmp_path, "R")["R"]
depth = np.reshape(convert_to_numpy(depth), [_h, _w])
exr_cloud = depth_to_cloud(
_w, _h, _fx, _fy, _cx, _cy, depth)
exr_cloud = np.reshape(exr_cloud, [-1, 3])
exr_cloud = exr_cloud[(exr_cloud[..., 2] < 100) & (exr_cloud[..., 2] > 0)]
matrix = np.reshape(camera.matrix_world, [4, 4])
matrix = np.linalg.inv(matrix) # why doesn't this place the depth properly
vertices = np.ones([exr_cloud.shape[0], 4], dtype=np.float32)
vertices[:, 0:3] = exr_cloud
vertices = np.array(
[matrix # vertex for vertex in vertices], dtype=np.float32)
vertices = vertices[..., :3]
create_mesh("Suzanne_EXR", exr_cloud, [])
create_mesh("SuzanneT_EXR", vertices, [])
"""
utilities methods required to run the script
"""
def clear_scene():
for scene in bpy.data.scenes:
for obj in scene.objects:
bpy.context.collection.objects.unlink(obj)
def read_exr(path, channels):
import OpenEXR as _OpenEXR
import Imath as _Imath
file = _OpenEXR.InputFile(path)
FLOAT = _Imath.PixelType(_Imath.PixelType.FLOAT)
results = {}
for ch in channels:
results[ch] = file.channel(ch, FLOAT)
file.close()
return results
def convert_to_numpy(data):
import array as _array
return np.array(_array.array("f", data).tolist())
def update_scene():
dg = bpy.context.evaluated_depsgraph_get()
dg.update()
def prepare_views():
preferences = bpy.context.preferences
preferences.view.show_tooltips_python = True
preferences.view.show_developer_ui = True
preferences.view.render_display_type = "NONE"
def init_rendering(scene, camera, width=None, height=None):
def set_rendering_settings(camera, scene, width=640, height=480):
image_settings = scene.render.image_settings
image_settings.file_format = "OPEN_EXR"
image_settings.use_zbuffer = True
scene.render.resolution_x = width
scene.render.resolution_y = height
# scene.render.use_antialiasing = False
scene.use_nodes = True
scene.camera = camera
node_tree = scene.node_tree
nodes = node_tree.nodes
node_render_layers = nodes["Render Layers"]
node_composite = nodes["Composite"]
node_tree.links.clear()
node_tree.links.new(
node_render_layers.outputs["Depth"], node_composite.inputs["Image"])
set_rendering_settings(camera, scene)
def get_scene(name): return bpy.data.scenes[name]
def create_camera(name):
camera = bpy.data.cameras.new(name)
camera.lens = 50
obj = bpy.data.objects.new(name, camera)
bpy.context.collection.objects.link(obj)
return obj
# ---------------------------------------------------------------
# 3x4 P matrix from Blender camera
# ---------------------------------------------------------------
# Build intrinsic camera parameters from Blender camera data
#
# See notes on this in
# blender.stackexchange.com/questions/15102/what-is-blenders-camera-projection-matrix-model
def get_calibration_matrix_K_from_blender(scene, camera):
from mathutils import Matrix as _Matrix
f_in_mm = camera.lens
resolution_x_in_px = scene.render.resolution_x
resolution_y_in_px = scene.render.resolution_y
scale = scene.render.resolution_percentage / 100
sensor_width_in_mm = camera.sensor_width
sensor_height_in_mm = camera.sensor_height
pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
if (camera.sensor_fit == 'VERTICAL'):
# the sensor height is fixed (sensor fit is horizontal),
# the sensor width is effectively changed with the pixel aspect ratio
s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
s_v = resolution_y_in_px * scale / sensor_height_in_mm
else: # 'HORIZONTAL' and 'AUTO'
# the sensor width is fixed (sensor fit is horizontal),
# the sensor height is effectively changed with the pixel aspect ratio
pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
s_u = resolution_x_in_px * scale / sensor_width_in_mm
s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm
# Parameters of intrinsic calibration matrix K
alpha_u = f_in_mm * s_u
alpha_v = f_in_mm * s_v
u_0 = resolution_x_in_px * scale / 2
v_0 = resolution_y_in_px * scale / 2
skew = 0 # only use rectangular pixels
K = _Matrix(
((alpha_u, skew, u_0),
(0, alpha_v, v_0),
(0, 0, 1)))
return K
def create_mesh(name, vertices, faces):
import bmesh as _bmesh
mesh = bpy.data.meshes.new("Mesh_%s" % name)
mesh.from_pydata(vertices, [], faces)
mesh.update()
obj = bpy.data.objects.new(name, mesh)
bpy.context.collection.objects.link(obj)
bm = _bmesh.new()
bm.from_mesh(mesh)
bm.to_mesh(mesh)
bm.free()
return obj
def depth_to_cloud(w, h, fx, fy, cx, cy, depth):
from numpy import concatenate as _concat
from numpy import indices as _indices
from numpy import newaxis as _newaxis
indices = _indices(depth.shape)
indices_y, indices_x = indices
ys, xs, zs = \
(indices_y - cy) * depth / fy, \
(indices_x - cx) * depth / fx, \
depth
points = _concat([xs[..., _newaxis], ys[..., _newaxis],
zs[..., _newaxis]], axis=2)
return points
if __name__ == "__main__":
raise main_script()
The problem was compound, first I needed to replace my transformed vertex calculation from instead using inverse camera world matrix, to negatively scaled camera world matrix like so
matrix_cam = np.reshape(camera.matrix_world, [4, 4])
mat_scale = np.array(Matrix.Scale(-1, 4))
matrix = matrix_cam # mat_scale
vertices = np.ones([exr_cloud.shape[0], 4], dtype=np.float32)
vertices[:, 0:3] = exr_cloud
vertices = np.array(
[matrix # vertex for vertex in vertices], dtype=np.float32)
vertices = vertices[..., :3]
Additionally, there was an issue with depth decoding which caused the point cloud to be deformed, fixed like so
ys, xs, zs = \
(indices_y - cx) * depth / fx, \
(indices_x - cy) * depth / fy, \
depth
Related
I am trying to write a program which fades an image in radial direction. which means as we move away from the centre of the image, the pixels fade to black. For this, I have written five different functions:
center: returns coordinate pair (center_y, center_x) of the image center.
radial_distance: returns for image with width w and height h an array with shape (h,w), where the number at index (i,j) gives the euclidean distance from the point (i,j) to the center of the image.
scale: returns a copy of the array 'a' (or image) with its elements scaled to be in the given range.
radial_mask: takes an image as a parameter and returns an array with same height and width filled with values between 0.0 and 1.0.
radial_fade: returns the image multiplied by its radial mask.
The program is:
import numpy as np
import matplotlib.pyplot as plt
def center(a):
y, x = a.shape[:2]
return ((y-1)/2,(x-1)/2) # note the order: (center_y, center_x)
def radial_distance(b):
h, w = b.shape[:2]
y, x = center(b)
o = b[:h,:w,0]
for i in range(h):
for j in range(w):
o[i,j] = np.sqrt((y-i)**2 + (x-j)**2)
return o
def scale(c, tmin=0.0, tmax=1.0):
"""Returns a copy of array 'a' with its values scaled to be in the range
[tmin,tmax]."""
mini, maxi = c.min(), c.max()
if maxi == 0:
return 0
else:
m = (tmax - tmin)/(maxi - mini)
f = tmin - m*mini
return c*m + f
def radial_mask(d):
f = radial_distance(d)
g = scale(f, tmin=0.0, tmax=1.0)
# f = g[:,:,0]
n = 1.0 - g
return n
def radial_fade(l):
f, g = l.shape[:2]
q = l[:f,:g,0]
return q * radial_mask(l)
image = plt.imread("src/painting.png")
fig, ax = plt.subplots(3)
masked = radial_mask(ima)
faded = radial_fade(ima)
ax[0].imshow(ima)
ax[1].imshow(masked)
ax[2].imshow(faded)
plt.show()
there is something wrong somewhere in the code as it does not do the expected job.
One problem is that in
o = b[:h,:w,0]
you're using the same precision as the image that may be integers (e.h. uint8).
You should use for example
o = np.zeros((h, w), np.float32)
I am using Open3D to visualize some point clouds. I would like to add arrows that start and end at specific points. The arrows would visualize some things that I am working on. However, I have not found an easy way to add these arrows.
I have noticed that there's a function to create a Cartesian coordinate system, which is using arrows. So, it is possible to add arrows to the 3D visualization.
import open3d as o3d
# Create cartesian coordinate
FOR = o3d.geometry.TriangleMesh.create_coordinate_frame(
size=10, origin=[0,0,0])
# Visualize FOR
o3d.visualization.draw_geometries([FOR])
I was frustrated by not finding an easy way to create arrows within Open3D, and after some time struggling with it, I have come up with a solution.
import open3d as o3d
import numpy as np
def draw_geometries(pcds):
"""
Draw Geometries
Args:
- pcds (): [pcd1,pcd2,...]
"""
o3d.visualization.draw_geometries(pcds)
def get_o3d_FOR(origin=[0, 0, 0],size=10):
"""
Create a FOR that can be added to the open3d point cloud
"""
mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
size=size)
mesh_frame.translate(origin)
return(mesh_frame)
def vector_magnitude(vec):
"""
Calculates a vector's magnitude.
Args:
- vec ():
"""
magnitude = np.sqrt(np.sum(vec**2))
return(magnitude)
def calculate_zy_rotation_for_arrow(vec):
"""
Calculates the rotations required to go from the vector vec to the
z axis vector of the original FOR. The first rotation that is
calculated is over the z axis. This will leave the vector vec on the
XZ plane. Then, the rotation over the y axis.
Returns the angles of rotation over axis z and y required to
get the vector vec into the same orientation as axis z
of the original FOR
Args:
- vec ():
"""
# Rotation over z axis of the FOR
gamma = np.arctan(vec[1]/vec[0])
Rz = np.array([[np.cos(gamma),-np.sin(gamma),0],
[np.sin(gamma),np.cos(gamma),0],
[0,0,1]])
# Rotate vec to calculate next rotation
vec = Rz.T#vec.reshape(-1,1)
vec = vec.reshape(-1)
# Rotation over y axis of the FOR
beta = np.arctan(vec[0]/vec[2])
Ry = np.array([[np.cos(beta),0,np.sin(beta)],
[0,1,0],
[-np.sin(beta),0,np.cos(beta)]])
return(Rz, Ry)
def create_arrow(scale=10):
"""
Create an arrow in for Open3D
"""
cone_height = scale*0.2
cylinder_height = scale*0.8
cone_radius = scale/10
cylinder_radius = scale/20
mesh_frame = o3d.geometry.TriangleMesh.create_arrow(cone_radius=1,
cone_height=cone_height,
cylinder_radius=0.5,
cylinder_height=cylinder_height)
return(mesh_frame)
def get_arrow(origin=[0, 0, 0], end=None, vec=None):
"""
Creates an arrow from an origin point to an end point,
or create an arrow from a vector vec starting from origin.
Args:
- end (): End point. [x,y,z]
- vec (): Vector. [i,j,k]
"""
scale = 10
Ry = Rz = np.eye(3)
T = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
T[:3, -1] = origin
if end is not None:
vec = np.array(end) - np.array(origin)
elif vec is not None:
vec = np.array(vec)
if end is not None or vec is not None:
scale = vector_magnitude(vec)
Rz, Ry = calculate_zy_rotation_for_arrow(vec)
mesh = create_arrow(scale)
# Create the arrow
mesh.rotate(Ry, center=np.array([0, 0, 0]))
mesh.rotate(Rz, center=np.array([0, 0, 0]))
mesh.translate(origin)
return(mesh)
# Create a Cartesian Frame of Reference
FOR = get_o3d_FOR()
# Create an arrow from point (5,5,5) to point (10,10,10)
# arrow = get_arrow([5,5,5],[10,10,10])
# Create an arrow representing vector vec, starting at (5,5,5)
# arrow = get_arrow([5,5,5],vec=[5,5,5])
# Create an arrow in the same place as the z axis
arrow = get_arrow()
# Draw everything
draw_geometries([FOR,arrow])
As mentioned in paper 3D-RCNN, we can calculate the matrix between the align operation of 2 (unit)vectors, as shown in the formula below:
where 'r' is defined as the cross product of p and q.
And the '[r]x' are defined by a Skew-symmetric matrix, which can be found here.
So we can use this method to creat needed arrow or cylinder: just use the previous matrix as a rotation of the target and align the center.
The demo can be realized as following way(based on open3d 0.9.0):
import numpy as np
import open3d as o3d
def get_cross_prod_mat(pVec_Arr):
# pVec_Arr shape (3)
qCross_prod_mat = np.array([
[0, -pVec_Arr[2], pVec_Arr[1]],
[pVec_Arr[2], 0, -pVec_Arr[0]],
[-pVec_Arr[1], pVec_Arr[0], 0],
])
return qCross_prod_mat
def caculate_align_mat(pVec_Arr):
scale = np.linalg.norm(pVec_Arr)
pVec_Arr = pVec_Arr/ scale
# must ensure pVec_Arr is also a unit vec.
z_unit_Arr = np.array([0,0,1])
z_mat = get_cross_prod_mat(z_unit_Arr)
z_c_vec = np.matmul(z_mat, pVec_Arr)
z_c_vec_mat = get_cross_prod_mat(z_c_vec)
if np.dot(z_unit_Arr, pVec_Arr) == -1:
qTrans_Mat = -np.eye(3, 3)
elif np.dot(z_unit_Arr, pVec_Arr) == 1:
qTrans_Mat = np.eye(3, 3)
else:
qTrans_Mat = np.eye(3, 3) + z_c_vec_mat + np.matmul(z_c_vec_mat,
z_c_vec_mat)/(1 + np.dot(z_unit_Arr, pVec_Arr))
qTrans_Mat *= scale
return qTrans_Mat
if __name__ == "__main__":
z_unit_Arr = np.array([0,0,1])
begin = [1, 0, 0]
end = [1.6, 0.4, 0.8]
vec_Arr = np.array(end) - np.array(begin)
vec_len = np.linalg.norm(vec_Arr)
mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.6, origin=[0, 0, 0])
mesh_arrow = o3d.geometry.TriangleMesh.create_arrow(
cone_height= 0.2 * vec_len,
cone_radius= 0.06 * vec_len,
cylinder_height= 0.8 * vec_len,
cylinder_radius= 0.04 * vec_len
)
mesh_arrow.paint_uniform_color([1,0,1])
mesh_arrow.compute_vertex_normals()
mesh_sphere_begin = o3d.geometry.TriangleMesh.create_sphere(radius=0.1, resolution= 20)
mesh_sphere_begin.translate(begin)
mesh_sphere_begin.paint_uniform_color([0,1,1])
mesh_sphere_begin.compute_vertex_normals()
mesh_sphere_end = o3d.geometry.TriangleMesh.create_sphere(radius=0.1, resolution= 20)
mesh_sphere_end.translate(end)
mesh_sphere_end.paint_uniform_color([0,1,1])
mesh_sphere_end.compute_vertex_normals()
# mesh_arrow,
o3d.visualization.draw_geometries(
geometry_list= [mesh_frame, mesh_arrow, mesh_sphere_begin, mesh_sphere_end],
window_name= "before", width= 800, height= 600
)
rot_mat = caculate_align_mat(vec_Arr)
mesh_arrow.rotate(rot_mat, center = False)
o3d.visualization.draw_geometries(
geometry_list= [mesh_frame, mesh_arrow, mesh_sphere_begin, mesh_sphere_end],
window_name= "after rotate", width= 800, height= 600
)
mesh_arrow.translate(np.array(begin)) # 0.5*(np.array(end) - np.array(begin))
o3d.visualization.draw_geometries(
geometry_list= [mesh_frame, mesh_arrow, mesh_sphere_begin, mesh_sphere_end],
window_name= "after translate", width= 800, height= 600
)
Implementing Selective Search but some of the images i use gives a weird error attaching code and output
import skimage.io
import skimage.feature
import skimage.color
import skimage.transform
import skimage.util
import skimage.segmentation
import numpy
import cv2
im_orig = img = cv2.imread("image.jpeg")
# "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
#
# - Modified version with LBP extractor for texture vectorization
def _generate_segments(im_orig, scale, sigma, min_size):
"""
segment smallest regions by the algorithm of Felzenswalb and
Huttenlocher
"""
# open the Image
im_mask = skimage.segmentation.felzenszwalb(
skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
min_size=min_size)
# merge mask channel to the image as a 4th channel
im_orig = numpy.append(
im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
im_orig[:, :, 3] = im_mask
return im_orig
def _sim_colour(r1, r2):
"""
calculate the sum of histogram intersection of colour
"""
return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
def _sim_texture(r1, r2):
"""
calculate the sum of histogram intersection of texture
"""
return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
def _sim_size(r1, r2, imsize):
"""
calculate the size similarity over the image
"""
return 1.0 - (r1["size"] + r2["size"]) / imsize
def _sim_fill(r1, r2, imsize):
"""
calculate the fill similarity over the image
"""
bbsize = (
(max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
* (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
)
return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
def _calc_sim(r1, r2, imsize):
return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
+ _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
def _calc_colour_hist(img):
"""
calculate colour histogram for each region
the size of output histogram will be BINS * COLOUR_CHANNELS(3)
number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
extract HSV
"""
BINS = 25
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# extracting one colour channel
c = img[:, colour_channel]
# calculate histogram for each colour and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
# L1 normalize
hist = hist / len(img)
return hist
def _calc_texture_gradient(img):
"""
calculate texture gradient for entire image
The original SelectiveSearch algorithm proposed Gaussian derivative
for 8 orientations, but we use LBP instead.
output will be [height(*)][width(*)]
"""
ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
for colour_channel in (0, 1, 2):
ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
img[:, :, colour_channel], 8, 1.0)
return ret
def _calc_texture_hist(img):
"""
calculate texture histogram for each region
calculate the histogram of gradient for each colours
the size of output histogram will be
BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
"""
BINS = 10
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# mask by the colour channel
fd = img[:, colour_channel]
# calculate histogram for each orientation and concatenate them all
# and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
# L1 Normalize
hist = hist / len(img)
return hist
def _extract_regions(img):
R = {}
# get hsv image
hsv = skimage.color.rgb2hsv(img[:, :, :3])
# pass 1: count pixel positions
for y, i in enumerate(img):
for x, (r, g, b, l) in enumerate(i):
# initialize a new region
if l not in R:
R[l] = {
"min_x": 0xffff, "min_y": 0xffff,
"max_x": 0, "max_y": 0, "labels": [l]}
# bounding box
if R[l]["min_x"] > x:
R[l]["min_x"] = x
if R[l]["min_y"] > y:
R[l]["min_y"] = y
if R[l]["max_x"] < x:
R[l]["max_x"] = x
if R[l]["max_y"] < y:
R[l]["max_y"] = y
# pass 2: calculate texture gradient
tex_grad = _calc_texture_gradient(img)
# pass 3: calculate colour histogram of each region
for k, v in list(R.items()):
# colour histogram
masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
R[k]["size"] = len(masked_pixels / 4)
R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
# texture histogram
R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
return R
def _extract_neighbours(regions):
def intersect(a, b):
if (a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]):
return True
return False
R = list(regions.items())
neighbours = []
for cur, a in enumerate(R[:-1]):
for b in R[cur + 1:]:
if intersect(a[1], b[1]):
neighbours.append((a, b))
return neighbours
def _merge_regions(r1, r2):
new_size = r1["size"] + r2["size"]
rt = {
"min_x": min(r1["min_x"], r2["min_x"]),
"min_y": min(r1["min_y"], r2["min_y"]),
"max_x": max(r1["max_x"], r2["max_x"]),
"max_y": max(r1["max_y"], r2["max_y"]),
"size": new_size,
"hist_c": (
r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
"hist_t": (
r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
"labels": r1["labels"] + r2["labels"]
}
return rt
def selective_search(
im_orig, scale=1.0, sigma=0.8, min_size=500):
'''Selective Search
Parameters
----------
im_orig : ndarray
Input image
scale : int
Free parameter. Higher means larger clusters in felzenszwalb segmentation.
sigma : float
Width of Gaussian kernel for felzenszwalb segmentation.
min_size : int
Minimum component size for felzenszwalb segmentation.
Returns
-------
img : ndarray
image with region label
region label is stored in the 4th value of each pixel [r,g,b,(region)]
regions : array of dict
[
{
'rect': (left, top, width, height),
'labels': [...],
'size': component_size
},
...
]
'''
assert im_orig.shape[2] == 3, "3ch image is expected"
# load image and get smallest regions
# region label is stored in the 4th value of each pixel [r,g,b,(region)]
img = _generate_segments(im_orig, scale, sigma, min_size)
if img is None:
return None, {}
imsize = img.shape[0] * img.shape[1]
R = _extract_regions(img)
# extract neighbouring information
neighbours = _extract_neighbours(R)
# calculate initial similarities
S = {}
for (ai, ar), (bi, br) in neighbours:
S[(ai, bi)] = _calc_sim(ar, br, imsize)
# hierarchal search
while S != {}:
# get highest similarity
i, j = sorted(S.items(), key=lambda i: i[1])[-1][0]
# merge corresponding regions
t = max(R.keys()) + 1.0
R[t] = _merge_regions(R[i], R[j])
# mark similarities for regions to be removed
key_to_delete = []
for k, v in list(S.items()):
if (i in k) or (j in k):
key_to_delete.append(k)
# remove old similarities of related regions
for k in key_to_delete:
del S[k]
# calculate similarity set with the new region
for k in [a for a in key_to_delete if a != (i, j)]:
n = k[1] if k[0] in (i, j) else k[0]
S[(t, n)] = _calc_sim(R[t], R[n], imsize)
regions = []
for k, r in list(R.items()):
regions.append({
'labels': r['labels'],
'rect': (
r['min_x'], r['min_y'],
r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
'size': r['size']
})
return img, regions
img_ou, region = selective_search(im_orig)
for r in region:
print(r)
cv2.imshow('image', img_ou)
cv2.waitKey(0)
cv2.destroyAllWindows()
Error Message
and can you please explain what labels is i am having a tough time to understand is it useful or not, its a list for max case of elements but later it gets more elements but major problem is the image error mentioned above any help will be deeply appreciated
Hi I know I am late but still i thought to answer your question in comment. You have to give cropped image as an input to your CNN.
My aim is to find the color of main object in a frame/image. In my case image is always of same type. For example News reporter(human) in Forest or News reporter in Animal farm.The position of news reporter is also same.What is the simple solution to find out the dominant colour of the main object(News Reporter)
Any help is welcome.Thanks
EDIT Code added
import cv2
from collections import namedtuple
from math import sqrt
import random
import webcolors
try:
import Image
except ImportError:
from PIL import Image
Point = namedtuple('Point', ('coords', 'n', 'ct'))
Cluster = namedtuple('Cluster', ('points', 'center', 'n'))
def get_points(img):
points = []
w, h = img.size
for count, color in img.getcolors(w * h):
points.append(Point(color, 3, count))
return points
rtoh = lambda rgb: '#%s' % ''.join(('%02x' % p for p in rgb))
def colorz(filename, n=3):
img = Image.open(filename)
img.thumbnail((200, 200))
w, h = img.size
points = get_points(img)
clusters = kmeans(points, n, 1)
rgbs = [map(int, c.center.coords) for c in clusters]
return map(rtoh, rgbs)
def euclidean(p1, p2):
return sqrt(sum([
(p1.coords[i] - p2.coords[i]) ** 2 for i in range(p1.n)
]))
def calculate_center(points, n):
vals = [0.0 for i in range(n)]
plen = 0
for p in points:
plen += p.ct
for i in range(n):
vals[i] += (p.coords[i] * p.ct)
return Point([(v / plen) for v in vals], n, 1)
def kmeans(points, k, min_diff):
clusters = [Cluster([p], p, p.n) for p in random.sample(points, k)]
while 1:
plists = [[] for i in range(k)]
for p in points:
smallest_distance = float('Inf')
for i in range(k):
distance = euclidean(p, clusters[i].center)
if distance < smallest_distance:
smallest_distance = distance
idx = i
plists[idx].append(p)
diff = 0
for i in range(k):
old = clusters[i]
center = calculate_center(plists[i], old.n)
new = Cluster(plists[i], center, old.n)
clusters[i] = new
diff = max(diff, euclidean(old.center, new.center))
if diff < min_diff:
break
return clusters
def main():
img = cv2.imread('d:/Emmanu/project-data/b1.jpg')
res=cv2.resize(img,(400,300))
crop_img = res[100:200, 150:250]
cv2.imwrite("d:/Emmanu/project-data/color-test.jpg", crop_img)
g= colorz('d:/Emmanu/project-data/color-test.jpg',1)
k=g[0]
print k
f=webcolors.hex_to_rgb(k)
print webcolors.rgb_to_name(f, spec='css3')
if __name__ == '__main__':main()
The problem is this returns the major color in the whole image not the main object
If your taking the colour of whole image,in most cases you will get wrong answer since background is more.If your image size is fixed and you are sure about object's position The most simple solution is Crop the image at where you expect the object.In most cases it will work.
In order to crop
import cv2
img = cv2.imread("'d:/Emmanu/project-data/b1.jpg'")
crop_img = img[200:400, 100:300] # Crop from x, y, w, h -> 100, 200, 300, 400
# NOTE: its img[y: y + h, x: x + w] and *not* img[x: x + w, y: y + h]
cv2.imshow("cropped", crop_img)
cv2.waitKey(0)
Now give this crop_image as input to your code.And in most cases it will give correct solution.There is nothing more simple that this.I think this will help.
I am trying to implement a texture image as described in this tutorial using Python and skimage.
The issue is to move a 7x7 window over a large raster and replace the center of each pixel with the calculated texture from the 7x7 window. I manage to do this with the code below, but I see no other way than looping through each individual pixel, which is very slow.
One software package does that in a few seconds, so there must be some other way ... is there?
Here the code that works but is very slow ...
import matplotlib.pyplot as plt
import gdal, gdalconst
import numpy as np
from skimage.feature import greycomatrix, greycoprops
filename = "//mnt//glaciology//RS2_20140101.jpg"
outfilename = "//home//max//Documents//GLCM_contrast.tif"
sarfile = gdal.Open(filename, gdalconst.GA_ReadOnly)
sarraster = sarfile.ReadAsArray()
#sarraster is satellite image, testraster will receive texture
testraster = np.copy(sarraster)
testraster[:] = 0
for i in range(testraster.shape[0] ):
print i,
for j in range(testraster.shape[1] ):
#windows needs to fit completely in image
if i <3 or j <3:
continue
if i > (testraster.shape[0] - 4) or j > (testraster.shape[0] - 4):
continue
#Calculate GLCM on a 7x7 window
glcm_window = sarraster[i-3: i+4, j-3 : j+4]
glcm = greycomatrix(glcm_window, [1], [0], symmetric = True, normed = True )
#Calculate contrast and replace center pixel
contrast = greycoprops(glcm, 'contrast')
testraster[i,j]= contrast
sarplot = plt.imshow(testraster, cmap = 'gray')
Results:
I had the same problem, different data. Here is a script I wrote that uses parallel processing and a sliding window approach:
import gdal, osr
import numpy as np
from scipy.interpolate import RectBivariateSpline
from numpy.lib.stride_tricks import as_strided as ast
import dask.array as da
from joblib import Parallel, delayed, cpu_count
import os
from skimage.feature import greycomatrix, greycoprops
def im_resize(im,Nx,Ny):
'''
resize array by bivariate spline interpolation
'''
ny, nx = np.shape(im)
xx = np.linspace(0,nx,Nx)
yy = np.linspace(0,ny,Ny)
try:
im = da.from_array(im, chunks=1000) #dask implementation
except:
pass
newKernel = RectBivariateSpline(np.r_[:ny],np.r_[:nx],im)
return newKernel(yy,xx)
def p_me(Z, win):
'''
loop to calculate greycoprops
'''
try:
glcm = greycomatrix(Z, [5], [0], 256, symmetric=True, normed=True)
cont = greycoprops(glcm, 'contrast')
diss = greycoprops(glcm, 'dissimilarity')
homo = greycoprops(glcm, 'homogeneity')
eng = greycoprops(glcm, 'energy')
corr = greycoprops(glcm, 'correlation')
ASM = greycoprops(glcm, 'ASM')
return (cont, diss, homo, eng, corr, ASM)
except:
return (0,0,0,0,0,0)
def read_raster(in_raster):
in_raster=in_raster
ds = gdal.Open(in_raster)
data = ds.GetRasterBand(1).ReadAsArray()
data[data<=0] = np.nan
gt = ds.GetGeoTransform()
xres = gt[1]
yres = gt[5]
# get the edge coordinates and add half the resolution
# to go to center coordinates
xmin = gt[0] + xres * 0.5
xmax = gt[0] + (xres * ds.RasterXSize) - xres * 0.5
ymin = gt[3] + (yres * ds.RasterYSize) + yres * 0.5
ymax = gt[3] - yres * 0.5
del ds
# create a grid of xy coordinates in the original projection
xx, yy = np.mgrid[xmin:xmax+xres:xres, ymax+yres:ymin:yres]
return data, xx, yy, gt
def norm_shape(shap):
'''
Normalize numpy array shapes so they're always expressed as a tuple,
even for one-dimensional shapes.
'''
try:
i = int(shap)
return (i,)
except TypeError:
# shape was not a number
pass
try:
t = tuple(shap)
return t
except TypeError:
# shape was not iterable
pass
raise TypeError('shape must be an int, or a tuple of ints')
def sliding_window(a, ws, ss = None, flatten = True):
'''
Source: http://www.johnvinyard.com/blog/?p=268#more-268
Parameters:
a - an n-dimensional numpy array
ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size
of each dimension of the window
ss - an int (a is 1D) or tuple (a is 2D or greater) representing the
amount to slide the window in each dimension. If not specified, it
defaults to ws.
flatten - if True, all slices are flattened, otherwise, there is an
extra dimension for each dimension of the input.
Returns
an array containing each n-dimensional window from a
'''
if None is ss:
# ss was not provided. the windows will not overlap in any direction.
ss = ws
ws = norm_shape(ws)
ss = norm_shape(ss)
# convert ws, ss, and a.shape to numpy arrays
ws = np.array(ws)
ss = np.array(ss)
shap = np.array(a.shape)
# ensure that ws, ss, and a.shape all have the same number of dimensions
ls = [len(shap),len(ws),len(ss)]
if 1 != len(set(ls)):
raise ValueError(\
'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
# ensure that ws is smaller than a in every dimension
if np.any(ws > shap):
raise ValueError(\
'ws cannot be larger than a in any dimension.\
a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
# how many slices will there be in each dimension?
newshape = norm_shape(((shap - ws) // ss) + 1)
# the shape of the strided array will be the number of slices in each dimension
# plus the shape of the window (tuple addition)
newshape += norm_shape(ws)
# the strides tuple will be the array's strides multiplied by step size, plus
# the array's strides (tuple addition)
newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
a = ast(a,shape = newshape,strides = newstrides)
if not flatten:
return a
# Collapse strided so that it has one more dimension than the window. I.e.,
# the new array is a flat list of slices.
meat = len(ws) if ws.shape else 0
firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
dim = firstdim + (newshape[-meat:])
# remove any dimensions with size 1
dim = filter(lambda i : i != 1,dim)
return a.reshape(dim), newshape
def CreateRaster(xx,yy,std,gt,proj,driverName,outFile):
'''
Exports data to GTiff Raster
'''
std = np.squeeze(std)
std[np.isinf(std)] = -99
driver = gdal.GetDriverByName(driverName)
rows,cols = np.shape(std)
ds = driver.Create( outFile, cols, rows, 1, gdal.GDT_Float32)
if proj is not None:
ds.SetProjection(proj.ExportToWkt())
ds.SetGeoTransform(gt)
ss_band = ds.GetRasterBand(1)
ss_band.WriteArray(std)
ss_band.SetNoDataValue(-99)
ss_band.FlushCache()
ss_band.ComputeStatistics(False)
del ds
#Stuff to change
if __name__ == '__main__':
win_sizes = [7]
for win_size in win_sizes[:]:
in_raster = #Path to input raster
win = win_size
meter = str(win/4)
#Define output file names
contFile =
dissFile =
homoFile =
energyFile =
corrFile =
ASMFile =
merge, xx, yy, gt = read_raster(in_raster)
merge[np.isnan(merge)] = 0
Z,ind = sliding_window(merge,(win,win),(win,win))
Ny, Nx = np.shape(merge)
w = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(p_me)(Z[k]) for k in xrange(len(Z)))
cont = [a[0] for a in w]
diss = [a[1] for a in w]
homo = [a[2] for a in w]
eng = [a[3] for a in w]
corr = [a[4] for a in w]
ASM = [a[5] for a in w]
#Reshape to match number of windows
plt_cont = np.reshape(cont , ( ind[0], ind[1] ) )
plt_diss = np.reshape(diss , ( ind[0], ind[1] ) )
plt_homo = np.reshape(homo , ( ind[0], ind[1] ) )
plt_eng = np.reshape(eng , ( ind[0], ind[1] ) )
plt_corr = np.reshape(corr , ( ind[0], ind[1] ) )
plt_ASM = np.reshape(ASM , ( ind[0], ind[1] ) )
del cont, diss, homo, eng, corr, ASM
#Resize Images to receive texture and define filenames
contrast = im_resize(plt_cont,Nx,Ny)
contrast[merge==0]=np.nan
dissimilarity = im_resize(plt_diss,Nx,Ny)
dissimilarity[merge==0]=np.nan
homogeneity = im_resize(plt_homo,Nx,Ny)
homogeneity[merge==0]=np.nan
energy = im_resize(plt_eng,Nx,Ny)
energy[merge==0]=np.nan
correlation = im_resize(plt_corr,Nx,Ny)
correlation[merge==0]=np.nan
ASM = im_resize(plt_ASM,Nx,Ny)
ASM[merge==0]=np.nan
del plt_cont, plt_diss, plt_homo, plt_eng, plt_corr, plt_ASM
del w,Z,ind,Ny,Nx
driverName= 'GTiff'
epsg_code=26949
proj = osr.SpatialReference()
proj.ImportFromEPSG(epsg_code)
CreateRaster(xx, yy, contrast, gt, proj,driverName,contFile)
CreateRaster(xx, yy, dissimilarity, gt, proj,driverName,dissFile)
CreateRaster(xx, yy, homogeneity, gt, proj,driverName,homoFile)
CreateRaster(xx, yy, energy, gt, proj,driverName,energyFile)
CreateRaster(xx, yy, correlation, gt, proj,driverName,corrFile)
CreateRaster(xx, yy, ASM, gt, proj,driverName,ASMFile)
del contrast, merge, xx, yy, gt, meter, dissimilarity, homogeneity, energy, correlation, ASM
This script calculates GLCM properties for a defined window size, with no overlap between adjacent windows.