Implementing R-CNN using Selective Search Algorithm

Implementing R-CNN using Selective Search Algorithm - python

Implementing Selective Search but some of the images i use gives a weird error attaching code and output
import skimage.io
import skimage.feature
import skimage.color
import skimage.transform
import skimage.util
import skimage.segmentation
import numpy
import cv2
im_orig = img = cv2.imread("image.jpeg")
# "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
#
# - Modified version with LBP extractor for texture vectorization
def _generate_segments(im_orig, scale, sigma, min_size):
"""
segment smallest regions by the algorithm of Felzenswalb and
Huttenlocher
"""
# open the Image
im_mask = skimage.segmentation.felzenszwalb(
skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
min_size=min_size)
# merge mask channel to the image as a 4th channel
im_orig = numpy.append(
im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
im_orig[:, :, 3] = im_mask
return im_orig
def _sim_colour(r1, r2):
"""
calculate the sum of histogram intersection of colour
"""
return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
def _sim_texture(r1, r2):
"""
calculate the sum of histogram intersection of texture
"""
return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
def _sim_size(r1, r2, imsize):
"""
calculate the size similarity over the image
"""
return 1.0 - (r1["size"] + r2["size"]) / imsize
def _sim_fill(r1, r2, imsize):
"""
calculate the fill similarity over the image
"""
bbsize = (
(max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
* (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
)
return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
def _calc_sim(r1, r2, imsize):
return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
+ _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
def _calc_colour_hist(img):
"""
calculate colour histogram for each region
the size of output histogram will be BINS * COLOUR_CHANNELS(3)
number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
extract HSV
"""
BINS = 25
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# extracting one colour channel
c = img[:, colour_channel]
# calculate histogram for each colour and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
# L1 normalize
hist = hist / len(img)
return hist
def _calc_texture_gradient(img):
"""
calculate texture gradient for entire image
The original SelectiveSearch algorithm proposed Gaussian derivative
for 8 orientations, but we use LBP instead.
output will be [height(*)][width(*)]
"""
ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
for colour_channel in (0, 1, 2):
ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
img[:, :, colour_channel], 8, 1.0)
return ret
def _calc_texture_hist(img):
"""
calculate texture histogram for each region
calculate the histogram of gradient for each colours
the size of output histogram will be
BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
"""
BINS = 10
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# mask by the colour channel
fd = img[:, colour_channel]
# calculate histogram for each orientation and concatenate them all
# and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
# L1 Normalize
hist = hist / len(img)
return hist
def _extract_regions(img):
R = {}
# get hsv image
hsv = skimage.color.rgb2hsv(img[:, :, :3])
# pass 1: count pixel positions
for y, i in enumerate(img):
for x, (r, g, b, l) in enumerate(i):
# initialize a new region
if l not in R:
R[l] = {
"min_x": 0xffff, "min_y": 0xffff,
"max_x": 0, "max_y": 0, "labels": [l]}
# bounding box
if R[l]["min_x"] > x:
R[l]["min_x"] = x
if R[l]["min_y"] > y:
R[l]["min_y"] = y
if R[l]["max_x"] < x:
R[l]["max_x"] = x
if R[l]["max_y"] < y:
R[l]["max_y"] = y
# pass 2: calculate texture gradient
tex_grad = _calc_texture_gradient(img)
# pass 3: calculate colour histogram of each region
for k, v in list(R.items()):
# colour histogram
masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
R[k]["size"] = len(masked_pixels / 4)
R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
# texture histogram
R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
return R
def _extract_neighbours(regions):
def intersect(a, b):
if (a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]):
return True
return False
R = list(regions.items())
neighbours = []
for cur, a in enumerate(R[:-1]):
for b in R[cur + 1:]:
if intersect(a[1], b[1]):
neighbours.append((a, b))
return neighbours
def _merge_regions(r1, r2):
new_size = r1["size"] + r2["size"]
rt = {
"min_x": min(r1["min_x"], r2["min_x"]),
"min_y": min(r1["min_y"], r2["min_y"]),
"max_x": max(r1["max_x"], r2["max_x"]),
"max_y": max(r1["max_y"], r2["max_y"]),
"size": new_size,
"hist_c": (
r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
"hist_t": (
r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
"labels": r1["labels"] + r2["labels"]
}
return rt
def selective_search(
im_orig, scale=1.0, sigma=0.8, min_size=500):
'''Selective Search
Parameters
----------
im_orig : ndarray
Input image
scale : int
Free parameter. Higher means larger clusters in felzenszwalb segmentation.
sigma : float
Width of Gaussian kernel for felzenszwalb segmentation.
min_size : int
Minimum component size for felzenszwalb segmentation.
Returns
-------
img : ndarray
image with region label
region label is stored in the 4th value of each pixel [r,g,b,(region)]
regions : array of dict
[
{
'rect': (left, top, width, height),
'labels': [...],
'size': component_size
},
...
]
'''
assert im_orig.shape[2] == 3, "3ch image is expected"
# load image and get smallest regions
# region label is stored in the 4th value of each pixel [r,g,b,(region)]
img = _generate_segments(im_orig, scale, sigma, min_size)
if img is None:
return None, {}
imsize = img.shape[0] * img.shape[1]
R = _extract_regions(img)
# extract neighbouring information
neighbours = _extract_neighbours(R)
# calculate initial similarities
S = {}
for (ai, ar), (bi, br) in neighbours:
S[(ai, bi)] = _calc_sim(ar, br, imsize)
# hierarchal search
while S != {}:
# get highest similarity
i, j = sorted(S.items(), key=lambda i: i[1])[-1][0]
# merge corresponding regions
t = max(R.keys()) + 1.0
R[t] = _merge_regions(R[i], R[j])
# mark similarities for regions to be removed
key_to_delete = []
for k, v in list(S.items()):
if (i in k) or (j in k):
key_to_delete.append(k)
# remove old similarities of related regions
for k in key_to_delete:
del S[k]
# calculate similarity set with the new region
for k in [a for a in key_to_delete if a != (i, j)]:
n = k[1] if k[0] in (i, j) else k[0]
S[(t, n)] = _calc_sim(R[t], R[n], imsize)
regions = []
for k, r in list(R.items()):
regions.append({
'labels': r['labels'],
'rect': (
r['min_x'], r['min_y'],
r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
'size': r['size']
})
return img, regions
img_ou, region = selective_search(im_orig)
for r in region:
print(r)
cv2.imshow('image', img_ou)
cv2.waitKey(0)
cv2.destroyAllWindows()
Error Message
and can you please explain what labels is i am having a tough time to understand is it useful or not, its a list for max case of elements but later it gets more elements but major problem is the image error mentioned above any help will be deeply appreciated

Hi I know I am late but still i thought to answer your question in comment. You have to give cropped image as an input to your CNN.

Related

Deprojecting depth onto original mesh

I am trying to get blender render depth map of an object and then moving it to overlay the original object. Currently I have no issue with rendering the object and extracting it into it's place.
However, I am stuck when trying to position the object into it's original position.
I'm trying to apply inverse camera world matrix to the rendered pointcloud (in blue). Unfortunately, when I apply said camera inverse it doesn't appear nowhere near where I'd expect (in red).
I have attached the entirety of code that I have to replicate this behaviour. I would appreciate it if someone would point me to the right matrix that I should be multiplying the point cloud by.
from mathutils import Vector, Quaternion, Euler, Matrix
import numpy as np
import bpy
def main_script():
clear_scene()
prepare_views()
tmp_path = "/tmp/tmp_render.exr"
scene = get_scene("Scene")
camera = create_camera("Camera")
camera.rotation_euler = Euler([np.pi * 0.5, 0, np.pi * 0.5], "XYZ")
camera.location = Vector([4.5, 0, 1])
bpy.ops.mesh.primitive_monkey_add(
location=(0, 0, 1), rotation=(0, 0, np.pi*0.5), size=1.0)
_w, _h = 640, 480
update_scene()
init_rendering(scene, camera, width=640, height=480)
update_scene()
matrix_K = get_calibration_matrix_K_from_blender(scene, camera.data)
_fy, _fx = matrix_K[0][0], matrix_K[1][1]
_cy, _cx = matrix_K[0][2], matrix_K[1][2]
scene.render.filepath = tmp_path
bpy.ops.render.render(write_still=True)
depth = read_exr(tmp_path, "R")["R"]
depth = np.reshape(convert_to_numpy(depth), [_h, _w])
exr_cloud = depth_to_cloud(
_w, _h, _fx, _fy, _cx, _cy, depth)
exr_cloud = np.reshape(exr_cloud, [-1, 3])
exr_cloud = exr_cloud[(exr_cloud[..., 2] < 100) & (exr_cloud[..., 2] > 0)]
matrix = np.reshape(camera.matrix_world, [4, 4])
matrix = np.linalg.inv(matrix) # why doesn't this place the depth properly
vertices = np.ones([exr_cloud.shape[0], 4], dtype=np.float32)
vertices[:, 0:3] = exr_cloud
vertices = np.array(
[matrix # vertex for vertex in vertices], dtype=np.float32)
vertices = vertices[..., :3]
create_mesh("Suzanne_EXR", exr_cloud, [])
create_mesh("SuzanneT_EXR", vertices, [])
"""
utilities methods required to run the script
"""
def clear_scene():
for scene in bpy.data.scenes:
for obj in scene.objects:
bpy.context.collection.objects.unlink(obj)
def read_exr(path, channels):
import OpenEXR as _OpenEXR
import Imath as _Imath
file = _OpenEXR.InputFile(path)
FLOAT = _Imath.PixelType(_Imath.PixelType.FLOAT)
results = {}
for ch in channels:
results[ch] = file.channel(ch, FLOAT)
file.close()
return results
def convert_to_numpy(data):
import array as _array
return np.array(_array.array("f", data).tolist())
def update_scene():
dg = bpy.context.evaluated_depsgraph_get()
dg.update()
def prepare_views():
preferences = bpy.context.preferences
preferences.view.show_tooltips_python = True
preferences.view.show_developer_ui = True
preferences.view.render_display_type = "NONE"
def init_rendering(scene, camera, width=None, height=None):
def set_rendering_settings(camera, scene, width=640, height=480):
image_settings = scene.render.image_settings
image_settings.file_format = "OPEN_EXR"
image_settings.use_zbuffer = True
scene.render.resolution_x = width
scene.render.resolution_y = height
# scene.render.use_antialiasing = False
scene.use_nodes = True
scene.camera = camera
node_tree = scene.node_tree
nodes = node_tree.nodes
node_render_layers = nodes["Render Layers"]
node_composite = nodes["Composite"]
node_tree.links.clear()
node_tree.links.new(
node_render_layers.outputs["Depth"], node_composite.inputs["Image"])
set_rendering_settings(camera, scene)
def get_scene(name): return bpy.data.scenes[name]
def create_camera(name):
camera = bpy.data.cameras.new(name)
camera.lens = 50
obj = bpy.data.objects.new(name, camera)
bpy.context.collection.objects.link(obj)
return obj
# ---------------------------------------------------------------
# 3x4 P matrix from Blender camera
# ---------------------------------------------------------------
# Build intrinsic camera parameters from Blender camera data
#
# See notes on this in
# blender.stackexchange.com/questions/15102/what-is-blenders-camera-projection-matrix-model
def get_calibration_matrix_K_from_blender(scene, camera):
from mathutils import Matrix as _Matrix
f_in_mm = camera.lens
resolution_x_in_px = scene.render.resolution_x
resolution_y_in_px = scene.render.resolution_y
scale = scene.render.resolution_percentage / 100
sensor_width_in_mm = camera.sensor_width
sensor_height_in_mm = camera.sensor_height
pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
if (camera.sensor_fit == 'VERTICAL'):
# the sensor height is fixed (sensor fit is horizontal),
# the sensor width is effectively changed with the pixel aspect ratio
s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
s_v = resolution_y_in_px * scale / sensor_height_in_mm
else: # 'HORIZONTAL' and 'AUTO'
# the sensor width is fixed (sensor fit is horizontal),
# the sensor height is effectively changed with the pixel aspect ratio
pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
s_u = resolution_x_in_px * scale / sensor_width_in_mm
s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm
# Parameters of intrinsic calibration matrix K
alpha_u = f_in_mm * s_u
alpha_v = f_in_mm * s_v
u_0 = resolution_x_in_px * scale / 2
v_0 = resolution_y_in_px * scale / 2
skew = 0 # only use rectangular pixels
K = _Matrix(
((alpha_u, skew, u_0),
(0, alpha_v, v_0),
(0, 0, 1)))
return K
def create_mesh(name, vertices, faces):
import bmesh as _bmesh
mesh = bpy.data.meshes.new("Mesh_%s" % name)
mesh.from_pydata(vertices, [], faces)
mesh.update()
obj = bpy.data.objects.new(name, mesh)
bpy.context.collection.objects.link(obj)
bm = _bmesh.new()
bm.from_mesh(mesh)
bm.to_mesh(mesh)
bm.free()
return obj
def depth_to_cloud(w, h, fx, fy, cx, cy, depth):
from numpy import concatenate as _concat
from numpy import indices as _indices
from numpy import newaxis as _newaxis
indices = _indices(depth.shape)
indices_y, indices_x = indices
ys, xs, zs = \
(indices_y - cy) * depth / fy, \
(indices_x - cx) * depth / fx, \
depth
points = _concat([xs[..., _newaxis], ys[..., _newaxis],
zs[..., _newaxis]], axis=2)
return points
if __name__ == "__main__":
raise main_script()

The problem was compound, first I needed to replace my transformed vertex calculation from instead using inverse camera world matrix, to negatively scaled camera world matrix like so
matrix_cam = np.reshape(camera.matrix_world, [4, 4])
mat_scale = np.array(Matrix.Scale(-1, 4))
matrix = matrix_cam # mat_scale
vertices = np.ones([exr_cloud.shape[0], 4], dtype=np.float32)
vertices[:, 0:3] = exr_cloud
vertices = np.array(
[matrix # vertex for vertex in vertices], dtype=np.float32)
vertices = vertices[..., :3]
Additionally, there was an issue with depth decoding which caused the point cloud to be deformed, fixed like so
ys, xs, zs = \
(indices_y - cx) * depth / fx, \
(indices_x - cy) * depth / fy, \
depth

python program to fade an image in radial direction

I am trying to write a program which fades an image in radial direction. which means as we move away from the centre of the image, the pixels fade to black. For this, I have written five different functions:
center: returns coordinate pair (center_y, center_x) of the image center.
radial_distance: returns for image with width w and height h an array with shape (h,w), where the number at index (i,j) gives the euclidean distance from the point (i,j) to the center of the image.
scale: returns a copy of the array 'a' (or image) with its elements scaled to be in the given range.
radial_mask: takes an image as a parameter and returns an array with same height and width filled with values between 0.0 and 1.0.
radial_fade: returns the image multiplied by its radial mask.
The program is:
import numpy as np
import matplotlib.pyplot as plt
def center(a):
y, x = a.shape[:2]
return ((y-1)/2,(x-1)/2) # note the order: (center_y, center_x)
def radial_distance(b):
h, w = b.shape[:2]
y, x = center(b)
o = b[:h,:w,0]
for i in range(h):
for j in range(w):
o[i,j] = np.sqrt((y-i)**2 + (x-j)**2)
return o
def scale(c, tmin=0.0, tmax=1.0):
"""Returns a copy of array 'a' with its values scaled to be in the range
[tmin,tmax]."""
mini, maxi = c.min(), c.max()
if maxi == 0:
return 0
else:
m = (tmax - tmin)/(maxi - mini)
f = tmin - m*mini
return c*m + f
def radial_mask(d):
f = radial_distance(d)
g = scale(f, tmin=0.0, tmax=1.0)
# f = g[:,:,0]
n = 1.0 - g
return n
def radial_fade(l):
f, g = l.shape[:2]
q = l[:f,:g,0]
return q * radial_mask(l)
image = plt.imread("src/painting.png")
fig, ax = plt.subplots(3)
masked = radial_mask(ima)
faded = radial_fade(ima)
ax[0].imshow(ima)
ax[1].imshow(masked)
ax[2].imshow(faded)
plt.show()
there is something wrong somewhere in the code as it does not do the expected job.

One problem is that in
o = b[:h,:w,0]
you're using the same precision as the image that may be integers (e.h. uint8).
You should use for example
o = np.zeros((h, w), np.float32)

Generating boolean matrix from image

I am trying to classify an image by selecting a pixel at random, then finding all pixels in the image that are a certain euclidian distance in color space from that original pixel. My current script takes a prohibitively long time. I wonder if I am able to use this equation to generate a boolean matrix that will allow quicker manipulation of the image.
( x-cx ) ^2 + (y-cy) ^2 + (z-cz) ^ 2 < r^2
Here is the code I am using now:
import PIL, glob, numpy, random, math, time
def zone_map(picture, threshold):
im = PIL.Image.open(picture)
pix = im.load()
[width, height] = im.size
mask = numpy.zeros((width,height))
while 0 in mask:
x = random.randint(0, width)
y = random.randint(0, height)
if mask[x, y] == 0:
point = pix[x,y]
to_average = {(x, y): pix[x, y]}
start = time.clock()
for row in range(0, width):
for column in range(0, height):
if euclid_dist(point, pix[row,column]) <= threshold:
to_average[(row,column)] = pix[row, column]
#to_average = in_sphere(pix, point)
end = time.clock()
print(end - start)
to_average_sum = (0, 0, 0)
for value in to_average.values():
to_average_sum = tuple_sum(to_average_sum, value)
average = tuple_divide(to_average_sum, len(to_average.values()))
for coordinate in to_average.keys():
pix[coordinate] = average
mask[coordinate] = 1
unique, counts = numpy.unique(mask, return_counts=True)
progress = dict(zip(unique, counts))
print((progress[1] / progress[0])*100, '%')
im.save()
return im
def euclid_dist(tuple1, tuple2):
"""
Finds euclidian distance between two points in n dimensional sapce
"""
tot_sq = 0
for num1, num2 in zip(tuple1, tuple2):
tot_sq += (num1 + num2)**2
return math.sqrt(tot_sq)
def tuple_sum(tuple1, tuple2):
"""
Returns tuple comprised of sums of input tuples
"""
sums = []
for num1, num2 in zip(tuple1, tuple2):
sums.append(num1 + num2)
return tuple(sums)
def tuple_divide(tuple1, divisor):
"""
Divides numerical values of tuples by divisisor, yielding integer results
"""
quotients = []
for value in tuple1:
quotients.append(int(round(value/divisor)))
return tuple(quotients)
Any information on how to incorporate the described boolean matrix, or any other ideas on how to speed this up, would be greatly appreciated.

Just load the image as a numpy array, and then use array operations instead of looping over pixels:
import numpy as np
import matplotlib.pyplot as plt
import PIL
def zone_map(picture, threshold, show=True):
with PIL.Image.open(picture) as img:
rgb = np.array(img, dtype=np.float)
height, width, _ = rgb.shape
mask = np.zeros_like(rgb)
while not np.any(mask):
# get random pixel
position = np.random.randint(height), np.random.randint(width)
color = rgb[position]
# get euclidean distance of all pixels in colour space
distance = np.sqrt(np.sum((rgb - color)**2, axis=-1))
# threshold
mask = distance < threshold
if show: # show output
fig, (ax1, ax2) = plt.subplots(1,2)
ax1.imshow(rgb.astype(np.uint8))
ax2.imshow(mask, cmap='gray')
fig.suptitle('Random color: {}'.format(color))
return mask
def test():
zone_map("Lenna.jpg", threshold=20)
plt.show()

Sliding window in Python for GLCM calculation

I am trying to do texture analysis in a satellite imagery using GLCM algorithm. The scikit-image documentation is very helpful on that but for GLCM calculation we need a window size looping over the image. This is too slow in Python. I found many posts on stackoverflow about sliding windows but the computation takes for ever. I have an example shown below, it works but takes forever. I guess this must be a a naive way of doing it
image = np.pad(image, int(win/2), mode='reflect')
row, cols = image.shape
feature_map = np.zeros((M, N))
for m in xrange(0, row):
for n in xrange(0, cols):
window = image[m:m+win, n:n+win]
glcm = greycomatrix(window, d, theta, levels)
contrast = greycoprops(glcm, 'contrast')
feature_map[m,n] = contrast
I came across with skimage.util.view_as_windows method which might be good solution for me. My problem is that, when I try to calculate the GLCM I get an error which says:
ValueError: The parameter image must be a 2-dimensional array
This is because the result of the GLCM image has 4d dimensions and scikit-image view_as_windows method accepts only 2d arrays. Here is my attempt
win_w=40
win_h=40
features = np.zeros(image.shape, dtype='uint8')
target = features[win_h//2:-win_h//2+1, win_w//2:-win_w//2+1]
windowed = view_as_windows(image, (win_h, win_w))
GLCM = greycomatrix(windowed, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], symmetric=True, normed=True)
haralick = greycoprops(GLCM, 'ASM')
Does anyone have an idea on how I can calculate the GLCM using skimage.util.view_as_windows method?

The feature extraction you are trying to perform is a computer-intensive task. I have speeded up your method by computing the co-occurrence map only once for the whole image, rather than computing the co-occurrence map over and over on overlapping positions of the sliding window.
The co-occurrence map is a stack of images of the same size as the original image, in which - for each pixel - intensity levels are replaced by integer numbers that encode the co-occurrence of two intensities, namely Ii at that pixel and Ij at an offset pixel. The co-occurrence map has as many layers as we considered offsets (i.e. all the possible distance-angle pairs). By retaining the co-occurrence map you don't need to compute the GLCM at each position of the sliding window from the scratch, as you can reuse the previously computed co-occurrence maps to obtain the adjacency matrices (the GLCMs) for each distance-angle pair. This approach provides you with a significant speed gain.
The solution I came up with relies on the functions below:
import numpy as np
from skimage import io
from scipy import stats
from skimage.feature import greycoprops
def offset(length, angle):
"""Return the offset in pixels for a given length and angle"""
dv = length * np.sign(-np.sin(angle)).astype(np.int32)
dh = length * np.sign(np.cos(angle)).astype(np.int32)
return dv, dh
def crop(img, center, win):
"""Return a square crop of img centered at center (side = 2*win + 1)"""
row, col = center
side = 2*win + 1
first_row = row - win
first_col = col - win
last_row = first_row + side
last_col = first_col + side
return img[first_row: last_row, first_col: last_col]
def cooc_maps(img, center, win, d=[1], theta=[0], levels=256):
"""
Return a set of co-occurrence maps for different d and theta in a square
crop centered at center (side = 2*w + 1)
"""
shape = (2*win + 1, 2*win + 1, len(d), len(theta))
cooc = np.zeros(shape=shape, dtype=np.int32)
row, col = center
Ii = crop(img, (row, col), win)
for d_index, length in enumerate(d):
for a_index, angle in enumerate(theta):
dv, dh = offset(length, angle)
Ij = crop(img, center=(row + dv, col + dh), win=win)
cooc[:, :, d_index, a_index] = encode_cooccurrence(Ii, Ij, levels)
return cooc
def encode_cooccurrence(x, y, levels=256):
"""Return the code corresponding to co-occurrence of intensities x and y"""
return x*levels + y
def decode_cooccurrence(code, levels=256):
"""Return the intensities x, y corresponding to code"""
return code//levels, np.mod(code, levels)
def compute_glcms(cooccurrence_maps, levels=256):
"""Compute the cooccurrence frequencies of the cooccurrence maps"""
Nr, Na = cooccurrence_maps.shape[2:]
glcms = np.zeros(shape=(levels, levels, Nr, Na), dtype=np.float64)
for r in range(Nr):
for a in range(Na):
table = stats.itemfreq(cooccurrence_maps[:, :, r, a])
codes = table[:, 0]
freqs = table[:, 1]/float(table[:, 1].sum())
i, j = decode_cooccurrence(codes, levels=levels)
glcms[i, j, r, a] = freqs
return glcms
def compute_props(glcms, props=('contrast',)):
"""Return a feature vector corresponding to a set of GLCM"""
Nr, Na = glcms.shape[2:]
features = np.zeros(shape=(Nr, Na, len(props)))
for index, prop_name in enumerate(props):
features[:, :, index] = greycoprops(glcms, prop_name)
return features.ravel()
def haralick_features(img, win, d, theta, levels, props):
"""Return a map of Haralick features (one feature vector per pixel)"""
rows, cols = img.shape
margin = win + max(d)
arr = np.pad(img, margin, mode='reflect')
n_features = len(d) * len(theta) * len(props)
feature_map = np.zeros(shape=(rows, cols, n_features), dtype=np.float64)
for m in xrange(rows):
for n in xrange(cols):
coocs = cooc_maps(arr, (m + margin, n + margin), win, d, theta, levels)
glcms = compute_glcms(coocs, levels)
feature_map[m, n, :] = compute_props(glcms, props)
return feature_map
DEMO
The following results correspond to a (250, 200) pixels crop from a Landsat image. I have considered two distances, four angles, and two GLCM properties. This results in a 16-dimensional feature vector for each pixel. Notice that the sliding window is squared and its side is 2*win + 1 pixels (in this test a value of win = 19 was used). This sample run took around 6 minutes, which is fairly shorter than "forever" ;-)
In [331]: img.shape
Out[331]: (250L, 200L)
In [332]: img.dtype
Out[332]: dtype('uint8')
In [333]: d = (1, 2)
In [334]: theta = (0, np.pi/4, np.pi/2, 3*np.pi/4)
In [335]: props = ('contrast', 'homogeneity')
In [336]: levels = 256
In [337]: win = 19
In [338]: %time feature_map = haralick_features(img, win, d, theta, levels, props)
Wall time: 5min 53s
In [339]: feature_map.shape
Out[339]: (250L, 200L, 16L)
In [340]: feature_map[0, 0, :]
Out[340]:
array([ 10.3314, 0.3477, 25.1499, 0.2738, 25.1499, 0.2738,
25.1499, 0.2738, 23.5043, 0.2755, 43.5523, 0.1882,
43.5523, 0.1882, 43.5523, 0.1882])
In [341]: io.imshow(img)
Out[341]: <matplotlib.image.AxesImage at 0xce4d160>

How to find the colour of main object in an fixed sized image

My aim is to find the color of main object in a frame/image. In my case image is always of same type. For example News reporter(human) in Forest or News reporter in Animal farm.The position of news reporter is also same.What is the simple solution to find out the dominant colour of the main object(News Reporter)
Any help is welcome.Thanks
EDIT Code added
import cv2
from collections import namedtuple
from math import sqrt
import random
import webcolors
try:
import Image
except ImportError:
from PIL import Image
Point = namedtuple('Point', ('coords', 'n', 'ct'))
Cluster = namedtuple('Cluster', ('points', 'center', 'n'))
def get_points(img):
points = []
w, h = img.size
for count, color in img.getcolors(w * h):
points.append(Point(color, 3, count))
return points
rtoh = lambda rgb: '#%s' % ''.join(('%02x' % p for p in rgb))
def colorz(filename, n=3):
img = Image.open(filename)
img.thumbnail((200, 200))
w, h = img.size
points = get_points(img)
clusters = kmeans(points, n, 1)
rgbs = [map(int, c.center.coords) for c in clusters]
return map(rtoh, rgbs)
def euclidean(p1, p2):
return sqrt(sum([
(p1.coords[i] - p2.coords[i]) ** 2 for i in range(p1.n)
]))
def calculate_center(points, n):
vals = [0.0 for i in range(n)]
plen = 0
for p in points:
plen += p.ct
for i in range(n):
vals[i] += (p.coords[i] * p.ct)
return Point([(v / plen) for v in vals], n, 1)
def kmeans(points, k, min_diff):
clusters = [Cluster([p], p, p.n) for p in random.sample(points, k)]
while 1:
plists = [[] for i in range(k)]
for p in points:
smallest_distance = float('Inf')
for i in range(k):
distance = euclidean(p, clusters[i].center)
if distance < smallest_distance:
smallest_distance = distance
idx = i
plists[idx].append(p)
diff = 0
for i in range(k):
old = clusters[i]
center = calculate_center(plists[i], old.n)
new = Cluster(plists[i], center, old.n)
clusters[i] = new
diff = max(diff, euclidean(old.center, new.center))
if diff < min_diff:
break
return clusters
def main():
img = cv2.imread('d:/Emmanu/project-data/b1.jpg')
res=cv2.resize(img,(400,300))
crop_img = res[100:200, 150:250]
cv2.imwrite("d:/Emmanu/project-data/color-test.jpg", crop_img)
g= colorz('d:/Emmanu/project-data/color-test.jpg',1)
k=g[0]
print k
f=webcolors.hex_to_rgb(k)
print webcolors.rgb_to_name(f, spec='css3')
if __name__ == '__main__':main()
The problem is this returns the major color in the whole image not the main object

If your taking the colour of whole image,in most cases you will get wrong answer since background is more.If your image size is fixed and you are sure about object's position The most simple solution is Crop the image at where you expect the object.In most cases it will work.
In order to crop
import cv2
img = cv2.imread("'d:/Emmanu/project-data/b1.jpg'")
crop_img = img[200:400, 100:300] # Crop from x, y, w, h -> 100, 200, 300, 400
# NOTE: its img[y: y + h, x: x + w] and *not* img[x: x + w, y: y + h]
cv2.imshow("cropped", crop_img)
cv2.waitKey(0)
Now give this crop_image as input to your code.And in most cases it will give correct solution.There is nothing more simple that this.I think this will help.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Implementing R-CNN using Selective Search Algorithm - python

Hi I know I am late but still i thought to answer your question in comment. You have to give cropped image as an input to your CNN.

Related

Deprojecting depth onto original mesh

python program to fade an image in radial direction

Generating boolean matrix from image

Sliding window in Python for GLCM calculation

How to find the colour of main object in an fixed sized image

Categories

Resources