matplotlib: changing a single voxel face color - python

I want to reproduce this image using matplotlib. The example docs have a numpy logo, but all the voxel cubes are homogenous in color.
I could imagine perhaps making a separate surface plot for each face I want to change but that seems impractical. Here's the code for the example docs numpy logo:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
def explode(data):
size = np.array(data.shape)*2
data_e = np.zeros(size - 1, dtype=data.dtype)
data_e[::2, ::2, ::2] = data
return data_e
# build up the numpy logo
n_voxels = np.zeros((4, 3, 4), dtype=bool)
n_voxels[0, 0, :] = True
n_voxels[-1, 0, :] = True
n_voxels[1, 0, 2] = True
n_voxels[2, 0, 1] = True
facecolors = np.where(n_voxels, '#FFD65DC0', '#7A88CCC0')
edgecolors = np.where(n_voxels, '#BFAB6E', '#7D84A6')
filled = np.ones(n_voxels.shape)
# upscale the above voxel image, leaving gaps
filled_2 = explode(filled)
fcolors_2 = explode(facecolors)
ecolors_2 = explode(edgecolors)
# Shrink the gaps
x, y, z = np.indices(np.array(filled_2.shape) + 1).astype(float) // 2
x[0::2, :, :] += 0.05
y[:, 0::2, :] += 0.05
z[:, :, 0::2] += 0.05
x[1::2, :, :] += 0.95
y[:, 1::2, :] += 0.95
z[:, :, 1::2] += 0.95
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.voxels(x, y, z, filled_2, facecolors=fcolors_2, edgecolors=ecolors_2)
plt.show()

'''
=====================================
Rotating 3D voxel animation of PYTHON
=====================================
Demonstrates using ``ax.voxels`` with uneven coordinates
'''
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as manimation
from math import copysign
def explode(data):
size = np.array(data.shape)*2
data_e = np.zeros(size - 1, dtype=data.dtype)
data_e[::2, ::2, ::2] = data
return data_e
def voxel_face(corns, dm, nf):
'''
Grab the corner coordinates of one voxel face
Parameters
----------
corns : np.indices array of corners for one voxel
dm : (dimension), values can be 0(x), 1(y), 2(z)
nf : (near/far face), values can be 0(near), 1(far)
'''
lc = corns.copy() #local copy so we don't swap original
if dm == 1 : #swap y into x and correct ordering
lc[0], lc[1] = corns[1].transpose(1,0,2), corns[0].transpose(1,0,2)
if dm == 2 : #swap z into x and correct ordering
lc[0], lc[2] = corns[2].transpose(2,1,0), corns[0].transpose(2,1,0)
ret = np.zeros((3,2,2))
xc1 = lc[0,nf,0,0] #hold x dim constant
ret[0,:] = np.array([[xc1, xc1], [xc1, xc1]])
yc1, yc2 = lc[1,0,0:2,0]
ret[1,:] = np.array([[yc1, yc2], [yc1, yc2]])
zc1, zc2 = lc[2,0,0,0:2]
ret[2,:] = np.array([[zc1, zc1], [zc2, zc2]])
if dm != 0 : #swap x back into desired dimension
ret[0], ret[dm] = ret[dm].copy(), ret[0].copy()
return ret
# build PYTHON letters
n_voxels = np.zeros((4, 4, 5), dtype=bool)
letters = [None]*6
letter_faces = np.zeros((6,2),dtype=int)
#P
n_voxels[0, 0, :] = True
n_voxels[:, 0, -3] = True
n_voxels[:, 0, -1] = True
n_voxels[-1, 0, -2] = True
letters[0] = np.array(np.where(n_voxels)).T
letter_faces[0] = [1, 0] #close y face
n_voxels[...] = False
#Y
n_voxels[-1, 0, -3:] = True
n_voxels[-1, -1, :] = True
n_voxels[-1, :, -3] = True
n_voxels[-1, :, 0] = True
letters[1] = np.array(np.where(n_voxels)).T
letter_faces[1] = [0, 1] #far x face
n_voxels[...] = False
#T
n_voxels[:, 0, -1] = True
n_voxels[1:3, :, -1] = True
letters[2] = np.array(np.where(n_voxels)).T
letter_faces[2] = [2, 1] #far z face
n_voxels[...] = False
#H
n_voxels[0, 0, :] = True
n_voxels[0, -1, :] = True
n_voxels[0, :, 2] = True
letters[3] = np.array(np.where(n_voxels)).T
letter_faces[3] = [0, 0] #close x face
n_voxels[...] = False
#O
n_voxels[0, 1:3, 0] = True
n_voxels[-1, 1:3, 0] = True
n_voxels[1:3, 0, 0] = True
n_voxels[1:3, -1, 0] = True
letters[4] = np.array(np.where(n_voxels)).T
letter_faces[4] = [2, 0] #close z face
n_voxels[...] = False
#N
n_voxels[0, -1, :] = True
n_voxels[-1, -1, :] = True
n_voxels[1, -1, 1:3] = True
n_voxels[2, -1, 2:4] = True
letters[5] = np.array(np.where(n_voxels)).T
letter_faces[5] = [1, 1] #far y face
n_voxels[...] = False
fcol = np.full(n_voxels.shape, '#7A88CC60')
ecol = np.full(n_voxels.shape, '#7D84A6')
filled = np.ones(n_voxels.shape)
# upscale the above voxel image, leaving gaps
filled_2 = explode(filled)
fcolors_2 = explode(fcol)
ecolors_2 = explode(ecol)
# Shrink the gaps
corn = np.indices(np.array(filled_2.shape) + 1).astype(float) // 2
ccorn = 0.05 #close corner
fcorn = 1.0 - ccorn
corn[0,0::2, :, :] += ccorn
corn[1,:, 0::2, :] += ccorn
corn[2,:, :, 0::2] += ccorn
corn[0,1::2, :, :] += fcorn
corn[1,:, 1::2, :] += fcorn
corn[2,:, :, 1::2] += fcorn
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.axis("off")
#Plot the voxels
x, y, z = corn
ax.voxels(x, y, z, filled_2, facecolors=fcolors_2, edgecolors=ecolors_2)
#Plot the letter square faces
jj=0
for j in [x for x in letters if x is not None]:
locf = np.empty((j.shape[0],3,2,2)) #local face
ji = 0
for i in j:
i = i * 2 #skip empty voxels
loc = corn[:,i[0]:i[0]+2,i[1]:i[1]+2,i[2]:i[2]+2] #local corners
locf[ji] = voxel_face(loc, letter_faces[jj,0], letter_faces[jj,1])
ax.plot_surface(locf[ji,0],locf[ji,1],locf[ji,2],color='#ffe500a0',
shade=False)
ji += 1
jj += 1
#Views: PY, P, Y, T, H, O, N, PY
view_elev = [ 5, 0, 0, 90, 0, -90, 0, 5]
view_azim = [-60, -90, 0, 90, 180, 180, 90, -60]
#'''
FFMpegWriter = manimation.writers['ffmpeg']
metadata = dict(title='Movie Test', artist='Matplotlib',
comment='Movie support!')
writer = FFMpegWriter(fps=25, metadata=metadata)
with writer.saving(fig, "pythonRot2.mp4", 100):
for j in range(20):
ax.view_init(view_elev[0], view_azim[0])
plt.draw()
writer.grab_frame()
for i in range(1,len(view_elev)):
de = (view_elev[i] - view_elev[i-1])
da = (view_azim[i] - view_azim[i-1])
if abs(da) >= 180 : #unecessary in this config
da -= copysign(360, da)
if abs(de) >= 180 :
de -= copysign(360, de)
if i != 1 :
steps = 60
else :
steps = 10
da = da / steps
de = de / steps
for j in range(10): #Pause on direct view of a letter
ax.view_init(view_elev[i-1], view_azim[i-1])
plt.draw()
writer.grab_frame()
for j in range(steps): #Rotate to next letter
ax.view_init(view_elev[i-1] + j*de,
view_azim[i-1] + j*da)
plt.draw()
writer.grab_frame()
#'''

Related

Find remaining line from rectangle in OpenCV

I'm detecting a rectangle with known dimensions in OpenCV. I've written code - which works - for detecting all four lines, but the furthest line is difficult to detect and usually causes problems when it comes time for warp the perspective.
My question (which I've looked into, but haven't found an answer for), is whether, theoretically, having 3 lines (angles and intercept) is enough to approximate the final line.
EDIT: I was asked to include my current code. Here it is:
import numpy as np
import cv2
from sklearn.cluster import AgglomerativeClustering
# find intersection of lines
def line_intersect(m1, b1, m2, b2):
if m1 == m2:
print ("These lines are parallel!!!")
return None
x = int((b2 - b1) / (m1 - m2))
y = int(m1 * x + b1)
return [x,y]
# method specs
width_error = 10
height_error = 10
kernel_size = 7
kernel_dilate = np.ones((1, 1), 'uint8')
kernel = np.ones((5, 5), 'uint8')
# read and process image
img = cv2.imread('assets/game-frames/hard-m-2019-124-1200.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
edges = cv2.Canny(blur_gray, 0, 100, apertureSize = 3)
dilate_img = cv2.dilate(edges, kernel_dilate, iterations=3)
closing = cv2.morphologyEx(dilate_img, cv2.MORPH_CLOSE, kernel)
# find hough lines
lines = cv2.HoughLinesP(closing, 2, np.pi/180, 100, minLineLength=20, maxLineGap=10)
lines_np = np.reshape(lines, (np.int32(lines.size/4), 4))
lines_np = lines_np[(lines_np[:, 2] - lines_np[:, 0]) != 0]
lines_np = np.c_[lines_np, (lines_np[:, 3] - lines_np[:, 1])/(lines_np[:, 2] - lines_np[:, 0])]
lines_np = np.c_[lines_np, np.sqrt((lines_np[:, 3] - lines_np[:, 1])**2 + (lines_np[:, 2] - lines_np[:, 0])**2)]
# find lengthwise lines
lengthwise_lines = lines_np[(abs(lines_np[:, 4]) < 3.5) & (abs(lines_np[:, 4]) > 1.20) & (lines_np[:, 5] > 100)]
if lengthwise_lines[:, 4].size < 2: print('No lengthwise lines!')
ward_length = AgglomerativeClustering(n_clusters = None, distance_threshold = 0.50, linkage = "ward").fit(abs(lengthwise_lines[:, 4].reshape(-1,1)))
lengthwise_lines = np.c_[lengthwise_lines, ward_length.labels_]
length_clusts = np.bincount(ward_length.labels_).argsort()[-2:]
lengthwise_lines = lengthwise_lines[np.isin(ward_length.labels_, length_clusts)]
# find extremities
x_max = np.max(lengthwise_lines[:,[0,2]].reshape(-1,1))
x_min = np.amin(lengthwise_lines[:,[0,2]])
y_coords = lengthwise_lines[:, [1, 3]].reshape(-1,1)
ward = AgglomerativeClustering(n_clusters = None, distance_threshold = 10, linkage = "ward").fit(y_coords)
want_clusts = np.bincount(ward.labels_).argsort()[-2:]
y_ext1 = np.median(y_coords[np.isin(ward.labels_, want_clusts[0])])
y_ext2 = np.median(y_coords[np.isin(ward.labels_, want_clusts[1])])
y_max = max(y_ext1, y_ext2)
y_min = min(y_ext1, y_ext2)
# find outer lengthwise lines
length_want = ward_length.labels_[np.isin(ward_length.labels_, length_clusts)][np.argmin(abs(lengthwise_lines[:,4]))]
lengthwise_lines = lengthwise_lines[lengthwise_lines[:, 6] == length_want]
lengthwise_lines = np.c_[lengthwise_lines, (lengthwise_lines[:,1] - (lengthwise_lines[:,4]*lengthwise_lines[:,0]))]
lengthwise_lines = np.c_[lengthwise_lines, ((np.max(lengthwise_lines[:,1]) - lengthwise_lines[:,5])/lengthwise_lines[:,4])]
# subset left and right outer lengthwise lines
l_left = lengthwise_lines[lengthwise_lines[:,4] < 0]
l_right = lengthwise_lines[lengthwise_lines[:,4] > 0]
# find top and bottom lines
widthwise_lines = lines_np[(abs(lines_np[:, 4]) < 0.10)]
widthwise_lines = np.c_[widthwise_lines, (widthwise_lines[:,1] - (widthwise_lines[:,4]*widthwise_lines[:,0]))]
bottom_lines = widthwise_lines[np.where((np.amin(widthwise_lines[:,[1,3]], axis = 1) >= y_min - height_error) & (np.amin(widthwise_lines[:,[1,3]], axis = 1) <= y_min + height_error) & (np.amin(widthwise_lines[:,[0,2]], axis = 1) >= x_min - width_error) & (np.amax(widthwise_lines[:,[0,2]], axis = 1) <= x_max + width_error))[0]]
top_lines = widthwise_lines[np.where((np.amax(widthwise_lines[:,[1,3]], axis = 1) >= y_max - height_error) & (np.amax(widthwise_lines[:,[1,3]], axis = 1) <= y_max + height_error) & (np.amin(widthwise_lines[:,[0,2]], axis = 1) >= x_min - width_error) & (np.amax(widthwise_lines[:,[0,2]], axis = 1) <= x_max + width_error))[0]]
# if no lines found for any border, stop
if top_lines.size == 0 or bottom_lines.size == 0 or l_left.size == 0 or l_right.size == 0: print('No Lengthwise lines!')
# take median of left outer lengthwise lines
l_left_b = np.median(l_left[:,7])
l_left_m = np.median(l_left[:,4])
# take median of right outer lengthwise lines
l_right_b = np.median(l_right[:,7])
l_right_m = np.median(l_right[:,4])
# take median of top widthwise lines
top_lines_b = np.median(top_lines[:,6])
top_lines_m = np.median(top_lines[:,4])
# take median of bottom widthwise lines
bottom_lines_b = np.median(bottom_lines[:,6])
bottom_lines_m = np.median(bottom_lines[:,4])
# find interesction of lines
int_pt1 = line_intersect(l_left_m, l_left_b, bottom_lines_m, bottom_lines_b)
int_pt2 = line_intersect(l_right_m, l_right_b, bottom_lines_m, bottom_lines_b)
int_pt3 = line_intersect(l_left_m, l_left_b, top_lines_m, top_lines_b)
int_pt4 = line_intersect(l_right_m, l_right_b, top_lines_m, top_lines_b)
# draw intersections
cv2.circle(img, int_pt1, 3, (0, 255, 0), -1)
cv2.circle(img, int_pt3, 3, (255, 255, 0), -1)
cv2.circle(img, int_pt2, 3, (0, 255, 0), -1)
cv2.circle(img, int_pt4, 3, (255, 255, 0), -1)
# show image
cv2.imshow('frame diff ', img)
cv2.waitKey(0)
Original image:

Get the area for a specific point's corresponding region in a Voronoi diagram

Using this answer, I can create a bounded Voronoi diagram (credit to #Flabetvibes for this code):
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import scipy.spatial
import sys
eps = sys.float_info.epsilon
def in_box(towers, bounding_box):
return np.logical_and(np.logical_and(bounding_box[0] <= towers[:, 0],
towers[:, 0] <= bounding_box[1]),
np.logical_and(bounding_box[2] <= towers[:, 1],
towers[:, 1] <= bounding_box[3]))
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
for region in vor.regions:
flag = True
for index in region:
if index == -1:
flag = False
break
else:
x = vor.vertices[index, 0]
y = vor.vertices[index, 1]
if not(bounding_box[0] - eps <= x and x <= bounding_box[1] + eps and
bounding_box[2] - eps <= y and y <= bounding_box[3] + eps):
flag = False
break
if region != [] and flag:
regions.append(region)
vor.filtered_points = points_center
vor.filtered_regions = regions
return vor
def centroid_region(vertices):
# Polygon's signed area
A = 0
# Centroid's x
C_x = 0
# Centroid's y
C_y = 0
for i in range(0, len(vertices) - 1):
s = (vertices[i, 0] * vertices[i + 1, 1] - vertices[i + 1, 0] * vertices[i, 1])
A = A + s
C_x = C_x + (vertices[i, 0] + vertices[i + 1, 0]) * s
C_y = C_y + (vertices[i, 1] + vertices[i + 1, 1]) * s
A = 0.5 * A
C_x = (1.0 / (6.0 * A)) * C_x
C_y = (1.0 / (6.0 * A)) * C_y
return np.array([[C_x, C_y]])
points = np.array([[0.17488374, 0.36498964],
[0.94904866, 0.80085891],
[0.89265224, 0.4160692 ],
[0.17035869, 0.82769497],
[0.30274931, 0.04572908],
[0.40515272, 0.1445514 ],
[0.23191921, 0.08250689],
[0.48713553, 0.94806717],
[0.77714412, 0.46517511],
[0.25945989, 0.76444964]])
vor = voronoi(points,(0,1,0,1))
fig = plt.figure()
ax = fig.gca()
# Plot initial points
ax.plot(vor.filtered_points[:, 0], vor.filtered_points[:, 1], 'b.')
# Plot ridges points
for region in vor.filtered_regions:
vertices = vor.vertices[region, :]
ax.plot(vertices[:, 0], vertices[:, 1], 'go')
# Plot ridges
for region in vor.filtered_regions:
vertices = vor.vertices[region + [region[0]], :]
ax.plot(vertices[:, 0], vertices[:, 1], 'k-')
Now, I want to get the area of the region containing one of the original points in blue, such as points[0]. In this example, points[0] is the point (0.17488374, 0.36498964). I thought I could find the area for this point with the following code:
area = ConvexHull(vor.vertices[vor.filtered_regions[0], :]).volume
Because I figured that the index of 0 in points[0] would correspond with the index of 0 in vor.filtered_regions[0]. But it doesn't -- vor.filtered_regions[9] is actually what I'm looking for (which I figured out manually but I'd like for it to be automated). In another example, the region with index 2 was the one I was looking for, so it doesn't appear consistent either.
Is there a way to find the index for vor.filtered_regions that'll give me the area I want? Or is there another way to go about this? Even though I'm creating the entire Voronoi diagram with all 10 points, the area of the region with points[0] is all I'm actually looking for (while still being bounded), so I'm assuming there might be a quicker way to go about doing this but I have no idea what that may be.
The point_region attribute of the scipy Voronoi diagram tells you which region is associated to which point. So you can use that data to look up the associated regions.
Here is a much simplified version of your voronoi function which uses that attribute to ensure that filted_points and filtered_regions are constructed consistently, i.e., the first region is the one associated with the first point.
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
[vor.point_region[i] for i in range(10)]
vor.filtered_points = points_center
vor.filtered_regions = [vor.regions[vor.point_region[i]] for i in range(len(points_center))]
return vor

Overlay a number of colormaps with matplotlib

I have a total of 16 color maps which look as follows:
Is there a way to overlay all the maps while retaining their color? That means, I want to obtain a final image which consists of 16 distributions with different colors. I've been searching a lot, but unfortunately didn't find anything good yet. Thank's a lot!
For reproduction, the code looks as follows:
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
def softmax(logit_map):
bn, kn, h, w = logit_map.shape
map_norm = F.softmax(logit_map.reshape(bn, kn, -1), dim=2).reshape(bn, kn, h, w)
return map_norm
def get_mu_and_prec(part_maps, device, scal):
"""
Calculate mean for each channel of part_maps
:param part_maps: tensor of part map activations [bn, n_part, h, w]
:return: mean calculated on a grid of scale [-1, 1]
"""
bn, nk, h, w = part_maps.shape
y_t = torch.linspace(-1., 1., h).reshape(h, 1).repeat(1, w).unsqueeze(-1)
x_t = torch.linspace(-1., 1., w).reshape(1, w).repeat(h, 1).unsqueeze(-1)
meshgrid = torch.cat((y_t, x_t), dim=-1).to(device) # 64 x 64 x 2
mu = torch.einsum('ijl, akij -> akl', meshgrid, part_maps) # bn x nk x 2
mu_out_prod = torch.einsum('akm,akn->akmn', mu, mu)
mesh_out_prod = torch.einsum('ijm,ijn->ijmn', meshgrid, meshgrid)
stddev = torch.einsum('ijmn,akij->akmn', mesh_out_prod, part_maps) - mu_out_prod
a_sq = stddev[:, :, 0, 0]
a_b = stddev[:, :, 0, 1]
b_sq_add_c_sq = stddev[:, :, 1, 1]
eps = 1e-12
a = torch.sqrt(a_sq + eps) # Σ = L L^T Prec = Σ^-1 = L^T^-1 * L^-1 ->looking for L^-1 but first L = [[a, 0], [b, c]
b = a_b / (a + eps)
c = torch.sqrt(b_sq_add_c_sq - b ** 2 + eps)
z = torch.zeros_like(a)
det = (a * c).unsqueeze(-1).unsqueeze(-1)
row_1 = torch.cat((c.unsqueeze(-1), z.unsqueeze(-1)), dim=-1).unsqueeze(-2)
row_2 = torch.cat((-b.unsqueeze(-1), a.unsqueeze(-1)), dim=-1).unsqueeze(-2)
L_inv = scal / (det + eps) * torch.cat((row_1, row_2), dim=-2) # L^⁻1 = 1/(ac)* [[c, 0], [-b, a]
return mu, L_inv
def get_heat_map(mu, L_inv, device):
h, w, nk = 64, 64, L_inv.shape[1]
y_t = torch.linspace(-1., 1., h).reshape(h, 1).repeat(1, w).unsqueeze(-1)
x_t = torch.linspace(-1., 1., w).reshape(1, w).repeat(h, 1).unsqueeze(-1)
y_t_flat = y_t.reshape(1, 1, 1, -1)
x_t_flat = x_t.reshape(1, 1, 1, -1)
mesh = torch.cat((y_t_flat, x_t_flat), dim=-2).to(device)
dist = mesh - mu.unsqueeze(-1)
proj_precision = torch.einsum('bnik, bnkf -> bnif', L_inv, dist) ** 2 # tf.matmul(precision, dist)**2
proj_precision = torch.sum(proj_precision, -2) # sum x and y axis
heat = 1 / (1 + proj_precision)
heat = heat.reshape(-1, nk, h, w) # bn number parts width height
return heat
color_list = ['black', 'gray', 'brown', 'chocolate', 'orange', 'gold', 'olive', 'lawngreen', 'aquamarine',
'dodgerblue', 'midnightblue', 'mediumpurple', 'indigo', 'magenta', 'pink', 'springgreen']
fmap = torch.randn(1, 16, 64, 64)
fmap_norm = softmax(fmap)
mu, L_inv = get_mu_and_prec(fmap_norm, 'cpu', scal=5.)
heat_map = get_heat_map(mu, L_inv, "cpu")
for i in range(16):
cmap = colors.LinearSegmentedColormap.from_list('my_colormap',
['white', color_list[i]],
256)
plt.imshow(heat_map[0][i].numpy(), cmap=cmap)
plt.show()

Encoded and decoded version of bouding box regression offsets are different

I'm trying to replicate bounding box regression technique used in faster-rcnn as given here. I've made a decoding fuunction and an encoding function. Ideally, when passing a bounding box to the encoder and then decoding it, I should get the same bounding box.
Here, are my input bounding boxes:
import numpy as np
import tensorflow as tf
def make_anchors(img_size, conv_h, conv_w, scale, aspect_ratios):
prior_data = []
# Iteration order is important (it has to sync up with the convout)
for j, i in product(range(conv_h), range(conv_w)):
# + 0.5 because priors are in center
x = (i + 0.5) / conv_w
y = (j + 0.5) / conv_h
for ar in aspect_ratios:
ar = sqrt(ar)
w = scale * ar / img_size
h = scale / ar / img_size
prior_data += [x, y, w, h]
return prior_data
test_bbox = tf.convert_to_tensor((np.array([[204.044, 253.8351, 487.8226, 427.06363],
[0, 140.01741, 550, 290.21936],
[40.005028, 117.37102, 255.7913, 205.13097],
[263.31314, 67.0434, 514.04736, 124.48139],
[0, 503.79834, 487.0279, 550]])), dtype=tf.float32)
test_labels = tf.convert_to_tensor((np.array([[1],
[2],
[3],
[4],
[5]])), dtype=tf.float32)
feature_map_size=[[69,69], [35,35], [18,18], [9,9], [5,5]]
aspect_ratios=[1, 0.5, 2]
scales=[24, 48, 96, 192, 384]
anchors = []
for i, shape in enumerate(feature_map_size):
anchors += make_anchors(550, shape[0], shape[1], scales[i], aspect_ratios)
anchors = tf.reshape(tf.convert_to_tensor(anchors), [-1, 4])
I'm using 550x550 image size as input and calculated the feature-map size accordingly.
The encoding is done as follows:
def encode(map_loc, center_anchors, include_variances=False):
# center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc)
h = map_loc[:, 2] - map_loc[:, 0]
w = map_loc[:, 3] - map_loc[:, 1]
center_gt = tf.cast(tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1), tf.float32)
variances = [0.1, 0.2]
# calculate offset
if include_variances:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0]
else:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3]
tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2])
tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3])
if include_variances:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1]
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1]
else:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2])
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3])
offsets = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1)
return offsets
def area(boxlist, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L48
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
y_min, x_min, y_max, x_max = tf.split(
value=boxlist, num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def intersection(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L209
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1, num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2, num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L259
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matching(pos_thresh, neg_thresh, gt_bbox, gt_labels, priors):
pairwise_iou = iou(priors, gt_bbox) # # size: [num_objects, num_priors]; anchors along the row and ground_truth clong the columns
each_prior_max = tf.reduce_max(pairwise_iou, axis=-1) # size [num_priors]; iou with ground truth with the anchors
each_prior_index = tf.math.argmax(pairwise_iou, axis=-1) # size [num_priors]; id of groud truth having max iou with the anchors
each_box_max = tf.reduce_max(pairwise_iou, axis=0)
each_box_index = tf.math.argmax(pairwise_iou, axis=0)
# For the max IoU prior for each gt box, set its IoU to 2. This ensures that it won't be filtered
# in the threshold step even if the IoU is under the negative threshold. This is because that we want
# at least one prior to match with each gt box or else we'd be wasting training data.
indices = tf.expand_dims(each_box_index,axis=-1)
updates = tf.cast(tf.tile(tf.constant([2]), each_box_index.shape), dtype=tf.float32)
each_prior_max = tf.tensor_scatter_nd_update(each_prior_max, indices, updates)
# Set the index of the pair (prior, gt) we set the overlap for above.
updates = tf.cast(tf.range(0,each_box_index.shape),dtype=tf.int64)
each_prior_index = tf.tensor_scatter_nd_update(each_prior_index, indices, updates)
each_prior_box = tf.gather(gt_bbox, each_prior_index) # size: [num_priors, 4]
conf = tf.squeeze(tf.gather(gt_labels, each_prior_index) + 1) # the class of the max IoU gt box for each prior, size: [num_priors]
neutral_label_index = tf.where(each_prior_max < pos_thresh)
background_label_index = tf.where(each_prior_max < neg_thresh)
conf = tf.tensor_scatter_nd_update(conf, neutral_label_index, -1*tf.ones(tf.size(neutral_label_index)))
conf = tf.tensor_scatter_nd_update(conf, background_label_index, tf.zeros(tf.size(background_label_index)))
offsets = encode(each_prior_box, priors)
return offsets, conf, each_prior_box, each_prior_index
offsets, conf, each_prior_box, each_prior_index = \
matching(0.5, 0.5, test_bbox/550, test_labels, anchors)
If I try to redraw the offset I got after encoding i'm getting image as follows:
def _decode(box_p, priors, include_variances=False):
# https://github.com/feiyuhuahuo/Yolact_minimal/blob/9299a0cf346e455d672fadd796ac748871ba85e4/utils/box_utils.py#L151
"""
Decode predicted bbox coordinates using the scheme
employed at https://lilianweng.github.io/lil-log/2017/12/31/object-recognition-for-dummies-part-3.html
b_x = prior_w*loc_x + prior_x
b_y = prior_h*loc_y + prior_y
b_w = prior_w * exp(loc_w)
b_h = prior_h * exp(loc_h)
Note that loc is inputed as [c_x, x_y, w, h]
while priors are inputed as [c_x, c_y, w, h] where each coordinate
is relative to size of the image.
Also note that prior_x and prior_y are center coordinates.
"""
variances = [0.1, 0.2]
box_p = tf.cast(box_p, tf.float32)
priors = tf.cast(priors, tf.float32)
if include_variances:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]* variances[0]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:]* variances[1])
else:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:])
boxes = tf.concat([b_x_y, b_w_h], axis=1)
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] + boxes[:, :2]], axis=1)
# [y_min, x_min, y_max, x_max]
return tf.transpose(tf.stack([boxes[:, 1], boxes[:, 0],boxes[:, 3], boxes[:, 2]]))
_idx = tf.where(conf > 0.5)
_test = _decode(offsets, anchorobj.anchors)
_out = tf.squeeze(tf.gather(_test, _idx)).numpy()*550
img_test = 255*np.ones((1000,1000,3), dtype=np.int8)
for box in _out:
box = np.round(box).astype(int)
image = cv2.rectangle(img_test, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
plt.imshow(image)
As one can see the output is crossing the image input dimensions of 550. Why is that happening?
The problem was in my decode function in calculating [x_min, y_min, x_max, y_max]. It should have been like this:
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] / 2 + boxes[:, :2]], axis=1)

Convert Numpy 3D array to 3 R,G,B hex-strings

I have large lists containing RGB values for pictures.
I use numpy to create patterns and/or pillow to load pictures and convert them to 3D-numpy arrays (int32). Now i want to restructure my array into hex-strings in a weird way:
Three hex-strings for R, G, B in this structure: '0000FFFF', where the first 4 characters always have to be zero, then 2 characters represent pixel n+1 and the last 2 cahracters represent pixel n.
I already have done this with a code which takes too long for larger images and i require some improvement. What i got so far:
import numpy
import numpy.matlib
#from matplotlib.colors import rgb2hex
import time
def pairwise(iterable):
"""Create a paired-list from a list."""
a = iter(iterable)
return zip(a, a)
def test(imgSize=[480,640], brightness=[255,255,255]):
#generate pattern
startPattern = time.time()
patternDescription = 'Stripe Test'
pattern = numpy.zeros((imgSize[0], imgSize[1], 3))
line = (numpy.r_[:imgSize[1]]%255)/255
colorChR = numpy.matlib.repmat(line, imgSize[0], 1)
colorChG = numpy.matlib.repmat(line, imgSize[0], 1)
colorChB = numpy.matlib.repmat(line, imgSize[0], 1)
colorChR[:, :] = 0
colorChR[:, 0:60] = 1
colorChG[:, :] = 0
colorChG[:, 0:60] = 1
colorChB[:, :] = 0
colorChB[:, 0:60] = 1
pattern[:, :, 0] = colorChR
pattern[:, :, 1] = colorChG
pattern[:, :, 2] = colorChB
stopPattern = time.time()
print('TIME: Pattern generation: ' + str(round(stopPattern-startPattern,3)) + ' s. ')
# first reshape
startReshape = time.time()
pattern[:, :, 0] = pattern[:, :, 0]*brightness[0] # red brightness multiplicator
pattern[:, :, 1] = pattern[:, :, 1]*brightness[1] # green brightness multiplicator
pattern[:, :, 2] = pattern[:, :, 2]*brightness[2] # blue brightness multiplicator
img = pattern.astype(int)
# IDEALLY I WANT TO CHANGE THE CODE ONLY FROM HERE ON
# redValues = pattern[:,:,0].astype(int)
# greenValues = pattern[:,:,1].astype(int)
# blueValues = pattern[:,:,2].astype(int)
# test = ("0000" + ("{:0>2X}" * len(redValues))).format(*tuple(redValues[::-1]))
# numpy.set_printoptions(formatter={'int':hex})
# #test = [ rgb2hex(img[i,:]) for i in range(img.shape[0]) ]
# rgb2hex = lambda r,g,b: '%02X%02X%02X' %(r,g,b)
# test = [ rgb2hex(*img[i,:]) for i in range(img.shape[0]) ]
# # img = numpy.array2string(img, formatter = {'int':lambda img: hex(img)})
imgReshape = numpy.reshape(img, (1, imgSize[0]*imgSize[1]*3)) #necessary?
redValues = imgReshape[0][0::3] #red values (0, 3, 6, ..)
greenValues = imgReshape[0][1::3] #green values (1, 4, 7, ..)
blueValues = imgReshape[0][2::3] #blue values (2, 5, 8, ..)
stopReshape = time.time()
print('TIME: Reshape into colors: ' + str(round(stopReshape-startReshape,3)) + ' s. ')
redString = ''
greenString = ''
blueString = ''
outData = dict()
startString = time.time()
for i, j in pairwise(redValues):
tempRed = "0000%02X%02X" % (int(j), int(i))
redString += tempRed
for i, j in pairwise(greenValues):
tempGreen = "0000%02X%02X" % (int(j), int(i))
greenString += tempGreen
for i, j in pairwise(blueValues):
tempBlue = "0000%02X%02X" % (int(j), int(i))
blueString += tempBlue
outData['red'] = redString
outData['green'] = greenString
outData['blue'] = blueString
stopString = time.time()
print('TIME: String formatting: ' + str(round(stopString-startString, 3)) + ' s')
print('DATATEST: First 200 red chars: ' + str(outData['red'][0:200]))
print('DATATEST: First 200 green chars: ' + str(outData['green'][0:200]))
print('DATATEST: First 200 blue chars: ' + str(outData['blue'][0:200]))
#return outData
Try to use numpy array instead:
redValues = np.random.randint(0, 255, (10, 2))
red = np.array(redValues).reshape(-1, 2)
red_channel = (red[:, 1] << 8) + red[:, 0]
redString = ''.join(map(lambda val: f'0000{val:04x}', red_channel))

Categories