I'm training a yolo network to detect ball soccer, to learn more and understand better this architecture. I can detect the ball in most cases. My problem here is that the bouding box generated is too big, covering much more pixels than the actual ball.
I know the parameter anchor boxes influence on the bouding-boxes. I've experimented with a few config values and also tried a code to generate the anchors. The anchors generate by the code are too small and often we don't even see the box, only the title. I have also thought that this isn't the best approach to deal with a one class problem, because we use the k-means to generate that anchors. When I tried change the values the cover region was completly wrong, but the box was really small, covering less than the original anchors.
There is my code to gen the anchors. I remove some methods to let the code more clean.
def __init__(self, dir_images, dir_labels, width = 416, height = 416, anchors = 9):
# dir_images: Directory that contains all images that will be used or
# already has used to training the neural network
# Example: /home/enacom/Desktop/darknet_football_resized_images/
#dir_labels: Directory that contains all labels that will be used or]
# already has used to training the neural network
# Example: /home/enacom/Desktop/BBoxLabelTool/Images/labels6/
# anchors: the number of anchor that you desire
# default value is 9 on yoloV3, but you can add more if want detect more objects
self.dir_images = dir_images
self.dir_labels = dir_labels
self.grid_w = width/32
self.grid_h = height/32
self.anchors = anchors
self.annotation_dims = []
file_list = os.listdir(self.dir_images)
boxes = self.__read_labels()
for file in file_list:
actual_dir = self.dir_images + file
try:
image = cv2.imread(actual_dir)
width, height = image.shape[:2]
cell_w = width / self.grid_w
cell_h = height / self.grid_h
box = boxes[file.replace("jpg", "txt")]
relative_w = (float(box[0][2]) - float(box[0][0])) # / cell_w
relatice_h = (float(box[0][3]) - float(box[0][1])) # / cell_h
self.annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
except AttributeError:
print("Failure when reading image")
self.annotation_dims = np.array(self.annotation_dims)
print(self.annotation_dims / 32)
def IOU(self, ann, centroids):
w, h = ann
similarities = []
for centroid in centroids:
c_w, c_h = centroid
if c_w >= w and c_h >= h:
similarity = w * h / (c_w * c_h)
elif c_w >= w and c_h <= h:
similarity = w * c_h / (w * h + (c_w - w) * c_h)
elif c_w <= w and c_h >= h:
similarity = c_w * h / (w * h + c_w * (c_h - h))
else: # means both w,h are bigger than c_w and c_h respectively
similarity = (c_w * c_h) / (w * h)
similarities.append(similarity) # will become (k,) shape
return np.array(similarities)
def run_kmeans(self):
ann_num = self.annotation_dims.shape[0]
iterations = 0
prev_assignments = np.ones(ann_num) * (-1)
iteration = 0
old_distances = np.zeros((ann_num, self.anchors))
indices = [random.randrange(self.annotation_dims.shape[0]) for i in range(self.anchors)]
centroids = self.annotation_dims[indices]
anchor_dim = self.annotation_dims.shape[1]
while True:
distances = []
iteration += 1
for i in range(ann_num):
d = 1 - self.IOU(self.annotation_dims[i], centroids)
distances.append(d)
distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances - distances))))
# assign samples to centroids
assignments = np.argmin(distances, axis=1)
if (assignments == prev_assignments).all():
return centroids
# calculate new centroids
centroid_sums = np.zeros((self.anchors, anchor_dim), np.float)
for i in range(ann_num):
centroid_sums[assignments[i]] += self.annotation_dims[i]
for j in range(self.anchors):
centroids[j] = centroid_sums[j] / (np.sum(assignments == j) + 1e-6)
prev_assignments = assignments.copy()
old_distances = distances.copy()
a = AnchorGenerator("/home/myDir/Desktop/BBoxLabelTool/Images/5_images_clean/",
"/home/myDir/Desktop/BBoxLabelTool/Labels/005/",
anchors = 9)
centroids = a.run_kmeans()
a.print_anchors(centroids)
Result with original anchors: https://imgur.com/a/5OhiTfl
Result with created my anchors: https://imgur.com/a/YCW2aam
In the case of created by hand anchor, it's close to be right, but I don't know how to do it more precisely. This is the anchors I used that generate this result.
anchors = 3,1, 7,4 5,5, 12,12, 15,15, 16,16, 18,17
Related
I developed a TensorFlow model with one class (it had a loss of 0.03 and was trained on 680 labelled images.) I am trying to use this model to detect the object on every video frame. However, whenever I run my code, it detects something in the top left of the screen in the black border surrounding the video. I tried changing the model from one trained with mobile net to one with efficientdet D3, and the same issue persisted. I then tried changing my code to require a minisize and a higher score. I have also tried letting it make multiple detections and use the one with the highest score. However, with all of these conditions, it didn't detect anything that fulfilled the requirements. my code is as follows:
import os
import time
import tensorflow as tf
import cv2
import scipy
import math
import pandas as pd
import numpy as np
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from base64 import b64encode
#os.chdir("C:\\Users\\Ibrahim\\desktop")
PATH_TO_SAVED_MODEL = "C:/Users/Ibrahim/Desktop/fine_tuned_model/content/fine_tuned_model/saved_model"
# Load label map and obtain class names and ids
#label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
category_index=label_map_util.create_category_index_from_labelmap("C:\\Users\\Ibrahim\\Desktop\\customTF2-20221225T123609Z-001\\customTF2\\data\\label_map.pbtxt",use_display_name=True)
file = "CL_1_S0003.mp4"
video = cv2.VideoCapture(file)
ret,frame=video.read()
#getting the walls bbox
wall_bbox = cv2.selectROI(frame)
(x_wall,y_wall,x2_wall,y2_wall) = wall_bbox
print(wall_bbox)
num_cont_frames=0
#video = cv.VideoCapture(path)
ball_size = 0.22 #diameter of a regulation ball in meters
fps_cam = 10000 # Change this to the required fps of the video
fps_vid =video.get(cv2.CAP_PROP_FPS)
fps_time= fps_vid / fps_cam
#print(fps_time)
model = tf.saved_model.load(PATH_TO_SAVED_MODEL)
signature = list(model.signatures.values())[0]
# Initialize variable to track state of ball (in contact with wall or not)
in_contact = False
# Initialize variable to track whether in_contact has ever been True
in_contact_ever = False
# Initialize lists for inbound and outbound velocities
inbound_velocities = []
outbound_velocities = []
# Calculate time interval between frames in seconds
time_interval = 1 / fps_cam
scale = []
x_list = []
y_list = []
x_def=[]
inbound_x = []
inbound_y = []
outbound_x = []
outbound_y = []
w1=[]
score_thresh = 0.8 # Minimum threshold for object detection
max_detections = 20
while True:
# Read frame from video
ret, frame = video.read()
if not ret:
break
# Add a batch dimension to the frame tensor
frame_tensor = tf.expand_dims(frame, axis=0)
# Get detections for image
detections = signature(frame_tensor) # Replace this with a call to your TensorFlow model's predict method
scores = detections['detection_scores'][0, :max_detections].numpy()
bboxes = detections['detection_boxes'][0, :max_detections].numpy()
labels = detections['detection_classes'][0, :max_detections].numpy().astype(np.int64)
labels = [category_index[n]['name'] for n in labels]
# Initialize variables to keep track of the maximum score and corresponding bounding box
max_score = 0
selected_bbox = None
# Loop through all bounding boxes
for bbox, score in zip(bboxes, scores):
# Check if the score is greater than the current maximum score
if score > max_score:
# Update the maximum score and corresponding bounding box
max_score = score
selected_bbox = bbox
# Check if a bounding box was selected
if selected_bbox is not None:
# Extract bounding box coordinates
(x, y, w, h) = selected_bbox
# Filter out bounding boxes that are too small (smaller than a minimum size)
if w >= 10 and h >= 10:
# Draw bounding box on frame
cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), (0,255,0), 20, 1)
cv2.imshow('Frame', frame)
cv2.waitKey(1)
x2=x+w
y2=y+h
# Calculate center point of bounding box
x_center = (x + x2) / 2
y_center = (y + y2) / 2
# Append x and y center points to lists
x_list.append(x_center)
y_list.append(y_center)
w1.append(w)
# Calculate other variables and metrics using bbox
scale.append(ball_size/h) #meters per pixel.diameter in pixels or coordinate value / real diameter in m to give pixel per m for a scale factor
#x_list.append(x2) #list of x positions of right edge
#y_list.append(y2)
if (x_center - w) < max(x2_wall, x_wall): #sometimes the bbox is the wrong way around
# Set in_contact to True
in_contact = True
# Set in_contact_ever to True
in_contact_ever = True
# Increment counter
num_cont_frames = num_cont_frames + 1
x_defe = x2-x2_wall
x_def.append(x_defe)
else:
in_contact = False
if in_contact == False and in_contact_ever==False:
inbound_x.append(x_center) #list of x positions at center of ball
inbound_y.append(y_center) #list of y positions at center of ball
if in_contact == False and in_contact_ever==True:
outbound_x.append(x_center) #list of x positions of right edge
outbound_y.append(x_center)
print(outbound_x)
else:
cv2.putText(frame,'Error',(100,0),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)
cv2.imshow('Tracking',frame)
if cv2.waitKey(1) & 0XFF==27:
break
cv2.destroyAllWindows()
scale_ave=scipy.stats.trim_mean(scale, 0.2) #trim_mean 20% either way to remove some extrainious results
x_diff=[]
y_diff=[]
x_len=len(x_list)-1 #minus 1 as python starts with 0 so we dont overflow
for i in range(x_len):
x_diff.append(x_list[i]-x_list[i+1]) #find x distance per frame
for i in range(x_len):
y_diff.append(y_list[i]-y_list[i+1]) #find y distance per frame
pyth_dist=[]
pyth_sub=[]
x2_len=len(x_diff)-1
x_speed=[]
y_speed=[]
for i in range(x2_len):
x_speeds=x_diff[i]*scale_ave*fps_cam
x_speed.append(x_speeds)
y_speeds=y_diff[i]*scale_ave*fps_cam
y_speed.append(y_speeds)
pyth_sub=math.hypot(x_diff[i] , y_diff[i])
pyth_dist.append(pyth_sub) #do pythagoras to find pixel distance per frame
realdist=[]
speed=[]
for i in range(x2_len):
realdistcalc=(pyth_dist[i]*scale_ave)
realdist.append(realdistcalc) # change from pixels to meters
for item in realdist:
if item > 1:
realdist.remove(item)
distlen=len(realdist)-1
for i in range(distlen):
speedcalc=realdist[i]*fps_cam
speed.append(speedcalc)
contact_time=num_cont_frames/fps_cam
print(contact_time)
if x_def:
realxdef = min(x_def)*scale_ave
else:
realxdef = 0
print(realxdef)
# Calculate inbound velocities
inbound_x_diff = []
inbound_y_diff = []
# Calculate inbound x- and y-velocities
inbound_x_velocities = []
inbound_y_velocities = []
inbound_len = len(inbound_x) - 1
# Calculate differences between consecutive x and y coordinates
for i in range(inbound_len):
inbound_x_diff.append(inbound_x[i] - inbound_x[i + 1])
inbound_y_diff.append(inbound_y[i] - inbound_y[i + 1])
# Calculate inbound velocities in meters per second
inbound_velocities = []
for i in range(inbound_len):
inbound_x_velocity = inbound_x_diff[i] * scale_ave * fps_cam
inbound_x_velocities.append(inbound_x_velocity)
inbound_y_velocity = inbound_y_diff[i] * scale_ave * fps_cam
inbound_y_velocities.append(inbound_y_velocity)
inbound_velocity = math.hypot(inbound_x_diff[i], inbound_y_diff[i]) * scale_ave * fps_cam
inbound_velocities.append(inbound_velocity)
# Calculate outbound velocities
outbound_x_diff = []
outbound_y_diff = []
outbound_len = len(outbound_x) - 1
# Calculate differences between consecutive x and y coordinates
for i in range(outbound_len):
outbound_x_diff.append(outbound_x[i] - outbound_x[i + 1])
outbound_y_diff.append(outbound_y[i] - outbound_y[i + 1])
# Calculate outbound velocities in meters per second
outbound_velocities = []
outbound_x_velocities = []
outbound_y_velocities = []
for i in range(outbound_len):
outbound_x_velocity = outbound_x_diff[i] * scale_ave * fps_cam
outbound_x_velocities.append(outbound_x_velocity)
outbound_y_velocity = outbound_y_diff[i] * scale_ave * fps_cam
outbound_y_velocities.append(outbound_y_velocity)
outbound_velocity = math.hypot(outbound_x_diff[i], outbound_y_diff[i]) * scale_ave * fps_cam
outbound_velocities.append(outbound_velocity)e
I expected a bounding box around the ball. I tried changing the model, increasing the number of maximum detections, adding a minimum size to the detections, and increasing the required score for detection.
I'm trying to implement the phase shift property of Fourier Transform with pytorch.
What I mean by the shift property is this:
I think that I've got most of the things correctly but somehow get a noisy image.
I'm having a hard time solving this issue. Would it be a numerical issue? Or maybe something due to odd or even pixel numbers? (My images are 1020 x 678 x 3)
These are the shifted image and the original image.
This is my implementation code:
def phase_shifters(y_alpha=0, x_alpha=0, shape=None):
# HxWxC
line = torch.zeros(shape)
# x shift
line_x = torch.linspace(-shape[1]/2,shape[1]/2,shape[1])
line_x = line_x.expand(shape[0], shape[2], shape[1]).transpose(1, 2)
line_x = line_x/shape[1]
line_x = x_alpha * line_x
# y shift
line_y = torch.linspace(-shape[0]/2,shape[0]/2,shape[0])
line_y = line_y.expand(shape[2], shape[1], shape[0]).transpose(0, 2)
line_y = line_y/shape[0]
line_y = y_alpha * line_y
return x_alpha*line_x + y_alpha*line_y
img = cv2.imread("test.png")
img_fft = torch.fft.fft2(img, dim=(0,1))
mag = torch.abs(img_fft)
phase = torch.angle(img_fft)
# alpha means pixel shift amount in spatial domain!
p_shift = phase_shifters(y_alpha=0,x_alpha=50, shape=phase.shape)
phase = (phase+p_shift) % (2*pi) # for wrapping
recon = torch.polar(mag,phase)
recon = torch.fft.ifft2(recon, dim=(0,1)).real
recon = torch.clamp(recon,0,255)
cv2.imshow("recon",np.array(recon, dtype=np.uint8))
cv2.waitKey(0)
I'm attempting to extend the 'tail' of an arrow. So far I've been able to draw a line through the center of the arrow, but this line extends 'both' ways, rather than in just one direction. The script below shows my progress. Ideally I would be able to extend the tail of the arrow regardless of the orientation of the arrow image. Any suggestions on how to accomplish this. Image examples below, L:R start, progress, goal.
# import image and grayscale
image = cv2.imread("image path")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("original",image)
# inverts black and white
gray = 255 - image
cv2.imshow("Inverted", gray)
# Extend the borders for the line
extended = cv2.copyMakeBorder(gray, 20, 20, 10, 10, cv2.BORDER_CONSTANT)
cv2.imshow("extended borders", extended)
# contour finding
contours, hierarchy = cv2.findContours(extended, 1, 2)
cont = contours[0]
rows,cols = extended.shape[:2]
[vx,vy,x,y] = cv2.fitLine(cont, cv2.DIST_L2,0,0.01,0.01)
leftish = int((-x*vy/vx) + y)
rightish = int(((cols-x)*vy/vx)+y)
line = cv2.line(extended,(cols-1,rightish),(0,leftish),(255,255,255), 6)
cv2.imshow("drawn line", line)
"Moments" can be strange things. They're building blocks and show up most often in statistics.
It helps to have a little background in statistics, and see the application of those calculations to image data, which can be considered a set of points. If you've ever calculated the weighted average or "centroid" of something, you'll recognize some of the sums that show up in "moments".
Higher order moments can be building blocks to higher statistical measures such as covariance and skewness.
Using covariance, you can calculate the major axis of your set of points, or your arrow in this case.
Using skewness, you can figure out which side of a distribution is heavier than the other... i.e. which side is the arrow's tip and which is its tail.
This should give you a very precise angle. The scale/radius however is best estimated using other ways. You'll notice that the radius estimated from the area of the arrow fluctuates a little. You could find the points belonging to the arrow that are furthest away from the center, and take that as a somewhat stable length.
Here's a longish program that implements the two ideas above and shows the direction of an arrow:
#!/usr/bin/env python3
import os
import sys
import numpy as np
import cv2 as cv
# utilities to convert between 2D vectors and complex numbers
# complex numbers are handy for rotating stuff
def to_complex(vec):
assert vec.shape[-1] == 2
if vec.dtype == np.float32:
return vec.view(np.complex64)
elif vec.dtype == np.float64:
return vec.view(np.complex128)
else:
assert False, vec.dtype
def from_complex(cplx):
if cplx.dtype == np.complex64:
return cplx.view(np.float32)
elif cplx.dtype == np.complex128:
return cplx.view(np.float64)
else:
assert False, cplx.dtype
# utilities for drawing with fractional bits of position
# just to make a pretty picture
def iround(val):
return int(round(val))
def ipt(vec, shift=0):
if isinstance(vec, (int, float)):
return iround(vec * 2**shift)
elif isinstance(vec, (tuple, list, np.ndarray)):
return tuple(iround(el * 2**shift) for el in vec)
else:
assert False, type(vec)
# utilities for affine transformation
# just to make a pretty picture
def rotate(degrees=0):
# we want positive rotation
# meaning move +x towards +y
# getRotationMatrix2D does it differently
result = np.eye(3).astype(np.float32)
result[0:2, 0:3] = cv.getRotationMatrix2D(center=(0,0), angle=-degrees, scale=1.0)
return result
def translate(dx=0, dy=0):
result = np.eye(3).astype(np.float32)
result[0:2,2] = [dx, dy]
return result
# main logic
def calculate_direction(im):
# using "nonzero" (default behavior) is a little noisy
mask = (im >= 128)
m = cv.moments(mask.astype(np.uint8), binaryImage=True)
# easier access... see below for details
m00 = m['m00']
m10 = m['m10']
m01 = m['m01']
mu00 = m00
mu20 = m['mu20']
mu11 = m['mu11']
mu02 = m['mu02']
nu30 = m['nu30']
nu03 = m['nu03']
# that's just the centroid
cx = m10 / m00
cy = m01 / m00
centroid = np.array([cx, cy]) # as a vector
# and that's the size in pixels:
size = m00
# and that's an approximate "radius", if it were a circle which it isn't
radius = (size / np.pi) ** 0.5
# (since the "size" in pixels can fluctuate due to resampling, so will the "radius")
# wikipedia helpfully mentions "image orientation" as an example:
# https://en.wikipedia.org/wiki/Image_moment#Examples_2
# we'll use that for the major axis
mup20 = mu20 / mu00
mup02 = mu02 / mu00
mup11 = mu11 / mu00
theta = 0.5 * np.arctan2(2 * mup11, mup20 - mup02)
#print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# we only have the axis, not yet the direction
# we will assess "skewness" now
# https://en.wikipedia.org/wiki/Skewness#Definition
# note how "positive" skewness appears in a distribution:
# it points away from the heavy side, towards the light side
# fortunately, cv.moments() also calculates those "standardized moments"
# https://en.wikipedia.org/wiki/Standardized_moment#Standard_normalization
skew = np.array([nu30, nu03])
#print("skew:", skew)
# we'll have to *rotate* that so it *roughly* lies along the x axis
# then assess which end is the heavy/light end
# then use that information to maybe flip the axis,
# so it points in the direction of the arrow
skew_complex = to_complex(skew) # reinterpret two reals as one complex number
rotated_skew_complex = skew_complex * np.exp(1j * -theta) # rotation
rotated_skew = from_complex(rotated_skew_complex)
#print("rotated skew:", rotated_skew)
if rotated_skew[0] > 0: # pointing towards tail
theta = (theta + np.pi) % (2*np.pi) # flip direction 180 degrees
else: # pointing towards head
pass
print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# construct a vector that points like the arrow in the picture
direction = np.exp([1j * theta])
direction = from_complex(direction)
return (radius, centroid, direction)
def draw_a_picture(im, radius, centroid, direction):
height, width = im.shape[:2]
# take the source at half brightness
canvas = cv.cvtColor(im // 2, cv.COLOR_GRAY2BGR)
shift = 4 # prettier drawing
cv.circle(canvas,
center=ipt(centroid, shift),
radius=ipt(radius, shift),
thickness=iround(radius * 0.1),
color=(0,0,255),
lineType=cv.LINE_AA,
shift=shift)
# (-direction) meaning point the *opposite* of the arrow's direction, i.e. towards tail
cv.line(canvas,
pt1=ipt(centroid + direction * radius * -3.0, shift),
pt2=ipt(centroid + direction * radius * +3.0, shift),
thickness=iround(radius * 0.05),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
cv.line(canvas,
pt1=ipt(centroid + (-direction) * radius * 3.5, shift),
pt2=ipt(centroid + (-direction) * radius * 4.5, shift),
thickness=iround(radius * 0.15),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
return canvas
if __name__ == '__main__':
imfile = sys.argv[1] if len(sys.argv) >= 2 else "p7cmR.png"
src = cv.imread(imfile, cv.IMREAD_GRAYSCALE)
src = 255 - src # invert (white arrow on black background)
height, width = src.shape[:2]
diagonal = np.hypot(height, width)
outsize = int(np.ceil(diagonal * 1.3)) # fudge factor
cv.namedWindow("arrow", cv.WINDOW_NORMAL)
cv.resizeWindow("arrow", 5*outsize, 5*outsize)
angle = 0 # degrees
increment = +1
do_spin = True
while True:
print(f"{angle:+.0f} degrees")
M = translate(dx=+outsize/2, dy=+outsize/2) # rotate(degrees=angle) # translate(dx=-width/2, dy=-height/2)
im = cv.warpAffine(src, M=M[:2], dsize=(outsize, outsize), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)
# resampling introduces blur... except when it's an even number like 0 degrees, 90 degrees, ...
# so at even rotations, things will jump a little.
# this rotation is only for demo purposes
(radius, centroid, direction) = calculate_direction(im)
canvas = draw_a_picture(im, radius, centroid, direction)
cv.imshow("arrow", canvas)
if do_spin:
angle = (angle + increment) % 360
print()
key = cv.waitKeyEx(30 if do_spin else -1)
if key == -1:
continue
elif key in (0x0D, 0x20): # ENTER (CR), SPACE
do_spin = not do_spin # toggle spinning
elif key == 27: # ESC
break # end program
elif key == 0x250000: # VK_LEFT
increment = -abs(increment)
angle += increment
elif key == 0x270000: # VK_RIGHT
increment = +abs(increment)
angle += increment
else:
print(f"key 0x{key:02x}")
cv.destroyAllWindows()
I came across this particular color-transfer tutorial using OpenCV:
https://www.pyimagesearch.com/2014/06/30/super-fast-color-transfer-images/
and implemented it like this:
def color_transfer(source, target):
# compute color statistics for the source and target images
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
# compute color stats for both images
(lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = self.image_stats(source)
(lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = self.image_stats(target)
# split the color space
(l, a, b) = cv2.split(target)
# substract the means from target image
l -= lMeanTar
a -= aMeanTar
b -= bMeanTar
# check values
print(lStdSrc, aStdSrc, bStdSrc)
print(lStdTar, aStdTar, bStdTar)
print(lMeanSrc, aStdSrc, bMeanSrc)
# process lab computation
l = (lStdSrc / lStdTar) * l
a = (aStdSrc / aStdTar) * a
b = (bStdSrc / bStdTar) * b
# add the source mean
l += lMeanSrc
a += aMeanSrc
b += bMeanSrc
# clipping the pixels between 0 and 255
l = np.clip(l, 0, 255)
a = np.clip(a, 0, 255)
b = np.clip(b, 0, 255)
# merge the channels
transfer = cv2.merge([l, a, b])
# converting back to BGR
transfer = cv2.cvtColor(transfer.astype("uint8"), cv2.COLOR_LAB2BGR)
return transfer
In this particular code:
# process lab computation
l = (lStdSrc / lStdTar) * l
a = (aStdSrc / aStdTar) * a
b = (bStdSrc / bStdTar) * b
it gets the standard deviation of the source, so when we combine the source and the target image, it will become a plain image as well since the lab will all be 0.
How can I fix this? It works when the source image is not a plain image with color.
I have the following attempt to calculate IOU for several detections I get from a face detector.
import numpy as np
class BboxUtils(object):
#staticmethod
def ious(bboxes1, bboxes2):
left1, top1, right1, bottom1 = BboxUtils.bbox_to_perimiters(bboxes1)
left2, top2, right2, bottom2 = BboxUtils.bbox_to_perimiters(bboxes2)
area1 = (right1 - left1) * (top1 - bottom1)
area2 = (right2 - left2) * (top2 - bottom2)
intersection_left = np.maximum(left1, left2)
intersection_right = np.minimum(right1, right2)
intersection_top = np.maximum(top1, top2)
intersection_bottom = np.minimum(bottom1, bottom2)
intersection_area = (intersection_right - intersection_left) * (intersection_top - intersection_bottom)
intersection_area[intersection_area < 0] = 0
iou = intersection_area / (area1 + area2 - intersection_area)
return iou
#staticmethod
def bbox_to_perimiters(bboxes):
left, w, top, h = np.split(bboxes.reshape(-1), 4)
right = left + w
bottom = top + h
return left, right, top, bottom
# example usage:
detections1 = my_detector.detect(frame1) #np.array of shape (n1, 4)
detections2 = my_detector.detect(frame2) #np.array of shape (n2, 4)
ious = BboxUtils.ious(detections1, detections2)
This code assumes that:
The detections on the 2 frames (bboxes1 and bboxes2) are of the
same length
Each detection's index is the same in bboxes1 and bboxes2
I would like to calculate IOU with the same logic as the code above, but avoid for loops.
Notice bboxes1 and bboxes2 can be matrices of shape (n1, 4) and (n2, 4), where n1 is not necessarily equal n2.
How can this be done?
Finally, probably there is a library that does all that already.
Please refer me if it does exist.