I'm trying to set up a fish-eye camera for object localisation with respect to a particular frame of reference.
I tried both the OpenCV fisheye module and the rational model from calibrateCamera() to calibrate. I obtained this result.
I collected 2 different datasets, one with calibration images taken mostly close to the camera (ds1) and a second one with images taken from afar (ds2). ds12 is a dataset obtained merging the two.
nok4 indicates the fisheye model with k4 fixed to 0.
rm is the rational model from cv2.calibrateCamera()
The camera has 178° horizontal FOV and 101° vertical FOV, the distortion is corrected mostly in the center of the image with disappointing results in the outermost parts of the image.
Am I doing something wrong? What could I do to improve the results?
Edit
Here's the code I'm using for both the calibration processes:
import cv2 as cv
import os
import numpy as np
cwd = os.path.dirname(os.path.realpath(__file__))
os.chdir(cwd)
number = None
folder_name = "merged_images"
if number is not None:
folder_name += "_" + str(number)
points_path = os.path.join(folder_name, "dataset", "good_detections")
npz = np.load(os.path.join(points_path, "points.npz"))
square_size = 0.02435
imgpoints = npz["imgpoints"]
objpoints = npz["objpoints"] * square_size
file_names = npz["file_names"]
shuffle = True
if shuffle:
if "indices.npz" in os.listdir(points_path):
p = np.load(os.path.join(points_path, "indices.npz"))["indices"]
else:
print("Random indices assigned")
p = np.random.permutation(len(imgpoints))
imgpoints = imgpoints[p]
objpoints = objpoints[p]
file_names = file_names[p]
img = cv.imread(os.path.join(points_path, file_names[2].replace("detected_", "")))
flag_list = [
cv.CALIB_RATIONAL_MODEL,
# cv.CALIB_ZERO_TANGENT_DIST,
]
calibration_flags = 0
for flag in flag_list:
calibration_flags += flag
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
shape = gray.shape[::-1]
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints,
imgpoints,
shape,
None,
None,
flags = calibration_flags
)
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# fisheye model
flag_list = [
cv.fisheye.CALIB_RECOMPUTE_EXTRINSIC,
cv.fisheye.CALIB_CHECK_COND,
cv.fisheye.CALIB_FIX_SKEW,
# cv.fisheye.CALIB_FIX_K4,
# cv.fisheye.CALIB_FIX_K3,
# cv.fisheye.CALIB_FIX_K2,
# cv.fisheye.CALIB_FIX_K1,
]
calibration_flags = 0
for flag in flag_list:
calibration_flags += flag
N_OK = len(objpoints)
K = np.zeros((3, 3))
D = np.zeros((4, 1))
rvecs = [np.zeros((1, 1, 3), dtype=np.float64) for i in range(N_OK)]
tvecs = [np.zeros((1, 1, 3), dtype=np.float64) for i in range(N_OK)]
n_objpoints = [np.expand_dims(objp, 0) for objp in objpoints]
all_true_points = list(n_objpoints)
all_image_points = list(imgpoints)
all_frames = list(file_names)
rejected = []
counter = 0
while True:
try:
rms, mtx, dist, rvecs, tvecs = \
cv.fisheye.calibrate(
all_true_points,
all_image_points,
gray.shape[::-1],
K,
D,
rvecs,
tvecs,
calibration_flags,
(cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 1e-3)
)
print('Found a calibration based on {} well-conditioned images.'.format(len(all_true_points)))
break
except cv.error as err:
try:
idx = int(str(err).split('array ')[1][0]) # Parse index of invalid image from error message
all_true_points.pop(idx)
all_image_points.pop(idx)
rejected.append(all_frames.pop(idx))
print(f"{counter}. Removed ill-conditioned image {idx} from the data. Trying again...".format(idx))
counter += 1
except IndexError:
raise err
h,w = img.shape[:2]
DIM = (w, h)
dim1 = img.shape[:2][::-1] # dim1 is the dimension of input image to un-distort
dim2 = None
dim3 = None
balance = 1
assert dim1[0]/dim1[1] == DIM[0]/DIM[1], "Image to undistort needs to have same aspect ratio as the ones used in calibration"
if not dim2:
dim2 = dim1
if not dim3:
dim3 = dim1
scaled_K = K * dim1[0] / DIM[0] # The values of K is to scale with image dimension.
scaled_K[2][2] = 1.0 # Except that K[2][2] is always 1.0
# This is how scaled_K, dim2 and balance are used to determine the final K used to un-distort image. OpenCV document failed to make this clear!
new_K = cv.fisheye.estimateNewCameraMatrixForUndistortRectify(scaled_K, D, dim2, np.eye(3), balance=balance)
map1, map2 = cv.fisheye.initUndistortRectifyMap(scaled_K, D, np.eye(3), new_K, dim3, cv.CV_16SC2)
undistorted_img = cv.remap(img, map1, map2, interpolation=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT)
new_img = cv.hconcat([undistorted_img, img])
Corner extraction is performed with the following code:
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
file_names = [] # files analyzed
for file in os.listdir(detections_path):
if file.startswith("hd_frame"):
frame = cv.imread(os.path.join(detections_path, file))
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
found, corners = cv.findChessboardCorners(gray, (9,6), None)
if found:
file_name = "detected_" + file
objpoints.append(objp)
imgpoints.append(corners)
# save file_name
file_names.append(file_name)
corners2 = cv.cornerSubPix(gray, corners, (11,11), (-1,-1), criteria)
# Draw and display the corners
cv.drawChessboardCorners(frame, (9,6), corners2, found)
# save image and points
cv.imwrite(os.path.join(detections_path, file_name), frame)
You can use a single image for calibration as described in details here: https://discorpy.readthedocs.io/en/latest/usage/demo_06.html . The correction model used in this package may give you better results than the model used by opencv: https://discorpy.readthedocs.io/en/latest/tutorials/methods.html
Related
I need to determine the location of yogurts in the supermarket. Source photo looks like
With template:
I using SIFT to extract key points of template:
img1 = cv.imread('train.jpg')
sift = cv.SIFT_create()# queryImage
kp1, des1 = sift.detectAndCompute(img1, None)
path = glob.glob("template.jpg")
cv_img = []
l=0
for img in path:
img2 = cv.imread(img) # trainImage
# Initiate SIFT detector
# find the keypoints and descriptors with SIFT
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
# ratio test as per Lowe's paper
if (l < len(matches)):
l = len(matches)
image = img2
match = matches
h_query, w_query, _= img2.shape
matchesMask = [[0,0] for i in range(len(match))]
good_matches = []
good_matches_indices = {}
for i,(m,n) in enumerate(match):
if m.distance < 0.7*n.distance:
matchesMask[i]=[1,0]
good_matches.append(m)
good_matches_indices[len(good_matches) - 1] = i
bboxes = []
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,2)
model, inliers = initialize_ransac(src_pts, dst_pts)
n_inliers = np.sum(inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(len(matched_indices))
model, inliers = ransac(
(src_pts, dst_pts),
AffineTransform, min_samples=4,
residual_threshold=4, max_trials=20000
)
n_inliers = np.sum(inliers)
print(n_inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(matched_indices)
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
h_query, w_query,_ = img2.shape
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
# bboxes_list.append((i, coords))
M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 2)
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = cv.DrawMatchesFlags_DEFAULT)
img3 = cv.drawMatchesKnn(img1,kp1,image,kp2,match,None,**draw_params)
plt.imshow(img3),plt.show()
Result of SIFT looks like
The question is what is the best way to clasterise points to obtain rectangles, representing each yogurt? I tried RANSAC, but this method doesn't work in this case.
I am proposing an approach based on what is discussed in this paper. I have modified the approach a bit because the use-case is not entirely same but they do use SIFT features matching to locate multiple objects in video frames. They have used PCA for reducing time but that may not be required for still images.
Sorry I could not write a code for this as it will take a lot of time but I believe this should work to locate all the occurrences of the template object.
The modified approach is like this:
Divide the template image into regions: left, middle, right along the horizontal
and top, bottom along the vertical
Now when you match features between the template and source image, you will get features matched from some of the keypoints from these regions on multiple locations on the source image. You can use these keypoints to identify which region of the template is present at what location(s) in the source image. If there are overlapping regions i.e. keypoints from different regions matched with close keypoints in source image then that would mean a wrong match.
Mark each set of matching keypoints within a neighborhood on source image as left, center, right, top, bottom depending upon if they have majority matches from keypoints of a particular region in the template image.
Starting from each left region on source image move towards right and if we find a central region followed by a right region then this area of source image between regions marked as left and right, can be marked as location of one template object.
There could be overlapping objects which could result in a left region followed by another left region when moving in right direction from the left region. The area between the two left regions can be marked as one template object.
For further refined locations, each area of source image marked as one template object can be cropped and re-matched with the template image.
Try working spatially: for each key-point in img2 get some bounding box around and consider only the points in there for your ransac homography to check for best fit.
You can also work with overlapping windows and later discard similar resulting homographys
Here is you can do
Base Image = Whole picture of shelf
Template Image = Single product image
Get SIFT matches from both images. (base and template image)
Do feature matching.
Get all the points in base image which are matching. (refer to figure)
Create Cluster based on size of template image. (here threshold in 50px)
Get Bounding box of clusters.
Crop each bounding box cluter and check matches with template image.
Accept all cluters which has atleast minimum percentage of matched. (here taken minimum 10% of keypoints)
def plot_pts(img, pts):
img_plot = img.copy()
for i in range(len(pts)):
img_plot = cv2.circle(img_plot, (int(pts[i][0]), int(pts[i][1])), radius=7, color=(255, 0, 0), thickness=-1)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def plot_bbox(img, bbox_list):
img_plot = img.copy()
for i in range(len(bbox_list)):
start_pt = bbox_list[i][0]
end_pt = bbox_list[i][2]
img_plot = cv2.rectangle(img_plot, pt1=start_pt, pt2=end_pt, color=(255, 0, 0), thickness=2)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def get_distance(pt1, pt2):
x1, y1 = pt1
x2, y2 = pt2
return np.sqrt(np.square(x1 - x2) + np.square(y1 - y2))
def check_centroid(pt, centroid):
x, y = pt
cx, cy = centroid
distance = get_distance(pt1=(x, y), pt2=(cx, cy))
if distance < max_distance:
return True
else:
return False
def update_centroid(pt, centroids_list):
new_centroids_list = centroids_list.copy()
flag_new_centroid = True
for j, c in enumerate(centroids_list):
temp_centroid = np.mean(c, axis=0)
if_close = check_centroid(pt, temp_centroid)
if if_close:
new_centroids_list[j].append(pt)
flag_new_centroid = False
break
if flag_new_centroid:
new_centroids_list.append([pt])
new_centroids_list = recheck_centroid(new_centroids_list)
return new_centroids_list
def recheck_centroid(centroids_list):
new_centroids_list = [list(set(c)) for c in centroids_list]
return new_centroids_list
def get_bbox(pts):
minn_x, minn_y = np.min(pts, axis=0)
maxx_x, maxx_y = np.max(pts, axis=0)
return [[minn_x, minn_y], [maxx_x, minn_y], [maxx_x, maxx_y], [minn_x, maxx_y]]
class RotateAndTransform:
def __init__(self, path_img_ref):
self.path_img_ref = path_img_ref
self.ref_img = self._read_ref_image()
#sift
self.sift = cv2.SIFT_create()
#feature matching
self.bf = cv2.BFMatcher()
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
self.flann = cv2.FlannBasedMatcher(index_params,search_params)
def _read_ref_image(self):
ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
return ref_img
def read_src_image(self, path_img_src):
self.path_img_src = path_img_src
# read images
# ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
src_img = cv2.imread(path_img_src, cv2.IMREAD_COLOR)
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
return src_img
def convert_bw(self, img):
img_bw = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return img_bw
def get_keypoints_descriptors(self, img_bw):
keypoints, descriptors = self.sift.detectAndCompute(img_bw,None)
return keypoints, descriptors
def get_matches(self, src_descriptors, ref_descriptors, threshold=0.6):
matches = self.bf.knnMatch(ref_descriptors, src_descriptors, k=2)
flann_matches = self.flann.knnMatch(ref_descriptors, src_descriptors,k=2)
good_matches = []
good_flann_matches = []
# Apply ratio test for Brute Force
for m,n in matches:
if m.distance <threshold*n.distance:
good_matches.append([m])
print(f'Numner of BF Match: {len(matches)}, Number of good BF Match: {len(good_matches)}')
# Apply ratio test for FLANN
for m,n in flann_matches:
if m.distance < threshold*n.distance:
good_flann_matches.append([m])
# matches = sorted(matches, key = lambda x:x.distance)
print(f'Numner of FLANN Match: {len(flann_matches)}, Number of good Flann Match: {len(good_flann_matches)}')
return good_matches, good_flann_matches
def get_src_dst_pts(self, good_flann_matches, ref_keypoints, src_keypoints):
pts_src = []
pts_ref = []
n = len(good_flann_matches)
for i in range(n):
ref_index = good_flann_matches[i][0].queryIdx
src_index = good_flann_matches[i][0].trainIdx
pts_src.append(src_keypoints[src_index].pt)
pts_ref.append(ref_keypoints[ref_index].pt)
return np.array(pts_src), np.array(pts_ref)
def extend_bbox(bbox, increment=0.1):
bbox_new = bbox.copy()
bbox_new[0] = [bbox_new[0][0] - int(bbox_new[0][0] * increment), bbox_new[0][1] - int(bbox_new[0][1] * increment)]
bbox_new[1] = [bbox_new[1][0] + int(bbox_new[1][0] * increment), bbox_new[1][1] - int(bbox_new[1][1] * increment)]
bbox_new[2] = [bbox_new[2][0] + int(bbox_new[2][0] * increment), bbox_new[2][1] + int(bbox_new[2][1] * increment)]
bbox_new[3] = [bbox_new[3][0] - int(bbox_new[3][0] * increment), bbox_new[3][1] + int(bbox_new[3][1] * increment)]
return bbox_new
def crop_bbox(img, bbox):
y, x = bbox[0]
h, w = bbox[1][0] - bbox[0][0], bbox[2][1] - bbox[0][1]
return img[x: x + w, y: y + h, :]
base_img = cv2.imread(path_img_base)
ref_img = cv2.imread(path_img_ref)
rnt = RotateAndTransform(path_img_ref)
ref_img_bw = rnt.convert_bw(img=rnt.ref_img)
ref_keypoints, ref_descriptors = rnt.get_keypoints_descriptors(ref_img_bw)
base_img = rnt.read_src_image(path_img_src = path_img_base)
base_img_bw = rnt.convert_bw(img=base_img)
base_keypoints, base_descriptors = rnt.get_keypoints_descriptors(base_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=ref_descriptors, threshold=0.6)
ref_points = []
for gm in good_flann_matches:
x, y = ref_keypoints[gm[0].queryIdx].pt
x, y = int(x), int(y)
ref_points.append((x, y))
max_distance = 50
centroids = [[ref_points[0]]]
for i in tqdm(range(len(ref_points))):
pt = ref_points[i]
centroids = update_centroid(pt, centroids)
bbox = [get_bbox(c) for c in centroi[![enter image description here][1]][1]ds]
centroids = [np.mean(c, axis=0) for c in centroids]
print(f'Number of Points: {len(good_flann_matches)}, centroids: {len(centroids)}')
data = []
for i in range(len(bbox)):
temp_crop_img = crop_bbox(ref_img, extend_bbox(bbox[i], 0.01))
temp_crop_img_bw = rnt.convert_bw(img=temp_crop_img)
temp_crop_keypoints, temp_crop_descriptors = rnt.get_keypoints_descriptors(temp_crop_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=temp_crop_descriptors, threshold=0.6)
temp_data = {'image': temp_crop_img,
'num_matched': len(good_flann_matches),
'total_keypoints' : len(base_keypoints),
}
data.append(temp_data)
filter_data = [{'num_matched' : i['num_matched'], 'image': i['image']} for i in data if i['num_matched'] > 25]
for i in range(len(filter_data)):
temp_num_match = filter_data[i]['num_matched']
plt.figure()
plt.title(f'num matched: {temp_num_match}')
plt.imshow(filter_data[i]['image'])
First you could detect any item that is on the shelf with a network like this, it's pre-trained in this exact context and works pretty well. You should also rectify the image before feeding it to the network. You will obtain bounding boxes for every product (maybe some false positive/negative, but that's another issue). Then you can match each box with the template using SIFT and calculating a score (it's up to you define which score works), but I suggest to use another approach like a siamese network if you a consistent dataset.
I want to stitch two panoramic images using homography matrix in OpenCv. I found 3x3 homography matrix,
but I can't stitch two images. I must stitch two images by hand(no build-in function).
Here is my code:
import cv2
import numpy as np
MIN_MATCH_COUNT = 10
img1 = cv2.imread("pano1/cyl_image00.png")
img2 = cv2.imread("pano1/cyl_image01.png")
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
des1 = np.float32(des1)
des2 = np.float32(des2)
matches = flann.knnMatch(des1, des2, k=2)
goodMatches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
goodMatches.append(m)
src_pts = 0
dst_pts = 0
if len(goodMatches) > MIN_MATCH_COUNT:
dst_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches]).reshape(-1, 2)
src_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches]).reshape(-1, 2)
def generateRandom(src_Pts, dest_Pts, N):
r = np.random.choice(len(src_Pts), N)
src = [src_Pts[i] for i in r]
dest = [dest_Pts[i] for i in r]
return np.asarray(src, dtype=np.float32), np.asarray(dest, dtype=np.float32)
def findH(src, dest, N):
A = []
for i in range(N):
x, y = src[i][0], src[i][1]
xp, yp = dest[i][0], dest[i][1]
A.append([x, y, 1, 0, 0, 0, -x * xp, -xp * y, -xp])
A.append([0, 0, 0, x, y, 1, -yp * x, -yp * y, -yp])
A = np.asarray(A)
U, S, Vh = np.linalg.svd(A)
L = Vh[-1, :] / Vh[-1, -1]
H = L.reshape(3, 3)
return H
def ransacHomography(src_Pts, dst_Pts):
maxI = 0
maxLSrc = []
maxLDest = []
for i in range(70):
srcP, destP = generateRandom(src_Pts, dst_Pts, 4)
H = findH(srcP, destP, 4)
inlines = 0
linesSrc = []
lineDest = []
for p1, p2 in zip(src_Pts, dst_Pts):
p1U = (np.append(p1, 1)).reshape(3, 1)
p2e = H.dot(p1U)
p2e = (p2e / p2e[2])[:2].reshape(1, 2)[0]
if cv2.norm(p2 - p2e) < 10:
inlines += 1
linesSrc.append(p1)
lineDest.append(p2)
if inlines > maxI:
maxI = inlines
maxLSrc = linesSrc.copy()
maxLSrc = np.asarray(maxLSrc, dtype=np.float32)
maxLDest = lineDest.copy()
maxLDest = np.asarray(maxLDest, dtype=np.float32)
Hf = findH(maxLSrc, maxLDest, maxI)
return Hf
H = ransacHomography(src_pts, dst_pts)
So far, so good. I found homography matrix(H).
Next, I tried to stitch two panoramic images.
First, I create a big array to stitch images(img3).
I copied img1 to the first half of img3.
I tried to find new coordinates for img2 through homography matrix and I copied new img2 coordinates to img3.
Here is my code:
height1, width1, rgb1 = img1.shape
height2, width2, rgb2 = img2.shape
img3 = np.empty((height1, width1+width2, 3))
img3[:, 0:width1] = img1/255.0
for i in range(len(img2)):
for j in range(len(img2[0])):
pp = H.dot(np.array([[i], [j], [1]]))
pp = (pp / pp[2]).reshape(1, 3)[0]
img3[int(round(pp[0])), int(round(pp[1]))] = img2[i, j]/255.0
But this part is not working.
How can I solve this problem?
Once you have the Homography matrix you need to transform one of the images to have the same perspective as the other. This is done using the warpPerspective function in OpenCV. Once you've done the transformation, it's time to concatenate the images.
Let's say you want to transform img_1 into the perspective of img_2 and that you already have the Homography matrix H
dst = cv2.warpPerspective(img_1, H, ((img_1.shape[1] + img_2.shape[1]), img_2.shape[0])) #wraped image
# now paste them together
dst[0:img_2.shape[0], 0:img_2.shape[1]] = img_2
dst[0:img_1.shape[0], 0:img_1.shape[1]] = img_1
Also note that OpenCV already has a build in RANSAC Homography finder
H, masked = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
So it can save you a lot of code.
Check out these tutorial for more details
https://medium.com/#navekshasood/image-stitching-to-create-a-panorama-5e030ecc8f7
https://medium.com/analytics-vidhya/image-stitching-with-opencv-and-python-1ebd9e0a6d78
Here is a code to get the optical flow output from a stabilized video (no camera movement) and save it as a set of frames
import cv2 as cv
import numpy as np
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
count = 0
while(cap.isOpened()):
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
# Opens a new window and displays the input frame
cv.imshow("input", frame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Computes the magnitude and angle of the 2D vectors
magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])
# Sets image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Sets image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)
# Converts HSV to RGB (BGR) color representation
rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)
# Opens a new window and displays the output frame
cv.imshow("dense optical flow", rgb[40:150,120:220])
cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
count +=1
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Can someone please suggest how to quantify the difference between the frames? i.e. to estimate speed/velocity ?
Here's an example to obtain pixel magnitude translation from .bsq frames. You can modify the the code to input a video file instead. You are probably most interested in the get_translation() function. Example:
Graph displaying pixel translation from frame-to-frame
Code
import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random
# Usage: python translate_analyzer.py -p <filename.bsq>
# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Calculate grayscale histogram
hist = cv2.calcHist([gray],[0],None,[256],[0,256])
hist_size = len(hist)
# Calculate cumulative distribution from the histogram
accumulator = []
accumulator.append(float(hist[0]))
for index in range(1, hist_size):
accumulator.append(accumulator[index -1] + float(hist[index]))
# Locate points to clip
maximum = accumulator[-1]
clip_hist_percent *= (maximum/100.0)
clip_hist_percent /= 2.0
# Locate left cut
minimum_gray = 0
while accumulator[minimum_gray] < clip_hist_percent:
minimum_gray += 1
# Locate right cut
maximum_gray = hist_size -1
while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
maximum_gray -= 1
# Calculate alpha and beta values
alpha = 255 / (maximum_gray - minimum_gray)
beta = -minimum_gray * alpha
auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)
# Draw flow
def draw_flow(img, flow, step=30):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 1, (36, 255, 12))
for (x1, y1), (_x2, _y2) in lines:
cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
return vis
# Return translation value
def get_translation(img, flow, step=30):
return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--path", help="Path to the directory")
args = vars(ap.parse_args())
if not args['path']:
print('Usage: python translate_analyzer.py -p <directory>')
exit(1)
# Extract file name
bsq_fname = os.path.split(args['path'])[-1]
if '.bsq' not in bsq_fname:
print('ERROR: Invalid bsq file. Select correct file.')
exit(1)
width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512
# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)
print(bsq_fname)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]
translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
data = data_file[:,:,index].copy()
data //= 64
data = automatic_brightness_and_contrast(data)[0]
data = data[y:y+h, x:x+w]
flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
translation, pixel_direction = get_translation(data, flow)
prev = data
cv2.imshow('flow', draw_flow(data, flow))
cv2.waitKey(1)
translation_data.append(translation)
frame_direction = pixel_direction
index = (index+1) % frame_count
end = time.time()
print('Time:', end - start)
plt.figure()
plt.title(bsq_fname)
plt.xlabel("Frames")
plt.ylabel("Magnitude")
plt.plot(translation_data)
plt.figure()
plt.title("Pixel Direction")
plt.xlabel("Width")
plt.ylabel("Height")
plt.imshow(frame_direction.T)
plt.colorbar(orientation='vertical')
plt.show()
I am working on a stereo camera rig with 4 discrete cameras (of the same type) but at the moment only one pair (cam1 and cam2) are necessary.
The aim is to calibrate the stereo pair and get 3D information about the scene. I am using Python 3.6 with OpenCV 3.4.3 in Visual Studio 2017.
I took 28 images of a chessboard calibration pattern and calibrated the cameras individually as well as stereoscopic with the standard OpenCV procedure.
Since the calibration data seems good and the distortion correction is working fine, the next step is the rectification of the images.
This is where things become weird. I spent the last 3 weeks working on this and read a lot, tried a lot and always got crappy results. I used cv2.stereoCalibrate (also tried with different flags, as suggested in different topics), cv2.stereoRectify (also with different alpha values), cv2.initUndistortRectifyMap and cv2.remap for the actual remapping of the images (method 1). But the results are never as wanted.
I recently managed to get rectified images which look like they are actually rectified with cv2.uncalibratedRectification. Therefore I did not use matched points (since SURF and SIFT are unfree...) but a slightly different approach. The edges of the calibration pattern in my 28 calibration images are used as input points. This works good, but the rectified images don't look perfect.
Here are my images (these are no calibration images) so you can imagine what I'm talking about:
original left and right images
undistorted images
rectified with method 1, alpha=1
rectified with method 1, alpha=0
rectified uncalibrated, best result I got by now
Can anybody give me a hint whats wrong with my usage of method 1? I've seen a lot posts to similar problems but I couldn't find the solution in the comments there. Or is this a bug in OpenCV?
Or has anyone an idea how to improve the uncalibrated rectification?
Here is a code snippet with the relevant calls:
# imports
import numpy as np
import cv2
import glob
import argparse
import sys
import os
# size calib array
numEdgeX = 10
numEdgeY = 7
# preface
exitCode = 0
# get directories
pathDir = str(os.path.dirname(os.path.realpath(__file__)))
pathDir = pathDir[:-17]
pathCalib = pathDir + "\\CalibData" + "\\chess"
try:
# define pair
p = 1
cal_path = pathCalib + "\\pair" + str(p)
images_right = glob.glob(cal_path + '\RIGHT/*.bmp')
images_left = glob.glob(cal_path + '\LEFT/*.bmp')
images_left.sort()
images_right.sort()
# termination criteria
criteria = (cv2.TermCriteria_EPS +
cv2.TermCriteria_MAX_ITER, 30, 0.001)
criteria_cal = (cv2.TermCriteria_EPS +
cv2.TermCriteria_MAX_ITER, 30, 1e-5)
# prepare object points, like (0,0,0); (1,0,0); ...; (6,5,0)
objp = np.zeros((numEdgeX*numEdgeY, 3), np.float32)
objp[:, :2] = np.mgrid[0:numEdgeX, 0:numEdgeY].T.reshape(-1, 2)
objpoints = [] # 3d points in real world space
imgpoints_l = [] # 2d points in image plane for calibration
imgpoints_r = [] # 2d points in image plane for calibration
for i, fname in enumerate(images_right):
print(str(i+1) + " out of " + str(len(images_right)))
img_l = cv2.imread(images_left[i])
img_r = cv2.imread(images_right[i])
# convert to cv2
img_l = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY)
img_r = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY)
# find the chess board corners
ret_l, corners_l = cv2.findChessboardCorners(img_l, (numEdgeX, numEdgeY), None)
ret_r, corners_r = cv2.findChessboardCorners(img_r, (numEdgeX, numEdgeY), None)
objpoints.append(objp)
if ret_l is True:
print("image " + str(i+1) + "left - io")
rt = cv2.cornerSubPix(img_l, corners_l, (11, 11),
(-1, -1), criteria)
imgpoints_l.append(corners_l)
if ret_r is True:
print("image " + str(i+1) + "right - io")
rt = cv2.cornerSubPix(img_r, corners_r, (11, 11),
(-1, -1), criteria)
imgpoints_r.append(corners_r)
# get shape
img_shape = img_l.shape[::-1]
### CALIBRATION ###
# calibrate left camera
rt, M1, d1, r1, t1 = cv2.calibrateCamera(
objpoints, imgpoints_l, img_shape, None, None)
# calibrate right camera
rt, M2, d2, r2, t2 = cv2.calibrateCamera(
objpoints, imgpoints_r, img_shape, None, None)
# stereo calibration
flags = (cv2.CALIB_FIX_K5 + cv2.CALIB_FIX_K6)
stereocalib_criteria = (cv2.TERM_CRITERIA_MAX_ITER +
cv2.TERM_CRITERIA_EPS, 100, 1e-5)
#flags = 0
#flags = cv2.CALIB_USE_INTRINSIC_GUESS
#flags = cv2.CALIB_FIX_PRINCIPAL_POINT
#flags = cv2.CALIB_FIX_ASPECT_RATIO
#flags = cv2.CALIB_ZERO_TANGENT_DIST
#flags = cv2.CALIB_FIX_INTRINSIC
#flags = cv2.CALIB_FIX_FOCAL_LENGTH
#flags = cv2.CALIB_FIX_K1...6
#flags = cv2.CALIB_RATIONAL_MODEL
#flags = cv2.CALIB_THIN_PRISM_MODEL
#flags = cv2.CALIB_SAME_FOCAL_LENGTH
#flags = cv2.CALIB_FIX_S1_S2_S3_S4
flags = (cv2.CALIB_FIX_PRINCIPAL_POINT | cv2.CALIB_FIX_ASPECT_RATIO | cv2.CALIB_FIX_FOCAL_LENGTH |
cv2.CALIB_FIX_INTRINSIC | cv2.CALIB_FIX_K3 | cv2.CALIB_FIX_K4 | cv2.CALIB_FIX_K5 |
cv2.CALIB_FIX_K6)
T = np.zeros((3, 1), dtype=np.float64)
R = np.eye(3, dtype=np.float64)
ret, M1, d1, M2, d2, R, T, E, F = cv2.stereoCalibrate(
objpoints, imgpoints_l,
imgpoints_r, M1, d1, M2,
d2, img_shape,
criteria = stereocalib_criteria,
flags=flags)
# get new optimal camera matrix
newCamMtx1, roi1 = cv2.getOptimalNewCameraMatrix(M1, d1, img_shape, 0, img_shape)
newCamMtx2, roi2 = cv2.getOptimalNewCameraMatrix(M2, d2, img_shape, 0, img_shape)
# rectification and undistortion maps which can be used directly to correct the stereo pair
(rectification_l, rectification_r, projection_l,
projection_r, disparityToDepthMap, ROI_l, ROI_r) = cv2.stereoRectify(
M1, d1, M2, d2, img_shape, R, T,
None, None, None, None, None,
#cv2.CALIB_ZERO_DISPARITY, # principal points of each camera have the same pixel coordinates in rect views
alpha=0) # alpha=1 no pixels lost, alpha=0 pixels lost
leftMapX, leftMapY = cv2.initUndistortRectifyMap(
M1, d1, rectification_l, projection_l,
img_shape, cv2.CV_32FC1)
rightMapX, rightMapY = cv2.initUndistortRectifyMap(
M2, d2, rectification_r, projection_r,
img_shape, cv2.CV_32FC1)
### UNCALIBRATED RECTIFICATION ###
imgpoints_l_undis = []
imgpoints_r_undis = []
for i, fname in enumerate(images_right):
img_l = cv2.imread(images_left[i])
img_r = cv2.imread(images_right[i])
# convert to cv2
img_l = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY)
img_r = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY)
# undistort
img_l_undis = cv2.undistort(img_l, M1, d1, None, newCamMtx1)
img_r_undis = cv2.undistort(img_r, M2, d2, None, newCamMtx2)
# find the chess board corners in undistorted image
ret_l_undis, corners_l_undis = cv2.findChessboardCorners(img_l_undis, (numEdgeX, numEdgeY), None)
ret_r_undis, corners_r_undis = cv2.findChessboardCorners(img_r_undis, (numEdgeX, numEdgeY), None)
if ret_l_undis is True:
rt = cv2.cornerSubPix(img_l_undis, corners_l_undis, (11, 11), (-1, -1), criteria)
for j in range(0, len(rt)):
x = rt[j][0,:]
imgpoints_l_undis.append(x)
if ret_r_undis is True:
rt = cv2.cornerSubPix(img_r_undis, corners_r_undis, (11, 11), (-1, -1), criteria)
for j in range(0, len(rt)):
x = rt[j][0,:]
imgpoints_r_undis.append(x)
# convert to np array
imgpoints_l_undis = np.array(imgpoints_l_undis)
imgpoints_r_undis = np.array(imgpoints_r_undis)
# compute rectification uncalibrated
ret, uncRectMtx1, uncRectMtx2 = cv2.stereoRectifyUncalibrated(imgpoints_l_undis, imgpoints_r_undis, F, img_shape)
### REMAPPING ###
# load images and convert to cv2 format
img_l = cv2.imread(images_left[0])
img_l = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY)
img_l_undis = cv2.undistort(img_l, M1, d1, None, newCamMtx1)
img_r = cv2.imread(images_right[0])
img_r = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY)
img_r_undis = cv2.undistort(img_r, M2, d2, None, newCamMtx2)
# remap
imglCalRect = cv2.remap(img_l, leftMapX, leftMapY, cv2.INTER_LINEAR)
imgrCalRect = cv2.remap(img_r, rightMapX, rightMapY, cv2.INTER_LINEAR)
numpyHorizontalCalibRect = np.hstack((imglCalRect, imgrCalRect))
# warp for uncalibrated rectification
imglUncalRect = cv2.warpPerspective(img_l_undis, uncRectMtx1, img_shape)
imgrUncalRect = cv2.warpPerspective(img_r_undis, uncRectMtx2, img_shape)
numpyHorizontalUncalibRect = np.hstack((imglUncalRect, imgrUncalRect))
### SHOW RESULTS ###
# calculate point arrays for epipolar lines
lineThickness = 5
lineColor = [0, 255, 0]
numLines = 20
interv = round(img_shape[0] / numLines)
x1 = np.zeros((numLines, 1))
y1 = np.zeros((numLines, 1))
x2 = np.full((numLines, 1), (3*img_shape[1]))
y2 = np.zeros((numLines, 1))
for jj in range(0, numLines):
y1[jj] = jj * interv
y2 = y1
for jj in range(0, numLines):
cv2.line(numpyHorizontalCalibRect, (x1[jj], y1[jj]), (x2[jj], y2[jj]),
lineColor, lineThickness)
cv2.line(numpyHorizontalUncalibRect, (x1[jj], y1[jj]), (x2[jj], y2[jj]),
lineColor, lineThickness)
cv2.namedWindow("calibRect", cv2.WINDOW_NORMAL)
cv2.namedWindow("uncalibRect", cv2.WINDOW_NORMAL)
cv2.imshow("calibRect", numpyHorizontalCalibRect)
cv2.imshow("uncalibRect", numpyHorizontalUncalibRect)
cv2.waitKey()
except (IOError, ValueError):
print("An I/O error or a ValueError occurred")
except:
print("An unexpected error occurred")
raise
Thanks!
Done!
The issue was that OpenCV interpreted my images as a vertical stereo system, I just looked at it as it was horizontal.
I'm trying to implement image rectification. I was using a software which is not available anymore. To rectify the image, the software used the height of the camera (h), the distance of two points (d1, d2) from the camera and the correspond lines in the image to the reference points (Line1, Line2).
So the variables are:
h (camera elevation);
Line1, Line2 (row pixel)
d1, d2 (Distance in meters from the camera)
Configuration:
I tried to implement few code using OpenCV (Python) but the final result is not the same of the software. I wrote a code to calibrate the camera and a second to undistort the image and then I want to apply the rectification.
The problem is that I'm using a single camera (take photos of a landscape) that is fixed with a fixed focal length and focus which I can't change anymore.
Can someone tell me a good way to execute the rectification using the same way of the software or an another valid solution?
My code for the calibration is
# Numbers of corners
n_w = 9
n_h = 6
patternSize = (n_w, n_h)
# SIZE OF THE WINDOW TO IMPROVE THE COORDINATES OF CORNERS
windowSize = (11, 11)
# TERMINATION CRITERIA
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((n_h * n_w, 3), dtype=np.float32)
objp[:, :2] = np.mgrid[0:n_w, 0:n_h].T.reshape(-1, 2)
# LIST OF POINT
objpoints = []
imgpoints = []
# GET ALL IMAGES
images = glob.glob('*.jpg')
for fname in images:
img = cv2.imread(fname)
# IMAGE ON GRAY SACLE
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# fIND CORNERS
retval, corners = cv2.findChessboardCorners(gray_img, patternSize, None)
if retval == True:
print 'Looping through image %s' % fname
objpoints.append(objp)
cv2.cornerSubPix(gray_img, corners, windowSize, (-1, -1), criteria)
imgpoints.append(corners)
cv2.drawChessboardCorners(img, patternSize, corners, retval)
cv2.imshow('ChessBoard Image %s' % fname, img)
cv2.waitKey(500)
cv2.destroyAllWindows()
print "------START CALIBRATION....."
ret, cameraMatrix, distCoeffs, revcs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray_img.shape[::-1],
None, None)
print ret
print cameraMatrix
print distCoeffs
print '---SAVING CALIBRATION DATA'
np.savez('calibration_data', RMS=ret, distCoeffs=distCoeffs, cameraMatrix=cameraMatrix)
if ret <= 1.0:
print '''-----GOOD CALIBRATION'''
The code to remove the distortion is:
# LOAD CALIBRATION DATA
calibrationData = np.load('calibration_data.npz')
distCoeffs = calibrationData['distCoeffs']
cameraMatrix = calibrationData['cameraMatrix']
calibrationData.close()
# LOAD IMAGES
images = glob.glob('/*.jpg')
for i, fname in enumerate(images):
img = cv2.imread(fname)
# UNDISTORT
undistorted_img = cv2.undistort(img, cameraMatrix, distCoeffs, None)
# SAVE IMAGE
cv2.imwrite(os.path.join(dirname, 'Undistorted_%05d.jpg' % i), undistorted_img)
cv2.imshow('Undistorted Image %s' % fname, undistorted_img)
The first idea to rectify the image was to find the 4 corners inside the real world image of a trapezoid (A4 paper) and compute a transformation matrix given 4 points of a rectangle (real dimension of an A4). But I think that is an wrong approce.
To do this I wrote this code:
#load image
img_Trap = cv2.imread('image.png', cv2.IMREAD_GRAYSCALE)
#points on the image (corners of an A4 paper)
ptsTrap = np.array(((1556, 1050), (1556, 1050), (2189, 1677), (1425, 1723)), dtype=np.float32)
img_Rect = cv2.imread('image2.png', cv2.IMREAD_GRAYSCALE)
# corner of a A4 (saving the aspect ratio)
ptsRect = np.array(((1980, 1381), (2189, 1381), (2189, 1677), (1980, 1677)), dtype=np.float32)
#transformation matrix
T = cv2.getPerspectiveTransform(ptsTrap, ptsRect)
print T
# warp perspective
warp = cv2.warpPerspective(img_Trap, T, img_Rect.shape[:2])
cv2.imwrite('warpimage.png', warp)