Here is a code to get the optical flow output from a stabilized video (no camera movement) and save it as a set of frames
import cv2 as cv
import numpy as np
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
count = 0
while(cap.isOpened()):
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
# Opens a new window and displays the input frame
cv.imshow("input", frame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Computes the magnitude and angle of the 2D vectors
magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])
# Sets image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Sets image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)
# Converts HSV to RGB (BGR) color representation
rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)
# Opens a new window and displays the output frame
cv.imshow("dense optical flow", rgb[40:150,120:220])
cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
count +=1
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Can someone please suggest how to quantify the difference between the frames? i.e. to estimate speed/velocity ?
Here's an example to obtain pixel magnitude translation from .bsq frames. You can modify the the code to input a video file instead. You are probably most interested in the get_translation() function. Example:
Graph displaying pixel translation from frame-to-frame
Code
import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random
# Usage: python translate_analyzer.py -p <filename.bsq>
# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Calculate grayscale histogram
hist = cv2.calcHist([gray],[0],None,[256],[0,256])
hist_size = len(hist)
# Calculate cumulative distribution from the histogram
accumulator = []
accumulator.append(float(hist[0]))
for index in range(1, hist_size):
accumulator.append(accumulator[index -1] + float(hist[index]))
# Locate points to clip
maximum = accumulator[-1]
clip_hist_percent *= (maximum/100.0)
clip_hist_percent /= 2.0
# Locate left cut
minimum_gray = 0
while accumulator[minimum_gray] < clip_hist_percent:
minimum_gray += 1
# Locate right cut
maximum_gray = hist_size -1
while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
maximum_gray -= 1
# Calculate alpha and beta values
alpha = 255 / (maximum_gray - minimum_gray)
beta = -minimum_gray * alpha
auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)
# Draw flow
def draw_flow(img, flow, step=30):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 1, (36, 255, 12))
for (x1, y1), (_x2, _y2) in lines:
cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
return vis
# Return translation value
def get_translation(img, flow, step=30):
return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--path", help="Path to the directory")
args = vars(ap.parse_args())
if not args['path']:
print('Usage: python translate_analyzer.py -p <directory>')
exit(1)
# Extract file name
bsq_fname = os.path.split(args['path'])[-1]
if '.bsq' not in bsq_fname:
print('ERROR: Invalid bsq file. Select correct file.')
exit(1)
width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512
# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)
print(bsq_fname)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]
translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
data = data_file[:,:,index].copy()
data //= 64
data = automatic_brightness_and_contrast(data)[0]
data = data[y:y+h, x:x+w]
flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
translation, pixel_direction = get_translation(data, flow)
prev = data
cv2.imshow('flow', draw_flow(data, flow))
cv2.waitKey(1)
translation_data.append(translation)
frame_direction = pixel_direction
index = (index+1) % frame_count
end = time.time()
print('Time:', end - start)
plt.figure()
plt.title(bsq_fname)
plt.xlabel("Frames")
plt.ylabel("Magnitude")
plt.plot(translation_data)
plt.figure()
plt.title("Pixel Direction")
plt.xlabel("Width")
plt.ylabel("Height")
plt.imshow(frame_direction.T)
plt.colorbar(orientation='vertical')
plt.show()
Related
I used the following code to select nose in OpenCV and Python i searched a lot of to find a way to change the size of nose and save as a other image but i didn't find anything is there anybody to help me to do this.
import cv2
import numpy as np
import dlib
img = cv2.imread('1.jpg')
img = cv2.resize(img,(0,0),None,0.5,0.5)
imgOriginal = img.copy()
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
def createBox(img,points,scale=5):
bbox = cv2.boundingRect(points)
x,y,w,h = bbox
imgCrop = img[y:y+h,x:x+w]
imgCrop = cv2.resize(imgCrop,(0,0),None,scale,scale)
return imgCrop
imgGray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
faces = detector(imgGray)
for face in faces:
x1,y1 = face.left(),face.top()
x2,y2 = face.right(),face.bottom()
imgOriginal = cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0),1)
landmarks = predictor(imgGray,face)
myPoints=[]
for n in range(68):
x = landmarks.part(n).x
y = landmarks.part(n).y
myPoints.append([x,y])
#cv2.circle(imgOriginal,(x,y),5,(50,50,255),cv2.FILLED)
#cv2.putText(imgOriginal,str(n),(x,y-10),cv2.FONT_HERSHEY_COMPLEX_SMALL,0.8,(0,0,255),1)
myPoints = np.array(myPoints)
#nose points to select
#nose_points = myPoints[27:35]
print(myPoints)
cv2_imshow(imgOriginal)
cv2.waitKey(0)
thanks in advance
Here is one way using a spherical (bubble) warp in a local region in Python/OpenCV.
- Define region center and radius and amount of spherical distortion
- Crop the image for that center and radius
- Compute the spherical distortion x and y displacement maps and a binary mask
- Apply the distortion maps using cv2.remap
- Antialias the mask
- Merge the distorted and cropped image using the mask
- Insert that merged image into the original image
- Save the results
Input:
import numpy as np
import cv2
import math
import skimage.exposure
img = cv2.imread("portrait_of_mussorgsky2.jpg")
# set location and radius
cx = 130
cy = 109
radius = 30
# set distortion gain
gain = 1.5
# crop image
crop = img[cy-radius:cy+radius, cx-radius:cx+radius]
# get dimensions
ht, wd = crop.shape[:2]
xcent = wd / 2
ycent = ht / 2
rad = min(xcent,ycent)
# set up the x and y maps as float32
map_x = np.zeros((ht, wd), np.float32)
map_y = np.zeros((ht, wd), np.float32)
mask = np.zeros((ht, wd), np.uint8)
# create map with the spherize distortion formula --- arcsin(r)
# xcomp = arcsin(r)*x/r; ycomp = arsin(r)*y/r
for y in range(ht):
Y = (y - ycent)/ycent
for x in range(wd):
X = (x - xcent)/xcent
R = math.hypot(X,Y)
if R == 0:
map_x[y, x] = x
map_y[y, x] = y
mask[y,x] = 255
elif R >= .90: # avoid extreme blurring near R = 1
map_x[y, x] = x
map_y[y, x] = y
mask[y,x] = 0
elif gain >= 0:
map_x[y, x] = xcent*X*math.pow((2/math.pi)*(math.asin(R)/R), gain) + xcent
map_y[y, x] = ycent*Y*math.pow((2/math.pi)*(math.asin(R)/R), gain) + ycent
mask[y,x] = 255
elif gain < 0:
gain2 = -gain
map_x[y, x] = xcent*X*math.pow((math.sin(math.pi*R/2)/R), gain2) + xcent
map_y[y, x] = ycent*Y*math.pow((math.sin(math.pi*R/2)/R), gain2) + ycent
mask[y,x] = 255
# remap using map_x and map_y
bump = cv2.remap(crop, map_x, map_y, cv2.INTER_LINEAR, borderMode = cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# antialias edge of mask
# (pad so blur does not extend to edges of image, then crop later)
blur = 7
mask = cv2.copyMakeBorder(mask, blur,blur,blur,blur, borderType=cv2.BORDER_CONSTANT, value=(0))
mask = cv2.GaussianBlur(mask, (0,0), sigmaX=blur, sigmaY=blur, borderType = cv2.BORDER_DEFAULT)
h, w = mask.shape
mask = mask[blur:h-blur, blur:w-blur]
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
mask = skimage.exposure.rescale_intensity(mask, in_range=(127.5,255), out_range=(0,1))
# merge bump with crop using grayscale (not binary) mask
bumped = (bump * mask + crop * (1-mask)).clip(0,255).astype(np.uint8)
# insert bumped image into original
result = img.copy()
result[cy-radius:cy+radius, cx-radius:cx+radius] = bumped
# save results
cv2.imwrite("portrait_of_mussorgsky2_bump.jpg", result)
# display images
cv2.imshow('img', img)
cv2.imshow('crop', crop)
cv2.imshow('bump', bump)
cv2.imshow('mask', mask)
cv2.imshow('bumped', bumped)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Resulting Image:
I think you need "Bulge" effects such as implode and explode. There are no implementation of these filters in OpenCV but, you can find other tools such as Wand(a python binding for ImageMagick) that have implode/explode.
Example (wand):
from wand.image import Image
with Image(filename="test.jpg") as img:
img.implode(amount = -0.2)
img.save(filename="destination.jpg")
# img_array = numpy.asarray(img) --> you can convert wand.image.Image to numpy array for further uses
passing negative values into implode functions is equal to doing explode. So for magnifying effect use negative values.
There is one problem though: img.implode performs on the center of the image, so after you've found the face features(eye, nose, ...) you need to move your picture somehow to make the eye or nose to lie on the center of the image. After that you can simply use implode function.
I have written the following script with which I aim to detect lines in Gazebo (a simulation environment):
#!/usr/bin/env python
# rospy for the subscriber
import rospy
# ROS Image message
from sensor_msgs.msg import Image
# ROS Image message -> OpenCV2 image converter
from cv_bridge import CvBridge, CvBridgeError
# OpenCV2 for saving an image
import cv2
import matplotlib.pyplot as plt
import numpy as np
def gradient(img):
# grayscale the image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# gaussian blur of image with a 5x5 kernel
gauss = cv2.GaussianBlur(gray,(5,5),0)
# Return the canny of the image
return cv2.Canny(gauss,20,30)
def region_of_interest(img):
# Height of image (number of rows)
height = img.shape[0]
# Width of the image (number of columns)
width = img.shape[1]
# Create an array of polygons to use for the masking of the canny image
polygons = np.array([
[(200,height), (200,500), (600,500), (600,height)]
])
# Create the mask image's background (black color)
mask_bg = np.zeros_like(img)
# Create the mask image (image with black background an white region of interest)
mask = cv2.fillPoly(mask_bg, polygons, 255)
# Isolate the area of interest using the bitwise operator of the mask and canny image
masked_image = cv2.bitwise_and(img,cv2.fillPoly(mask_bg, polygons, 255))
# Return the updated image
return masked_image
def make_coordinates(img, line_parameters):
# Extract the average slope and intercept of the line
slope, intercept = line_parameters
# Coordinate y(1) of the calculated line
y1 = img.shape[0]
# Coordinate y(2) of the calculated line
y2 = int(y1*0.5)
# Coordinate x(1) of the calculated line
x1 = int((y1-intercept)/slope)
# Coordinate x(2) of the calculated line
x2 = int((y2-intercept)/slope)
# Return the coordinates of the average line
return np.array([x1,y1,x2,y2])
def average_slope_intercep(img,lines):
# Create an empty list containing the coordinates of the detected line
line_fit = []
# Loop through all the detected lines
for line in lines:
# Store the coordinates of the detected lines into an 1D array of 4 elements
x1,y1,x2,y2 = line.reshape(4)
# Create a line y = mx+b based on the coordinates
parameters = np.polyfit((x1,x2),(y1,y2),1)
# Extract the slope m
slope = parameters[0]
# Extract the intercept b
intercept = parameters[1]
# Add elements on the list
line_fit.append((slope,intercept))
# Check slope of line
# if slope < 0:
# continue
# else:
# continue
# Calculate the average of the line fit parameters list
line_fit_average = np.average(line_fit,axis=0)
# Extract the coordinates of the calculated line
main_line = make_coordinates(img,line_fit_average)
return np.array([main_line])
def display_lines(img,lines):
# Create a mask image that will have the drawn lines
line_image = np.zeros_like(img)
# If no lines were detected
if lines is not None:
# Loop through all the lines
for line in lines:
# Store the coordinates of the first and last point of the lines into 1D arrays
x1, y1, x2, y2 = line.reshape(4)
# Draw the lines on the image with blue color and thicknes of 10
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),10)
# Return the mask image with the drawn lines
return line_image
def image_callback(msg):
# print("Received an image!")
# Instantiate CvBridge
bridge = CvBridge()
try:
# Convert your ROS Image message to OpenCV2
frame = bridge.imgmsg_to_cv2(msg, "bgr8")
except CvBridgeError, e:
print(e)
else:
# Copy of the original frame
frame_copy = np.copy(frame)
# Canny of image
canny_frame = gradient(frame_copy)
# Apply mask in region of interest
cropped_image = region_of_interest(canny_frame)
# Apply Hough Transform on the region of interest
lines = cv2.HoughLinesP(cropped_image,1,np.pi/180,30,np.array([]),minLineLength=10,maxLineGap=2)
# Calculate the average slope of the detected lines
averaged_lines = average_slope_intercep(frame_copy,lines)
# Create a mask image with the drawn lines
line_image = display_lines(frame_copy,averaged_lines)
# Plot lines on the camera feed frame
combo_image = cv2.addWeighted(frame_copy,0.8,line_image,1,1)
#Show manipulated image feed
cv2.imshow("Result feed", frame_copy)
# plt.imshow(canny_frame)
cv2.waitKey(1)
# plt.show()
def main():
rospy.init_node('image_listener')
# Define your image topic
image_topic = "rover/camera1/image_raw"
# Set up your subscriber and define its callback
rospy.Subscriber(image_topic, Image, image_callback)
# Spin until ctrl + c
rospy.spin()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code is integrated in ROS, so please focus your attention at the image_callback function. My issue is that the line that I want to detect is quite noisy and I cannot figure out how to detect it correctly.
To be more specific, from the following frame,
Original Frame
I get this image after gaussian blur and the canny algorithm,
Canny Frame
How could I filter the "noise" I see in the canny frame? I played a lot with the canny and gausian blur parameters but all that I have achieved is removing gradients instead of actually making it less "noisy".
This method might help you to remove noise from the frame.
import cv2
import numpy as np
from skimage.morphology import skeletonize
def get_skeleton_iamge(threshold_image):
skeleton = skeletonize(threshold_image / 255)
skeleton = skeleton.astype(np.uint8)
skeleton *= 255
return skeleton
image = cv2.imread("road.png", 0)
image = cv2.resize(image, (300, 300))
bilateral = cv2.bilateralFilter(image, 15, 100, 100)
cv2.imshow("bilateral_image", bilateral)
canny_image = cv2.Canny(bilateral, 20, 30)
cv2.imshow("canny_image", canny_image)
kernel = np.ones((10, 10))
dilate_image = cv2.dilate(canny_image, kernel, iterations=1)
erode_image = cv2.erode(dilate_image, kernel, iterations=1)
cv2.imshow("erode_image", erode_image)
skeleton_iamge = get_skeleton_iamge(erode_image)
cv2.imshow("skeleton_iamge", skeleton_iamge)
cv2.waitKey(0)
By using this link, I made the deformed mesh:
inputs = cv2.imread("../datasets/images/0.jpg")
nh, nw = inputs.shape[0]//8, inputs.shape[1]//8
inputs = cv2.resize(inputs, dsize=(nh, nw), interpolation=cv2.INTER_AREA)
mr = nh
mc = nw
xx = np.arange(mr-1, -1, -1)
yy = np.arange(0, mc, 1)
[Y, X] = np.meshgrid(xx, yy)
ms = np.transpose(np.asarray([X.flatten('F'), Y.flatten('F')]), (1,0))
perturbed_mesh = ms
nv = np.random.randint(20) - 1
for k in range(nv):
#Choosing one vertex randomly
vidx = np.random.randint(np.shape(ms)[0])
vtex = ms[vidx, :]
#Vector between all vertices and the selected one
xv = perturbed_mesh - vtex
#Random movement
mv = (np.random.rand(1,2) - 0.5)*20
hxv = np.zeros((np.shape(xv)[0], np.shape(xv)[1] +1) )
hxv[:, :-1] = xv
hmv = np.tile(np.append(mv, 0), (np.shape(xv)[0],1))
d = np.cross(hxv, hmv)
d = np.absolute(d[:, 2])
d = d / (np.linalg.norm(mv, ord=2))
wt = d
curve_type = np.random.rand(1)
if curve_type > 0.3:
alpha = np.random.rand(1) * 50 + 50
wt = alpha / (wt + alpha)
else:
alpha = np.random.rand(1) + 1
wt = 1 - (wt / 100 )**alpha
msmv = mv * np.expand_dims(wt, axis=1)
perturbed_mesh = perturbed_mesh + msmv
So I got the mesh like:
Then I tried to map the source image pixels onto the generated mesh.
img = cv2.copyMakeBorder(inputs, dh, dh, dw, dw, borderType=cv2.BORDER_CONSTANT, value=(0,0,0))
xs, ys = perturbed_mesh[:, 0], perturbed_mesh[:, 1]
xs = xs.reshape(nh, nw).astype(np.float32)
ys = ys.reshape(nh, nw).astype(np.float32)
dst = cv2.remap(img, xs, ys, cv2.INTER_CUBIC)
plt.imshow(dst)
Finally, I got the result:
But this image have a document on the corner, I can't use it.
How to map the document onto the center of image?
Here is an example of what I did for a perspective warp in Python/OpenCV. It will show you how I achieved the expanded view of the output. Not only did I increase the output size, but I also shifted the output control points. I shifted by +500 px and doubled that to +1000 for the output size.
Input:
No Expand Case:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[0,0],[width,0],[width,height],[0,height]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width,height), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped_unexpanded.png", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
No Expand Warped Result:
Expanded Case:
import numpy as np
import cv2
# read input
img = cv2.imread("building.jpg")
# resize
height,width = 1000,1500
img = cv2.resize(img, (width,height))
# specify conjugate coordinates and shift output on left and top
pts1 = np.float32([[ 250, 0],[1220, 300],[1300, 770],[ 250, 860]])
pts2 = np.float32([[+500,+500],[width+500,+500],[width+500,height+500],[+500,height+500]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(pts1,pts2)
print(matrix.shape)
print(matrix)
# convert image to BGRA with opaque alpha
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
# do perspective transformation setting area outside input to transparent
# extend output size so extended by 500 all around
imgOutput = cv2.warpPerspective(img, matrix, (width+1000,height+1000), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# resize output, since it is too large to post
imgOutput = cv2.resize(imgOutput, (width,height))
# save the warped output
cv2.imwrite("building_warped.jpg", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()
Expanded Result:
I am trying to generate grayscale MotionHistoryImages from a live image feed (webcam) for processing by a CNN model I have built, from the updateMotionHistory function I get an output of:
[width x height] array of type np.float32
I want to convert this array into a grayscale image, brighter the newer the change in motion like this
EDIT: Added code example below
import cv2
import numpy as np
import time
import genMHI_util
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2(history=1000,detectShadows=False)
colourThreshold = 0.975
frame_reduction_counter = 0
mhi = np.zeros((genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT), np.float32)
while True:
ret, frame = cap.read()
# kernel = np.ones((5, 5), np.float32) / 25
# frame = cv2.medianBlur(frame, 5)
# frame = cv2.filter2D(frame, -1, kernel) # Blur image
if frame_reduction_counter >= 0:
# Disregarding frames that do not contain enough movement, below the set threshold
frame_reduction_counter = 0
timestamp = cv2.getTickCount() / cv2.getTickFrequency()
# Do background subtraction on frame to get silhouette
silhouette = fgbg.apply(frame)
silhouette = cv2.resize(silhouette, (genMHI_util.MHI_WIDTH, genMHI_util.MHI_HEIGHT))
# Update MHI
cv2.motempl.updateMotionHistory(silhouette, mhi, timestamp, 0.5)
# Do something with 'mhi' object (300x300 float array) ?
# Convert float array to int
mask = cv2.convertScaleAbs(mhi,
alpha=(255 / genMHI_util.MHI_DURATION),
beta=((genMHI_util.MHI_DURATION - timestamp) * 255 / genMHI_util.MHI_DURATION))
# Preview images
cv2.imshow('original', frame)
cv2.imshow('silhouette', silhouette)
cv2.imshow('mhi', mask)
cv2.waitKey(1)
frame_reduction_counter += 1
cap.release()
cv2.destroyAllWindows()
CONSTANTS:
MHI_DURATION = 5
MHI_WIDTH = 300
MHI_HEIGHT = 300
Output without movement - bit of camera noise
Output with movement - kind of works but very badly
I am trying to learn OpenCV in order to improve a script I wrote for comparing engineering drawings. I am using the code (see below) found on this tutorial but I am having zero success with it. In the tutorial the author uses the example of a blank form for the reference image and a photo of the completed form as the image to align. My situation is very similar because I am attempting to use a blank drawing title block as my reference image and a scanned image of a drawing as my image to align.
My goal is to use OpenCV to clean up the scanned engineering drawings so that they are aligned properly but no matter what I try in the MAX_FEATURES and GOOD_MATCH_PERCENT parameters, I get an image that looks like a black and white star burst. Also, when I review the "matches.jpg" file generated by the script, it appears that there are no correct matches. I have tried multiple drawings and I get the same results.
Can anyone see a reason why this script would not work in the way I am trying to use it?
from __future__ import print_function
import cv2
import numpy as np
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
def alignImages(im1, im2):
# Convert images to grayscale
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
cv2.imwrite("matches.jpg", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width, channels = im2.shape
im1Reg = cv2.warpPerspective(im1, h, (width, height))
return im1Reg, h
if __name__ == '__main__':
# Read reference image
refFilename = "form.jpg"
print("Reading reference image : ", refFilename)
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR)
# Read image to be aligned
imFilename = "scanned-form.jpg"
print("Reading image to align : ", imFilename);
im = cv2.imread(imFilename, cv2.IMREAD_COLOR)
print("Aligning images ...")
# Registered image will be resotred in imReg.
# The estimated homography will be stored in h.
imReg, h = alignImages(im, imReference)
# Write aligned image to disk.
outFilename = "aligned.jpg"
print("Saving aligned image : ", outFilename);
cv2.imwrite(outFilename, imReg)
# Print estimated homography
print("Estimated homography : \n", h)
Template Image:
Image to Align:
Expected output Image:
Here is one way in Python/OpenCV using a Rigid Affine Transformation (scale, rotation and translation only - no skew or perspective) to warp one image to match the other. It uses findTransformECC() -- Enhanced Correlation Coefficient Maximization) -- to get the rotation matrix and then uses warpAffine to do the rigid warping.
Template:
Image to be warped:
import cv2
import numpy as np
import math
import sys
# Get the image files from the command line arguments
# These are full paths to the images
# image2 will be warped to match image1
# argv[0] is name of script
image1 = sys.argv[1]
image2 = sys.argv[2]
outfile = sys.argv[3]
# Read the images to be aligned
# im2 is to be warped to match im1
im1 = cv2.imread(image1);
im2 = cv2.imread(image2);
# Convert images to grayscale for computing the rotation via ECC method
im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# Find size of image1
sz = im1.shape
# Define the motion model - euclidean is rigid (SRT)
warp_mode = cv2.MOTION_EUCLIDEAN
# Define 2x3 matrix and initialize the matrix to identity matrix I (eye)
warp_matrix = np.eye(2, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = 5000;
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-3;
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1)
# Warp im2 using affine
im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
# write output
cv2.imwrite(outfile, im2_aligned)
# Print rotation angle
row1_col0 = warp_matrix[0,1]
angle = math.degrees(math.asin(row1_col0))
print(angle)
Result:
Resulting Angle of Rotation (in deg):
-0.3102187026194794
Note, you can change the background color in the affineWarp to white if desired.
Also make the termination epsilon smaller by an order of magnitude or two for more accuracy, but longer processing times.
The other Rigid Affine approach that I mentioned in my comments earlier is to use ORB feature matching, filter the key points, then use estimateAffinePartial2D() to get the rigid affine matrix. Then use that to warp the image. For large angles this seems to me to be more reliable than the ECC method. But the ECC method seems more accurate for small rotations.
import cv2
import numpy as np
import math
import sys
MAX_FEATURES = 10000
GOOD_MATCH_PERCENT = 0.15
DIFFY_THRESH = 2
# Get the image files from the command line arguments
# These are full paths to the images
# image[2] will be warped to match image[1]
# argv[0] is name of script
file1 = sys.argv[1]
file2 = sys.argv[2]
outFile = sys.argv[3]
# Read image1
image1 = cv2.imread(file1, cv2.IMREAD_COLOR)
# Read image2 to be warped to match image1
image2 = cv2.imread(file2, cv2.IMREAD_COLOR)
# Convert images to grayscale
image1Gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2Gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(image1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(image2Gray, None)
# Match features.
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
#print('numgood',numGoodMatches)
# Extract location of good matches and filter by diffy if rotation is small
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# initialize empty arrays for newpoints1 and newpoints2 and mask
newpoints1 = np.empty(shape=[0, 2], dtype=np.float32)
newpoints2 = np.empty(shape=[0, 2], dtype=np.float32)
matches_Mask = [0] * len(matches)
count=0
for i in range(len(matches)):
pt1 = points1[i]
pt2 = points2[i]
pt1x, pt1y = zip(*[pt1])
pt2x, pt2y = zip(*[pt2])
diffy = np.float32( np.float32(pt2y) - np.float32(pt1y) )
if abs(diffy) < DIFFY_THRESH:
newpoints1 = np.append(newpoints1, [pt1], axis=0).astype(np.uint8)
newpoints2 = np.append(newpoints2, [pt2], axis=0).astype(np.uint8)
matches_Mask[i]=1
count += 1
# Find Affine Transformation
# note swap of order of newpoints here so that image2 is warped to match image1
m, inliers = cv2.estimateAffinePartial2D(newpoints2,newpoints1)
# Use affine transform to warp im2 to match im1
height, width, channels = image1.shape
image2Reg = cv2.warpAffine(image2, m, (width, height))
# Write aligned image to disk.
cv2.imwrite(outFile, image2Reg)
# Print angle
row1_col0 = m[1,0]
print('row1_col0:',row1_col0)
angle = math.degrees(math.asin(row1_col0))
print('angle', angle)
Result Image:
Result Rotation Angle:
-0.6123936361765413
After some trial and error I determined that I don't need to find a homography in order to align my images properly. Since my images only need to be scaled and rotated slightly, my best option is to find the outer most points of the drawing title block and align one image to the other with a transform.
My approach is to use the Harris corner finding function to find all of the corners on the drawing, then do a simple calculation to find the points that are the shortest distance to the corners of the drawing canvas (these are the outside corners of the drawing title block). I then take 3 of the points (top left, top right, and bottom left) and use a transform to scale/rotate one drawing to the other.
Below is the code that I used:
import cv2
import numpy as np
import math
img1 = cv2.imread('reference.jpg')
img2 = cv2.imread('to-be-aligned.jpg')
#Find the corner points of img1
h1,w1,c=img1.shape
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray1 = np.float32(gray1)
dst1 = cv2.cornerHarris(gray1,5,3,0.04)
ret1, dst1 = cv2.threshold(dst1,0.1*dst1.max(),255,0)
dst1 = np.uint8(dst1)
ret1, labels1, stats1, centroids1 = cv2.connectedComponentsWithStats(dst1)
criteria1 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners1 = cv2.cornerSubPix(gray1,np.float32(centroids1),(5,5),(-1,-1),criteria1)
#Find the corner points of img2
h2,w2,c=img2.shape
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray2 = np.float32(gray2)
dst2 = cv2.cornerHarris(gray2,5,3,0.04)
ret2, dst2 = cv2.threshold(dst2,0.1*dst2.max(),255,0)
dst2 = np.uint8(dst2)
ret2, labels2, stats2, centroids2 = cv2.connectedComponentsWithStats(dst2)
criteria2 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners2 = cv2.cornerSubPix(gray2,np.float32(centroids2),(5,5),(-1,-1),criteria2)
#Find the top left, top right, and bottom left outer corners of the drawing frame for img1
a1=[0,0]
b1=[w1,0]
c1=[0,h1]
a1_dist=[]
b1_dist=[]
c1_dist=[]
for i in corners1:
temp_a1=math.sqrt((i[0]-a1[0])**2+(i[1]-a1[1])**2)
temp_b1=math.sqrt((i[0]-b1[0])**2+(i[1]-b1[1])**2)
temp_c1=math.sqrt((i[0]-c1[0])**2+(i[1]-c1[1])**2)
a1_dist.append(temp_a1)
b1_dist.append(temp_b1)
c1_dist.append(temp_c1)
print("Image #1 (reference):")
print("Top Left:")
print(corners1[a1_dist.index(min(a1_dist))])
print("Top Right:")
print(corners1[b1_dist.index(min(b1_dist))])
print("Bottom Left:")
print(corners1[c1_dist.index(min(c1_dist))])
#Find the top left, top right, and bottom left outer corners of the drawing frame for img2
a2=[0,0]
b2=[w2,0]
c2=[0,h2]
a2_dist=[]
b2_dist=[]
c2_dist=[]
for i in corners2:
temp_a2=math.sqrt((i[0]-a2[0])**2+(i[1]-a2[1])**2)
temp_b2=math.sqrt((i[0]-b2[0])**2+(i[1]-b2[1])**2)
temp_c2=math.sqrt((i[0]-c2[0])**2+(i[1]-c2[1])**2)
a2_dist.append(temp_a2)
b2_dist.append(temp_b2)
c2_dist.append(temp_c2)
print("Image #2 (image to align):")
print("Top Left:")
print(corners2[a2_dist.index(min(a2_dist))])
print("Top Right:")
print(corners2[b2_dist.index(min(b2_dist))])
print("Bottom Left:")
print(corners2[c2_dist.index(min(c2_dist))])
#Create the points for img1
point1 = np.zeros((3,2), dtype=np.float32)
point1[0][0]=corners1[a1_dist.index(min(a1_dist))][0]
point1[0][1]=corners1[a1_dist.index(min(a1_dist))][1]
point1[1][0]=corners1[b1_dist.index(min(b1_dist))][0]
point1[1][1]=corners1[b1_dist.index(min(b1_dist))][1]
point1[2][0]=corners1[c1_dist.index(min(c1_dist))][0]
point1[2][1]=corners1[c1_dist.index(min(c1_dist))][1]
#Create the points for img2
point2 = np.zeros((3,2), dtype=np.float32)
point2[0][0]=corners2[a2_dist.index(min(a2_dist))][0]
point2[0][1]=corners2[a2_dist.index(min(a2_dist))][1]
point2[1][0]=corners2[b2_dist.index(min(b2_dist))][0]
point2[1][1]=corners2[b2_dist.index(min(b2_dist))][1]
point2[2][0]=corners2[c2_dist.index(min(c2_dist))][0]
point2[2][1]=corners2[c2_dist.index(min(c2_dist))][1]
#Make sure points look ok:
print(point1)
print(point2)
#Transform the image
m = cv2.getAffineTransform(point2,point1)
image2Reg = cv2.warpAffine(img2, m, (w1, h1), borderValue=(255,255,255))
#Highlight found points in red:
img1[dst1>0.1*dst1.max()]=[0,0,255]
img2[dst2>0.1*dst2.max()]=[0,0,255]
#Output the images:
cv2.imwrite("output-img1-harris.jpg", img1)
cv2.imwrite("output-img2-harris.jpg", img2)
cv2.imwrite("output-harris-transform.jpg",image2Reg)