I have written the following script with which I aim to detect lines in Gazebo (a simulation environment):
#!/usr/bin/env python
# rospy for the subscriber
import rospy
# ROS Image message
from sensor_msgs.msg import Image
# ROS Image message -> OpenCV2 image converter
from cv_bridge import CvBridge, CvBridgeError
# OpenCV2 for saving an image
import cv2
import matplotlib.pyplot as plt
import numpy as np
def gradient(img):
# grayscale the image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# gaussian blur of image with a 5x5 kernel
gauss = cv2.GaussianBlur(gray,(5,5),0)
# Return the canny of the image
return cv2.Canny(gauss,20,30)
def region_of_interest(img):
# Height of image (number of rows)
height = img.shape[0]
# Width of the image (number of columns)
width = img.shape[1]
# Create an array of polygons to use for the masking of the canny image
polygons = np.array([
[(200,height), (200,500), (600,500), (600,height)]
])
# Create the mask image's background (black color)
mask_bg = np.zeros_like(img)
# Create the mask image (image with black background an white region of interest)
mask = cv2.fillPoly(mask_bg, polygons, 255)
# Isolate the area of interest using the bitwise operator of the mask and canny image
masked_image = cv2.bitwise_and(img,cv2.fillPoly(mask_bg, polygons, 255))
# Return the updated image
return masked_image
def make_coordinates(img, line_parameters):
# Extract the average slope and intercept of the line
slope, intercept = line_parameters
# Coordinate y(1) of the calculated line
y1 = img.shape[0]
# Coordinate y(2) of the calculated line
y2 = int(y1*0.5)
# Coordinate x(1) of the calculated line
x1 = int((y1-intercept)/slope)
# Coordinate x(2) of the calculated line
x2 = int((y2-intercept)/slope)
# Return the coordinates of the average line
return np.array([x1,y1,x2,y2])
def average_slope_intercep(img,lines):
# Create an empty list containing the coordinates of the detected line
line_fit = []
# Loop through all the detected lines
for line in lines:
# Store the coordinates of the detected lines into an 1D array of 4 elements
x1,y1,x2,y2 = line.reshape(4)
# Create a line y = mx+b based on the coordinates
parameters = np.polyfit((x1,x2),(y1,y2),1)
# Extract the slope m
slope = parameters[0]
# Extract the intercept b
intercept = parameters[1]
# Add elements on the list
line_fit.append((slope,intercept))
# Check slope of line
# if slope < 0:
# continue
# else:
# continue
# Calculate the average of the line fit parameters list
line_fit_average = np.average(line_fit,axis=0)
# Extract the coordinates of the calculated line
main_line = make_coordinates(img,line_fit_average)
return np.array([main_line])
def display_lines(img,lines):
# Create a mask image that will have the drawn lines
line_image = np.zeros_like(img)
# If no lines were detected
if lines is not None:
# Loop through all the lines
for line in lines:
# Store the coordinates of the first and last point of the lines into 1D arrays
x1, y1, x2, y2 = line.reshape(4)
# Draw the lines on the image with blue color and thicknes of 10
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),10)
# Return the mask image with the drawn lines
return line_image
def image_callback(msg):
# print("Received an image!")
# Instantiate CvBridge
bridge = CvBridge()
try:
# Convert your ROS Image message to OpenCV2
frame = bridge.imgmsg_to_cv2(msg, "bgr8")
except CvBridgeError, e:
print(e)
else:
# Copy of the original frame
frame_copy = np.copy(frame)
# Canny of image
canny_frame = gradient(frame_copy)
# Apply mask in region of interest
cropped_image = region_of_interest(canny_frame)
# Apply Hough Transform on the region of interest
lines = cv2.HoughLinesP(cropped_image,1,np.pi/180,30,np.array([]),minLineLength=10,maxLineGap=2)
# Calculate the average slope of the detected lines
averaged_lines = average_slope_intercep(frame_copy,lines)
# Create a mask image with the drawn lines
line_image = display_lines(frame_copy,averaged_lines)
# Plot lines on the camera feed frame
combo_image = cv2.addWeighted(frame_copy,0.8,line_image,1,1)
#Show manipulated image feed
cv2.imshow("Result feed", frame_copy)
# plt.imshow(canny_frame)
cv2.waitKey(1)
# plt.show()
def main():
rospy.init_node('image_listener')
# Define your image topic
image_topic = "rover/camera1/image_raw"
# Set up your subscriber and define its callback
rospy.Subscriber(image_topic, Image, image_callback)
# Spin until ctrl + c
rospy.spin()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code is integrated in ROS, so please focus your attention at the image_callback function. My issue is that the line that I want to detect is quite noisy and I cannot figure out how to detect it correctly.
To be more specific, from the following frame,
Original Frame
I get this image after gaussian blur and the canny algorithm,
Canny Frame
How could I filter the "noise" I see in the canny frame? I played a lot with the canny and gausian blur parameters but all that I have achieved is removing gradients instead of actually making it less "noisy".
This method might help you to remove noise from the frame.
import cv2
import numpy as np
from skimage.morphology import skeletonize
def get_skeleton_iamge(threshold_image):
skeleton = skeletonize(threshold_image / 255)
skeleton = skeleton.astype(np.uint8)
skeleton *= 255
return skeleton
image = cv2.imread("road.png", 0)
image = cv2.resize(image, (300, 300))
bilateral = cv2.bilateralFilter(image, 15, 100, 100)
cv2.imshow("bilateral_image", bilateral)
canny_image = cv2.Canny(bilateral, 20, 30)
cv2.imshow("canny_image", canny_image)
kernel = np.ones((10, 10))
dilate_image = cv2.dilate(canny_image, kernel, iterations=1)
erode_image = cv2.erode(dilate_image, kernel, iterations=1)
cv2.imshow("erode_image", erode_image)
skeleton_iamge = get_skeleton_iamge(erode_image)
cv2.imshow("skeleton_iamge", skeleton_iamge)
cv2.waitKey(0)
Related
I am trying to detect a grainy printed line on a paper with cv2. I need the angle of the line. I dont have much knowledge in image processing and I only need to detect the line. I tried to play with the parameters but the angle is always detected wrong. Could someone help me. This is my code:
import cv2
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pyplot import figure
img = cv2.imread('CamXY1_1.bmp')
crop_img = img[100:800, 300:900]
blur = cv2.GaussianBlur(crop_img, (1,1), 0)
ret,thresh = cv2.threshold(blur,150,255,cv2.THRESH_BINARY)
gray = cv2.cvtColor(thresh,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 60, 150)
figure(figsize=(15, 15), dpi=150)
plt.imshow(edges, 'gray')
lines = cv2.HoughLines(edges,1,np.pi/180,200)
for rho,theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 3000*(-b))
y1 = int(y0 + 3000*(a))
x2 = int(x0 - 3000*(-b))
y2 = int(y0 - 3000*(a))
cv2.line(img,(x1,y1),(x2,y2),(0, 255, 0),2)
imagetobedetected
Here's a possible solution to estimate the line (and its angle) without using the Hough line transform. The idea is to locate the start and ending points of the line using the reduce function. This function can reduce an image to a single column or row. If we reduce the image we can also get the total SUM of all the pixels across the reduced image. Using this info we can estimate the extreme points of the line and calculate its angle. This are the steps:
Resize your image because it is way too big
Get a binary image via adaptive thresholding
Define two extreme regions of the image and crop them
Reduce the ROIs to a column using the SUM mode, which is the sum of all rows
Accumulate the total values above a threshold value
Estimate the starting and ending points of the line
Get the angle of the line
Here's the code:
# imports:
import cv2
import numpy as np
import math
# image path
path = "D://opencvImages//"
fileName = "mmCAb.jpg"
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Scale your BIG image into a small one:
scalePercent = 0.3
# Calculate the new dimensions
width = int(inputImage.shape[1] * scalePercent)
height = int(inputImage.shape[0] * scalePercent)
newSize = (width, height)
# Resize the image:
inputImage = cv2.resize(inputImage, newSize, None, None, None, cv2.INTER_AREA)
# Deep copy for results:
inputImageCopy = inputImage.copy()
# Convert BGR to grayscale:
grayInput = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Adaptive Thresholding:
windowSize = 51
windowConstant = 11
binaryImage = cv2.adaptiveThreshold(grayInput, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, windowSize, windowConstant)
The first step is to get the binary image. Note that I previously downscaled your input because it is too big and we don't need all that info. This is the binary mask:
Now, we don't need most of the image. In fact, since the line is across the whole image, we can only "trim" the first and last column and check out where the white pixels begin. I'll crop a column a little bit wider, though, so we can ensure we have enough data and as less noise as possible. I'll define two Regions of Interest (ROIs) and crop them. Then, I'll reduce each ROI to a column using the SUM mode, this will give me the summation of all intensity across each row. After that, I can accumulate the locations where the sum exceeds a certain threshold and approximate the location of the line, like this:
# Define the regions that will be cropped
# from the original image:
lineWidth = 5
cropPoints = [(0, 0, lineWidth, height), (width-lineWidth, 0, lineWidth, height)]
# Store the line points here:
linePoints = []
# Loop through the crop points and
# crop de ROI:
for p in range(len(cropPoints)):
# Get the ROI:
(x,y,w,h) = cropPoints[p]
# Crop the ROI:
imageROI = binaryImage[y:y+h, x:x+w]
# Reduce the ROI to a n row x 1 columns matrix:
reducedImg = cv2.reduce(imageROI, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)
# Get the height (or lenght) of the arry:
reducedHeight = reducedImg.shape[0]
# Define a threshold and accumulate
# the coordinate of the points:
threshValue = 100
pointSum = 0
pointCount = 0
for i in range(reducedHeight):
currentValue = reducedImg[i]
if currentValue > threshValue:
pointSum = pointSum + i
pointCount = pointCount + 1
# Get average coordinate of the line:
y = int(accX / pixelCount)
# Store in list:
linePoints.append((x, y))
The red rectangles show the regions I cropped from the input image:
Note that I've stored both points in the linePoints list. Let's check out our approximation by drawing a line that connects both points:
# Get the two points:
p0 = linePoints[0]
p1 = linePoints[1]
# Draw the line:
cv2.line(inputImageCopy, (p0[0], p0[1]), (p1[0], p1[1]), (255, 0, 0), 1)
cv2.imshow("Line", inputImageCopy)
cv2.waitKey(0)
Which yields:
Not bad, huh? Now that we have both points, we can estimate the angle of this line:
# Get angle:
adjacentSide = p1[0] - p0[0]
oppositeSide = p0[1] - p1[1]
# Compute the angle alpha:
alpha = math.degrees(math.atan(oppositeSide / adjacentSide))
print("Angle: "+str(alpha))
This prints:
Angle: 0.534210901840831
Here is a code to get the optical flow output from a stabilized video (no camera movement) and save it as a set of frames
import cv2 as cv
import numpy as np
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
count = 0
while(cap.isOpened()):
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
# Opens a new window and displays the input frame
cv.imshow("input", frame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Computes the magnitude and angle of the 2D vectors
magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])
# Sets image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Sets image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)
# Converts HSV to RGB (BGR) color representation
rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)
# Opens a new window and displays the output frame
cv.imshow("dense optical flow", rgb[40:150,120:220])
cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
count +=1
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Can someone please suggest how to quantify the difference between the frames? i.e. to estimate speed/velocity ?
Here's an example to obtain pixel magnitude translation from .bsq frames. You can modify the the code to input a video file instead. You are probably most interested in the get_translation() function. Example:
Graph displaying pixel translation from frame-to-frame
Code
import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random
# Usage: python translate_analyzer.py -p <filename.bsq>
# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Calculate grayscale histogram
hist = cv2.calcHist([gray],[0],None,[256],[0,256])
hist_size = len(hist)
# Calculate cumulative distribution from the histogram
accumulator = []
accumulator.append(float(hist[0]))
for index in range(1, hist_size):
accumulator.append(accumulator[index -1] + float(hist[index]))
# Locate points to clip
maximum = accumulator[-1]
clip_hist_percent *= (maximum/100.0)
clip_hist_percent /= 2.0
# Locate left cut
minimum_gray = 0
while accumulator[minimum_gray] < clip_hist_percent:
minimum_gray += 1
# Locate right cut
maximum_gray = hist_size -1
while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
maximum_gray -= 1
# Calculate alpha and beta values
alpha = 255 / (maximum_gray - minimum_gray)
beta = -minimum_gray * alpha
auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)
# Draw flow
def draw_flow(img, flow, step=30):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 1, (36, 255, 12))
for (x1, y1), (_x2, _y2) in lines:
cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
return vis
# Return translation value
def get_translation(img, flow, step=30):
return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--path", help="Path to the directory")
args = vars(ap.parse_args())
if not args['path']:
print('Usage: python translate_analyzer.py -p <directory>')
exit(1)
# Extract file name
bsq_fname = os.path.split(args['path'])[-1]
if '.bsq' not in bsq_fname:
print('ERROR: Invalid bsq file. Select correct file.')
exit(1)
width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512
# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)
print(bsq_fname)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]
translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
data = data_file[:,:,index].copy()
data //= 64
data = automatic_brightness_and_contrast(data)[0]
data = data[y:y+h, x:x+w]
flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
translation, pixel_direction = get_translation(data, flow)
prev = data
cv2.imshow('flow', draw_flow(data, flow))
cv2.waitKey(1)
translation_data.append(translation)
frame_direction = pixel_direction
index = (index+1) % frame_count
end = time.time()
print('Time:', end - start)
plt.figure()
plt.title(bsq_fname)
plt.xlabel("Frames")
plt.ylabel("Magnitude")
plt.plot(translation_data)
plt.figure()
plt.title("Pixel Direction")
plt.xlabel("Width")
plt.ylabel("Height")
plt.imshow(frame_direction.T)
plt.colorbar(orientation='vertical')
plt.show()
First of all I am putting values into hough_lines such as rho = 2, theta = np.pi/180, threshold = 15, min_line_len = 40 , max_line_gap = 20
lines = hough_lines(masked_edges, rho, theta, threshold, min_line_len, max_line_gap)
def hough_lines(img, rho, theta, threshold, min_line_len, max_line_gap):
#This function is used for drawing line when we give a specific criteria into the pixels we want to pick up.
#This function is used with draw_lines function to draw the lines in specific pixels
#which are drawn by region_of_interest function.
"""`img` should be the output of a Canny transform.
Returns an image with hough lines drawn.
"""
lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len, maxLineGap=max_line_gap)
line_img = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
draw_lines(line_img, lines)
return line_img
Above is my code and getting following error.
File "auto.py", line 68, in <module>
output = lane.process_image(frame)
File "/home/pi/test/lane.py", line 227, in process_image
lines = hough_lines(masked_edges, rho, theta, threshold, min_line_len, max_line_gap)
File "/home/pi/test/lane.py", line 154, in hough_lines
draw_lines(line_img, lines)
File "/home/pi/test/lane.py", line 97, in draw_lines
for line in lines:
TypeError: 'NoneType' object is not iterable
In the main function, I am trying to put the frame taken by video capture into process_image function in lane file.
cam = cv2.VideoCapture(0)
while True:
print('Succeed to connect...')
data = ''
data=sys.stdin.read(1)[0]
print("data input=",data)
while True:
if not cam.isOpened():
print("Wait for the header")
else:
flag, frame = cam.read()
frame = cv2.flip(frame,1)
cv2.imshow('video', frame)
#print(type(frame))
output = lane.process_image(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
exit()
I tried many things such that I changed parameter values for hough_lines function and tried to see what functions affects NoneType value in terms of image. However, before cv2.HoughLinesP function, every function has their own value with narray type.
--edit
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import math
import os
def grayscale(img): # It converts the original picture to the gray scale picture.
"""Applies the Grayscale transform
This will return an image with only one color channel
but NOTE: to see the returned image as grayscale
(assuming your grayscaled image is called 'gray')
you should call plt.imshow(gray, cmap='gray')"""
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# Or use BGR2GRAY if you read an image with cv2.imread()
# return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def canny(img, low_threshold, high_threshold):
#After applied grayscale function, it converts the grayscale image to edges
#which is a binary image with white pixels.
"""Applies the Canny transform"""
return cv2.Canny(img, low_threshold, high_threshold)
def gaussian_blur(img, kernel_size):
#Gaussian blur is applied for suppressing noise and nonlogical gradients by averaging.
"""Applies a Gaussian Noise kernel"""
return cv2.GaussianBlur(img, (kernel_size, kernel_size), 0)
def region_of_interest(img, vertices):
# This functions is used for tracing a specific line of the road
# utilizing vertices which is are 4 integer points here and ignore_mask_color which ignores
# pixels if those do not meet the criteria.
"""
Applies an image mask.
Only keeps the region of the image defined by the polygon
formed from `vertices`. The rest of the image is set to black.
`vertices` should be a numpy array of integer points.
"""
#defining a blank mask to start with
mask = np.zeros_like(img)
#defining a 3 channel or 1 channel color to fill the mask with depending on the input image
if len(img.shape) > 2:
channel_count = img.shape[2] # i.e. 3 or 4 depending on your image
ignore_mask_color = (255,) * channel_count
else:
ignore_mask_color = 255
#filling pixels inside the polygon defined by "vertices" with the fill color
cv2.fillPoly(mask, vertices, ignore_mask_color)
#returning the image only where mask pixels are nonzero
masked_image = cv2.bitwise_and(img, mask)
return masked_image
def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
"""
NOTE: this is the function you might want to use as a starting point once you want to
average/extrapolate the line segments you detect to map out the full
extent of the lane (going from the result shown in raw-lines-example.mp4
to that shown in P1_example.mp4).
Think about things like separating line segments by their
slope ((y2-y1)/(x2-x1)) to decide which segments are part of the left
line vs. the right line. Then, you can average the position of each of
the lines and extrapolate to the top and bottom of the lane.
This function draws `lines` with `color` and `thickness`.
Lines are drawn on the image inplace (mutates the image).
If you want to make the lines semi-transparent, think about combining
this function with the weighted_img() function below
"""
"""
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(img, (x1, y1), (x2, y2), color, thickness)
"""
right_slopes = []
right_intercepts = []
left_slopes = []
left_intercepts = []
left_points_x = []
left_points_y = []
right_points_x = []
right_points_y = []
y_max = img.shape[0]
y_min = img.shape[0]
for line in lines:
for x1,y1,x2,y2 in line:
slope = (y2-y1)/(x2-x1)
if slope < 0.0 and slope > -math.inf: # math.inf = floating point positive infinity
left_slopes.append(slope) # left line
left_points_x.append(x1)
left_points_x.append(x2)
left_points_y.append(y1)
left_points_y.append(y2)
left_intercepts.append(y1 - slope*x1)
if slope > 0.0 and slope < math.inf:
right_slopes.append(slope) # right line
right_points_x.append(x1)
right_points_x.append(x2)
right_points_y.append(y1)
right_points_y.append(y2)
right_intercepts.append(y1 - slope*x1)
y_min = min(y1,y2,y_min)
if len(left_slopes) > 0:
left_slope = np.mean(left_slopes)
left_intercept = np.mean(left_intercepts)
x_min_left = int((y_min - left_intercept)/left_slope)
x_max_left = int((y_max - left_intercept)/left_slope)
cv2.line(img, (x_min_left, y_min), (x_max_left, y_max), [255, 0, 0], 8)
if len(right_slopes) > 0:
right_slope = np.mean(right_slopes)
right_intercept = np.mean(right_intercepts)
x_min_right = int((y_min - right_intercept)/right_slope)
x_max_right = int((y_max - right_intercept)/right_slope)
cv2.line(img, (x_min_right, y_min), (x_max_right, y_max), [255, 0, 0], 8)
def hough_lines(img, rho, theta, threshold, min_line_len, max_line_gap):
#This function is used for drawing line when we give a specific criteria into the pixels we want to pick up.
#This function is used with draw_lines function to draw the lines in specific pixels
#which are drawn by region_of_interest function.
"""
`img` should be the output of a Canny transform.
Returns an image with hough lines drawn.
"""
lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len, maxLineGap=max_line_gap)
line_img = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
draw_lines(line_img, lines)
return line_img
# Python 3 has support for cool math symbols.
def weighted_img(img, initial_img, α=0.8, β=1., γ=0.):
# By appliying "color" binary image, it finally draws the line on the edge image.
"""
`img` is the output of the hough_lines(), An image with lines drawn on it.
Should be a blank image (all black) with lines drawn on it.
`initial_img` should be the image before any processing.
The result image is computed as follows:
initial_img * α + img * β + γ
NOTE: initial_img and img must be the same shape!
"""
return cv2.addWeighted(initial_img, α, img, β, γ)
def process_image(image):
# NOTE: The output you return should be a color image (3 channel) for processing video below
# TODO: put your pipeline here,
# you should return the final output (image where lines are drawn on lanes)
# Read in and grayscale the image
gray = grayscale(image)
# Define a kernel size and apply Gaussian smoothing
kernel_size = 5
blur_gray = gaussian_blur(gray, kernel_size)
# Define our parameters for Canny and apply
low_threshold = 50
high_threshold = 150
edges = canny(blur_gray, low_threshold, high_threshold)
# Next we'll create a masked edges image using cv2.fillPoly()
imshape = image.shape
vertices = np.array([[(120,imshape[0]),(450, 320), (500, 320), (imshape[1],imshape[0])]], dtype=np.int32)
masked_edges = region_of_interest(edges, vertices)
# Define the Hough transform parameters
# Make a blank the same size as our image to draw on
rho = 2 # distance resolution in pixels of the Hough grid
theta = np.pi/180 # angular resolution in radians of the Hough grid
threshold = 15 # minimum number of votes (intersections in Hough grid cell)
min_line_len = 40 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(image)*1 #creating a blank to draw lines on
# Run Hough on edge detected image
lines = hough_lines(masked_edges, rho, theta, threshold, min_line_len, max_line_gap)
# Create a "color" binary image to combine with line image
color_edges = np.dstack((edges, edges, edges))
# Draw the lines on the edge image
lines_edges = weighted_img(lines, line_image)
return lines_edges
Background
A raster file collected via LIDAR records the topography of a watershed. To properly model the watershed, the river must appear continuous without any breaks or interruptions. The roads in the raster file appear like dams that interrupt the river as seen in the picture below
Specific Area Under Consideration in the Watershed
Objective
These river breaks are the main problem and I am trying but failing to remove them.
Approach
Via Python, I used the various tools and prebuilt functions in the OpenCV library. The primary function I used in this approach is the cv2.inpaint function. This function takes in an image file and a mask file and interpolates the original image wherever the mask file pixels are nonzero.
The main step here is determining the mask file which I did by detecting the corners at the break in the river. The mask file will guide the inpaint function to fill in the pixels according to the patterns in the surrounding pixels.
Problem
My issue is that this happens from all directions whereas I require it to only extrapolate pixel data from the river itself. The image below shows the flawed result: inpaint works but it considers data from outside the river too.
Inpainted Result
Here is my code if you are so kind as to help:
import scipy.io as sio
import numpy as np
from matplotlib import pyplot as plt
import cv2
matfile = sio.loadmat('box.mat') ## box.mat original raster file linked below
ztopo = matfile['box']
#Crop smaller section for analysis
ztopo2 = ztopo[200:500, 1400:1700]
## Step 1) Isolate river
river = ztopo2.copy()
river[ztopo2<217.5] = 0
#This will become problematic for entire watershed w/o proper slicing
## Step 2) Detect Corners
dst = cv2.cornerHarris(river,3,7,0.04)
# cornerHarris arguments adjust qualities of corner markers
# Dilate Image (unnecessary)
#dst = cv2.dilate(dst,None)
# Threshold for an optimal value, it may vary depending on the image.
# This adjusts what defines a corner
river2 = river.copy()
river2[dst>0.01*dst.max()]=[150]
## Step 3) Remove river and keep corners
#Initiate loop to isolate detected corners
i=0
j=0
rows,columns = river2.shape
for i in np.arange(rows):
for j in np.arange(columns):
if river2[i,j] != 150:
river2[i,j] = 0
j = j + 1
i = i + 1
# Save corners as new image for import during next step.
# Import must be via cv2 as thresholding and contour detection can only work on BGR files. Sio import in line 6 (matfile = sio.loadmat('box.mat')) imports 1 dimensional image rather than BGR.
cv2.imwrite('corners.png', river2)
## Step 4) Create mask image by defining and filling a contour around the previously detected corners
#Step 4 code retrieved from http://dsp.stackexchange.com/questions/2564/opencv-c-connect-nearby-contours-based-on-distance-between-them
#Article: OpenCV/C++ connect nearby contours based on distance between them
#Initiate function to specify features of contour connections
def find_if_close(cnt1,cnt2):
row1,row2 = cnt1.shape[0],cnt2.shape[0]
for i in xrange(row1):
for j in xrange(row2):
dist = np.linalg.norm(cnt1[i]-cnt2[j])
if abs(dist) < 50 :
return True
elif i==row1-1 and j==row2-1:
return False
#import image of corners created in step 3 so thresholding can function properly
img = cv2.imread('corners.png')
#Thresholding and Finding contours only works on grayscale image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY)
contours,hier = cv2.findContours(thresh,cv2.RETR_EXTERNAL,2)
LENGTH = len(contours)
status = np.zeros((LENGTH,1))
for i,cnt1 in enumerate(contours):
x = i
if i != LENGTH-1:
for j,cnt2 in enumerate(contours[i+1:]):
x = x+1
dist = find_if_close(cnt1,cnt2)
if dist == True:
val = min(status[i],status[x])
status[x] = status[i] = val
else:
if status[x]==status[i]:
status[x] = i+1
unified = []
maximum = int(status.max())+1
for i in xrange(maximum):
pos = np.where(status==i)[0]
if pos.size != 0:
cont = np.vstack(contours[i] for i in pos)
hull = cv2.convexHull(cont) # I don't know why/how this is used
unified.append(hull)
cv2.drawContours(img,unified,-1,(0,255,0),1) #Last argument specifies contour width
cv2.drawContours(thresh,unified,-1,255,-1)
# Thresh is the filled contour while img is the contour itself
# The contour surrounds the corners
#cv2.imshow('pic', thresh) #Produces black and white image
## Step 5) Merge via inpaint
river = np.uint8(river)
ztopo2 = np.uint8(ztopo2)
thresh[thresh>0] = 1
#new = river.copy()
merged = cv2.inpaint(river,thresh,12,cv2.INPAINT_TELEA)
plt.imshow(merged)
plt.colorbar()
plt.show()
I am using the following code for character recognition, this python code is openCV-3 supported, but i have opencv-2.4.9. when i try to execute i am getting the following error
File "TrainAndTest.py", line 143, in <module>
main()
File "TrainAndTest.py", line 60, in main
kNearest = cv2.ml.KNearest_create() # instantiate KNN object
AttributeError: 'module' object has no attribute 'ml'
Python code
import cv2
import numpy as np
import operator
import os
# module level variables
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
class ContourWithData():
# member variables
npaContour = None # contour
boundingRect = None # bounding rect for contour
intRectX = 0 # bounding rect top left corner x location
intRectY = 0 # bounding rect top left corner y location
intRectWidth = 0 # bounding rect width
intRectHeight = 0 # bounding rect height
fltArea = 0.0 # area of contour
def calculateRectTopLeftPointAndWidthAndHeight(self): # calculate bounding rect info
[intX, intY, intWidth, intHeight] = self.boundingRect
self.intRectX = intX
self.intRectY = intY
self.intRectWidth = intWidth
self.intRectHeight = intHeight
def checkIfContourIsValid(self): # this is oversimplified, for a production grade program
if self.fltArea < MIN_CONTOUR_AREA: return False # much better validity checking would be necessary
return True
def main():
allContoursWithData = [] # declare empty lists,
validContoursWithData = [] # we will fill these shortly
try:
npaClassifications = np.loadtxt("classifications.txt", np.float32) # read in training classifications
except:
print "error, unable to open classifications.txt, exiting program\n"
os.system("pause")
return
# end try
try:
npaFlattenedImages = np.loadtxt("flattened_images.txt", np.float32) # read in training images
except:
print "error, unable to open flattened_images.txt, exiting program\n"
os.system("pause")
return
# end try
npaClassifications = npaClassifications.reshape((npaClassifications.size, 1)) # reshape numpy array to 1d, necessary to pass to call to train
kNearest = cv2.ml.KNearest_create() # instantiate KNN object
kNearest.train(npaFlattenedImages, cv2.ml.ROW_SAMPLE, npaClassifications)
imgTestingNumbers = cv2.imread("1.jpg") # read in testing numbers image
if imgTestingNumbers is None: # if image was not read successfully
print "error: image not read from file \n\n" # print error message to std out
os.system("pause") # pause so user can see error message
return # and exit function (which exits program)
# end if
imgGray = cv2.cvtColor(imgTestingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur
# filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black
11, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours
cv2.RETR_EXTERNAL, # retrieve the outermost contours only
cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points
for npaContour in npaContours: # for each contour
contourWithData = ContourWithData() # instantiate a contour with data object
contourWithData.npaContour = npaContour # assign contour to contour with data
contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour) # get the bounding rect
contourWithData.calculateRectTopLeftPointAndWidthAndHeight() # get bounding rect info
contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour) # calculate the contour area
allContoursWithData.append(contourWithData) # add contour with data object to list of all contours with data
# end for
for contourWithData in allContoursWithData: # for all contours
if contourWithData.checkIfContourIsValid(): # check if valid
validContoursWithData.append(contourWithData) # if so, append to valid contour list
# end if
# end for
validContoursWithData.sort(key = operator.attrgetter("intRectX")) # sort contours from left to right
strFinalString = "" # declare final string, this will have the final number sequence by the end of the program
for contourWithData in validContoursWithData: # for each contour
# draw a green rect around the current char
cv2.rectangle(imgTestingNumbers, # draw rectangle on original testing image
(contourWithData.intRectX, contourWithData.intRectY), # upper left corner
(contourWithData.intRectX + contourWithData.intRectWidth, contourWithData.intRectY + contourWithData.intRectHeight), # lower right corner
(0, 255, 0), # green
2) # thickness
imgROI = imgThresh[contourWithData.intRectY : contourWithData.intRectY + contourWithData.intRectHeight, # crop char out of threshold image
contourWithData.intRectX : contourWithData.intRectX + contourWithData.intRectWidth]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage
npaROIResized = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image into 1d numpy array
npaROIResized = np.float32(npaROIResized) # convert from 1d numpy array of ints to 1d numpy array of floats
retval, npaResults, neigh_resp, dists = kNearest.findNearest(npaROIResized, k = 1) # call KNN function find_nearest
strCurrentChar = str(chr(int(npaResults[0][0]))) # get character from results
strFinalString = strFinalString + "\0" + strCurrentChar # append current char to full string
# end for
print "\n" + strFinalString + "\n" # show the full string
cv2.imshow("imgTestingNumbers", imgTestingNumbers) # show input image with green boxes drawn around found digits
cv2.waitKey(0) # wait for user key press
cv2.destroyAllWindows() # remove windows from memory
return
if __name__ == "__main__":
main()
# end if
Can anyone help me to solve this error , what changes i need to make to execute this code with opencv-2.4.9. using python-2.7 and Ubuntu 14.04