I'm working on a project which uses openCV to detect a blue LED and obtain its x and y co-ords. So far, i have everything working however i can't seem to find any successful way of using the co-ordinates to move the cursor in the same way you can with a physical mouse.
I have tried using the python mouse module and pynput but they both have the same issue which is that the "press" feature is very inconsistent in how it works.
What i want to be able to do is for the LED to always be detected as a single click unless it is held in which case it should drag.
The problem is, dragging only works on some windows like file explorer and doesnt work on VScode or Chrome. Also i cant draw smooth lines using the press function as it only draws straight lines.
The only way i can think of doing something like this would be to draw small straight lines in regular intervals in order to form a smooth line but i'm unsure of how something like this would be done
Maybe there is a module that already does this but i cant seem to find anything on the subject. Most questions asked here are about automating the mouse events but thats not what im after.
The code i have so far is as follow:
from cv2 import warpPerspective
from LScalibrate import warpImage
import cv2
from ast import literal_eval
import numpy as np
import mouse
def start(root, pointsstr, maskparamsmalformed, width, height):
points = literal_eval(pointsstr)
maskparamsstr = ''.join([letter for letter in maskparamsmalformed if letter not in("array()")])
maskparams = literal_eval(maskparamsstr)
lower, upper = np.array(maskparams[0]), np.array(maskparams[1])
root.withdraw()
cap = cv2.VideoCapture(0)
cap.set(15, 3) # may have to change the 2nd arg. Only supported for some cameras. Testing with droidcam therefore cannot use this myself
mat = warpImage(cap, points)
while True:
check, frame = cap.read()
if not check:
break
frame = warpPerspective(frame, mat, (1000, 1000))
hsvimg = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
maskedimg = cv2.inRange(hsvimg, lower, upper)
image = cv2.bitwise_and(frame, frame, mask=maskedimg)
contours, rel = cv2.findContours(maskedimg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
pts = None
contourpts = []
if len(contours) != 0:
for contour in contours:
if cv2.contourArea(contour) > 10:
x, y, w, h = cv2.boundingRect(contour)
x = (x+(x+w))//2
y = (y+(y+h))//2
pts = (x, y)
contourpts.append(pts)
check = set(contourpts)
if len(check) > 1:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
minv, maxv, minl, maxl = cv2.minMaxLoc(gray)
pts = maxl
if pts is not None:
controlCursor(pts, width, height)
cv2.circle(frame, pts, 3, (0, 0, 255), -1)
else:
mouse.release("left")
cv2.imshow("win", frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
root.deiconify()
def minContour(contours):
return sorted(contours, key=cv2.contourArea, reverse=False)[0]
def controlCursor(pos, w, h):
print(pos)
print(w, h)
x = (pos[0]/1000)*w
y = (pos[1]/1000)*h
print(x, y)
mouse.move(x, y, True)
mouse.press("left")
I have included a video showing how it's currently working and all the program code if needed:
Video: https://youtu.be/Q9tOIyy_tsE
Github (all code): https://github.com/ImaadNisar/Lightscreen-Touchscreen-Detection
Thanks!
If the title isn't clear let's say I have a list of images (10k+), and I have a target image I am searching for.
Here's an example of the target image:
Here's an example of images I will want to be searching to find something 'similar' (ex1, ex2, and ex3):
Here's the matching I do (I use KAZE)
from matplotlib import pyplot as plt
import numpy as np
import cv2
from typing import List
import os
import imutils
def calculate_matches(des1: List[cv2.KeyPoint], des2: List[cv2.KeyPoint]):
"""
does a matching algorithm to match if keypoints 1 and 2 are similar
#param des1: a numpy array of floats that are the descriptors of the keypoints
#param des2: a numpy array of floats that are the descriptors of the keypoints
#return:
"""
# bf matcher with default params
bf = cv2.BFMatcher(cv2.NORM_L2)
matches = bf.knnMatch(des1, des2, k=2)
topResults = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
topResults.append([m])
return topResults
def compare_images_kaze():
cwd = os.getcwd()
target = os.path.join(cwd, 'opencv_target', 'target.png')
images_list = os.listdir('opencv_images')
for image in images_list:
# get my 2 images
img2 = cv2.imread(target)
img1 = cv2.imread(os.path.join(cwd, 'opencv_images', image))
for i in range(0, 360, int(360 / 8)):
# rotate my image by i
img_target_rotation = imutils.rotate_bound(img2, i)
# Initiate KAZE object with default values
kaze = cv2.KAZE_create()
kp1, des1 = kaze.detectAndCompute(img1, None)
kp2, des2 = kaze.detectAndCompute(img2, None)
matches = calculate_matches(des1, des2)
try:
score = 100 * (len(matches) / min(len(kp1), len(kp2)))
except ZeroDivisionError:
score = 0
print(image, score)
img3 = cv2.drawMatchesKnn(img1, kp1, img_target_rotation, kp2, matches,
None, flags=2)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
plt.imshow(img3)
plt.show()
plt.clf()
if __name__ == '__main__':
compare_images_kaze()
Here's the result of my code:
ex1.png 21.052631578947366
ex2.png 0.0
ex3.png 42.10526315789473
It does alright! It was able to tell that ex1 is similar and ex2 is not similar, however it states that ex3 is similar (even more similar than ex1). Any extra pre-processing or post-processing (maybe ml, assuming ml is actually useful) or just changes I can do to my method that can be done to keep only ex1 as similar and not ex3?
(Note this score I create is something I found online. Not sure if it's an accurate way to go about it)
ADDED MORE EXAMPLES BELOW
Another set of examples:
Here's what I am searching for
I want the above image to be similar to the middle and bottom images (NOTE: I rotate my target image by 45 degrees and compare it to the images below.)
Feature matching (as stated in answers below) were useful in found similarity with the second image, but not the third image (Even after rotating it properly)
Detecting The Most Similar Image
The Code
You can use template matching, where the image you want to detect if it's in the other images is the template. I have that small image saved in template.png, and the other three images in img1.png, img2.png and img3.png.
I defined a function that utilizes the cv2.matchTemplate to calculate the amount of confidence for if a template is in an image. Using the function on every image, the one that results ion the highest confidence is the image that contains the template:
import cv2
template = cv2.imread("template.png", 0)
files = ["img1.png", "img2.png", "img3.png"]
for name in files:
img = cv2.imread(name, 0)
print(f"Confidence for {name}:")
print(cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED).max())
The Output:
Confidence for img1.png:
0.8906427
Confidence for img2.png:
0.4427919
Confidence for img3.png:
0.5933967
The Explanation:
Import the opencv module, and read in the template image as grayscale by setting the second parameter of the cv2.imread method to 0:
import cv2
template = cv2.imread("template.png", 0)
Define your list of images of which you want to determine which one contains the template:
files = ["img1.png", "img2.png", "img3.png"]
Loop through the filenames and read in each one as a grayscale image:
for name in files:
img = cv2.imread(name, 0)
Finally, you can use the cv2.matchTemplate to detect the template in each image. There are many detection methods you can use, but for this I decided to use the cv2.TM_CCOEFF_NORMED method:
print(f"Confidence for {name}:")
print(cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED).max())
The output of the function ranges from between 0 and 1, and as you can see, it successfully detected that the first image is most likely to contain the template image (it has the highest level of confidence).
The Visualization
The Code
If detecting which image contains the template isn't enough, and you want a visualization, you can try the code below:
import cv2
import numpy as np
def confidence(img, template):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
conf = res.max()
return np.where(res == conf), conf
files = ["img1.png", "img2.png", "img3.png"]
template = cv2.imread("template.png")
h, w, _ = template.shape
for name in files:
img = cv2.imread(name)
([y], [x]), conf = confidence(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
text = f'Confidence: {round(float(conf), 2)}'
cv2.putText(img, text, (x, y), 1, cv2.FONT_HERSHEY_PLAIN, (0, 0, 0), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
The Output:
The Explanation:
Import the necessary libraries:
import cv2
import numpy as np
Define a function that will take in a full image and a template image. As the cv2.matchTemplate method requires grayscale images, convert the 2 images into grayscale:
def confidence(img, template):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
Use the cv2.matchTemplate method to detect the template in the image, and return the position of the point with the highest confidence, and return the highest confidence:
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
conf = res.max()
return np.where(res == conf), conf
Define your list of images you want to determine which one contains the template, and read in the template image:
files = ["img1.png", "img2.png", "img3.png"]
template = cv2.imread("template.png")
Get the size of the template image to later use for drawing a rectangle on the images:
h, w, _ = template.shape
Loop though the filenames and read in each image. Using the confidence function we defined before, get the x y position of the top-left corner of the detected template and the confidence amount for the detection:
for name in files:
img = cv2.imread(name)
([y], [x]), conf = confidence(img, template)
Draw a rectangle on the image at the corner and put the text on the image. Finally, show the image:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
text = f'Confidence: {round(float(conf), 2)}'
cv2.putText(img, text, (x, y), 1, cv2.FONT_HERSHEY_PLAIN, (0, 0, 0), 2)
cv2.imshow(name, img)
Also, show the template for comparison:
cv2.imshow('Template', template)
cv2.waitKey(0)
I'm not sure, if the given images resemble your actual task or data, but for this kind of images, you could try simple template matching, cf. this OpenCV tutorial.
Basically, I just implemented the tutorial with some modifications:
import cv2
import matplotlib.pyplot as plt
# Read images
examples = [cv2.imread(img) for img in ['ex1.png', 'ex2.png', 'ex3.png']]
target = cv2.imread('target.png')
h, w = target.shape[:2]
# Iterate examples
for i, img in enumerate(examples):
# Template matching
# cf. https://docs.opencv.org/4.5.2/d4/dc6/tutorial_py_template_matching.html
res = cv2.matchTemplate(img, target, cv2.TM_CCOEFF_NORMED)
# Get location of maximum
_, max_val, _, top_left = cv2.minMaxLoc(res)
# Set up threshold for decision target found or not
thr = 0.7
if max_val > thr:
# Show found target in example
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
# Visualization
plt.figure(i, figsize=(10, 5))
plt.subplot(1, 2, 1), plt.imshow(img[..., [2, 1, 0]]), plt.title('Example')
plt.subplot(1, 2, 2), plt.imshow(res, vmin=0, vmax=1, cmap='gray')
plt.title('Matching result'), plt.colorbar(), plt.tight_layout()
plt.show()
These are the results:
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
PyCharm: 2021.1.1
Matplotlib: 3.4.1
OpenCV: 4.5.1
----------------------------------------
EDIT: To emphasize the information from the different colors, one might use the hue channel from the HSV color space for the template matching:
import cv2
import matplotlib.pyplot as plt
# Read images
examples = [
[cv2.imread(img) for img in ['ex1.png', 'ex2.png', 'ex3.png']],
[cv2.imread(img) for img in ['ex12.png', 'ex22.png', 'ex32.png']]
]
targets = [
cv2.imread('target.png'),
cv2.imread('target2.png')
]
# Iterate examples and targets
for i, (ex, target) in enumerate(zip(examples, targets)):
for j, img in enumerate(ex):
# Rotate last image from second data set
if (i == 1) and (j == 2):
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
h, w = target.shape[:2]
# Get hue channel from HSV color space
target_h = cv2.cvtColor(target, cv2.COLOR_BGR2HSV)[..., 0]
img_h = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[..., 0]
# Template matching
# cf. https://docs.opencv.org/4.5.2/d4/dc6/tutorial_py_template_matching.html
res = cv2.matchTemplate(img_h, target_h, cv2.TM_CCOEFF_NORMED)
# Get location of maximum
_, max_val, _, top_left = cv2.minMaxLoc(res)
# Set up threshold for decision target found or not
thr = 0.6
if max_val > thr:
# Show found target in example
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
# Visualization
plt.figure(i * 10 + j, figsize=(10, 5))
plt.subplot(1, 2, 1), plt.imshow(img[..., [2, 1, 0]]), plt.title('Example')
plt.subplot(1, 2, 2), plt.imshow(res, vmin=0, vmax=1, cmap='gray')
plt.title('Matching result'), plt.colorbar(), plt.tight_layout()
plt.savefig('{}.png'.format(i * 10 + j))
plt.show()
New results:
The Concept
We can use the cv2.matchTemplate method to detect where an image is in another image, but for your second set of images you have rotation. Also, we'll need to take the colors into account.
cv2.matchTemplate will take in an image, a template (the other image) and a template detection method, and will return a grayscale array where the brightest point in the grayscale array will be the point with the most confidence that template is at that point.
We can use the template at 4 different angles and use the one that resulted in the highest confidence. When we detected a possible point that matched the template, we use a function (that we will define ourselves) to check if the most frequent colors in the template is present in the patch of the image we detected. If not, then ignore the patch, regardless of the amount of confidence returned.
The Code
import cv2
import numpy as np
def frequent_colors(img, vals=3):
colors, count = np.unique(np.vstack(img), return_counts=True, axis=0)
sorted_by_freq = colors[np.argsort(count)]
return sorted_by_freq[-vals:]
def get_templates(img):
template = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
for i in range(3):
yield cv2.rotate(template, i)
def detect(img, template, min_conf=0.45):
colors = frequent_colors(template)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
conf_max = min_conf
shape = 0, 0, 0, 0
for tmp in get_templates(template):
h, w = tmp.shape
res = cv2.matchTemplate(img_gray, tmp, cv2.TM_CCOEFF_NORMED)
for y, x in zip(*np.where(res > conf_max)):
conf = res[y, x]
if conf > conf_max:
seg = img[y:y + h, x:x + w]
if all(np.any(np.all(seg == color, -1)) for color in colors):
conf_max = conf
shape = x, y, w, h
return shape
files = ["img1_2.png", "img2_2.png", "img3_2.png"]
template = cv2.imread("template2.png")
for name in files:
img = cv2.imread(name)
x, y, w, h = detect(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
The Output
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
Define a function, frequent_colors, that will take in an image and return the most frequent colors in the image. An optional parameter, val, is how many colors to return; if val is 3, then the 3 most frequent colors will be returned:
def frequent_colors(img, vals=3):
colors, count = np.unique(np.vstack(img), return_counts=True, axis=0)
sorted_by_freq = colors[np.argsort(count)]
return sorted_by_freq[-vals:]
Define a function, get_templates, that will take in an image, and yield the image (in grayscale) at 4 different angles - original, 90 clockwise, 180, and 90 counterclockwise:
def get_templates(img):
template = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
for i in range(3):
yield cv2.rotate(template, i)
Define a function, detect, that will take in an image and a template image, and return the x, y, w, h of the bounding box of the detected template on the image, and for this function we will be utilizing the frequent_colors and get_templates functions defined earlier. The min_conf parameter will be the minimum amount of confidence needed to classify a detection as an actual detection:
def detect(img, template, min_conf=0.45):
Detect the three most frequent colors in the template and store them in a variable, colors. Also, define a grayscale version of the main image:
colors = frequent_colors(template)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
Define the initial value for the greatest confidence detected, and initial values for the detected patch:
conf_max = min_conf
shape = 0, 0, 0, 0
Loop though the grayscale templates at 4 angles, get the shape of the grayscale template (as rotation changes the shape), and use the cv2.matchTemplate method to get the grayscale array of detected templates on the image:
for tmp in get_templates(template):
h, w = tmp.shape
res = cv2.matchTemplate(img_gray, tmp, cv2.TM_CCOEFF_NORMED)
Loop though the x, y coordinates of the detected templates where the confidence is greater than conf_min, and store the confidence in a variable, conf. If conf is greater than the initial greatest confidence variable (conf_max), proceed to detect if all three most frequent colors in the template is present in the patch of the image:
for y, x in zip(*np.where(res > conf_max)):
conf = res[y, x]
if conf > conf_max:
seg = img[y:y + h, x:x + w]
if all(np.any(np.all(seg == color, -1)) for color in colors):
conf_max = conf
shape = x, y, w, h
At the end we can return the shape. If no template is detected in the image, the shape will be the initial values defined for it, 0, 0, 0, 0:
return shape
Finally, loop though each image and use the detect function we defined to get the x, y, w, h of the bounding box. Use the cv2.rectangle method to draw the bounding box onto the images:
files = ["img1_2.png", "img2_2.png", "img3_2.png"]
template = cv2.imread("template2.png")
for name in files:
img = cv2.imread(name)
x, y, w, h = detect(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
First, the data appears in graphs, aren't you able to get the overlapping values from their numerical data?
And have you tried performing some edge detection for the change in color from white-blue and then from blue-red, fitting some circles to those edges and then checking if they overlap?
Since the input data is quite controlled (no organic photography or videos), perhaps you won't have to go the ML route.
I have this image with tables where I want to remove the tabular structure from the image so that it can work more effectively with Tesseract. I used the following code to create a boundary around the table (and individual cells) so that it can be deleted.
img =cv2.imread('bfir.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,50,150,apertureSize = 3)
img1 = np.ones(img.shape, dtype=np.uint8)*255
ret,thresh = cv2.threshold(gray,127,255,1)
(_,contours,h) = cv2.findContours(thresh,1,2)
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01*cv2.arcLength(cnt,True),True)
if len(approx)==4:
cv2.drawContours(img1,[cnt],0,(0,255,0),2)
This draws green lines around the table like this image.
Next, I tried the cv2.subtract method to subtract the table from the image, somewhat like this.
final_img = cv2.subtract(img1, img)
But this didn't work as I expected and gives me a grayscale image with the table still in it. Link
While I just want the original image in B&W with the table removed. I am using OpenCV for the first time so I don't know what I am doing wrong and I am sorry for the long post but if anybody can please help with how to go about with this or just point me in the right direction about how to remove the table, that would be very much appreciated.
EDIT:
As suggested by RobAu it can also work with simply drawing the contours in white in the first place but I don't know how to do that without losing the rest of the data in the preprocessing stage.
You could try and simply overwrite the cells that represent the borders. This can be done by creating a mask image, and then using that as reference as to where to overwrite pixels in the original.
This can be done with:
mask_image = np.zeros(img.shape[0:2], np.uint8)
cv2.drawContours(mask_image, contours, -1, color=255, thickness=2)
border_points = np.array(np.where(mask_image == 255)).transpose()
background = [0, 0, 0] # Change this to the colour you want
for point in border_points :
img[point[0], point[1]] = background
Update:
You could use the 3-channel you already created for the mask, but that slightly complicates the algorithms. The mask image propose is more fitted for the task, but I will try to adapt it to your code:
# Create your mask image as usual...
border_points = np.array(np.where(img1[:,:,1] == 255)).transpose() # Only look at channel 2
background = [0, 0, 0] # Change this to the colour you want
for point in border_points :
img[point[0], point[1]] = background
Update to do as #RobAu suggested (quicker than my previous methods):
line_thickness = 3 # Change this value until it looks the best.
cv2.drawContours(img, contours, -1, color=(0,0,0), thickness=line_thickness )
Please note I didn't test this code. So it might need some further fiddling.
As a reference to the comments of this question, this is an example of a code that locates rectangles and creates new images for each one, this was an attempt at creating individual images of a picture of shredded paper. Some of the values will need to be changed for it to locate the rectangles with the right amount of size
There is also some code for tracking sizes of images and the code is made up by 50% what i have written and 50% by stackoverflow help.
import cv2
import numpy as np
fileName = ['9','8','7','6','5','4','3','2','1','0']
img = cv2.imread('#YOUR IMAGE#')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
kernel = np.ones((5,5),np.uint8)
erosion = cv2.erode(gray,kernel,iterations = 2)
kernel = np.ones((4,4),np.uint8)
dilation = cv2.dilate(erosion,kernel,iterations = 2)
edged = cv2.Canny(dilation, 30, 200)
_, contours, hierarchy = cv2.findContours(edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
rects = [cv2.boundingRect(cnt) for cnt in contours]
rects = sorted(rects,key=lambda x:x[1],reverse=True)
i = -1
j = 1
y_old = 5000
x_old = 5000
for rect in rects:
x,y,w,h = rect
area = w * h
print('width: %d and height: %d' %(w,h))
if w > 50 and h > 500:
print('abs:')
print(abs(x_old - x))
if abs(x_old - x) > 0:
print('writing')
x_old = x
x,y,w,h = rect
out = img[y+10:y+h-10,x+10:x+w-10]
cv2.imwrite('assets/newImage' + fileName[i] + '.jpg', out)
j+=1
if (y_old - y) > 1000:
i += 1
y_old = y
Even though, the given input image links are not working & so I obviously doesn't know the following is what you have asked for, I learnt something from your question, when I was working on, removing table structure lines from given image, I like to share what I have learnt, for the future readers.
I followed the steps provided in opencv documentation to remove the lines.
But that only removed the horizontal lines. When I tried to remove vertical lines, the result image only had the vertical lines. The text in the table was not there.
Then I came across your question & saw final_img = cv2.subtract(img1, img) in the question. Tried that & it worked great.
Here are the steps that I followed:
# Load the image
src = cv.imread(argv[0], cv.IMREAD_COLOR)
# Check if image is loaded fine
if src is None:
print ('Error opening image: ' + argv[0])
return -1
# Show source image
cv.imshow("src", src)
# [load_image]
# [gray]
# Transform source image to gray if it is not already
if len(src.shape) != 2:
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
else:
gray = src
# Show gray image
# show_wait_destroy("gray", gray)
# [gray]
# [bin]
# Apply adaptiveThreshold at the bitwise_not of gray, notice the ~ symbol
gray = cv.bitwise_not(gray)
bw = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_MEAN_C, \
cv.THRESH_BINARY, 15, -2)
# Show binary image
# show_wait_destroy("binary", bw)
# [bin]
# [init]
# Create the images that will use to extract the horizontal and vertical lines
horizontal = np.copy(bw)
vertical = np.copy(bw)
# [horiz]
# [vert]
# Specify size on vertical axis
rows = vertical.shape[0]
verticalsize = rows / 10
# Create structure element for extracting vertical lines through morphology operations
verticalStructure = cv.getStructuringElement(cv.MORPH_RECT, (1, verticalsize))
# Apply morphology operations
vertical = cv.erode(vertical, verticalStructure)
vertical = cv.dilate(vertical, verticalStructure)
# [init]
# [horiz]
# Specify size on horizontal axis
cols = horizontal.shape[1]
horizontal_size = cols / 30
# Create structure element for extracting horizontal lines through morphology operations
horizontalStructure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1))
# Apply morphology operations
horizontal = cv.erode(horizontal, horizontalStructure)
horizontal = cv.dilate(horizontal, horizontalStructure)
lines_removed = cv.subtract(gray, vertical + horizontal)
show_wait_destroy("lines_removed", ~lines_removed)
Input:
Output:
Few things that I changed from the sources:
verticalsize = rows / 10, here, I do not understand the significance of the number 10. In the documentation, 30 was used. I got better result with 10. I guess, the less the division number, the large the structure element & here, as we are targeting straight lines, reducing the number works.
In the documentation, vertical lines are processed after horizontal lines. I reversed the order
I swapped the parameters to cv2.substract(). I used cv2.subtract(img, img1).
I am very new to OpenCV Python and I really need some help here.
So what I am trying to do here is to extract out these words in the image below.
The words and shapes are all hand drawn, so they are not perfect. I have did some coding below.
First of all, I grayscale the image
img_final = cv2.imread(file_name)
img2gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
Then I use THRESH_INV to show the content
ret, new_img = cv2.threshold(image_final, 100 , 255, cv2.THRESH_BINARY_INV)
After which, I dilate the content
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3 , 3))
dilated = cv2.dilate(new_img,kernel,iterations = 3)
I dilate the image is because I can identify text as one cluster
After that, I apply boundingRect around the contour and draw around the rectangle
contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours
index = 0
for contour in contours:
# get rectangle bounding contour
[x,y,w,h] = cv2.boundingRect(contour)
#Don't plot small false positives that aren't text
if w < 10 or h < 10:
continue
# draw rectangle around contour on original image
cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,255),2)
This is what I got after that.
I am only able to detect one of the text. I have tried many other methods but this is the closet results I have got and it does not fulfill the requirement.
The reason for me to identify the text is so that I can get the X and Y coordinate of each of the text in this image by putting a bounding Rectangle "boundingRect()".
Please help me out. Thank you so much
You can use the fact that the connected component of the letters are much smaller than the large strokes of the rest of the diagram.
I used opencv3 connected components in the code but you can do the same things using findContours.
The code:
import cv2
import numpy as np
# Params
maxArea = 150
minArea = 10
# Read image
I = cv2.imread('i.jpg')
# Convert to gray
Igray = cv2.cvtColor(I,cv2.COLOR_RGB2GRAY)
# Threshold
ret, Ithresh = cv2.threshold(Igray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Keep only small components but not to small
comp = cv2.connectedComponentsWithStats(Ithresh)
labels = comp[1]
labelStats = comp[2]
labelAreas = labelStats[:,4]
for compLabel in range(1,comp[0],1):
if labelAreas[compLabel] > maxArea or labelAreas[compLabel] < minArea:
labels[labels==compLabel] = 0
labels[labels>0] = 1
# Do dilation
se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(25,25))
IdilateText = cv2.morphologyEx(labels.astype(np.uint8),cv2.MORPH_DILATE,se)
# Find connected component again
comp = cv2.connectedComponentsWithStats(IdilateText)
# Draw a rectangle around the text
labels = comp[1]
labelStats = comp[2]
#labelAreas = labelStats[:,4]
for compLabel in range(1,comp[0],1):
cv2.rectangle(I,(labelStats[compLabel,0],labelStats[compLabel,1]),(labelStats[compLabel,0]+labelStats[compLabel,2],labelStats[compLabel,1]+labelStats[compLabel,3]),(0,0,255),2)