Select object by color in an image? - python

Is there a way to find object that have specific color (for example red rectangle 100px 50px with white text) and then select that object as it is and cut it to new file? Look at the picture below. I'm trying to make a script for selecting data from image, then convert to text and finally write to Excel.
I googled a lot of howtos but didn't find any that address my problem.
Sample image

I don't know your real intention, would you like only read the text or do you like also extract the parts?
Anyway, I'm going to show you a straight forward and general solution. Take the parts you need, at the end you find the hole code.
For the hole bunch you need 4 modules:
cv2 (openCV) for image processing
numpy to handle special operations on the images
pytesseract to recognize text (ocr)
pillow (pil) to prepare the image for pytesseract
Load und filter
Your original image:
First we reduce all colors except red. lower and upper describes the values from BGR (RGB = red, green, blue) we like to filter.
image = cv.imread("AR87t.jpg")
lower = np.array([0, 0, 200])
upper = np.array([100, 100, 255])
shapeMask = cv.inRange(image, lower, upper)
cv.imshow("obj shapeMask", shapeMask)
cv.waitKey(0)
This shows:
finding contours
Next, we find the contours and iterating through. If we find 4 corners, we will do the next stuff...
cnts = cv.findContours(shapeMask.copy(), cv.RETR_EXTERNAL,
cv.CHAIN_APPROX_SIMPLE)[0]
for c in cnts:
peri = cv.arcLength(c, True)
approx = cv.approxPolyDP(c, 0.04 * peri, True)
if len(approx) == 4:
....
mask the original
With boundingRect, we extract x, y, w, h
(x, y, w, h) = cv.boundingRect(approx)
cv.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), thickness=5)
ocr on the mask
And here comes the magic! First we extract the mask parts and export the openCV image to an PIL image. We are then able to run tesseract over.
el = shapeMask.copy()[y:y + h, x:x + w]
pil_im = Image.fromarray(el)
cv.imshow("obj", el)
cv.waitKey(0)
print(pytesseract.image_to_string(pil_im))
this shows you every rectangle as small image. You console will print out:
L2 = 33,33
L3 = 44,44
L1 = 12,22
code
import cv2 as cv
import numpy as np
import pytesseract
from PIL import Image
image = cv.imread("AR87t.jpg")
lower = np.array([0, 0, 200])
upper = np.array([100, 100, 255])
shapeMask = cv.inRange(image, lower, upper)
cv.imshow("obj shapeMask", shapeMask)
cv.waitKey(0)
cnts = cv.findContours(shapeMask.copy(), cv.RETR_EXTERNAL,
cv.CHAIN_APPROX_SIMPLE)[0]
for c in cnts:
peri = cv.arcLength(c, True)
approx = cv.approxPolyDP(c, 0.04 * peri, True)
if len(approx) == 4:
(x, y, w, h) = cv.boundingRect(approx)
cv.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), thickness=5)
print("w:%s, y:%s, w:%s, h:%s" % (x, y, w, h))
el = shapeMask.copy()[y:y + h, x:x + w]
pil_im = Image.fromarray(el)
cv.imshow("obj", el)
cv.waitKey(0)
print(pytesseract.image_to_string(pil_im))
cv.imshow("obj rectangle", image)
cv.waitKey(0)

Related

Find text on a colored background using OpenCV

I use Python and OpenCV to search for text. I have screenshots of the messanger and I want to separate the senders so that it is clear who is sending the text. Thus, I came to the conclusion that I need to check the background of the contour, if it is, for example, green, then I am the sender, and if the color is black, then it is another person
I tried using the following code, but in the end I couldn't find anything:
import cv2
import numpy as np
img = cv2.imread("image.jpg")
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower_color = np.array([0, 31, 52])
upper_color = np.array([31, 255, 255])
mask = cv2.inRange(hsv, lower_color, upper_color)
img_without_background = cv2.bitwise_and(img, img, mask=mask)
gray = cv2.cvtColor(img_without_background, cv2.COLOR_BGR2GRAY)
mser = cv2.MSER_create()
regions, _ = mser.detectRegions(gray)
for p in regions:
x, y, w, h = cv2.boundingRect(p.reshape(-1, 1, 2))
cv2.rectangle(img_without_background, (x, y), (x + w, y + h), (255, 0, 0), 2)
I have already tried ChatGPT for help, but all the results are not suitable

detect coordinates of black squares and crop image based on these coordinates

I am trying to crop this image by detecting coordinates of pitch black squares on the upper side, tick marked in this image one I detect coordinates I need to crop image in straight lines to these coordinates as mentioned in this image and only keep inner area. I have tried following code to detect coordinates of contours
import numpy as np
import cv2
# Reading image
font = cv2.FONT_HERSHEY_COMPLEX
img2 = cv2.imread("img.jpg", cv2.IMREAD_COLOR)
# Reading same image in another
# variable and converting to gray scale.
img = cv2.imread("img.jpg", cv2.IMREAD_GRAYSCALE)
# Converting image to a binary image
# ( black and white only image).
_, threshold = cv2.threshold(img, 110, 255, cv2.THRESH_BINARY)
# Detecting contours in image.
contours, _= cv2.findContours(threshold, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
# Going through every contours found in the image.
for cnt in contours :
approx = cv2.approxPolyDP(cnt, 0.009 * cv2.arcLength(cnt, True), True)
# draws boundary of contours.
cv2.drawContours(img2, [approx], 0, (0, 0, 255), 5)
# Used to flatted the array containing
# the co-ordinates of the vertices.
n = approx.ravel()
i = 0
for j in n :
if(i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(x) + " " + str(y)
if(i == 0):
# text on topmost co-ordinate.
cv2.putText(img2, "Arrow tip", (x, y),
font, 0.5, (255, 0, 0))
else:
# text on remaining co-ordinates.
cv2.putText(img2, string, (x, y),
font, 0.5, (0, 255, 0))
i = i + 1
cv2.imwrite('img.jpg', img2)
but it isn't detecting coordinates properly as you can see here can someone suggest me what am I doing wrong here? Thank you

How to detect and increase the space between two lines in an image of text using opencv python?

If the initial image is like this(above) then I can successfully introduce space between the 2 lines and get this image(below)
using the code below:
import os
import cv2
def space_between_lines_and_skewness_correction(file_path):
img = cv2.imread(os.path.expanduser(file_path))
grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
th, threshed = cv2.threshold(grey, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
pts = cv2.findNonZero(threshed)
ret = cv2.minAreaRect(pts)
(cx, cy), (w, h), ang = ret
if w < h:
w, h = h, w
ang += 90
M = cv2.getRotationMatrix2D((cx, cy), ang, 1.0)
rotated = cv2.warpAffine(threshed, M, (img.shape[1], img.shape[0]))
hist = cv2.reduce(rotated, 1, cv2.REDUCE_AVG).reshape(-1)
th = 2
H, W = img.shape[:2]
delimeter = [y for y in range(H - 1) if hist[y] <= th < hist[y + 1]]
arr = []
y_prev = 0
y_curr = 0
for y in delimeter:
y_prev = y_curr
y_curr = y
arr.append(rotated[y_prev:y_curr, 0:W])
arr.append(rotated[y_curr:H, 0:W])
space_arr = np.zeros((10, W))
final_img = np.zeros((1, W))
for im in arr:
v = np.concatenate((space_arr, im), axis=0)
final_img = np.concatenate((final_img, v), axis=0)
return final_img
The above code will remove skewness and introduce space.
But for few cases, the above code doesn't work.
These are cases like:
The output for the image is
How to handle cases such as this?
Note:
I tried to resize to a bigger size and do pixel by pixel iteration and building a custom algorithm for this case, but it is taking a huge amount of time to solve and sometimes giving memory error.
Please Note: The input of the above code is actually the inverse image(white background) of the image provided here
Maybe this helps:
def detect_letters(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# just to remove noise
thresh_val, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
num_labels, _, stats, centroids = cv2.connectedComponentsWithStats(thresh)
for i in range(num_labels):
leftmost_x = stats[i, cv2.CC_STAT_LEFT]
topmost_y = stats[i, cv2.CC_STAT_TOP]
width = stats[i, cv2.CC_STAT_WIDTH]
height = stats[i, cv2.CC_STAT_HEIGHT]
# enclose all detected components in a blue rectangle
cv2.rectangle(img, (leftmost_x, topmost_y), (leftmost_x + width, topmost_y + height), (255, 0, 0), 2)
cv2.imshow("window", img)
cv2.waitKey(0) & 0xFF
Input:
Output:
The main intent of the above solution is just to get an enclosing rectangle around every letter.
Now all you need to do is shift all those letters above or below or wherever you want to.
For example, see how the football was shifted in the following link : https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_core/py_basic_ops/py_basic_ops.html
As you know the topmost and bottom-most y coordinate for every letter now, you can see how much far away they currently are and if they are very close just shift the letter as in the above link.
The letters on the same line will have very little difference in their vertex coordinates or centroids. You can have a tolerence range to spot out all those letters.
If any issues, feel free to ask.

Image watermarking on specific position in an image in python

Currently I am working with an image processing project in which I need to split the image into several segments and then apply watermark on each of the segment.
I have written a code which divides the image into segments by masking. You may find the code here. Now i want to implement watermark on each of these segments. The tutorial for watermarking can be found here.
How am I supposed to do that?
Please help as I am new to OpenCV and Python.
Feel free to ask for any further information needed to solve this.
Thank you!
EDIT
I am adding some code for your inference:
`
segment= 'segment storing location'
image = cv2.imread(image path)
segments = slic(img_as_float(image),compactness= 100.0, n_segments = 10, sigma = 5) #segmentation of image
row, col, _ = image.shape
for (i, segVal) in enumerate(np.unique(segments)):
# construct a mask for the segment
print "[x] inspecting segment %d" % (i)
mask = np.zeros(image.shape[:2], dtype = "uint8")
mask[segments == segVal] = 255 #masking image with different mask to create unique segments
bb= (cv2.bitwise_and(image, image, mask = mask) )
cv2.imwrite(segment + str(i) + ".png",bb) #save image segments created
`
Now after saving the segments, I need to watermark each one of them by calling them one after another. This is the code for watermarking:
import numpy as np
import cv2
import os
wk= 'D:\\watermark\\wm.png'
input_im= 'D:\\watermark\\input\\image_01.jpg'
op= 'D:\\watermark\\output'
alpha = 0.25
watermark = cv2.imread(wk, cv2.IMREAD_UNCHANGED)
(wH, wW) = watermark.shape[:2]
image = cv2.imread(input_im)
(h, w) = image.shape[:2]
image = np.dstack([image, np.ones((h, w), dtype="uint8") * 255])
overlay = np.zeros((h, w, 4), dtype="uint8")
overlay[h - wH - 500:h - 500, w - wW - 500:w - 500] = watermark #This is the line where we can set the watermark's coordinates
output = image.copy()
cv2.addWeighted(overlay,alpha, output, 1.0, 0, output)
filename = input_im[input_im.rfind(os.path.sep) + 1:]
p = os.path.sep.join((op, filename))
cv2.imwrite(p, output)
Now how can I extract the coordinates of this segment in order to watermark it?
Edit
This is what I get when the lines
`cv2.circle(im, (cX, cY), 7, (255, 255, 255), -1)
cv2.putText(im, "center", (cX - 20, cY - 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2`
are kept outside the loop:
And this is what I get when they are executed within the loop:
You need to find the countour of the image (I've downloaded your segment image to try this), then compute the center of the contour.
To find the contour, you need to convert the image to gray scale and threshold it, dividing totally black pixels (black background) from non-black ones (your segment).
Finding the center of the segment
The only assumption I've made is that the pixel values of your segments are different from 0 (total black). This assumption may be invalid but, since you're working with photos of natural landscape (like the one you posted) this should not be a problem.
Feel free to ask for further details.
import numpy as np
import cv2
im = cv2.imread('try.png')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(imgray,1,255,0) # Threshold to highlight non black pixels
image, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
# compute the center of the contour
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# draw the contour and center of the shape on the image
cv2.drawContours(im, [c], -1, (0, 255, 0), 2)
cv2.circle(im, (cX, cY), 7, (255, 255, 255), -1)
cv2.putText(im, "center", (cX - 20, cY - 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
# show the image
cv2.imshow("Image", im)
cv2.waitKey(0)
This is what I get:
Placing the watermark
Let's say you have the coordinates of the center of the segment region. Knowing the size of the watermark you can convert them coordinates locating the point of the image where to put the left upper corner of the watermark. In this example I assume that them are (x=10,y=10).
I've reused the last image you posted (I'm not drawing the contours, just the watermark).
import numpy as np
import cv2 as cv
# Coordinates where to put the watermark (left upper corner)
cy = 10
cx = 10
# Reading the image
image = cv.imread("try.png")
(h,w) = image.shape[:2]
image = np.dstack([image, np.ones((h, w), dtype="uint8") * 255])
# Reading the watermark
watermark = cv.imread("watermark.png", cv.IMREAD_UNCHANGED)
(wH, wW) = watermark.shape[:2]
(B, G, R, A) = cv.split(watermark)
B = cv.bitwise_and(B, B, mask=A)
G = cv.bitwise_and(G, G, mask=A)
R = cv.bitwise_and(R, R, mask=A)
watermark = cv.merge([B, G, R, A])
# Creating the image's overlay with the watermark
overlay = np.zeros((h, w, 4), dtype="uint8")
overlay[cy:wH + cy, cx:wW + cx] = watermark
# Applying the overlay
output = image.copy()
cv.addWeighted(overlay, 0.4, output, 1.0, 0, output)
cv.imshow("out", output)
cv.waitKey()

How to get x,y position of contours in Python OpenCV

I'm trying to get x and y positions of contours from the following image, but I messed up.
the image
I just need to find x and y positions of contours or center of the contours.
The results will be something like the following as I manually look up their positions from GIMP.
290, 210
982, 190
570, 478
I believe it can be done with cv2.findContours method, but I'm really out of ideas right now.
-Offtopic-
I will use these values in setting cursor position usingwin32api.SetCursorPos((xposition,yposition))
Thanks
You can refer here
Find Co-ordinates of Contours using OpenCV | Python
# Python code to find the co-ordinates of
# the contours detected in an image.
import numpy as np
import cv2
# Reading image
font = cv2.FONT_HERSHEY_COMPLEX
img2 = cv2.imread('test.jpg', cv2.IMREAD_COLOR)
# Reading same image in another
# variable and converting to gray scale.
img = cv2.imread('test.jpg', cv2.IMREAD_GRAYSCALE)
# Converting image to a binary image
# ( black and white only image).
_, threshold = cv2.threshold(img, 110, 255, cv2.THRESH_BINARY)
# Detecting contours in image.
contours, _= cv2.findContours(threshold, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
# Going through every contours found in the image.
for cnt in contours :
approx = cv2.approxPolyDP(cnt, 0.009 * cv2.arcLength(cnt, True), True)
# draws boundary of contours.
cv2.drawContours(img2, [approx], 0, (0, 0, 255), 5)
# Used to flatted the array containing
# the co-ordinates of the vertices.
n = approx.ravel()
i = 0
for j in n :
if(i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(x) + " " + str(y)
if(i == 0):
# text on topmost co-ordinate.
cv2.putText(img2, "Arrow tip", (x, y),
font, 0.5, (255, 0, 0))
else:
# text on remaining co-ordinates.
cv2.putText(img2, string, (x, y),
font, 0.5, (0, 255, 0))
i = i + 1
# Showing the final image.
cv2.imshow('image2', img2)
# Exiting the window if 'q' is pressed on the keyboard.
if cv2.waitKey(0) & 0xFF == ord('q'):
cv2.destroyAllWindows()
Indeed, you can do that with findContours. Since you have your contours there are several options:
Calculate enclosing rectangle and take e.g. the center point.
Calculate moments and take the centroid
Fit minimum enclosing circle and take the center
and so on...
Here are some examples of what you can do with your contours, including the options above.
First you need to find contours, draw a bounding box and then take the x and y from there. I hope this helps
import numpy as np
import cv2
im = cv2.imread('ctBI9.png')
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
if cv2.contourArea(c) <= 50 :
continue
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255,0), 2)
center = (x,y)
print (center)
while True:
cv2.imshow('test',im)
if cv2.waitKey(20) & 0xFF == 27:
break
cv2.destroyAllWindows()
result is something like this
(93, 746)
(1174, 738)
(147, 736)
(395, 729)
(506, 404)
(240, 168)
(918, 130)

Categories