Related
I am triyng calculate the area of the white pixels in this image:
My code :
import cv2
import matplotlib.pyplot as plt
import numpy as np
img = cv2.imread("tepsi.jpg")
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv,(21, 10, 15), (30, 255, 255) )
cv2.imshow("orange", mask)
cv2.waitKey()
cv2.destroyAllWindows()
How can I solve this problem? Actually my purpose is to find empty areas of food tray.
Thank you for your answers.
Thanks for your advices, i found a solution for my problem for above picture. But i will try your advices...
Note: My tray is white area and it is constant for now.
My solution :
import numpy as np
import cv2
image = cv2.imread("petibor_biskuvi.png")
dimensions = image.shape
height= image.shape[0]
width = image.shape[1]
size = height*width
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
_,thresh = cv2.threshold(gray,150,255,cv2.THRESH_BINARY_INV)
cnts, hier = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
size_elements = 0
for cnt in cnts:
cv2.drawContours(image,cnts, -1, (0, 0, 255), 3)
size_elements += cv2.contourArea(cnt)
print(cv2.contourArea(cnt))
cv2.imshow("Image", image)
print("size elements total : ", size_elements)
print("size of pic : ", size)
print("rate of fullness : % ", (size_elements/size)*100)
cv2.waitKey(0)
This is one option. Since white in BGR is (255, 255, 255), id suggest you convert the image to a boolean true where each (independent) channel equals to 255:
b, g, r = cv2.split(img)
wb = b == 255
wg = g == 255
wr = r == 255
The value of all of the channels must be 255 (True) for the same pixel, so, use np.bitwise_and:
white_pixels_if_true = np.bitwise_and(wb, np.bitwise_and(wg, wr))
Finally, get the count of the true value and the size of the image and find the percentage of white pixels:
img_size = r.size
white_pixels_count = np.sum(white_pixels_if_true)
white_area_ratio = white_pixels_count / img_size
Given the area of the image you can multiply the area by the white_area_ratio to get the white area.
How to get height and width of bright part using python (opencv) ? Having 2000 of these picture and end goal is to make table with length and width values
It is primitive method. Convert to grayscale and check which points have value bigger then some "bright" color ie. 21 and it gives array True/False - using .any(axis=0) you can reduce every row to single value, using .any(axis=1) you can reduce every column to single value and then using sum() you can count how many True was in any row or column (because True/False is converted to 1/0)
import cv2
img = cv2.imread('image.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#print(img)
print('height, width, color:', img.shape)
#cv2.imshow('image', img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
print('width:', sum((img > 21).any(axis=0)))
print('height:', sum((img > 21).any(axis=1)))
For your image it gives me
width: 19
height: 27
For my image (below) it gives me
width: 23
height: 128
EDIT: Version with small change.
I set mask = (img > 21) to
calculate size
create Black&White image which better shows which points are
used to calculate size.
BTW: code ~mask inverts mask (convert True to False and False to True). It can be used also to invert image - ~img - to create negative for RGB, Grayscale or B&W.
Code:
import cv2
for filename in ['image-1.png', 'image-2.png']:
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print('height, width, color:', img.shape)
mask = (img > 21)
# display size
print(' width:', sum( mask.any(axis=0) ))
print('height:', sum( mask.any(axis=1) ))
# create Black&White version
img[ mask ] = 255 # set white
img[ ~mask ] = 0 # set black
# display Black&White version
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# write Black&White version
cv2.imwrite('BW-' + filename, img)
--->
--->
EDIT: The same result using cv2.boundingRect() instead of sum(mask.any()) - but it still needs img[ mask ] = 255 to create Black&White image.
import cv2
for filename in ['image-1.png', 'image-2.png']:
print('filename:', filename)
img = cv2.imread(filename)
#print('height, width, color:', img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#print('height, width, color:', img.shape)
mask = (img > 21)
# create Black&White version
img[ mask ] = 255 # set white
img[ ~mask ] = 0 # set black
x, y, width, height = cv2.boundingRect(img)
# display size
print(' width:', width)
print('height:', height)
# display Black&White version
#cv2.imshow('image', img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
# write Black&White version
#cv2.imwrite('BW-' + filename, img)
https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.html#bounding-rectangle
EDIT: The same result using cv2.boundingRect() and cv2.threshold() - so it doesn't need mask
import cv2
for filename in ['image-1.png', 'image-2.png']:
print('filename:', filename)
img = cv2.imread(filename)
#print('height, width, color:', img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#print('height, width, color:', img.shape)
ret, img = cv2.threshold(img, 21, 255, cv2.THRESH_BINARY) # the same 21 as in `mask = (img > 21)`
x, y, width, height = cv2.boundingRect(img)
# display size
print(' width:', width)
print('height:', height)
# display Black&White version
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# write Black&White version
#cv2.imwrite('BW-' + filename, img)
https://docs.opencv.org/3.4/d7/d4d/tutorial_py_thresholding.html
https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html
https://www.learnopencv.com/opencv-threshold-python-cpp/
https://www.geeksforgeeks.org/python-thresholding-techniques-using-opencv-set-1-simple-thresholding/
I have these images
For which I want to remove the text in the background. Only the captcha characters should remain(i.e K6PwKA, YabVzu). The task is to identify these characters later using tesseract.
This is what I have tried, but it isn't giving much good accuracy.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\HPO2KOR\AppData\Local\Tesseract-OCR\tesseract.exe"
img = cv2.imread("untitled.png")
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_filtered = cv2.inRange(gray_image, 0, 75)
cv2.imwrite("cleaned.png", gray_filtered)
How can I improve the same?
Note :
I tried all the suggestion that I was getting for this question and none of them worked for me.
EDIT :
According to Elias, I tried finding the color of the captcha text using photoshop by converting it to grayscale which came out to be somewhere in between [100, 105]. I then threshold the image based on this range. But the result which I got did not give satisfactory result from tesseract.
gray_filtered = cv2.inRange(gray_image, 100, 105)
cv2.imwrite("cleaned.png", gray_filtered)
gray_inv = ~gray_filtered
cv2.imwrite("cleaned.png", gray_inv)
data = pytesseract.image_to_string(gray_inv, lang='eng')
Output :
'KEP wKA'
Result :
EDIT 2 :
def get_text(img_name):
lower = (100, 100, 100)
upper = (104, 104, 104)
img = cv2.imread(img_name)
img_rgb_inrange = cv2.inRange(img, lower, upper)
neg_rgb_image = ~img_rgb_inrange
cv2.imwrite('neg_img_rgb_inrange.png', neg_rgb_image)
data = pytesseract.image_to_string(neg_rgb_image, lang='eng')
return data
gives :
and the text as
GXuMuUZ
Is there any way to soften it a little
Here are two potential approaches and a method to correct distorted text:
Method #1: Morphological operations + contour filtering
Obtain binary image. Load image, grayscale, then Otsu's threshold.
Remove text contours. Create a rectangular kernel with cv2.getStructuringElement() and then perform morphological operations to remove noise.
Filter and remove small noise. Find contours and filter using contour area to remove small particles. We effectively remove the noise by filling in the contour with cv2.drawContours()
Perform OCR. We invert the image then apply a slight
Gaussian blur. We then OCR using Pytesseract with the --psm 6 configuration option to treat the image as a single block of text. Look at Tesseract improve quality for other methods to improve detection and Pytesseract configuration options for additional settings.
Input image -> Binary -> Morph opening
Contour area filtering -> Invert -> Apply blur to get result
Result from OCR
YabVzu
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
# Find contours and remove small noise
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 50:
cv2.drawContours(opening, [c], -1, 0, -1)
# Invert and apply slight Gaussian blur
result = 255 - opening
result = cv2.GaussianBlur(result, (3,3), 0)
# Perform OCR
data = pytesseract.image_to_string(result, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()
Method #2: Color segmentation
With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.
Input image -> Mask -> Result
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('2.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Invert image and OCR
invert = 255 - mask
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('invert', invert)
cv2.waitKey()
Correcting distorted text
OCR works best when the image is horizontal. To ensure that the text is in an ideal format for OCR, we can perform a perspective transform. After removing all the noise to isolate the text, we can perform a morph close to combine individual text contours into a single contour. From here we can find the rotated bounding box using cv2.minAreaRect and then perform a four point perspective transform using imutils.perspective.four_point_transform. Continuing from the cleaned mask, here's the results:
Mask -> Morph close -> Detected rotated bounding box -> Result
Output with the other image
Updated code to include perspective transform
import cv2
import pytesseract
import numpy as np
from imutils.perspective import four_point_transform
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))
# OCR
data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()
Note: The color threshold range was determined using this HSV threshold script
import cv2
import numpy as np
def nothing(x):
pass
# Load image
image = cv2.imread('2.png')
# Create a window
cv2.namedWindow('image')
# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)
# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)
# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0
while(1):
# Get current positions of all trackbars
hMin = cv2.getTrackbarPos('HMin', 'image')
sMin = cv2.getTrackbarPos('SMin', 'image')
vMin = cv2.getTrackbarPos('VMin', 'image')
hMax = cv2.getTrackbarPos('HMax', 'image')
sMax = cv2.getTrackbarPos('SMax', 'image')
vMax = cv2.getTrackbarPos('VMax', 'image')
# Set minimum and maximum HSV values to display
lower = np.array([hMin, sMin, vMin])
upper = np.array([hMax, sMax, vMax])
# Convert to HSV format and color threshold
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(image, image, mask=mask)
# Print if there is a change in HSV value
if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
phMin = hMin
psMin = sMin
pvMin = vMin
phMax = hMax
psMax = sMax
pvMax = vMax
# Display result image
cv2.imshow('image', result)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
Your code produces better results than this. Here, I set a threshold for upperb and lowerb values based on histogram CDF values and a threshold. Press ESC button to get next image.
This code is unnecessarily complex and needs to be optimized in various ways. Code can be reordered to skip some steps. I kept it as some parts may help others. Some existing noise can be removed by keeping contour with area above certain threshold. Any suggestions on other noise reduction method is welcome.
Similar easier code for getting 4 corner points for perspective transform can be found here,
Accurate corners detection?
Code Description:
Original Image
Median Filter (Noise Removal and ROI Identification)
OTSU Thresholding
Invert Image
Use Inverted Black and White Image as Mask to keep mostly ROI part of original image
Dilation for largest Contour finding
Mark the ROI by drawing rectangle and corner points in original image
Straighten the ROI and extract it
Median Filter
OTSU Thresholding
Invert Image for mask
Mask the straight image to remove most noise further to text
In Range is used with lowerb and upperb values from histogram cdf as mentioned above to further reduce noise
Maybe eroding the image at this step will produce somewhat acceptable result. Instead here that image is dilated again and used as a mask to get less noisy ROI from perspective transformed image.
Code:
## Press ESC button to get next image
import cv2
import cv2 as cv
import numpy as np
frame = cv2.imread('extra/c1.png')
#frame = cv2.imread('extra/c2.png')
## keeping a copy of original
print(frame.shape)
original_frame = frame.copy()
original_frame2 = frame.copy()
## Show the original image
winName = 'Original'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Apply median blur
frame = cv2.medianBlur(frame,9)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
cv.imshow(winName, img_gray & frame)
cv.waitKey(0)
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Get largest contour from contours
contours, hierarchy = cv2.findContours(frame, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
## Get minimum area rectangle and corner points
rect = cv2.minAreaRect(max(contours, key = cv2.contourArea))
print(rect)
box = cv2.boxPoints(rect)
print(box)
## Sorted points by x and y
## Not used in this code
print(sorted(box , key=lambda k: [k[0], k[1]]))
## draw anchor points on corner
frame = original_frame.copy()
z = 6
for b in box:
cv2.circle(frame, tuple(b), z, 255, -1)
## show original image with corners
box2 = np.int0(box)
cv2.drawContours(frame,[box2],0,(0,0,255), 2)
cv2.imshow('Detected Corners',frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
## https://stackoverflow.com/questions/11627362/how-to-straighten-a-rotated-rectangle-area-of-an-image-using-opencv-in-python
def subimage(image, center, theta, width, height):
shape = ( image.shape[1], image.shape[0] ) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D( center=center, angle=theta, scale=1 )
image = cv2.warpAffine( src=image, M=matrix, dsize=shape )
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[ y:y+height, x:x+width ]
return image
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
## use the calculated rectangle attributes to rotate and extract it
frame = subimage(original_frame, center=rect[0], theta=int(rect[2]), width=int(rect[1][0]), height=int(rect[1][1]))
original_frame = frame.copy()
cv.imshow(winName, frame)
cv.waitKey(0)
perspective_transformed_image = frame.copy()
## Apply median blur
frame = cv2.medianBlur(frame,11)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
frame = img_gray & frame
frame[np.where(frame==0)] = 255
cv.imshow(winName, frame)
cv.waitKey(0)
hist,bins = np.histogram(frame.flatten(),256,[0,256])
cdf = hist.cumsum()
cdf_normalized = cdf * hist.max()/ cdf.max()
print(cdf)
print(cdf_normalized)
hist_image = frame.copy()
## two decresing range algorithm
low_index = -1
for i in range(0, 256):
if cdf[i] > 0:
low_index = i
break
print(low_index)
tol = 0
tol_limit = 20
broken_index = -1
past_val = cdf[low_index] - cdf[low_index + 1]
for i in range(low_index + 1, 255):
cur_val = cdf[i] - cdf[i+1]
if tol > tol_limit:
broken_index = i
break
if cur_val < past_val:
tol += 1
past_val = cur_val
print(broken_index)
##
lower = min(frame.flatten())
upper = max(frame.flatten())
print(min(frame.flatten()))
print(max(frame.flatten()))
#img_rgb_inrange = cv2.inRange(frame_HSV, np.array([lower,lower,lower]), np.array([upper,upper,upper]))
img_rgb_inrange = cv2.inRange(frame, (low_index), (broken_index))
neg_rgb_image = ~img_rgb_inrange
## Show the original image
winName = 'Final'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, neg_rgb_image)
cv.waitKey(0)
kernel = np.ones((3,3),np.uint8)
frame = cv2.erode(neg_rgb_image,kernel,iterations = 1)
winName = 'Final Dilate'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
##
winName = 'Final Subtracted'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
img2 = np.zeros_like(perspective_transformed_image)
img2[:,:,0] = frame
img2[:,:,1] = frame
img2[:,:,2] = frame
frame = img2
cv.imshow(winName, perspective_transformed_image | frame)
cv.waitKey(0)
##
import matplotlib.pyplot as plt
plt.plot(cdf_normalized, color = 'b')
plt.hist(hist_image.flatten(),256,[0,256], color = 'r')
plt.xlim([0,256])
plt.legend(('cdf','histogram'), loc = 'upper left')
plt.show()
1. Median Filter:
2. OTSU Threshold:
3. Invert:
4. Inverted Image Dilation:
5. Extract by Masking:
6. ROI points for transform:
7. Perspective Corrected Image:
8. Median Blur:
9. OTSU Threshold:
10. Inverted Image:
11. ROI Extraction:
12. Clamping:
13. Dilation:
14. Final ROI:
15. Histogram plot of step 11 image:
Didn't try , but this might work.
step 1:
use ps to find out what color the captcha characters are. For excample, "YabVzu" is (128,128,128),
Step 2:
Use pillow's method getdata()/getcolor(), it will return a sequence which contain the colour of every pixel.
then ,we project every item in the sequence to the original captcha image.
hence we know the positon of every pixel in the image.
Step 3:
find all pixels whose colour with the most approximate values to (128,128,128).
You may set a threshold to control the accuracy. this step return another sequence.
Lets annotate it as Seq a
Step 4:
generate a picture with the very same height and width as the original one.
plot every pixel in [Seq a] in the very excat position in the picture. Here,we will get a cleaned training items
Step 5:
Use a Keras project to break the code. And the precission should be over 72%.
This is the imageI am trying to give proper shape to the images in my folder but unable to get that perfect result. Following is one type of example:
Following is the coding that I have done for my folder containing this type of images:
''''code''''
import cv2
import numpy as np
import glob
path = r'C:\Users\User\Desktop\A\*.jpg'
def k_function(image,k):
z= image.reshape((-1,4))
z=np.float32(z)
criteria = (cv2.TERM_CRITERIA_EPS+cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center=cv2.kmeans(z,k,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((image.shape))
return res2
def noise_function(image):
kernel = np.ones((2, 2), np.uint8)
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE,
kernel, iterations = 2)
bg = cv2.dilate(closing, kernel, iterations = 1)
dist_transform = cv2.distanceTransform(closing, cv2.DIST_L2, 0)
ret, fg = cv2.threshold(dist_transform, 0.02
* dist_transform.max(), 255, 0)
return fg
def filling(thresh):
im_floodfill = thresh.copy()
h, w = thresh.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
cv2.floodFill(im_floodfill, mask,(60,60),255);
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
n = thresh | im_floodfill_inv
return n
for i, img in enumerate(glob.glob(path)):
img1 = cv2.imread(img)
n = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
b= k_function(n,2)
nm, thresh1 = cv2.threshold(b, 127, 255, cv2.THRESH_BINARY_INV);
fill = filling(thresh1)
noise = noise_function(fill)
cv2.imwrite(r'C:\Users\User\Desktop\New folder\image{}.jpg'.format(i),noise)
Try using copyMakeBorder to make a border. It looks like you're trying to use floodFill and I've never figured out how that is supposed to work.
import cv2
image = cv2.imread('elbow.png')
image = cv2.copyMakeBorder(image, 10, 0, 0, 10, cv2.BORDER_CONSTANT)
cv2.imwrite('elbow_border.png', image)
elbow.png:
elbow_border.png:
I would approach it a bit differently in Python/OpenCV. I would convert to HSV and threshold the saturation channel. Then use morphology open to smooth outline.
Input (cropped from your post):
import cv2
# load image as HSV and select saturation
img = cv2.imread("finger.png")
sat = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:,:,1]
# threshold the saturation channel
ret, thresh = cv2.threshold(sat,25,255,0)
# apply morphology open to smooth the outline
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (19,19))
smoothed = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# write result to disk
cv2.imwrite("finger_smoothed.png", smoothed)
cv2.imshow("SAT", sat)
cv2.imshow("THRESH", thresh)
cv2.imshow("SMOOTHED", smoothed)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
How can I crop a concave polygon from an image. My Input image look like
.
and the coordinates of closed polygon are
[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]. I want region bounded by concave polygon to be cropped using opencv. I searched for other similar questions but I did not able to find correct answer. That's why I am asking it ? Can you help me.
Any help would be highly appreciated.!!!
Steps
find region using the poly points
create mask using the poly points
do mask op to crop
add white bg if needed
The code:
# 2018.01.17 20:39:17 CST
# 2018.01.17 20:50:35 CST
import numpy as np
import cv2
img = cv2.imread("test.png")
pts = np.array([[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]])
## (1) Crop the bounding rect
rect = cv2.boundingRect(pts)
x,y,w,h = rect
croped = img[y:y+h, x:x+w].copy()
## (2) make mask
pts = pts - pts.min(axis=0)
mask = np.zeros(croped.shape[:2], np.uint8)
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
## (3) do bit-op
dst = cv2.bitwise_and(croped, croped, mask=mask)
## (4) add the white background
bg = np.ones_like(croped, np.uint8)*255
cv2.bitwise_not(bg,bg, mask=mask)
dst2 = bg+ dst
cv2.imwrite("croped.png", croped)
cv2.imwrite("mask.png", mask)
cv2.imwrite("dst.png", dst)
cv2.imwrite("dst2.png", dst2)
Source image:
Result:
You can do it in 3 steps:
Create a mask out of the image
mask = np.zeros((height, width))
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
Apply mask to original image
res = cv2.bitwise_and(img,img,mask = mask)
Optionally you can remove the crop the image to have a smaller one
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
With this you should have at the end the image cropped
UPDATE
For the sake of completeness here is the complete code:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
cv2.waitKey(0)
For the colored background version use the code like this:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
im2 = np.full((res.shape[0], res.shape[1], 3), (0, 255, 0), dtype=np.uint8 ) # you can also use other colors or simply load another image of the same size
maskInv = cv2.bitwise_not(mask)
colorCrop = cv2.bitwise_or(im2,im2,mask = maskInv)
finalIm = res + colorCrop
cropped = finalIm[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
cv2.waitKey(0)
For the blured image background version use the code like this:
img = cv2.imread(img_path)
box = <box points>
# -- background
blur_bg = cv2.blur(img, (h, w))
mask1 = np.zeros((h, w, 3), np.uint8)
mask2 = np.ones((h, w, 3), np.uint8) * 255
cv2.fillPoly(mask1, box, (255, 255, 255))
# -- indexing
img_idx = np.where(mask1 == mask2)
bg_idx = np.where(mask1 != mask2)
# -- fill box
res = np.zeros((h, w, 3), np.int64)
res[img_idx] = img[img_idx]
res[bg_idx] = blur_bg[bg_idx]
res = res[y1:y2, x1:x2, :]