I'm looking for some insight into what the best approach might be to my problem.
I'm comparing two separate images for differences, but I'm running into a problem with small translational movements.
I have a "gospel" image which is the "gold standard" per se:
gospel image
Then I have multiple different taken images to compare against.
Here's an example: example image
Here's an example difference image showing my problem: difference image
As you can see, they are quite small. The way that I am differencing the images now is by first resizing the images to 32x32, manually decreasing the contrast by 100 and then applying a blur using OpenCV.
After, I am using skimage's 'structural_integrity' function to subtract and quantify the differences between the images. The rest is purely for viewing.
import cv2
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity
def change_contrast(img, level):
img = Image.fromarray(img)
factor = (259 * (level + 255)) / (255 * (259 - level))
def contrast(c):
return 128 + factor * (c - 128)
return np.asarray(img.point(contrast))
# Open and preprocess the images
image_orig = cv2.imread(IMAGE_PATH)
image = cv2.resize(image, (32, 32))
image = change_contrast(image_orig, -100)
image = cv2.blur(image, (5, 5))
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gospel_orig = cv2.imread(GOSPEL_PATH)
gospel = cv2.resize(gospel_orig, (32, 32))
gospel = change_contrast(gospel, -100)
gospel = cv2.blur(gospel, (5, 5))
gospel = cv2.cvtColor(gospel, cv2.COLOR_BGR2GRAY)
# Get image similarities and an output difference image
(score, diff) = structural_similarity(image, gospel, full=True)
print("Image similarity", score)
diff = (diff * 255).astype("uint8")
# Viewing stuff below
thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
filled_gospel = cv2.cvtColor(gospel, cv2.COLOR_GRAY2BGR)
for c in contours:
area = cv2.contourArea(c)
if area > 40:
x,y,w,h = cv2.boundingRect(c)
cv2.drawContours(filled_gospel, [c], 0, (0,255,0), -1)
cv2.imshow('image', image)
cv2.imshow('gospel', gospel)
cv2.imshow('diff',diff)
cv2.imshow('filled gospel',filled_gospel)
cv2.waitKey(0)
When I do the above steps, you can see some translational differences between the 'gospel' and the taken image. What would be the best way to combat this as I only want to get the differences in the black of the letter, not how well it is aligned?
Here is how I would do template matching and differencing in Python/OpenCV.
Read the reference and example images
Pad the example image to twice its dimensions with its background gray color.
Do template matching with the reference to find the best match location and match score.
Crop the padded example image with its top left corner at the match location, but the size of the reference image
Get the absolute difference image
Save results
Reference:
Example:
import cv2
import numpy as np
# read reference and convert to gray
ref = cv2.imread('reference.png')
ref_gray = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY)
hr, wr = ref_gray.shape
# read example and convert to gray
ex = cv2.imread('example.png')
ex_gray = cv2.cvtColor(ex, cv2.COLOR_BGR2GRAY)
he, we = ex_gray.shape
# pad the example to double its dimensions with gray=190
color=190
wp = we // 2
hp = he // 2
ex_gray = cv2.copyMakeBorder(ex_gray, hp,hp,wp,wp, cv2.BORDER_CONSTANT, value=color)
# do template matching
corrimg = cv2.matchTemplate(ref_gray,ex_gray,cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(corrimg)
max_val_corr = '{:.3f}'.format(max_val)
print("correlation: " + max_val_corr)
xx = max_loc[0]
yy = max_loc[1]
print('x_match_loc =',xx,'y_match_loc =',yy)
# crop the padded example image at top left corner of xx,yy and size hr x wr
ex_gray_crop = ex_gray[yy:yy+hr, xx:xx+wr]
# get absolute difference image
ref_grayf = ref_gray.astype(np.float32)
ex_gray_cropf = ex_gray_crop.astype(np.float32)
diff = 255 - np.abs(cv2.add(ref_gray, -ex_gray_crop))
# compute mean of diff
mean = cv2.mean(diff)[0]
print("mean of diff in range 0 to 100 =",mean)
cv2.imshow('ref_gray', ref_gray)
cv2.imshow('ex_gray', ex_gray)
cv2.imshow('ex_gray_crop', ex_gray_crop)
cv2.imshow('correlation image', corrimg)
cv2.imshow('diff', diff)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save results
cv2.imwrite('reference_gray.jpg', ref_gray)
cv2.imwrite('example_gray_padded.jpg', ex_gray)
cv2.imwrite('reference_example_correlation.jpg', (255*corrimg).clip(0,255).astype(np.uint8))
cv2.imwrite('example_gray_padded_cropped.jpg', ex_gray_crop)
cv2.imwrite('reference_example_diff.jpg', diff)
Example padded:
Correlation Image showing locations of best match:
Match Results:
correlation: 0.969
x_match_loc = 10 y_match_loc = 9
mean of diff in range 0 to 100 = 1.3956887102667155
Example cropped to align with reference:
Diff image (white is where they differ):
Related
Given the following two images:
Filled Form
Template
I would like to remove the template from this image, and leave behind ONLY the handwriting. I have code that aligns these images perfectly, but I am struggling on the code to remove the underlying template.
The code I currently have is as follows:
#Read in images and threshold
image = cv2.imread('image0.png')
template = cv2.imread('image1.png')
(thresh, im_bw) = cv2.threshold(image, 100, 255, cv2.THRESH_BINARY)
(thresh, temp_bw) = cv2.threshold(template, 100, 255, cv2.THRESH_BINARY)
#Convert temp from color to gray
graymask = cv2.cvtColor(temp_bw, cv2.COLOR_BGR2GRAY)
#Increase thickness of lines slightly
kernel = np.ones((2,2),np.uint8)
mask_crop = cv2.erode(graymask, kernel, iterations = 2)
(thresh, blackAndWhitemask) = cv2.threshold(mask_crop, 175, 255, cv2.THRESH_BINARY)
bw = cv2.bitwise_not(blackAndWhitemask)
#Inpaint
dst = cv2.inpaint(im_bw, bw, 3, cv2.INPAINT_NS)
The issue is that the resulting image Output does not look clean. You can clearly tell that there was a template there to begin with. Does anyone have any other techniques that they would reccomend?
The difference image solves most of the problem, but getting a clean signature is challenging.
First stage - finding where image and template are different:
# Read in images and threshold
image = cv2.imread('image0.png', cv2.IMREAD_GRAYSCALE) # Read image as grayscale
template = cv2.imread('image1.png', cv2.IMREAD_GRAYSCALE)
diff = (image != template).astype(np.uint8)*255 # Find the difference and convert it to OpenCV mask format (255 where True).
cv2.imwrite('orig_diff.png', diff)
Small improvement:
Find where absolute difference is above 200:
thresh, diff = cv2.threshold(cv2.absdiff(image, template), 200, 255, cv2.THRESH_BINARY)
For covering the small black gaps (assuming all signatures are identical), we may use the following steps:
Find contours in diff - each contour applies a signatures.
Find bounding rectangles of all the signatures (of contours).
Iterate each signature, and for each signature iterate all other signature and place the maximum of the two signatures.
By putting the maximum value of two signatures, the black gaps are filled.
Use the result of the previous stage as a mask.
The result is not perfect because the bounding boxes are not perfectly aligned, and because the original difference is too "thick".
Code sample:
import cv2
import numpy as np
#Read in images and threshold
image = cv2.imread('image0.png', cv2.IMREAD_GRAYSCALE) # Read image as grayscale
template = cv2.imread('image1.png', cv2.IMREAD_GRAYSCALE)
#diff = (image != template).astype(np.uint8)*255 # Find the difference and convert it to OpenCV mask format (255 where True).
thresh, diff = cv2.threshold(cv2.absdiff(image, template), 200, 255, cv2.THRESH_BINARY) # Find where absolute difference is above 200 and convert it to OpenCV mask format (255 where True).
cv2.imwrite('orig_diff.png', diff)
# Dilate diff for getting a gross place of the signatures
dilated_diff = cv2.dilate(diff, np.ones((51, 51), np.uint8))
# Find contours - each contour applies a signatures
cnts = cv2.findContours(dilated_diff, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]
rects = []
# Find bounding rectangles of all the signatures
for c in cnts:
bounding_rect = cv2.boundingRect(c)
rects.append(bounding_rect)
# Cover black parts in diff - asuume all the signatures are the same.
# Iterate each signature, and for each signature iterate all other signature and place the maximum of the two signatures
for rect in rects:
x1, y1, w1, h1 = rect
for rect in rects:
x2, y2, w2, h2 = rect
w3 = min(w1, w2)
h3 = min(h1, h2)
roi1 = diff[y1:y1+h3, x1:x1+w3]
roi2 = diff[y2:y2+h3, x2:x2+w3]
diff[y2:y2+h3, x2:x2+w3] = np.maximum(roi1, roi2)
dst = image.copy()
dst[(diff == 0) | (image > 50)] = 255 # Place white color whrere diff=0 and also where image is white.
cv2.imwrite('diff.png', diff)
cv2.imwrite('dilated_diff.png', dilated_diff)
cv2.imwrite('dst.png', dst)
cv2.imshow('diff', diff)
cv2.imshow('dilated_diff', dilated_diff)
cv2.imshow('dst', dst)
cv2.waitKey()
cv2.destroyAllWindows()
Output:
orig_diff.png:
dilated_diff.png:
diff.png:
I am trying to segment lung CT images using Kmeans by using code below:
def process_mask(mask):
convex_mask = np.copy(mask)
for i_layer in range(convex_mask.shape[0]):
mask1 = np.ascontiguousarray(mask[i_layer])
if np.sum(mask1)>0:
mask2 = convex_hull_image(mask1)
if np.sum(mask2)>2*np.sum(mask1):
mask2 = mask1
else:
mask2 = mask1
convex_mask[i_layer] = mask2
struct = generate_binary_structure(3,1)
dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=10)
return dilatedMask
def lumTrans(img):
lungwin = np.array([-1200.,600.])
newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
newimg[newimg<0]=0
newimg[newimg>1]=1
newimg = (newimg*255).astype('uint8')
return newimg
def lungSeg(imgs_to_process,output,name):
if os.path.exists(output+'/'+name+'_clean.npy') : return
imgs_to_process = Image.open(imgs_to_process)
img_to_save = imgs_to_process.copy()
img_to_save = np.asarray(img_to_save).astype('uint8')
imgs_to_process = lumTrans(imgs_to_process)
imgs_to_process = np.expand_dims(imgs_to_process, axis=0)
x,y,z = imgs_to_process.shape
img_array = imgs_to_process.copy()
A1 = int(y/(512./100))
A2 = int(y/(512./400))
A3 = int(y/(512./475))
A4 = int(y/(512./40))
A5 = int(y/(512./470))
for i in range(len(imgs_to_process)):
img = imgs_to_process[i]
print(img.shape)
x,y = img.shape
#Standardize the pixel values
allmean = np.mean(img)
allstd = np.std(img)
img = img-allmean
img = img/allstd
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[A1:A2,A1:A2]
mean = np.mean(middle)
max = np.max(img)
min = np.min(img)
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img<threshold,1.0,0.0) # threshold the image
eroded = morphology.erosion(thresh_img,np.ones([4,4]))
dilation = morphology.dilation(eroded,np.ones([10,10]))
labels = measure.label(dilation)
label_vals = np.unique(labels)
regions = measure.regionprops(labels)
good_labels = []
for prop in regions:
B = prop.bbox
if B[2]-B[0]<A3 and B[3]-B[1]<A3 and B[0]>A4 and B[2]<A5:
good_labels.append(prop.label)
mask = np.ndarray([x,y],dtype=np.int8)
mask[:] = 0
for N in good_labels:
mask = mask + np.where(labels==N,1,0)
mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation
imgs_to_process[i] = mask
m1 = imgs_to_process
convex_mask = m1
dm1 = process_mask(m1)
dilatedMask = dm1
Mask = m1
extramask = dilatedMask ^ Mask
bone_thresh = 180
pad_value = 0
img_array[np.isnan(img_array)]=-2000
sliceim = img_array
sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype('uint8')
bones = sliceim*extramask>bone_thresh
sliceim[bones] = pad_value
x,y,z = sliceim.shape
if not os.path.exists(output):
os.makedirs(output)
img_to_save[sliceim.squeeze()==0] = 0
im = Image.fromarray(img_to_save)
im.save(output + name + '.png', 'PNG')
The problem is the segmented lung still contains white borderers like this:
Segmented lung (output):
Unsegmented lung (input):
The full code can be found in Google Colab Notebook. code.
And sample of the dataset is here.
For this problem, I don't recommend using Kmeans color quantization since this technique is usually reserved for a situation where there are various colors and you want to segment them into dominant color blocks. Take a look at this previous answer for a typical use case. Since your CT scan images are grayscale, Kmeans would not perform very well. Here's a potential solution using simple image processing with OpenCV:
Obtain binary image. Load input image, convert to grayscale, Otsu's threshold, and find contours.
Create a blank mask to extract desired objects. We can use np.zeros() to create a empty mask with the same size as the input image.
Filter contours using contour area and aspect ratio. We search for the lung objects by ensuring that contours are within a specified area threshold as well as aspect ratio. We use cv2.contourArea(), cv2.arcLength(), and cv2.approxPolyDP() for contour perimeter and contour shape approximation. If we have have found our lung object, we utilize cv2.drawContours() to fill in our mask with white to represent the objects that we want to extract.
Bitwise-and mask with original image. Finally we convert the mask to grayscale and bitwise-and with cv2.bitwise_and() to obtain our result.
Here is our image processing pipeline visualized step-by-step:
Grayscale -> Otsu's threshold
Detected objects to extract highlighted in green -> Filled mask
Bitwise-and to get our result -> Optional result with white background instead
Code
import cv2
import numpy as np
image = cv2.imread('1.png')
highlight = image.copy()
original = image.copy()
# Convert image to grayscale, Otsu's threshold, and find contours
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# Create black mask to extract desired objects
mask = np.zeros(image.shape, dtype=np.uint8)
# Search for objects by filtering using contour area and aspect ratio
for c in contours:
# Contour area
area = cv2.contourArea(c)
# Contour perimeter
peri = cv2.arcLength(c, True)
# Contour approximation
approx = cv2.approxPolyDP(c, 0.035 * peri, True)
(x, y, w, h) = cv2.boundingRect(approx)
aspect_ratio = w / float(h)
# Draw filled contour onto mask if passes filter
# These are arbitary values, may need to change depending on input image
if aspect_ratio <= 1.2 or area < 5000:
cv2.drawContours(highlight, [c], 0, (0,255,0), -1)
cv2.drawContours(mask, [c], 0, (255,255,255), -1)
# Convert 3-channel mask to grayscale then bitwise-and with original image for result
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(original, original, mask=mask)
# Uncomment if you want background to be white instead of black
# result[mask==0] = (255,255,255)
# Display
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('highlight', highlight)
cv2.imshow('mask', mask)
cv2.imshow('result', result)
# Save images
# cv2.imwrite('gray.png', gray)
# cv2.imwrite('thresh.png', thresh)
# cv2.imwrite('highlight.png', highlight)
# cv2.imwrite('mask.png', mask)
# cv2.imwrite('result.png', result)
cv2.waitKey(0)
A simpler approach to solve this problem is using morphological erosion. Its just that than you will have to tune in threshold values
I searched for image recognition using python. It seems there is no tutorial about Extracting Numbers from colored background so I followed THIS TUTORIAL
import cv2
import matplotlib.pyplot as plt
def detect_edge(image):
''' function Detecting Edges '''
image_with_edges = cv2.Canny(image , 100, 200)
images = [image , image_with_edges]
location = [121, 122]
for loc, img in zip(location, images):
plt.subplot(loc)
plt.imshow(img, cmap='gray')
plt.savefig('edge.png')
plt.show()
image = cv2.imread('myscreenshot.png', 0)
detect_edge(image)
This is my image:
This is the result:
Any solution to print out these numbers?
Here is some code for getting clean canny edges for this image.
import cv2
import numpy as np
# load image
img = cv2.imread("numbers.png");
# change to hue colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# use clahe to improve contrast
# (the contrast is pretty good already, so not much change, but good habit to have here)
clahe = cv2.createCLAHE(clipLimit = 10)
contrast = clahe.apply(v);
# use canny
canny = cv2.Canny(contrast, 20, 110);
# show
cv2.imshow('i', img);
cv2.imshow('v', v);
cv2.imshow('c', contrast);
cv2.imshow("canny", canny);
cv2.waitKey(0);
# save
cv2.imwrite("edges.png", canny);
Without using any OCR like pytesseract or something, I don't see an obvious way to be able to consistently turn this image into "text" numbers. I'll leave that for someone else who might know how to solve that without any pattern recognition stuff because I don't even know where to begin without that. If you're willing to forgo that restriction then pytessaract should have no problem with this; possibly even without doing processing like this.
Ok, I filled in the numbers for the image. OpenCV's findContours' hierarchy wasn't cooperating for some reason so I had to manually do it which makes this code pretty janky. Honestly, if I were to try this again from scratch, I'd try to find colors that contribute to a small number of total pixels and threshold on each and combine the masks.
import cv2
import numpy as np
# check if small box is in big box
def contained(big, small):
# big corners
x,y,w,h = big;
big_tl = [x, y];
big_br = [x+w, y+h];
# small corners
x,y,w,h = small;
small_tl = [x, y];
small_br = [x+w, y+h];
# check
if small_tl[0] > big_tl[0] and small_br[0] < big_br[0]:
if small_tl[1] > big_tl[1] and small_br[1] < big_br[1]:
return True;
return False;
# load image
img = cv2.imread("numbers.png");
# change to hue colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# use clahe to improve contrast
# (the contrast is pretty good already, so not much change, but good habit to have here)
clahe = cv2.createCLAHE(clipLimit = 10)
contrast = clahe.apply(v);
# rescale
scale = 2.0;
h, w = img.shape[:2];
h = int(h * scale);
w = int(w * scale);
contrast = cv2.resize(contrast, (w,h), cv2.INTER_LINEAR);
img = cv2.resize(img, (w,h), cv2.INTER_LINEAR);
# use canny
canny = cv2.Canny(contrast, 10, 60);
# show
cv2.imshow('i', img);
cv2.imshow('v', v);
cv2.imshow('c', contrast);
cv2.imshow("canny", canny);
cv2.waitKey(0);
# try to fill in contours
# contours
_, contours, hierarchy = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# filter contours by size
# filter out noisy bits and the big grid boxes
filtered = [];
for contour in contours:
perimeter = cv2.arcLength(contour, True);
if 50 < perimeter and perimeter < 750:
filtered.append(contour);
# draw contours again
# create a mask of the contoured image
mask = np.zeros_like(contrast);
mask = cv2.drawContours(mask, filtered, -1, 255, -1);
# close to get rid of annoying little gaps
kernel = np.ones((3,3),np.uint8)
mask = cv2.dilate(mask,kernel,iterations = 1);
mask = cv2.erode(mask,kernel, iterations = 1);
# contours
_, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# alright, hierarchy is being stupid, plan B
# SUUUUUPEEERRR JAAAANK
outer_cntrs = [a for a in range(len(contours))];
children = [];
for a in range(len(contours)):
if a in outer_cntrs:
# get current box
big_box = cv2.boundingRect(contours[a]);
# check against all other boxes
for b in range(0, len(contours)):
if b in outer_cntrs:
small_box = cv2.boundingRect(contours[b]);
# remove any children
if contained(big_box, small_box):
outer_cntrs.remove(b);
children.append(contours[b]);
# # select by hierarchy
top_cntrs = [];
for a in range(len(contours)):
if a in outer_cntrs:
top_cntrs.append(contours[a]);
# create a mask of the contoured image
mask = np.zeros_like(contrast);
mask = cv2.drawContours(mask, top_cntrs, -1, 255, -1);
mask = cv2.drawContours(mask, children, -1, 255, -1);
# close
kernel = np.ones((3,3),np.uint8)
mask = cv2.dilate(mask,kernel,iterations = 1);
mask = cv2.erode(mask,kernel, iterations = 1);
# do contours agains because opencv is being super difficult
# honestly, at this point, a fill method would've been better
# contours
_, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE);
# fill in
for con in contours:
cv2.fillPoly(mask, pts = [con], color=(255));
for con in children:
cv2.fillPoly(mask, pts = [con], color=(0));
# resize back down
h, w = mask.shape;
h = int(h / scale);
w = int(w / scale);
mask = cv2.resize(mask, (w,h));
# show
cv2.imshow("mask", mask);
cv2.waitKey(0);
# save
cv2.imwrite("filled.png", mask);
You can find the digits in three-steps
Applying Adaptive-threshold
Applying erosion
Read using pytesseract
Adaptive-threshold result:
Here we see 9 and 0 is different from rest of the digits. We need to remove the boundaries of the 9.
Erosion result:
Pytesseract result:
8 | 1
5 9
4 #
3 | 3
6 | 1
There are multiple page-segmentation-modes are available for pytesseract
If you want to remove | from the output you can use re.sub
text = re.sub('[^A-Za-z0-9]+', ',', text)
Result will be:
8
1
5
9
4
3
3
6
1
Code:
import cv2
import pytesseract
import re
import numpy as np
image = cv2.imread("7UUGYHw.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 13, 2)
erode = cv2.erode(thresh, np.array((7, 7)), iterations=1)
text = pytesseract.image_to_string(erode, config="--psm 6")
text = re.sub('[^A-Za-z0-9]+', '\n', text)
print(text)
I want to crop the image only inside the box or rectangle. I tried so many approaches but nothing worked.
import cv2
import numpy as np
img = cv2.imread("C:/Users/hp/Desktop/segmentation/add.jpeg", 0);
h, w = img.shape[:2]
# print(img.shape)
kernel = np.ones((3,3),np.uint8)
img2 = img.copy()
img2 = cv2.medianBlur(img2,5)
img2 = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
img2 = 255 - img2
img2 = cv2.dilate(img2, kernel)
img2 = cv2.medianBlur(img2, 9)
img2 = cv2.medianBlur(img2, 9)
cv2.imshow('anything', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()
position = np.where(img2 !=0)
x0 = position[0].min()
x1 = position[0].max()
y0 = position[1].min()
y1 = position[1].max()
print(x0,x1,y0,y1)
result = img[x0:x1,y0:y1]
cv2.imshow('anything', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output should be the image inside the sqaure.
You can use contour detection for this. If your image has basically only a hand drawn rectangle in it, I think it's good enough to assume it's the largest closed contour in the image. From that contour, we can figure out a polygon/quadrilateral approximation and then finally get an approximate rectangle. I'll define some utilities at the beginning which I generally use to make my time easier when messing around with images:
def load_image(filename):
return cv2.imread(filename)
def bnw(image):
return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
def col(image):
return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
def fixrgb(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
def show_image(image, figsize=(7,7), cmap=None):
cmap = cmap if len(image.shape)==3 else 'gray'
plt.figure(figsize=figsize)
plt.imshow(image, cmap=cmap)
plt.show()
def AdaptiveThresh(gray):
blur = cv2.medianBlur(gray, 5)
adapt_type = cv2.ADAPTIVE_THRESH_GAUSSIAN_C
thresh_type = cv2.THRESH_BINARY_INV
return cv2.adaptiveThreshold(blur, 255, adapt_type, thresh_type, 11, 2)
def get_rect(pts):
xmin = pts[:,0,1].min()
ymin = pts[:,0,0].min()
xmax = pts[:,0,1].max()
ymax = pts[:,0,0].max()
return (ymin,xmin), (ymax,xmax)
Let's load the image and convert it to grayscale:
image_name = 'test.jpg'
image_original = fixrgb(load_image(image_name))
image_gray = 255-bnw(image_original)
show_image(image_gray)
Use some morph ops to enhance the image:
kernel = np.ones((3,3),np.uint8)
d = 255-cv2.dilate(image_gray,kernel,iterations = 1)
show_image(d)
Find the edges and enhance/denoise:
e = AdaptiveThresh(d)
show_image(e)
m = cv2.dilate(e,kernel,iterations = 1)
m = cv2.medianBlur(m,11)
m = cv2.dilate(m,kernel,iterations = 1)
show_image(m)
Contour detection:
contours, hierarchy = cv2.findContours(m, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
total_area = np.prod(image_gray.shape)
max_area = 0
for cnt in contours:
# Simplify contour
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
area = cv2.contourArea(approx)
# Shape is recrangular, so 4 points approximately and it's convex
if (len(approx) == 4 and cv2.isContourConvex(approx) and max_area<area<total_area):
max_area = cv2.contourArea(approx)
quad_polygon = approx
img1 = image_original.copy()
img2 = image_original.copy()
cv2.polylines(img1,[quad_polygon],True,(0,255,0),10)
show_image(img1)
tl, br = get_rect(quad_polygon)
cv2.rectangle(img2, tl, br, (0,255,0), 10)
show_image(img2)
So you can see the approximate polygon and the corresponding rectangle, using which you can get your crop. I suggest you play around with median blur and morphological ops like erosion, dilation, opening, closing etc and see which set of operations suits your images the best; I can't really say what's good from just one image. You can crop using the top left and bottom right coordinates:
show_image(image_original[tl[1]:br[1],tl[0]:br[0],:])
Draw the square with a different color (e.g red) so it can be distinguishable from other writing and background. Then threshold it so you get a black and white image: the red line will be white in this image. Get the coordinates of white pixels: from this set, select only the two pairs (minX, minY)(maxX,maxY). They are the top-left and bottom-right points of the box (remember that in an image the 0,0 point is on the top left of the image) and you can use them to crop the image.
Input floor plan image
Above images are my input floor plan and I need to identify each room separately and then crop those rooms. after that, I can use those images for the next steps. So far I was able to Remove Small Items from input floor plans by using cv2.connectedComponentsWithStats. So that I think it will help to identify wall easily. after that my input images look like this.
output image after removing small objects
Then I did MorphologicalTransform to remove text and other symbols from image to leave only the walls. after that my input image look like this.
after MorphologicalTransform
So I was able to identify walls. then how I use those wall to crop rooms from the original input floor plan. Can someone help me? You can find my python code in this link. Download My Code
or
#Import packages
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util
# Name of the directory containing the object detection module we're using
MODEL_NAME = 'inference_graph'
IMAGE_NAME = 'floorplan2.jpg'
#Remove Small Items
im_gray = cv2.imread(IMAGE_NAME, cv2.IMREAD_GRAYSCALE)
(thresh, im_bw) = cv2.threshold(im_gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
thresh = 127
im_bw = cv2.threshold(im_gray, thresh, 255, cv2.THRESH_BINARY)[1]
#find all your connected components
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(im_bw, connectivity=8)
#connectedComponentswithStats yields every seperated component with information on each of them, such as size
#the following part is just taking out the background which is also considered a component, but most of the time we don't want that.
sizes = stats[1:, -1]; nb_components = nb_components - 1
# minimum size of particles we want to keep (number of pixels)
#here, it's a fixed value, but you can set it as you want, eg the mean of the sizes or whatever
min_size = 150
#your answer image
img2 = np.zeros((output.shape))
#for every component in the image, you keep it only if it's above min_size
for i in range(0, nb_components):
if sizes[i] >= min_size:
img2[output == i + 1] = 255
cv2.imshow('room detector', img2)
#MorphologicalTransform
kernel = np.ones((5, 5), np.uint8)
dilation = cv2.dilate(img2, kernel)
erosion = cv2.erode(img2, kernel, iterations=6)
#cv2.imshow("img2", img2)
cv2.imshow("Dilation", dilation)
cv2.imwrite("Dilation.jpg", dilation)
#cv2.imshow("Erosion", erosion)
# Press any key to close the image
cv2.waitKey(0)
# Clean up
cv2.destroyAllWindows()
Here is something that I've come up with. It is not perfect (I made some comments what you might want to try), and it will be better if you improve the input image quality.
import cv2
import numpy as np
def find_rooms(img, noise_removal_threshold=25, corners_threshold=0.1,
room_closing_max_length=100, gap_in_wall_threshold=500):
"""
:param img: grey scale image of rooms, already eroded and doors removed etc.
:param noise_removal_threshold: Minimal area of blobs to be kept.
:param corners_threshold: Threshold to allow corners. Higher removes more of the house.
:param room_closing_max_length: Maximum line length to add to close off open doors.
:param gap_in_wall_threshold: Minimum number of pixels to identify component as room instead of hole in the wall.
:return: rooms: list of numpy arrays containing boolean masks for each detected room
colored_house: A colored version of the input image, where each room has a random color.
"""
assert 0 <= corners_threshold <= 1
# Remove noise left from door removal
img[img < 128] = 0
img[img > 128] = 255
_, contours, _ = cv2.findContours(~img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask = np.zeros_like(img)
for contour in contours:
area = cv2.contourArea(contour)
if area > noise_removal_threshold:
cv2.fillPoly(mask, [contour], 255)
img = ~mask
# Detect corners (you can play with the parameters here)
dst = cv2.cornerHarris(img ,2,3,0.04)
dst = cv2.dilate(dst,None)
corners = dst > corners_threshold * dst.max()
# Draw lines to close the rooms off by adding a line between corners on the same x or y coordinate
# This gets some false positives.
# You could try to disallow drawing through other existing lines for example.
for y,row in enumerate(corners):
x_same_y = np.argwhere(row)
for x1, x2 in zip(x_same_y[:-1], x_same_y[1:]):
if x2[0] - x1[0] < room_closing_max_length:
color = 0
cv2.line(img, (x1, y), (x2, y), color, 1)
for x,col in enumerate(corners.T):
y_same_x = np.argwhere(col)
for y1, y2 in zip(y_same_x[:-1], y_same_x[1:]):
if y2[0] - y1[0] < room_closing_max_length:
color = 0
cv2.line(img, (x, y1), (x, y2), color, 1)
# Mark the outside of the house as black
_, contours, _ = cv2.findContours(~img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]
biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]
mask = np.zeros_like(mask)
cv2.fillPoly(mask, [biggest_contour], 255)
img[mask == 0] = 0
# Find the connected components in the house
ret, labels = cv2.connectedComponents(img)
img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
unique = np.unique(labels)
rooms = []
for label in unique:
component = labels == label
if img[component].sum() == 0 or np.count_nonzero(component) < gap_in_wall_threshold:
color = 0
else:
rooms.append(component)
color = np.random.randint(0, 255, size=3)
img[component] = color
return rooms, img
#Read gray image
img = cv2.imread("/home/veith/Pictures/room.png", 0)
rooms, colored_house = find_rooms(img.copy())
cv2.imshow('result', colored_house)
cv2.waitKey()
cv2.destroyAllWindows()
This will show an image like this, where each room has a random color:
You can see that it sometimes finds a room where there is none, but I think this is a decent starting point for you.
I've used a screenshot of the image in your question for this.
You can use the returned masks of each room to index the original image and crop that.
To crop just use something like (untested, but should work for the most part):
for room in rooms:
crop = np.zeros_like(room).astype(np.uint8)
crop[room] = original_img[room] # Get the original image from somewhere
# if you need to crop the image into smaller parts as big as each room
r, c = np.nonzero(room)
min_r, max_r = r.argmin(), r.argmax()
min_c, max_c = c.argmin(), c.argmax()
crop = crop[min_r:max_r, min_c:max_c]
cv2.imshow("cropped room", crop)
cv2.waitKey()
cv2.destroyAllWindows()
I used three for loops to crop each room.
height, width = img.shape[:2]
rooms, colored_house = find_rooms(img.copy())
roomId = 0
images = []
for room in rooms:
x = 0
image = np.zeros ((height, width, 3), np.uint8)
image[np.where ((image == [0, 0, 0]).all (axis=2))] = [0, 33, 166]
roomId = roomId + 1
for raw in room:
y = 0
for value in raw:
if value == True:
image[x,y] = img[x,y]
y = y +1
#print (value)
#print (img[x,y])
x = x + 1
cv2.imwrite ('result' + str(roomId)+ '.jpg', image)