I am scanning old photos, so I have the image and a white background from the scanner. My aim is to take the picture, removing the white background. How can I do that ?
An example picture is the following:
My simple approach:
import os
import time
from PIL import Image
from collections import Counter
import numpy as np
def get_cropped_image(image, crop_folder, threshold):
image_name = image.split("\\")[-1]
im = Image.open(image)
pixels = im.load()
width, height = im.size
rows = []
for h_index in xrange(height):
row = []
for w_index in xrange(width):
row.append(pixels[((w_index, h_index))])
color_count = Counter(row)[(255, 255, 255)] / float(len(row))
rows.append([h_index, color_count])
columns = []
for w_index in xrange(width):
column = []
for h_index in xrange(height):
column.append(im.getpixel((w_index, h_index)))
color_count = Counter(column)[(255, 255, 255)] / float(len(column))
columns.append([w_index, color_count])
image_data = csv.writer(open("image_data.csv", "wb")).writerows(zip(rows, columns))
rows_indexes = [i[0] for i in rows if i[1] < threshold]
columns_indexes = [i[0] for i in columns if i[1] < threshold]
x1, y1, x2, y2 = columns_indexes[0], rows_indexes[0], columns_indexes[-1], rows_indexes[-1]
im.crop((x1, y1, x2, y2)).save(os.path.join(cropped_folder, "c_" + image_name))
In the example below, I create a mask by selecting all pixels that are close to white (close, because the values right outside the area of interest are not exactly white). I then invert the mask to find pixels potentially belonging to the image. I then calculate the bounding box of those pixels, and use it to extract the region of interest.
from skimage import io, img_as_float
import matplotlib.pyplot as plt
import numpy as np
image = img_as_float(io.imread('universe.jpg'))
# Select all pixels almost equal to white
# (almost, because there are some edge effects in jpegs
# so the boundaries may not be exactly white)
white = np.array([1, 1, 1])
mask = np.abs(image - white).sum(axis=2) < 0.05
# Find the bounding box of those pixels
coords = np.array(np.nonzero(~mask))
top_left = np.min(coords, axis=1)
bottom_right = np.max(coords, axis=1)
out = image[top_left[0]:bottom_right[0],
top_left[1]:bottom_right[1]]
plt.imshow(out)
plt.show()
Related
I have this PCB with the black components containing some colored dots. For a quality inspection I want to check all the dots und check, if they have the right color, which works pretty good. But if the PCB is just rotated or translated a little bit, this method will fail. Do you have any ideas, how to make this method more robust?
one component has a red sticker in this example
import cv2
import matplotlib.pyplot as plt
import numpy as np
#Settings
confidentMax = 60
confidentMin = 40
img = cv2.imread("test.jpg")
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
pos2 = [96,205]
pos1 = [158,205]
positions = [pos1, pos2]
result = {}
dataContainer = []
for pos in positions:
data = []
img_part = img[pos[1]-15:pos[1]+15,pos[0]-15:pos[0]+15,:]
plt.imshow(cv2.cvtColor(img_part, cv2.COLOR_BGR2RGB))
data.append(pos)
data.append(img_part)
dataContainer.append(data)
for data in dataContainer:
#convert image to HSV
my_img = cv2.cvtColor(data[1], cv2.COLOR_BGR2HSV)
#check for reds
# lower boundary RED color range values; Hue (0 - 10)
lower1 = np.array([0, 100, 20])
upper1 = np.array([10, 255, 255])
# upper boundary RED color range values; Hue (160 - 180)
lower2 = np.array([160,100,20])
upper2 = np.array([179,255,255])
lower_mask = cv2.inRange(my_img, lower1, upper1)
upper_mask = cv2.inRange(my_img, lower2, upper2)
full_mask = lower_mask + upper_mask
result = cv2.bitwise_and(my_img, my_img, mask=full_mask)
number = np.count_nonzero(full_mask)
if number > confidentMin and number < confidentMax:
print(data[0], "has red dot!")
else:
print(data[0], "has no red dot!")
plt.imshow(cv2.cvtColor(my_img, cv2.COLOR_HSV2RGB))
I am trying to detect a grainy printed line on a paper with cv2. I need the angle of the line. I dont have much knowledge in image processing and I only need to detect the line. I tried to play with the parameters but the angle is always detected wrong. Could someone help me. This is my code:
import cv2
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pyplot import figure
img = cv2.imread('CamXY1_1.bmp')
crop_img = img[100:800, 300:900]
blur = cv2.GaussianBlur(crop_img, (1,1), 0)
ret,thresh = cv2.threshold(blur,150,255,cv2.THRESH_BINARY)
gray = cv2.cvtColor(thresh,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 60, 150)
figure(figsize=(15, 15), dpi=150)
plt.imshow(edges, 'gray')
lines = cv2.HoughLines(edges,1,np.pi/180,200)
for rho,theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 3000*(-b))
y1 = int(y0 + 3000*(a))
x2 = int(x0 - 3000*(-b))
y2 = int(y0 - 3000*(a))
cv2.line(img,(x1,y1),(x2,y2),(0, 255, 0),2)
imagetobedetected
Here's a possible solution to estimate the line (and its angle) without using the Hough line transform. The idea is to locate the start and ending points of the line using the reduce function. This function can reduce an image to a single column or row. If we reduce the image we can also get the total SUM of all the pixels across the reduced image. Using this info we can estimate the extreme points of the line and calculate its angle. This are the steps:
Resize your image because it is way too big
Get a binary image via adaptive thresholding
Define two extreme regions of the image and crop them
Reduce the ROIs to a column using the SUM mode, which is the sum of all rows
Accumulate the total values above a threshold value
Estimate the starting and ending points of the line
Get the angle of the line
Here's the code:
# imports:
import cv2
import numpy as np
import math
# image path
path = "D://opencvImages//"
fileName = "mmCAb.jpg"
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Scale your BIG image into a small one:
scalePercent = 0.3
# Calculate the new dimensions
width = int(inputImage.shape[1] * scalePercent)
height = int(inputImage.shape[0] * scalePercent)
newSize = (width, height)
# Resize the image:
inputImage = cv2.resize(inputImage, newSize, None, None, None, cv2.INTER_AREA)
# Deep copy for results:
inputImageCopy = inputImage.copy()
# Convert BGR to grayscale:
grayInput = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Adaptive Thresholding:
windowSize = 51
windowConstant = 11
binaryImage = cv2.adaptiveThreshold(grayInput, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, windowSize, windowConstant)
The first step is to get the binary image. Note that I previously downscaled your input because it is too big and we don't need all that info. This is the binary mask:
Now, we don't need most of the image. In fact, since the line is across the whole image, we can only "trim" the first and last column and check out where the white pixels begin. I'll crop a column a little bit wider, though, so we can ensure we have enough data and as less noise as possible. I'll define two Regions of Interest (ROIs) and crop them. Then, I'll reduce each ROI to a column using the SUM mode, this will give me the summation of all intensity across each row. After that, I can accumulate the locations where the sum exceeds a certain threshold and approximate the location of the line, like this:
# Define the regions that will be cropped
# from the original image:
lineWidth = 5
cropPoints = [(0, 0, lineWidth, height), (width-lineWidth, 0, lineWidth, height)]
# Store the line points here:
linePoints = []
# Loop through the crop points and
# crop de ROI:
for p in range(len(cropPoints)):
# Get the ROI:
(x,y,w,h) = cropPoints[p]
# Crop the ROI:
imageROI = binaryImage[y:y+h, x:x+w]
# Reduce the ROI to a n row x 1 columns matrix:
reducedImg = cv2.reduce(imageROI, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)
# Get the height (or lenght) of the arry:
reducedHeight = reducedImg.shape[0]
# Define a threshold and accumulate
# the coordinate of the points:
threshValue = 100
pointSum = 0
pointCount = 0
for i in range(reducedHeight):
currentValue = reducedImg[i]
if currentValue > threshValue:
pointSum = pointSum + i
pointCount = pointCount + 1
# Get average coordinate of the line:
y = int(accX / pixelCount)
# Store in list:
linePoints.append((x, y))
The red rectangles show the regions I cropped from the input image:
Note that I've stored both points in the linePoints list. Let's check out our approximation by drawing a line that connects both points:
# Get the two points:
p0 = linePoints[0]
p1 = linePoints[1]
# Draw the line:
cv2.line(inputImageCopy, (p0[0], p0[1]), (p1[0], p1[1]), (255, 0, 0), 1)
cv2.imshow("Line", inputImageCopy)
cv2.waitKey(0)
Which yields:
Not bad, huh? Now that we have both points, we can estimate the angle of this line:
# Get angle:
adjacentSide = p1[0] - p0[0]
oppositeSide = p0[1] - p1[1]
# Compute the angle alpha:
alpha = math.degrees(math.atan(oppositeSide / adjacentSide))
print("Angle: "+str(alpha))
This prints:
Angle: 0.534210901840831
I am making an automatic curve detection program (graphs, 2D plots, etc.) and have gotten stuck with trying to find the average location of a color. The color is determined based on a K means clustering, so I have the RGB value for that color. I have also built a small sliding window program to follow a set of (X,Y) coordinates, and in that window is where the average location/centroid for a particular color needs to be found. I am very new to OpenCV, so I will detail what I want to do below, and appreciate any help.
Find the 3 most common colors with K-means and obtain the RGB values of those colors (done).
Use the sliding window to establish an N x N search window that rides along a determined X,Y path (done).
Apply a mask to get the curve color (based on Kmeans) extracted in the search window (need help). Also potentially mask out the background color based on Kmeans.
Find the average X,Y coordinate/pixel location for the desired curve color in the NxN search window (need help).
It would be appreciated if anyone could help with masking the color of the image based on the RGB value in OpenCV and then determining the average pixel location of the desired color in the search window. I have included the code that I have so far. The boundaries would come from the K-means clustering, but right now they are just hard coded for testing purposes.
Below is an example curve, where I would attempt to follow any of the black lines
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import math
import time
from tkinter import *
from tkinter import filedialog
from tkinter import ttk
from tkinter.filedialog import askopenfilename
from tkinter import scrolledtext
from PIL import Image, ImageTk
import os
import ReadCoords
from sklearn.cluster import KMeans
(winW, winH) = (12,12)
xs,ys = ReadCoords.read_coords()
image = cv2.imread(r"Curve.PNG")
boundaries = [([0,0,0],[128,128,128])]
def color_detection(image, boundaries):
for (lower,upper) in boundaries:
lower = np.array(lower, dtype = 'uint8')
upper = np.array(upper, dtype = 'uint8')
mask = cv2.inRange(image, lower, upper)
output = cv2.bitwise_and(image, image, mask = mask)
return output
def kmeans_colors(image = image):
org_clone = image.copy()
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = image.reshape((image.shape[0] * image.shape[1], 3))
# image = image.reshape((image.shape[0] * image.shape[1], 1))
clt = KMeans(n_clusters = 3)
clt.fit(image)
# print(clt.cluster_centers_)
def centroid_histogram(clt):
# grab the number of different clusters and create a histogram
# based on the number of pixels assigned to each cluster
numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
(hist, _) = np.histogram(clt.labels_, bins = numLabels)
# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()
# return the histogram
return hist
def plot_colors(hist, centroids):
# initialize the bar chart representing the relative frequency
# of each of the colors
bar = np.zeros((50, 300, 3), dtype = "uint8")
startX = 0
# loop over the percentage of each cluster and the color of
# each cluster
for (percent, color) in zip(hist, centroids):
# plot the relative percentage of each cluster
endX = startX + (percent * 300)
cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
color.astype("uint8").tolist(), -1)
startX = endX
# return the bar chart
return bar
hist = centroid_histogram(clt)
bar = plot_colors(hist, clt.cluster_centers_)
return clt.cluster_centers_
def curve_sliding_window(image, step_size, window_size, x_range, y_range):
for x,y in zip(x_range, y_range):
yield (x, y, image[y:y+window_size[1], x:x+window_size[0]])
for (x, y, window) in curve_sliding_window(image, step_size=8, window_size=(winW, winH), x_range = xs, y_range = ys ):
# if the window does not meet our desired window size, ignore it
if window.shape[0] != winH or window.shape[1] != winW:
continue
# THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
# MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
# WINDOW
# since we do not have a classifier, we'll just draw the window
clone = image.copy()
cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 0, 255), 2)
cv2.imshow("Window", clone)
# cv2.waitKey(1)
# time.sleep(0.01)
masked = color_detection(window, boundaries)
cX,cY = moment_centriod(contours(window))
if cX != 3 and cY != 3:
cv2.circle(window, (cX, cY), 1, (0,0,255), 2)
cv2.imshow("windows", np.hstack([masked, window]))
cv2.waitKey(50)
I work with logos and other simple graphics, in which there are no gradients or complex patterns. My task is to extract from the logo segments with letters and other elements.
To do this, I define the background color, and then I go through the picture in order to segment the images. Here is my code for more understanding:
MAXIMUM_COLOR_TRANSITION_DELTA = 100 # 0 - 765
def expand_segment_recursive(image, unexplored_foreground, segment, point, color):
height, width, _ = image.shape
# Unpack coordinates from point
py, px = point
# Create list of pixels to check
neighbourhood_pixels = [(py, px + 1), (py, px - 1), (py + 1, px), (py - 1, px)]
allowed_zone = unexplored_foreground & np.invert(segment)
for y, x in neighbourhood_pixels:
# Add pixel to segment if its coordinates within the image shape and its color differs from segment color no
# more than MAXIMUM_COLOR_TRANSITION_DELTA
if y in range(height) and x in range(width) and allowed_zone[y, x]:
color_delta = np.sum(np.abs(image[y, x].astype(np.int) - color.astype(np.int)))
print(color_delta)
if color_delta <= MAXIMUM_COLOR_TRANSITION_DELTA:
segment[y, x] = True
segment = expand_segment_recursive(image, unexplored_foreground, segment, (y, x), color)
allowed_zone = unexplored_foreground & np.invert(segment)
return segment
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Pass image as the argument to use the tool")
exit(-1)
IMAGE_FILENAME = sys.argv[1]
print(IMAGE_FILENAME)
image = cv.imread(IMAGE_FILENAME)
height, width, _ = image.shape
# To filter the background I use median value of the image, as background in most cases takes > 50% of image area.
background_color = np.median(image, axis=(0, 1))
print("Background color: ", background_color)
# Create foreground mask to find segments in it (TODO: Optimize this part)
foreground = np.zeros(shape=(height, width, 1), dtype=np.bool)
for y in range(height):
for x in range(width):
if not np.array_equal(image[y, x], background_color):
foreground[y, x] = True
unexplored_foreground = foreground
for y in range(height):
for x in range(width):
if unexplored_foreground[y, x]:
segment = np.zeros(foreground.shape, foreground.dtype)
segment[y, x] = True
segment = expand_segment_recursive(image, unexplored_foreground, segment, (y, x), image[y, x])
cv.imshow("segment", segment.astype(np.uint8) * 255)
while cv.waitKey(0) != 27:
continue
Here is the desired result:
In the end of run-time I expect 13 extracted separated segments (for this particular image). But instead I got RecursionError: maximum recursion depth exceeded, which is not surprising as expand_segment_recursive() can be called for every pixel of the image. And since even with small image resolution of 600x500 i got at maximum 300K calls.
My question is how can I get rid of recursion in this case and possibly optimize the algorithm with Numpy or OpenCV algorithms?
You can actually use a thresholded image (binary) and connectedComponents to do this job in a couple of steps. Also, you may use findContours or other methods.
Here is the code:
import numpy as np
import cv2
# load image as greyscale
img = cv2.imread("hp.png", 0)
# puts 0 to the white (background) and 255 in other places (greyscale value < 250)
_, thresholded = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY_INV)
# gets the labels and the amount of labels, label 0 is the background
amount, labels = cv2.connectedComponents(thresholded)
# lets draw it for visualization purposes
preview = np.zeros((img.shape[0], img.shape[2], 3), dtype=np.uint8)
print (amount) #should be 3 -> two components + background
# draw label 1 blue and label 2 green
preview[labels == 1] = (255, 0, 0)
preview[labels == 2] = (0, 255, 0)
cv2.imshow("frame", preview)
cv2.waitKey(0)
At the end, the thresholded image will look like this:
and the preview image (the one with the colored segments) will look like this:
With the mask you can always use numpy functions to get things like, coordinates of the segments you want or to color them (like I did with preview)
UPDATE
To get different colored segments, you may try to create a "border" between the segments. Since they are plain colors and not gradients, you can try to do an edge detector like canny and then put it black in the image....
import numpy as np
import cv2
img = cv2.imread("total.png", 0)
# background to black
img[img>=200] = 0
# get edges
canny = cv2.Canny(img, 60, 180)
# make them thicker
kernel = np.ones((3,3),np.uint8)
canny = cv2.morphologyEx(canny, cv2.MORPH_DILATE, kernel)
# apply edges as border in the image
img[canny==255] = 0
# same as before
amount, labels = cv2.connectedComponents(img)
preview = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
print (amount) #should be 14 -> 13 components + background
# color them randomly
for i in range(1, amount):
preview[labels == i] = np.random.randint(0,255, size=3, dtype=np.uint8)
cv2.imshow("frame", preview )
cv2.waitKey(0)
The result is:
I am looking to display some images in OpenCV Python with titles and borders around the each subplot. something like this (courtesy of the following stackoverflow post: OpenCV (Python) video subplots):
WHAT I WANT:
But I only manage to get this with that code adapted.
import cv2
im1 = cv2.imread('Lenna.png')
final_frame = cv2.hconcat((im1, im1))
cv2.imshow('lena', final_frame)
WHAT I HAVE
Is it possible to obtain this using OpenCV?
I know a workaround would be to put text on the images, but that's not what I want because it will cover important information that way.
UPDATE
My bad, I didn't specify initially: I have 4 subplots (so 4 different images) and not two like in the example. Also, I want the solution to be as fast as possible since I have video (time restrictions)
I have a pretty quick and dirty solution. You can refine it to suit your needs. I have the explanation alongside the code as well:
import cv2
import numpy as np
img1 = cv2.imread('lena.jpg')
#--- Here I am creating the border---
black = [0,0,0] #---Color of the border---
constant=cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_CONSTANT,value=black )
cv2.imshow('constant',constant)
You can find many other options for different borders ON THIS PAGE
#--- Here I created a violet background to include the text ---
violet= np.zeros((100, constant.shape[1], 3), np.uint8)
violet[:] = (255, 0, 180)
#--- I then concatenated it vertically to the image with the border ---
vcat = cv2.vconcat((violet, constant))
cv2.imshow('vcat', vcat)
#--- Now I included some text ---
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(vcat,'FRAME',(30,50), font, 2,(0,0,0), 3, 0)
cv2.imshow('Text', vcat)
#--- I finally concatenated both the above images horizontally---
final_img = cv2.hconcat((vcat, vcat))
cv2.imshow('Final', final_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
The general idea would be to create a new image with width += width/10 and height += height/20. Write some text as heading and place the input image along the center as:
import cv2
import numpy as np
img = cv2.imread("/Users/anmoluppal/Downloads/Lenna.png")
height, width, ch = img.shape
new_width, new_height = width + width/20, height + height/8
# Crate a new canvas with new width and height.
canvas = np.ones((new_height, new_width, ch), dtype=np.uint8) * 125
# New replace the center of canvas with original image
padding_top, padding_left = 60, 10
if padding_top + height < new_height and padding_left + width < new_width:
canvas[padding_top:padding_top + height, padding_left:padding_left + width] = img
else:
print "The Given padding exceeds the limits."
text1 = "Sample Image 1"
text2 = "Sample Image 2"
img1 = cv2.putText(canvas.copy(), text1, (int(0.25*width), 30), cv2.FONT_HERSHEY_COMPLEX, 1, np.array([255, 0, 0]))
img2 = cv2.putText(canvas.copy(), text2, (int(0.25*width), 30), cv2.FONT_HERSHEY_COMPLEX, 1, np.array([255, 0, 0]))
final = cv2.hconcat((img1, img2))
cv2.imwrite("./debug.png", final)
I used the other answers to make a generalizable function which works for arbitrary row/columns:
def cvSubplot(imgs, # 2d np array of imgs (each img an np arrays of depth 1 or 3).
pad=10, # number of pixels to use for padding between images. must be even
titles=None, # (optional) np array of subplot titles
win_name='CV Subplot' # name of cv2 window
):
'''
Makes cv2 based subplots. Useful to plot image in actual pixel size
'''
rows, cols = imgs.shape
subplot_shapes = np.array([list(map(np.shape, x)) for x in imgs])
sp_height, sp_width, depth = np.max(np.max(subplot_shapes, axis=0), axis=0)
title_pad = 30
if titles is not None:
pad_top = pad + title_pad
else:
pad_top = pad
frame = np.zeros((rows*(sp_height+pad_top), cols*(sp_width+pad), depth ))
for r in range(rows):
for c in range(cols):
img = imgs[r, c]
h, w, _ = img.shape
y0 = r * (sp_height+pad_top) + pad_top//2
x0 = c * (sp_width+pad) + pad//2
frame[y0:y0+h, x0:x0+w, :] = img
if titles is not None:
frame = cv2.putText(frame, titles[r, c], (x0, y0-title_pad//4), cv2.FONT_HERSHEY_COMPLEX, .5, (255,255,255))
cv2.imshow(win_name, frame)
cv2.waitKey(0)
Below is an example usage:
import cv2
import numpy as np
a1 = np.random.random((40,400,1))
a2 = np.random.random((200,200,1))
a3 = np.random.random((100,100,1))
a4 = np.random.random((300,150,1))
a5 = np.random.random((100,150,1))
filler = np.zeros((0,0,1))
titles = np.array([['A', 'B', 'C'], ['D', 'E', 'Filler']])
imgs = np.array([[a1, a2, a3], [a4, a5, filler]])
cvSubplot(imgs, pad=20, titles=titles)
That script produces the following cv2 image: