I am trying to classify an image by selecting a pixel at random, then finding all pixels in the image that are a certain euclidian distance in color space from that original pixel. My current script takes a prohibitively long time. I wonder if I am able to use this equation to generate a boolean matrix that will allow quicker manipulation of the image.
( x-cx ) ^2 + (y-cy) ^2 + (z-cz) ^ 2 < r^2
Here is the code I am using now:
import PIL, glob, numpy, random, math, time
def zone_map(picture, threshold):
im = PIL.Image.open(picture)
pix = im.load()
[width, height] = im.size
mask = numpy.zeros((width,height))
while 0 in mask:
x = random.randint(0, width)
y = random.randint(0, height)
if mask[x, y] == 0:
point = pix[x,y]
to_average = {(x, y): pix[x, y]}
start = time.clock()
for row in range(0, width):
for column in range(0, height):
if euclid_dist(point, pix[row,column]) <= threshold:
to_average[(row,column)] = pix[row, column]
#to_average = in_sphere(pix, point)
end = time.clock()
print(end - start)
to_average_sum = (0, 0, 0)
for value in to_average.values():
to_average_sum = tuple_sum(to_average_sum, value)
average = tuple_divide(to_average_sum, len(to_average.values()))
for coordinate in to_average.keys():
pix[coordinate] = average
mask[coordinate] = 1
unique, counts = numpy.unique(mask, return_counts=True)
progress = dict(zip(unique, counts))
print((progress[1] / progress[0])*100, '%')
im.save()
return im
def euclid_dist(tuple1, tuple2):
"""
Finds euclidian distance between two points in n dimensional sapce
"""
tot_sq = 0
for num1, num2 in zip(tuple1, tuple2):
tot_sq += (num1 + num2)**2
return math.sqrt(tot_sq)
def tuple_sum(tuple1, tuple2):
"""
Returns tuple comprised of sums of input tuples
"""
sums = []
for num1, num2 in zip(tuple1, tuple2):
sums.append(num1 + num2)
return tuple(sums)
def tuple_divide(tuple1, divisor):
"""
Divides numerical values of tuples by divisisor, yielding integer results
"""
quotients = []
for value in tuple1:
quotients.append(int(round(value/divisor)))
return tuple(quotients)
Any information on how to incorporate the described boolean matrix, or any other ideas on how to speed this up, would be greatly appreciated.
Just load the image as a numpy array, and then use array operations instead of looping over pixels:
import numpy as np
import matplotlib.pyplot as plt
import PIL
def zone_map(picture, threshold, show=True):
with PIL.Image.open(picture) as img:
rgb = np.array(img, dtype=np.float)
height, width, _ = rgb.shape
mask = np.zeros_like(rgb)
while not np.any(mask):
# get random pixel
position = np.random.randint(height), np.random.randint(width)
color = rgb[position]
# get euclidean distance of all pixels in colour space
distance = np.sqrt(np.sum((rgb - color)**2, axis=-1))
# threshold
mask = distance < threshold
if show: # show output
fig, (ax1, ax2) = plt.subplots(1,2)
ax1.imshow(rgb.astype(np.uint8))
ax2.imshow(mask, cmap='gray')
fig.suptitle('Random color: {}'.format(color))
return mask
def test():
zone_map("Lenna.jpg", threshold=20)
plt.show()
Related
Not sure if that is the correct terminology. Basically trying to take a black and white image and first transform it such that all the white pixels that border black-pixels remain white, else turn black. That part of the program works fine, and is done in find_edges. Next I need to calculate the distance from each element in the image to the closest white-pixel. Right now I am doing it by using a for-loop that is insanely slow. Is there a way to make the find_nearest_edge function written solely with numpy without the need for a for-loop to call it on each element? Thanks.
####
from PIL import Image
import numpy as np
from scipy.ndimage import binary_erosion
####
def find_nearest_edge(arr, point):
w, h = arr.shape
x, y = point
xcoords, ycoords = np.meshgrid(np.arange(w), np.arange(h))
target = np.sqrt((xcoords - x)**2 + (ycoords - y)**2)
target[arr == 0] = np.inf
shortest_distance = np.min(target[target > 0.0])
return shortest_distance
def find_edges(img):
img = img.convert('L')
img_np = np.array(img)
kernel = np.ones((3,3))
edges = img_np - binary_erosion(img_np, kernel)*255
return edges
a = Image.open('a.png')
x, y = a.size
edges = find_edges(a)
out = Image.fromarray(edges.astype('uint8'), 'L')
out.save('b.png')
dists =[]
for _x in range(x):
for _y in range(y):
dist = find_nearest_edge(edges,(_x,_y))
dists.append(dist)
print(dists)
Images:
You can use KDTree to compute distances fast.
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import binary_erosion
from scipy.spatial import KDTree
def find_edges(img):
img_np = np.array(img)
kernel = np.ones((3,3))
edges = img_np - binary_erosion(img_np, kernel)*255
return edges
def find_closest_distance(img):
# NOTE: assuming input is binary image and white is any non-zero value!
white_pixel_points = np.array(np.where(img))
tree = KDTree(white_pixel_points.T)
img_meshgrid = np.array(np.meshgrid(np.arange(img.shape[0]), np.arange(img.shape[1]))).T
distances, _ = tree.query(img_meshgrid)
return distances
test_image = np.zeros((200, 200))
rectangle = np.ones((30, 80))
test_image[20:50, 60:140] = rectangle
test_image[150:180, 60:140] = rectangle
test_image[60:140, 20:50] = rectangle.T
test_image[60:140, 150:180] = rectangle.T
test_image = test_image * 255
edge_image = find_edges(test_image)
distance_image = find_closest_distance(edge_image)
fig, axes = plt.subplots(1, 3, figsize=(12, 5))
axes[0].imshow(test_image, cmap='Greys_r')
axes[1].imshow(edge_image, cmap='Greys_r')
axes[2].imshow(distance_image, cmap='Greys_r')
plt.show()
You can make your code 25X faster by just changing find_nearest_edge as follows. Many other optimizations are possible, but this is the biggest bottleneck in your code.
from numba import njit
#njit
def find_nearest_edge(arr, point):
x, y = point
shortest_distance = np.inf
for i in range(arr.shape[0]):
for j in range(arr.shape[1]):
if arr[i,j] == 0: continue
shortest_distance = min(shortest_distance, (i-x)**2 + (j-y)**2)
return np.sqrt(shortest_distance)
I am reading an image, getting objects that have a certain brightness value, and then plotting the X and Y coords to the image.
But, there is a huge group of outliers, which are all located in a rectangular part of the image, Its X and Y coords are 1110-1977 (width) and 1069-1905 (height). From here, I'm looping through this little square portion of the image, and from my pre-created x and y arrays any values that have the same coords as shown there are removed.
However, this removes a lot more coords, which, for example, have X in the range 1110-1977. So the end result is a cross pattern filtering when I only want the square in the center to be filtered. How would I do this?
Code
from PIL import Image, ImageDraw
import numpy as np
from math import sqrt
imag = Image.open("Centaurus_A-DeNoiseAI-denoise.jpg")
imag = imag.convert ('RGB')
x=[]
y=[]
imag2=Image.open("Cen_A_cropped.jpg")
imag2=imag2.convert('RGB')
r=[]
g=[]
b=[]
width2, height2=imag2.size
for count2 in range(width2):
for i2 in range(height2):
X,Y=count2,i2
(R,G,B)=imag2.getpixel((X,Y))
r.append(R)
g.append(G)
b.append(B)
average_r=sum(r)/len(r)
average_g=sum(g)/len(g)
average_b=sum(b)/len(b)
brightness_average=sqrt(0.299*(average_r**2) + 0.587*(average_g**2) + 0.114*(average_b**2))
print("Avg. brightness "+str(brightness_average))
def calculate_brightness(galaxy,ref_clus,clus_mag):
delta_b=(galaxy/ref_clus)
bright=delta_b**2
mag=np.log(bright)/np.log(2.512)
return mag+clus_mag
count=0
X,Y = 1556,1568
(R,G,B) = imag.getpixel((X,Y))
width, height=imag.size
brightness = sqrt(0.299*(R**2) + 0.587*(G**2) + 0.114*(B**2))
print("Magnitude: "+str((calculate_brightness(13050, 15.79,3.7))))
reference=brightness_average/(calculate_brightness(13050, 15.79,3.7)/6.84)
print("Reference: "+str(reference))
for count in range(width):
for i in range(height):
X,Y = count,i
(R,G,B) = imag.getpixel((X,Y))
brightness = sqrt(0.299*(R**2) + 0.587*(G**2) + 0.114*(B**2))
if(reference<=brightness<=reference+3):
x.append(X)
y.append(Y)
#post processing----------------------------------------------------------------------------------------------------
for x2 in range(1110, 1977):
for y2 in range(1069, 1905):
X,Y=x2,y2
if(X in x and Y in y):
x.remove(X)
y.remove(Y)
#-------------------------------------------------------------------------------------------------------------------
with imag as im:
delta = 19
draw = ImageDraw.Draw(im)
for i in range(len(x)):
draw.rectangle([x[i-delta],y[i-delta],x[i-delta],y[i-delta]], fill=(0,255,0))
im.save("your_image.png")
Centaurus_A-DeNoiseAI-denoise.jpg
Cen_A_cropped.jpg
Your post-processing logic is flawed. You remove a bunch of X values in the range 1110-1977, without checking whether its corresponding Y value is also in the range of the box. Remove this code section instead and add that logic the first time you loop to gather your x and y coords.
for count in range(width):
for i in range(height):
X,Y = count,i
if 1110 <= X < 1977 and 1069 <= Y < 1905: # add these
continue # two lines
(R,G,B) = imag.getpixel((X,Y))
However, there is a better way of doing the exact same thing by using numpy arrays. Instead of writing explicit loops, you can vectorise a lot of your computations.
import numpy as np
from PIL import Image, ImageDraw
image = Image.open('Centaurus_A-DeNoiseAI-denoise.jpg').convert('RGB')
img1 = np.array(image)
img2 = np.array(Image.open('Cen_A_cropped.jpg').convert('RGB'))
coeffs = np.array([.299, .587, .114])
average = img2.mean(axis=(0, 1))
brightness_average = np.sqrt(np.sum(average**2 * coeffs))
reference = brightness_average / (calculate_brightness(13050, 15.79,3.7) / 6.84)
print(f'Avg. brightness: {brightness_average}')
print(f'Reference: {reference}')
brightness = np.sqrt(np.sum(img1.astype(int)**2 * coeffs, axis=-1))
accepted_brightness = (brightness >= reference) * (brightness <= reference + 3)
pixels_used = np.ones((img1.shape[:2]), dtype=bool)
pixels_used[1069:1905,1110:1977] = False
rows, cols = np.where(accepted_brightness * pixels_used)
with image as im:
draw = ImageDraw.Draw(im)
draw.point(list(zip(cols, rows)), fill=(0, 255, 0))
image.save('out.png')
The main trick used here is in the line
rows, cols = np.where(accepted_brightness * pixels_used)
accepted_brightess is a 2d array of each pixel with a boolean value whether its brightness is within your preferred range. pixels_used is another 2d boolean array, where every pixel is True, except from the pixels in the box near the centre you want to ignore. The combination of those two gives you the pixel coordinates that have the correct brightness and are not in the square in the centre.
I am trying to write a program which fades an image in radial direction. which means as we move away from the centre of the image, the pixels fade to black. For this, I have written five different functions:
center: returns coordinate pair (center_y, center_x) of the image center.
radial_distance: returns for image with width w and height h an array with shape (h,w), where the number at index (i,j) gives the euclidean distance from the point (i,j) to the center of the image.
scale: returns a copy of the array 'a' (or image) with its elements scaled to be in the given range.
radial_mask: takes an image as a parameter and returns an array with same height and width filled with values between 0.0 and 1.0.
radial_fade: returns the image multiplied by its radial mask.
The program is:
import numpy as np
import matplotlib.pyplot as plt
def center(a):
y, x = a.shape[:2]
return ((y-1)/2,(x-1)/2) # note the order: (center_y, center_x)
def radial_distance(b):
h, w = b.shape[:2]
y, x = center(b)
o = b[:h,:w,0]
for i in range(h):
for j in range(w):
o[i,j] = np.sqrt((y-i)**2 + (x-j)**2)
return o
def scale(c, tmin=0.0, tmax=1.0):
"""Returns a copy of array 'a' with its values scaled to be in the range
[tmin,tmax]."""
mini, maxi = c.min(), c.max()
if maxi == 0:
return 0
else:
m = (tmax - tmin)/(maxi - mini)
f = tmin - m*mini
return c*m + f
def radial_mask(d):
f = radial_distance(d)
g = scale(f, tmin=0.0, tmax=1.0)
# f = g[:,:,0]
n = 1.0 - g
return n
def radial_fade(l):
f, g = l.shape[:2]
q = l[:f,:g,0]
return q * radial_mask(l)
image = plt.imread("src/painting.png")
fig, ax = plt.subplots(3)
masked = radial_mask(ima)
faded = radial_fade(ima)
ax[0].imshow(ima)
ax[1].imshow(masked)
ax[2].imshow(faded)
plt.show()
there is something wrong somewhere in the code as it does not do the expected job.
One problem is that in
o = b[:h,:w,0]
you're using the same precision as the image that may be integers (e.h. uint8).
You should use for example
o = np.zeros((h, w), np.float32)
Implementing Selective Search but some of the images i use gives a weird error attaching code and output
import skimage.io
import skimage.feature
import skimage.color
import skimage.transform
import skimage.util
import skimage.segmentation
import numpy
import cv2
im_orig = img = cv2.imread("image.jpeg")
# "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
#
# - Modified version with LBP extractor for texture vectorization
def _generate_segments(im_orig, scale, sigma, min_size):
"""
segment smallest regions by the algorithm of Felzenswalb and
Huttenlocher
"""
# open the Image
im_mask = skimage.segmentation.felzenszwalb(
skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
min_size=min_size)
# merge mask channel to the image as a 4th channel
im_orig = numpy.append(
im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
im_orig[:, :, 3] = im_mask
return im_orig
def _sim_colour(r1, r2):
"""
calculate the sum of histogram intersection of colour
"""
return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
def _sim_texture(r1, r2):
"""
calculate the sum of histogram intersection of texture
"""
return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
def _sim_size(r1, r2, imsize):
"""
calculate the size similarity over the image
"""
return 1.0 - (r1["size"] + r2["size"]) / imsize
def _sim_fill(r1, r2, imsize):
"""
calculate the fill similarity over the image
"""
bbsize = (
(max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
* (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
)
return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
def _calc_sim(r1, r2, imsize):
return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
+ _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
def _calc_colour_hist(img):
"""
calculate colour histogram for each region
the size of output histogram will be BINS * COLOUR_CHANNELS(3)
number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
extract HSV
"""
BINS = 25
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# extracting one colour channel
c = img[:, colour_channel]
# calculate histogram for each colour and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
# L1 normalize
hist = hist / len(img)
return hist
def _calc_texture_gradient(img):
"""
calculate texture gradient for entire image
The original SelectiveSearch algorithm proposed Gaussian derivative
for 8 orientations, but we use LBP instead.
output will be [height(*)][width(*)]
"""
ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
for colour_channel in (0, 1, 2):
ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
img[:, :, colour_channel], 8, 1.0)
return ret
def _calc_texture_hist(img):
"""
calculate texture histogram for each region
calculate the histogram of gradient for each colours
the size of output histogram will be
BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
"""
BINS = 10
hist = numpy.array([])
for colour_channel in (0, 1, 2):
# mask by the colour channel
fd = img[:, colour_channel]
# calculate histogram for each orientation and concatenate them all
# and join to the result
hist = numpy.concatenate(
[hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
# L1 Normalize
hist = hist / len(img)
return hist
def _extract_regions(img):
R = {}
# get hsv image
hsv = skimage.color.rgb2hsv(img[:, :, :3])
# pass 1: count pixel positions
for y, i in enumerate(img):
for x, (r, g, b, l) in enumerate(i):
# initialize a new region
if l not in R:
R[l] = {
"min_x": 0xffff, "min_y": 0xffff,
"max_x": 0, "max_y": 0, "labels": [l]}
# bounding box
if R[l]["min_x"] > x:
R[l]["min_x"] = x
if R[l]["min_y"] > y:
R[l]["min_y"] = y
if R[l]["max_x"] < x:
R[l]["max_x"] = x
if R[l]["max_y"] < y:
R[l]["max_y"] = y
# pass 2: calculate texture gradient
tex_grad = _calc_texture_gradient(img)
# pass 3: calculate colour histogram of each region
for k, v in list(R.items()):
# colour histogram
masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
R[k]["size"] = len(masked_pixels / 4)
R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
# texture histogram
R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
return R
def _extract_neighbours(regions):
def intersect(a, b):
if (a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["min_x"] < a["max_x"]
and a["min_y"] < b["max_y"] < a["max_y"]) or (
a["min_x"] < b["max_x"] < a["max_x"]
and a["min_y"] < b["min_y"] < a["max_y"]):
return True
return False
R = list(regions.items())
neighbours = []
for cur, a in enumerate(R[:-1]):
for b in R[cur + 1:]:
if intersect(a[1], b[1]):
neighbours.append((a, b))
return neighbours
def _merge_regions(r1, r2):
new_size = r1["size"] + r2["size"]
rt = {
"min_x": min(r1["min_x"], r2["min_x"]),
"min_y": min(r1["min_y"], r2["min_y"]),
"max_x": max(r1["max_x"], r2["max_x"]),
"max_y": max(r1["max_y"], r2["max_y"]),
"size": new_size,
"hist_c": (
r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
"hist_t": (
r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
"labels": r1["labels"] + r2["labels"]
}
return rt
def selective_search(
im_orig, scale=1.0, sigma=0.8, min_size=500):
'''Selective Search
Parameters
----------
im_orig : ndarray
Input image
scale : int
Free parameter. Higher means larger clusters in felzenszwalb segmentation.
sigma : float
Width of Gaussian kernel for felzenszwalb segmentation.
min_size : int
Minimum component size for felzenszwalb segmentation.
Returns
-------
img : ndarray
image with region label
region label is stored in the 4th value of each pixel [r,g,b,(region)]
regions : array of dict
[
{
'rect': (left, top, width, height),
'labels': [...],
'size': component_size
},
...
]
'''
assert im_orig.shape[2] == 3, "3ch image is expected"
# load image and get smallest regions
# region label is stored in the 4th value of each pixel [r,g,b,(region)]
img = _generate_segments(im_orig, scale, sigma, min_size)
if img is None:
return None, {}
imsize = img.shape[0] * img.shape[1]
R = _extract_regions(img)
# extract neighbouring information
neighbours = _extract_neighbours(R)
# calculate initial similarities
S = {}
for (ai, ar), (bi, br) in neighbours:
S[(ai, bi)] = _calc_sim(ar, br, imsize)
# hierarchal search
while S != {}:
# get highest similarity
i, j = sorted(S.items(), key=lambda i: i[1])[-1][0]
# merge corresponding regions
t = max(R.keys()) + 1.0
R[t] = _merge_regions(R[i], R[j])
# mark similarities for regions to be removed
key_to_delete = []
for k, v in list(S.items()):
if (i in k) or (j in k):
key_to_delete.append(k)
# remove old similarities of related regions
for k in key_to_delete:
del S[k]
# calculate similarity set with the new region
for k in [a for a in key_to_delete if a != (i, j)]:
n = k[1] if k[0] in (i, j) else k[0]
S[(t, n)] = _calc_sim(R[t], R[n], imsize)
regions = []
for k, r in list(R.items()):
regions.append({
'labels': r['labels'],
'rect': (
r['min_x'], r['min_y'],
r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
'size': r['size']
})
return img, regions
img_ou, region = selective_search(im_orig)
for r in region:
print(r)
cv2.imshow('image', img_ou)
cv2.waitKey(0)
cv2.destroyAllWindows()
Error Message
and can you please explain what labels is i am having a tough time to understand is it useful or not, its a list for max case of elements but later it gets more elements but major problem is the image error mentioned above any help will be deeply appreciated
Hi I know I am late but still i thought to answer your question in comment. You have to give cropped image as an input to your CNN.
I have tried the dHash algorithm which is applied on each image, then a hamming_distance is calculated on both hashes, the lower the number, the higher the similarity.
from PIL import Image
import os
import shutil
import glob
from plotData import *
def hamming_distance(s1, s2):
#Return the Hamming distance between equal-length sequences
if len(s1) != len(s2):
raise ValueError("Undefined for sequences of unequal length")
return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))
def dhash(image, hash_size = 8):
# Grayscale and shrink the image in one step.
image = image.convert('L').resize(
(hash_size + 1, hash_size),
Image.ANTIALIAS,
)
pixels = list(image.getdata())
# Compare adjacent pixels.
difference = []
for row in xrange(hash_size):
for col in xrange(hash_size):
pixel_left = image.getpixel((col, row))
pixel_right = image.getpixel((col + 1, row))
difference.append(pixel_left > pixel_right)
# Convert the binary array to a hexadecimal string.
decimal_value = 0
hex_string = []
for index, value in enumerate(difference):
if value:
decimal_value += 2**(index % 8)
if (index % 8) == 7:
hex_string.append(hex(decimal_value)[2:].rjust(2, '0'))
decimal_value = 0
return ''.join(hex_string)
orig = Image.open('imageA.png')
modif = Image.open('imageA.png')
hammingDistanceValue = hamming_distance(dhash(orig),dhash(modif))
print hammingDistanceValue
Unfortunately, this approach produces false positives because it does not really look at the line chart shapes as primary similarity feature. I guess, I'd need some kind of machine learning approach maybe from openCV or so. Can anyone guide me into the right direction to something that compares with high precision?
this is the initial image to compare against a collection of similar images.
this is a positive match
this is a false match
update: I added some opencv magic to jme's suggestion below. I try to detect significant features first. Howeve, it still produces false positives, since the overall indicator for similarity is the cummulated value over all features and does not take differences into account that can give a line chart a totally different meaning.
False Positive example
Example of preprocessed image with significant features marked as red dots
from PIL import Image
import os
import numpy as np
from scipy.interpolate import interp1d
import os.path
import shutil
import glob
from plotData import *
import cv2
from matplotlib import pyplot as plt
def load_image(path):
#data = Image.open(path)
img = cv2.imread(path)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
corners = cv2.goodFeaturesToTrack(gray,25,0.01,10)
corners = np.int0(corners)
for i in corners:
x,y = i.ravel()
cv2.circle(img,(x,y),3,255,-1)
return np.mean((255 - np.array(img))**2, axis=2)
symbol = "PBYI"
x = np.arange(1000)
if not os.path.exists('clusters1DSignal/'+symbol+'/'):
os.mkdir('clusters1DSignal/'+symbol+'/')
else:
shutil.rmtree('clusters1DSignal/'+symbol+'/')
os.mkdir('clusters1DSignal/'+symbol+'/')
shutil.copyfile('rendered/'+symbol+'.png', "clusters1DSignal/"+symbol+"/"+symbol+'.png')
img1 = load_image('rendered/'+symbol+'.png')
y1 = np.argmax(img1, axis=0)
f1 = interp1d(np.linspace(0, 1000, len(y1)), y1)
z1 = f1(x)
for filename in glob.iglob('rendered/*.png'):
try:
img2 = load_image(filename)
except:
continue
y2 = np.argmax(img2, axis=0)
f2 = interp1d(np.linspace(0, 1000, len(y2)), y2)
z2 = f2(x)
result = np.linalg.norm(z1 - z2)
if result < 2100:
print str(result) +": " +filename
symbolCompare = filename.split("/")[1].replace(".png","")
shutil.copyfile('rendered/'+symbolCompare+'.png', "clusters1DSignal/"+symbol+"/"+str(result)+"_"+symbolCompare+".png")
The approach I'd take is this: first, convert each image to a 1d signal by finding for each x pixel, a representative y pixel where the image is red. You can take the mean of the y pixels, but for simplicity, I'll just take the first that isn't white:
def load_image(path):
data = Image.open(path)
return np.mean((255 - np.array(data))**2, axis=2)
img1 = load_image("one.png")
img2 = load_image("two.png")
img3 = load_image("three.png")
y1 = np.argmax(img1, axis=0)
y2 = np.argmax(img2, axis=0)
y3 = np.argmax(img3, axis=0)
y1, y2, and y3 are 1d arrays which represent the functions in the first, second, and third images. Now we simply treat each array as a vector, and find the l2 distance between them. We prefer the l2 distance because the Hamming distance will be somewhat sensitive for this task.
We have a slight problem: the images have different widths, so the y arrays aren't of compatible size. A quick-and-dirty fix is to interpolate them to a longer length (we'll use 1000):
f1 = interp1d(np.linspace(0, 1000, len(y1)), y1)
f2 = interp1d(np.linspace(0, 1000, len(y2)), y2)
f3 = interp1d(np.linspace(0, 1000, len(y3)), y3)
x = np.arange(1000)
z1 = f1(x)
z2 = f2(x)
z3 = f3(x)
Now we can find the distance between the images:
>>> np.linalg.norm(z1 - z2)
2608.5368359281415
>>> np.linalg.norm(z1 - z3)
5071.1340610709549
>>> np.linalg.norm(z2 - z2)
5397.379183811714