How to detect if a 2D array is inside another 2D array? - python

So with the help of a stack-overflow member, I have the following code:
data = "needle's (which is a png image) base64 code goes here"
decoded = data.decode('base64')
f = cStringIO.StringIO(decoded)
image = Image.open(f)
needle = image.load()
while True:
screenshot = ImageGrab.grab()
haystack = screenshot.load()
if detectImage(haystack, needle):
break
else:
time.sleep(5)
I've written the following code to check if the needle is in the haystack:
def detectImage(haystack, needle):
counter = 0
for hayrow in haystack:
for haypix in hayrow:
for needlerow in needle:
for needlepix in needlerow:
if haypix == needlepix:
counter += 1
if counter == 980: #the needle has 980 pixels
return True
else:
return False
The issue is that I get this error for line 3: 'PixelAccess' object is not iterable
It was suggested to me that it would be easier to copy both needle and haystack into a numpy/scipy array. And then I can just use a function that checks to see if the 2D array needle is inside the 2D array haystack.
I need help with:
1) converting those arrays to numpy arrays.
2) a function that checks to see if the 2D array needle is inside the 2D array haystack. My function doesn't work.
These are the images:
Needle:
Haystack:

To convert the image into a numpy array, you should be able to simply do this:
import numpy as np
from PIL import Image
needle = Image.open('needle.png')
haystack = Image.open('haystack.jpg')
needle = np.asarray(needle)
haystack = np.asarray(haystack)
To get you started with finding the needle, note that this will give you a list of all the places where the corner matches:
haystack = np.array([[1,2,3],[3,2,1],[2,1,3]])
needle = np.array([[2,1],[1,3]])
np.where(haystack == needle[0,0])
#(array([0, 1, 2]), row-values
# array([1, 1, 0])) col-values
Then, you can look at all the corner matches, and see if the subhaystack there matches:
h,w = needle.shape
rows, cols = np.where(haystack == needle[0,0])
for row, col in zip(rows, cols):
if np.all(haystack[row:row+h, col:col+w] == needle):
print "found it at row = %i, col = %i"%(row,col)
break
else:
print "no needle in haystack"
Below is a more robust version that finds the best match, and if it matches better than some percentage, considers the needle found. Returns the corner coordinate if found, None if not.
def find_needle(needle, haystack, tolerance=.80):
""" input: PIL.Image objects
output: coordinat of found needle, else None """
# convert to grayscale ("L"uminosity) for simplicity.
needle = np.asarray(needle.convert('L'))
haystack = np.asarray(haystack.convert('L'))
h,w = needle.shape
H,W = haystack.shape
L = haystack.max()
best = (None, None, 1)
rows, cols = np.where((haystack - needle[0,0])/L < tolerance)
for row, col in zip(rows, cols):
if row+h > H or col+w > W: continue # out of range
diff = np.mean(haystack[row:row+h, col:col+w] - needle)/L
if diff < best[-1]:
best = (diff, row, col)
return best if best[-1] < tolerance else None

I finally managed to make a numpy-only implementation of a cross correlation search work... The cross-correlation is calculated using the cross-correlation theorem and FFTs.
from __future__ import division
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def cross_corr(a, b):
a_rows, a_cols = a.shape[:2]
b_rows, b_cols = b.shape[:2]
rows, cols = max(a_rows, b_rows), max(a_cols, b_cols)
a_f = np.fft.fft2(a, s=(rows, cols), axes=(0, 1))
b_f = np.fft.fft2(b, s=(rows, cols), axes=(0, 1))
corr_ab = np.fft.fft2(a_f.conj()*b_f, axes=(0,1))
return np.rint(corr_ab / rows / cols)
def find_needle(haystack, needle, n=10):
# convert to float and subtract 128 for better matching
haystack = haystack.astype(np.float) - 128
needle = needle.astype(np.float) - 128
target = np.sum(np.sum(needle*needle, axis=0), axis=0)
corr_hn = cross_corr(haystack, needle)
delta = np.sum(np.abs(corr_hn - target), axis=-1)
return np.unravel_index(np.argsort(delta, axis=None)[:n],
dims=haystack.shape[:2])
haystack = np.array(Image.open('haystack.jpg'))
needle = np.array(Image.open('needle.png'))[..., :3]
plt.imshow(haystack, interpolation='nearest')
dy, dx = needle.shape[:2]
candidates = find_needle(haystack, needle, 1)
for y, x in zip(*candidates):
plt.plot([x, x+dx, x+dx, x, x], [y, y, y+dy,y+dy, y], 'g-', lw=2)
plt.show()
So the highest scoring point is the real needle:
>>> print candidates
(array([553], dtype=int64), array([821], dtype=int64))

You can use matchTemplate in opencv to detect the position:
import cv2
import numpy as np
import pylab as pl
needle = cv2.imread("needle.png")
haystack = cv2.imread("haystack.jpg")
diff = cv2.matchTemplate(haystack, needle, cv2.TM_CCORR_NORMED)
x, y = np.unravel_index(np.argmax(diff), diff.shape)
pl.figure(figsize=(12, 8))
im = pl.imshow(haystack[:,:, ::-1])
ax = pl.gca()
ax.add_artist(pl.Rectangle((y, x), needle.shape[1], needle.shape[0], transform=ax.transData, alpha=0.6))
here is the output:

Related

ValueError: need at least one array to concatenate

I am having issues with
ValueError: need at least one array to concatenate
Below is the whole error message.
Training mode
Traceback (most recent call last):
File "bcf.py", line 342, in <module>
bcf.train()
File "bcf.py", line 321, in train
self._learn_codebook()
File "bcf.py", line 142, in _learn_codebook
feats_sc = np.concatenate(feats_sc, axis=1).transpose()
ValueError: need at least one array to concatenate
Below is the area of the problem.
def _learn_codebook(self):
MAX_CFS = 800 # max number of contour fragments per image; if above, sample randomly
CLUSTERING_CENTERS = 1500
feats_sc = []
for image in self.data.values():
feats = image['cfs']
feat_sc = feats[1]
if feat_sc.shape[1] > MAX_CFS:
# Sample MAX_CFS from contour fragments
rand_indices = np.random.permutation(feat_sc.shape[1])
feat_sc = feat_sc[:, rand_indices[:MAX_CFS]]
feats_sc.append(feat_sc)
feats_sc = np.concatenate(feats_sc, axis=1).transpose()
print("Running KMeans...")
self.kmeans = sklearn.cluster.KMeans(min(CLUSTERING_CENTERS, feats_sc.shape[0]), n_jobs=-1, algorithm='elkan').fit(feats_sc)
print("Saving codebook...")
self._save_kmeans(self.kmeans)
return self.kmeans
Below is the complete CLASS
class BCF():
def __init__(self):
self.DATA_DIR = "/Users/minniemouse/TRAIN/bcf-master5/data/cuauv/"
self.PERC_TRAINING_PER_CLASS = 0.5
self.CODEBOOK_FILE = "codebook.data"
self.CLASSIFIER_FILE = "classifier"
self.LABEL_TO_CLASS_MAPPING_FILE = "labels_to_classes.data"
self.classes = defaultdict(list)
self.data = defaultdict(dict)
self.counter = defaultdict(int)
self.kmeans = None
self.clf = None
self.label_to_class_mapping = None
def _load_classes(self):
for dir_name, subdir_list, file_list in os.walk(self.DATA_DIR):
if subdir_list:
continue
for f in sorted(file_list, key=hash):
self.classes[dir_name.split('/')[-1]].append(os.path.join(dir_name, f))
def _load_training(self):
for cls in self.classes:
images = self.classes[cls]
for image in images[:int(len(images) * self.PERC_TRAINING_PER_CLASS)]:
image_id = self._get_image_identifier(cls)
self.data[image_id]['image'] = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
if self.data[image_id]['image'] is None:
print("Failed to load " + image)
def _load_testing(self):
for cls in self.classes:
images = self.classes[cls]
for image in images[int(len(images) * self.PERC_TRAINING_PER_CLASS):]:
image_id = self._get_image_identifier(cls)
self.data[image_id]['image'] = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
if self.data[image_id]['image'] is None:
print("Failed to load " + image)
def _load_single(self, image):
# Load single image data
self.data.clear()
image_id = self._get_image_identifier(None)
self.data[image_id]['image'] = image
def _save_label_to_class_mapping(self):
self.label_to_class_mapping = {hash(cls): cls for cls in self.classes}
with open(self.LABEL_TO_CLASS_MAPPING_FILE, 'wb') as out_file:
pickle.dump(self.label_to_class_mapping, out_file, -1)
def _load_label_to_class_mapping(self):
if self.label_to_class_mapping is None:
with open(self.LABEL_TO_CLASS_MAPPING_FILE, 'rb') as in_file:
self.label_to_class_mapping = pickle.load(in_file)
return self.label_to_class_mapping
def _normalize_shapes(self):
for (cls, idx) in self.data.keys():
image = self.data[(cls, idx)]['image']
# Remove void space
y, x = np.where(image > 50)
max_y = y.max()
min_y = y.min()
max_x = x.max()
min_x = x.min()
trimmed = image[min_y:max_y, min_x:max_x] > 50
trimmed = trimmed.astype('uint8')
trimmed[trimmed > 0] = 255
self.data[(cls, idx)]['normalized_image'] = trimmed
def _extract_cf(self):
for (cls, idx) in self.data.keys():
image = self.data[(cls, idx)]['normalized_image']
images,contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contour = sorted(contours, key=len)[-1]
mat = np.zeros(image.shape, np.int8)
cv2.drawContours(mat, [contour], -1, (255, 255, 255))
#self.show(mat)
MAX_CURVATURE = 1.5
N_CONTSAMP = 50
N_PNTSAMP = 10
C = None
for pnt in contour:
if C is None:
C = np.array([[pnt[0][0], pnt[0][1]]])
else:
C = np.append(C, [[pnt[0][0], pnt[0][1]]], axis=0)
cfs = self._extr_raw_points(C, MAX_CURVATURE, N_CONTSAMP, N_PNTSAMP)
tmp = mat.copy()
for cf in cfs:
for pnt in cf:
cv2.circle(tmp, (pnt[0], pnt[1]), 2, (255, 0, 0))
#self.show(tmp)
num_cfs = len(cfs)
print("Extracted %s points" % (num_cfs))
feat_sc = np.zeros((300, num_cfs))
xy = np.zeros((num_cfs, 2))
for i in range(num_cfs):
cf = cfs[i]
sc, _, _, _ = shape_context(cf)
# shape context is 60x5 (60 bins at 5 reference points)
sc = sc.flatten(order='F')
sc /= np.sum(sc) # normalize
feat_sc[:, i] = sc
# shape context descriptor sc for each cf is 300x1
# save a point at the midpoint of the contour fragment
xy[i, 0:2] = cf[np.round(len(cf) / 2. - 1).astype('int32'), :]
sz = image.shape
self.data[(cls, idx)]['cfs'] = (cfs, feat_sc, xy, sz)
def _learn_codebook(self):
MAX_CFS = 800 # max number of contour fragments per image; if above, sample randomly
CLUSTERING_CENTERS = 1500
feats_sc = []
for image in self.data.values():
feats = image['cfs']
feat_sc = feats[1]
if feat_sc.shape[1] > MAX_CFS:
# Sample MAX_CFS from contour fragments
rand_indices = np.random.permutation(feat_sc.shape[1])
feat_sc = feat_sc[:, rand_indices[:MAX_CFS]]
feats_sc.append(feat_sc)
feats_sc = np.concatenate(feats_sc, axis=1).transpose()
print("Running KMeans...")
self.kmeans = sklearn.cluster.KMeans(min(CLUSTERING_CENTERS, feats_sc.shape[0]), n_jobs=-1, algorithm='elkan').fit(feats_sc)
print("Saving codebook...")
self._save_kmeans(self.kmeans)
return self.kmeans
I have read through the various posts on ValueError already described, but I am not having much luck on figuring it out. I have now attached the CLASS and full error message information.
Please, can someone point out what I am missing?
Thank you
the problem comes from the lenght of your array. Check if your array/list is longer than to 0 print(len(feats_sc)).
Don't forget to checkout the documentation numpy.concatenate — NumPy v1.16 Manual
The problem seems to be in np.concatenate where it expects an array of arrays and it's not receiving that.
Refer: Scipy docs
numpy.concatenate((a1, a2, ...), axis=0, out=None)
Join a sequence of arrays along an existing axis.
Parameters:
a1, a2, … : sequence of array_like The arrays must have
the same shape, except in the dimension corresponding to axis (the
first, by default).
axis : int, optional The axis along which the arrays will be joined.
If axis is None, arrays are flattened before use. Default is 0.
out : ndarray, optional If provided, the destination to place the
result. The shape must be correct, matching that of what concatenate
would have returned if no out argument were specified.
Returns: res : ndarray The concatenated array.
In your case, check what feats_sc contains.
You can debug using pdb
python -m pdb <your-code>.py
(pdb) b fullpath/to/your-code.py:line-number-to-break
(pdb) c
c will continue until break point in encountered
n will move to next line
b is to set break point
q is to quit
Just to make it clearer, running the following piece of code throws the same ValueError: need at least one array to concatenate error.
import numpy as np
feats_sc = np.array([])
feats_sc = np.concatenate(feats_sc, axis=1)
whereas the following code does not.
import numpy as np
feats_sc = np.array(([[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [1 ,2 ,3]]))
feats_sc = np.concatenate(feats_sc, axis=1)
The reason is that in the former, the numpy array is empty, and in the latter, it is not.

How to get the index of a list items in another list?

Consider I have these lists:
l = [5,6,7,8,9,10,5,15,20]
m = [10,5]
I want to get the index of m in l. I used list comprehension to do that:
[(i,i+1) for i,j in enumerate(l) if m[0] == l[i] and m[1] == l[i+1]]
Output : [(5,6)]
But if I have more numbers in m, I feel its not the right way. So is there any easy approach in Python or with NumPy?
Another example:
l = [5,6,7,8,9,10,5,15,20,50,16,18]
m = [10,5,15,20]
The output should be:
[(5,6,7,8)]
The easiest way (using pure Python) would be to iterate over the items and first only check if the first item matches. This avoids doing sublist comparisons when not needed. Depending on the contents of your l this could outperform even NumPy broadcasting solutions:
def func(haystack, needle): # obviously needs a better name ...
if not needle:
return
# just optimization
lengthneedle = len(needle)
firstneedle = needle[0]
for idx, item in enumerate(haystack):
if item == firstneedle:
if haystack[idx:idx+lengthneedle] == needle:
yield tuple(range(idx, idx+lengthneedle))
>>> list(func(l, m))
[(5, 6, 7, 8)]
In case your interested in speed I checked the performance of the approaches (borrowing from my setup here):
import random
import numpy as np
# strided_app is from https://stackoverflow.com/a/40085052/
def strided_app(a, L, S ): # Window len = L, Stride len/stepsize = S
nrows = ((a.size-L)//S)+1
n = a.strides[0]
return np.lib.stride_tricks.as_strided(a, shape=(nrows,L), strides=(S*n,n))
def pattern_index_broadcasting(all_data, search_data):
n = len(search_data)
all_data = np.asarray(all_data)
all_data_2D = strided_app(np.asarray(all_data), n, S=1)
return np.flatnonzero((all_data_2D == search_data).all(1))
# view1D is from https://stackoverflow.com/a/45313353/
def view1D(a, b): # a, b are arrays
a = np.ascontiguousarray(a)
void_dt = np.dtype((np.void, a.dtype.itemsize * a.shape[1]))
return a.view(void_dt).ravel(), b.view(void_dt).ravel()
def pattern_index_view1D(all_data, search_data):
a = strided_app(np.asarray(all_data), L=len(search_data), S=1)
a0v, b0v = view1D(np.asarray(a), np.asarray(search_data))
return np.flatnonzero(np.in1d(a0v, b0v))
def find_sublist_indices(haystack, needle):
if not needle:
return
# just optimization
lengthneedle = len(needle)
firstneedle = needle[0]
restneedle = needle[1:]
for idx, item in enumerate(haystack):
if item == firstneedle:
if haystack[idx+1:idx+lengthneedle] == restneedle:
yield tuple(range(idx, idx+lengthneedle))
def Divakar1(l, m):
return np.squeeze(pattern_index_broadcasting(l, m)[:,None] + np.arange(len(m)))
def Divakar2(l, m):
return np.squeeze(pattern_index_view1D(l, m)[:,None] + np.arange(len(m)))
def MSeifert(l, m):
return list(find_sublist_indices(l, m))
# Timing setup
timings = {Divakar1: [], Divakar2: [], MSeifert: []}
sizes = [2**i for i in range(5, 20, 2)]
# Timing
for size in sizes:
l = [random.randint(0, 50) for _ in range(size)]
m = [random.randint(0, 50) for _ in range(10)]
larr = np.asarray(l)
marr = np.asarray(m)
for func in timings:
# first timings:
# res = %timeit -o func(l, m)
# second timings:
if func is MSeifert:
res = %timeit -o func(l, m)
else:
res = %timeit -o func(larr, marr)
timings[func].append(res)
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(1)
ax = plt.subplot(111)
for func in timings:
ax.plot(sizes,
[time.best for time in timings[func]],
label=str(func.__name__))
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('size')
ax.set_ylabel('time [seconds]')
ax.grid(which='both')
ax.legend()
plt.tight_layout()
In case your l and m are lists my function outperforms the NumPy solutions for all sizes:
But in case you have these as numpy arrays you'll get faster results for large arrays (size > 1000 elements) when using Divakars NumPy solutions:
You are basically looking for the starting indices of a list in another list.
Approach #1 : One approach to solve it would be to create sliding windows of the elements in list in which we are searching, giving us a 2D array and then simply use NumPy broadcasting to perform broadcasted comparison against the search list against each row of the 2D sliding window version obtained earlier. Thus, one method would be -
# strided_app is from https://stackoverflow.com/a/40085052/
def strided_app(a, L, S ): # Window len = L, Stride len/stepsize = S
nrows = ((a.size-L)//S)+1
n = a.strides[0]
return np.lib.stride_tricks.as_strided(a, shape=(nrows,L), strides=(S*n,n))
def pattern_index_broadcasting(all_data, search_data):
n = len(search_data)
all_data = np.asarray(all_data)
all_data_2D = strided_app(np.asarray(all_data), n, S=1)
return np.flatnonzero((all_data_2D == search_data).all(1))
out = np.squeeze(pattern_index_broadcasting(l, m)[:,None] + np.arange(len(m)))
Sample runs -
In [340]: l = [5,6,7,8,9,10,5,15,20,50,16,18]
...: m = [10,5,15,20]
...:
In [341]: np.squeeze(pattern_index_broadcasting(l, m)[:,None] + np.arange(len(m)))
Out[341]: array([5, 6, 7, 8])
In [342]: l = [5,6,7,8,9,10,5,15,20,50,16,18,10,5,15,20]
...: m = [10,5,15,20]
...:
In [343]: np.squeeze(pattern_index_broadcasting(l, m)[:,None] + np.arange(len(m)))
Out[343]:
array([[ 5, 6, 7, 8],
[12, 13, 14, 15]])
Approach #2 : Another method would be to get the sliding window and then get the row-wise scalar view into the data to be search data and the data to be search for, giving us 1D data to work with, like so -
# view1D is from https://stackoverflow.com/a/45313353/
def view1D(a, b): # a, b are arrays
a = np.ascontiguousarray(a)
void_dt = np.dtype((np.void, a.dtype.itemsize * a.shape[1]))
return a.view(void_dt).ravel(), b.view(void_dt).ravel()
def pattern_index_view1D(all_data, search_data):
a = strided_app(np.asarray(all_data), L=len(search_data), S=1)
a0v, b0v = view1D(np.asarray(a), np.asarray(search_data))
return np.flatnonzero(np.in1d(a0v, b0v))
out = np.squeeze(pattern_index_view1D(l, m)[:,None] + np.arange(len(m)))
2020 Versions
In search of more easy/compact approaches, we could look into scikit-image's view_as_windows for getting sliding windows with a built-in. I am assuming arrays as inputs for less messy code. For lists as input, we have to use np.asarray() as shown earlier.
Approach #3 : Basically a derivative of pattern_index_broadcasting with view_as_windows for a one-liner with a as the larger data and b is the array to be searched -
from skimage.util import view_as_windows
np.flatnonzero((view_as_windows(a,len(b))==b).all(1))[:,None]+np.arange(len(b))
Approach #4 : For a small number of matches from b in a, we could optimize, by looking for first element match from b to reduce the dataset size for searches -
mask = a[:-len(b)+1]==b[0]
mask[mask] = (view_as_windows(a,len(b))[mask]).all(1)
out = np.flatnonzero(mask)[:,None]+np.arange(len(b))
Approach #5 : For a small sized b, we could simply run a loop for each of the elements in b and perform bitwise and-reduction -
mask = np.bitwise_and.reduce([a[i:len(a)-len(b)+1+i]==b[i] for i in range(len(b))])
out = np.flatnonzero(mask)[:,None]+np.arange(len(b))
Just making the point that #MSeifert's approach can, of course, also be implemented in numpy:
def pp(h,n):
nn = len(n)
NN = len(h)
c = (h[:NN-nn+1]==n[0]).nonzero()[0]
if c.size==0: return
for i,l in enumerate(n[1:].tolist(),1):
c = c[h[i:][c]==l]
if c.size==0: return
return np.arange(c[0],c[0]+nn)
def get_data(l1,l2):
d=defaultdict(list)
[d[item].append(index) for index,item in enumerate(l1)]
print(d)
Using defaultdict to store indices of elements from other list.

How to vectorize a code with python numpy.bincount, using apply along axis

I'm trying to vectorize a code with numpy, to run it using multiprocessing, but i can't understand how numpy.apply_along_axis works. This is an example of the code, vectorized using map
import numpy
from scipy import sparse
import multiprocessing
from matplotlib import pyplot
#first i build a matrix of some x positions vs time datas in a sparse format
matrix = numpy.random.randint(2, size = 100).astype(float).reshape(10,10)
x = numpy.nonzero(matrix)[0]
times = numpy.nonzero(matrix)[1]
weights = numpy.random.rand(x.size)
#then i define an array of y positions
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
#now i build an image using x-y-times coordinates and x-times weights
def mapIt(ithStep):
ncolumns = 80
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
image = list(map(mapIt, range(nStepsY)))
image = numpy.array(image)
a = pyplot.imshow(image, aspect = 10)
Here the output plot
I tried to use numpy.apply_along_axis, but this function allows me to iterate only along the rows of image, while i need to iterate along the ithStep index too. E.g.:
#now i build an image using x-y-times coordinates and x-times weights
nrows = nStepsY
ncolumns = 80
matrix = numpy.zeros(nrows*ncolumns).reshape(nrows,ncolumns)
def applyIt(image):
image = numpy.zeros(ncolumns)
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[positions] = values
return image
imageApplied = numpy.apply_along_axis(applyIt,1,matrix)
a = pyplot.imshow(imageApplied, aspect = 10)
It obviously return only the firs row nrows times, since nothing iterates ithStep:
And here the wrong plot
There is a way to iterate an index, or to use an index while numpy.apply_along_axis iterates?
Here the code with only matricial operations: it's quite faster than map or apply_along_axis but uses so much memory.
(in this function i use a trick with scipy.sparse, which works more intuitively than numpy arrays when you try to sum numbers on a same element)
def fullmatrix(nRows, nColumns):
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
yTimed = numpy.outer(y,times)
x3d = numpy.outer(numpy.ones(nStepsY),x)
weights3d = numpy.outer(numpy.ones(nStepsY),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions)))).todense()
return matrix
image = fullmatrix(nStepsY, 80)
a = pyplot.imshow(image, aspect = 10)
This way is simplier and very fast! Thank you so much.
nStepsY = 5
nRows = nStepsY
nColumns = 80
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nRows, nColumns))
fakeRow = numpy.zeros(positions.size)
def itermatrix(ithStep):
yTimed = y[ithStep]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
matrix = sparse.coo_matrix((weights, (fakeRow, positions))).todense()
matrix = numpy.ravel(matrix)
missColumns = (nColumns-matrix.size)
zeros = numpy.zeros(missColumns)
matrix = numpy.concatenate((matrix, zeros))
return matrix
for i in numpy.arange(nStepsY):
image[i] = itermatrix(i)
#or, without initialization of image:
imageMapped = list(map(itermatrix, range(nStepsY)))
imageMapped = numpy.array(imageMapped)
It feels like attempting to use map or apply_along_axis is obscuring the essentially iteration of the problem.
I rewrote your code as an explicit loop on y:
nStepsY = 5
y = numpy.arange(1,nStepsY+1)
image = numpy.zeros((nStepsY, 80))
for i, yi in enumerate(y):
yTimed = yi*times
positions = (numpy.round(x-yTimed)+50).astype(int)
values = numpy.bincount(positions,weights)
values = values[numpy.nonzero(values)]
positions = numpy.unique(positions)
image[i, positions] = values
a = pyplot.imshow(image, aspect = 10)
pyplot.show()
Looking at the code, I think I could calculate positions for all y values making a (y.shape[0],times.shape[0]) array. But the rest, the bincount and unique still have to work row by row.
apply_along_axis when working with a 2d array, and axis=1 essentially does:
res = np.zeros_like(arr)
for i in range....:
res[i,:] = func1d(arr[i,:])
If the input array has more dimensions it constructs a more elaborate indexing object [i,j,k,:]. And it can handle cases where func1d returns a different size array than the input. But in any case it is just a generalized iteration tool.
Moving the initial positions creation outside the loop:
yTimed = y[:,None]*times
positions = (numpy.round(x-yTimed)+50).astype(int)
image = numpy.zeros((positions.shape[0], 80))
for i, pos in enumerate(positions):
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
image[i, pos] = values
Now I can cast this as an apply_along_axis problem, with an applyIt that takes a positions vector (with all the yTimed information) rather than blank image vector.
def applyIt(pos, size, weights):
acolumn = numpy.zeros(size)
values = numpy.bincount(pos,weights)
values = values[numpy.nonzero(values)]
pos = numpy.unique(pos)
acolumn[pos] = values
return acolumn
image = numpy.apply_along_axis(applyIt, 1, positions, 80, weights)
Timing wise I expect it's a bit slower than my explicit iteration. It has to do more setup work, including a test call applyIt(positions[0,:],...) to determine the size of its return array (i.e image has different shape than positions.)
def csrmatrix(y, times, x, weights):
yTimed = numpy.outer(y,times)
n=y.shape[0]
x3d = numpy.outer(numpy.ones(n),x)
weights3d = numpy.outer(numpy.ones(n),weights)
y3d = numpy.outer(y,numpy.ones(x.size))
positions = (numpy.round(x3d-yTimed)+50).astype(int)
#print(y.shape, weights3d.shape, y3d.shape, positions.shape)
matrix = sparse.csr_matrix((numpy.ravel(weights3d), (numpy.ravel(y3d), numpy.ravel(positions))))
#print(repr(matrix))
return matrix
# one call
image = csrmatrix(y, times, x, weights)
# iterative call
alist = []
for yi in numpy.arange(1,nStepsY+1):
alist.append(csrmatrix(numpy.array([yi]), times, x, weights))
def mystack(alist):
# concatenate without offset
row, col, data = [],[],[]
for A in alist:
A = A.tocoo()
row.extend(A.row)
col.extend(A.col)
data.extend(A.data)
print(len(row),len(col),len(data))
return sparse.csr_matrix((data, (row, col)))
vimage = mystack(alist)

getting elements in an array1 that are not in array2

Main Problem
What is the better/pythonic way of retrieving elements in a particular array that are not found in a different array. This is what I have;
idata = [np.column_stack(data[k]) for k in range(len(data)) if data[k] not in final]
idata = np.vstack(idata)
My interest is in performance. My data is an (X,Y,Z) array of size (7000 x 3) and my gdata is an (X,Y) array of (11000 x 2)
Preamble
I am working on an octant search to find the n-number(e.g. 8) of points (+) closest to my circular point (o) in each octant. This would mean that my points (+) are reduced to only 64 (8 per octant). Then for each gdata I would save the elements that are not found in data.
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from collections import defaultdict
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
data = pd.read_excel(file_path)
data = np.array(data, dtype=np.float)
nrow, cols = data.shape
file_path1 = filedialog.askopenfilename()
gdata = pd.read_excel(file_path1)
gdata = np.array(gdata, dtype=np.float)
gnrow, gcols = gdata.shape
N=8
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for j in range(gnrow):
delta = gdata[j, ::] - data[:, :2]
angles = np.arctan2(delta[:, 1], delta[:, 0])
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
if data_i.size > 0:
dist_order = np.argsort(cdist(data_i[:, :2], gdata[j, ::][np.newaxis]), axis=0)
if dist_order.size < npoint_per_octant+1:
[octantsort.append(data_i[dist_order[:npoint_per_octant][j]]) for j in range(dist_order.size)]
else:
[octantsort.append(data_i[dist_order[:npoint_per_octant][j]]) for j in range(npoint_per_octant)]
final = np.vstack(octantsort)
idata = [np.column_stack(data[k]) for k in range(len(data)) if data[k] not in final]
idata = np.vstack(idata)
Is there an efficient and pythonic way of doing this do increase performance in the last two lines of the code?
If I understand your code correctly, then I see the following potential savings:
dedent the final = ... line
don't use arctan it's expensive; since you only want octants compare the coordinates to zero and to each other
don't do a full argsort, use argpartition instead
make your octantsort an "octantargsort", i.e. store the indices into data, not the data points themselves; this would save you the search in the last but one line and allow you to use np.delete for removing
don't use append inside a list comprehension. This will produce a list of Nones that is immediately discarded. You can use list.extend outside the comprehension instead
besides, these list comprehensions look like a convoluted way of converting data_i[dist_order[:npoint_per_octant]] into a list, why not simply cast, or even keep as an array, since you want to vstack in the end?
Here is some sample code illustrating these ideas:
import numpy as np
def discard_nearest_in_each_octant(eater, eaten, n_eaten_p_eater):
# build octants
# start with quadrants ...
top, left = (eaten < eater).T
quadrants = [np.where(v&h)[0] for v in (top, ~top) for h in (left, ~left)]
dcoord2 = (eaten - eater)**2
dc2quadrant = [dcoord2[q] for q in quadrants]
# ... and split them
oct4158 = [q[:, 0] < q [:, 1] for q in dc2quadrant]
# main loop
dc2octants = [[q[o], q[~o]] for q, o in zip (dc2quadrant, oct4158)]
reloap = [[
np.argpartition(o.sum(-1), n_eaten_p_eater)[:n_eaten_p_eater]
if o.shape[0] > n_eaten_p_eater else None
for o in opair] for opair in dc2octants]
# translate indices
octantargpartition = [q[so] if oap is None else q[np.where(so)[0][oap]]
for q, o, oaps in zip(quadrants, oct4158, reloap)
for so, oap in zip([o, ~o], oaps)]
octantargpartition = np.concatenate(octantargpartition)
return np.delete(eaten, octantargpartition, axis=0)

Full Frequency Array Reconstruction after numpy.fft.rfftn

I have a real valued grayscale 3D image with resolution rows x cols x deps. I take the dft of the image using freq = numpy.fft.rfftn(myImage)
The returned array, freq, is resolution: rows x cols x deps/2 + 1. I want to reconstruct freq as if it were the output of numpy.fft.fftn(myImage), that is, I want the dimensions of freq to be rows x cols x deps.
I know that the correspondence for real-valued dft is X_(k1,k2,k3) = X*_(N1-k1,N2-k2,N3-k3) where * is the conjugate transpose.
I could reconstruct the full freq array using a loop, but that'll be too slow, but I'm having trouble figuring out the correct way of doing it with array slicing.
Thanks!
FYI, I need the full array because I'll be element wise multiplying it with another array of full size rows x cols x deps, I cannot assume that array has any structure (like symmetry) that would make it unnecessary for me to reconstruct the full freq array.
I got it!
import numpy as np
import time
rows = 181
cols = 217
deps = 181
jac_k = np.random.rand(rows, cols, deps)*5
prev = time.time()
fft1 = np.fft.fftn(jac_k)
print time.time() - prev
prev = time.time()
fft2 = np.fft.rfftn(jac_k)
if deps%2 == 0:
fft2Star = np.conj(fft2[:, :, -2:0:-1])
else:
fft2Star = np.conj(fft2[:, :, -1:0:-1])
fft2Star[1::, :, :] = fft2Star[:0:-1, :, :]
fft2Star[:, 1::, :] = fft2Star[:, :0:-1, :]
fft2 = np.concatenate( (fft2, fft2Star), axis=2)
print time.time() - prev
print np.linalg.norm(fft1 - fft2)

Categories