How to use ArrayFire batched 2D convolution - python

Reading through ArrayFire documentation, I noticed that the library supports batched operations when using 2D convolution. Therefore, I need to apply N filters to an image using the C++ API.
For easy testing, I decided to create a simple Python script to assert the convolution results. However, I couldn't get proper results when using >1 filters and comparing them to OpenCV's 2D convolution separately. Following is my Python script:
import arrayfire as af
import cv2
import numpy as np
np.random.seed(1)
np.set_printoptions(precision=3)
af.set_backend('cuda')
n_kernels = 2
image = np.random.randn(512,512).astype(np.float32)
kernels_list = [np.random.randn(7,7).astype(np.float32) for _ in range(n_kernels)]
conv_cv_list = [cv2.filter2D(image, -1, cv2.flip(kernel,-1), borderType=cv2.BORDER_CONSTANT) for kernel in kernels_list]
image_gpu = af.array.Array(image.ctypes.data, image.shape, image.dtype.char)
kernels = np.stack(kernels_list, axis=-1) if n_kernels > 1 else kernels_list[0]
kernels_gpu = af.array.Array(kernels.ctypes.data, kernels.shape, kernels.dtype.char)
conv_af_gpu = af.convolve2(image_gpu, kernels_gpu)
conv_af = conv_af_gpu.to_ndarray()
if n_kernels == 1:
conv_af = conv_af[..., None]
for kernel_idx in range(n_kernels):
print("CV conv:", conv_cv_list[kernel_idx][0, 0])
print("AF conv", conv_af[0, 0, kernel_idx])
That said, I would like to know how properly use ArrayFire batched support.

ArrayFire is column-major, whereas OpenCV and NumPy are row-major. This can cause issues. We provide an interop function to handle this for you, as follows.
import arrayfire as af
import cv2
import numpy as np
np.random.seed(1)
np.set_printoptions(precision=3)
af.set_backend('cuda')
n_kernels = 2
image = np.random.randn(512,512).astype(np.float32)
kernels_list = [np.random.randn(7,7).astype(np.float32) for _ in range(n_kernels)]
conv_cv_list = [cv2.filter2D(image, -1, cv2.flip(kernel,-1), borderType=cv2.BORDER_CONSTANT) for kernel in kernels_list]
image_gpu = af.interop.from_ndarray(image) # CHECK OUT THIS
kernels = np.stack(kernels_list, axis=-1) if n_kernels > 1 else kernels_list[0]
kernels_gpu = af.interop.from_ndarray(kernels) # CHECK OUT THIS
conv_af_gpu = af.convolve2(image_gpu, kernels_gpu)
conv_af = conv_af_gpu.to_ndarray()
if n_kernels == 1:
conv_af = conv_af[..., None]
for kernel_idx in range(n_kernels):
print("CV conv:", conv_cv_list[kernel_idx][0, 0])
print("AF conv", conv_af[0, 0, kernel_idx])
We are working on a newer version of ArrayFire Python which will address this issue.
Good luck!

Related

Implementing STFT with Pytorch gives a slightly different result than the STFT with Librose

I am trying to implement STFT with Pytorch. But the output from the Pytorch implementation is slightly off, when compared with the implementation from Librosa.
Librosa version
import numpy as np
from librosa.core import stft
import matplotlib.pyplot as plt
np.random.seed(3)
y = np.sin(2*np.pi*50*np.linspace(0,10,2048))+np.sin(2*np.pi*20*np.linspace(0,10,2048)) + np.random.normal(scale=1,size=2048)
S_stft = np.abs(stft(y, hop_length=512, n_fft=2048,center=False))
plt.plot(S_stft)
Pytorch version
import torch
from torch.autograd import Variable
from torch.nn.functional import conv1d
from scipy.signal.windows import hann
stride = 512
def create_filters(d,k,low=50,high=6000):
x = np.arange(0, d, 1)
wsin = np.empty((k,1,d), dtype=np.float32)
wcos = np.empty((k,1,d), dtype=np.float32)
start_freq = low
end_freq = high
# num_cycles = start_freq*d/44000.
# scaling_ind = np.log(end_freq/start_freq)/k
window_mask = hann(2048, sym=False) # same as 0.5-0.5*np.cos(2*np.pi*x/(k))
for ind in range(k):
wsin[ind,0,:] = window_mask*np.sin(2*np.pi*ind/k*x)
wcos[ind,0,:] = window_mask*np.cos(2*np.pi*ind/k*x)
return wsin,wcos
wsin, wcos = create_filters(2048,2048)
wsin_var = Variable(torch.from_numpy(wsin), requires_grad=False)
wcos_var = Variable(torch.from_numpy(wcos),requires_grad=False)
network_input = torch.from_numpy(y).float()
network_input = network_input.reshape(1,-1)
zx = np.sqrt(conv1d(network_input[:,None,:], wsin_var, stride=stride).pow(2)+conv1d(network_input[:,None,:], wcos_var, stride=stride).pow(2))
pytorch_Xs = zx.cpu().numpy()
plt.plot(pytorch_Xs[0,:1025,0])
My Question
The two graphs might look the same, but if I check the two outputs with np.allclose, we can see that they are slightly different.
np.allclose(S_stft, pytorch_Xs[0,:1025,0].reshape(1025,1))
output >>> False
Only when I tune up the tolerance to 1e-5, it gives me True result
np.allclose(S_stft, pytorch_Xs[0,:1025,0].reshape(1025,1),atol=1e-5)
output >>> True
What causes the difference in values? Is it because of the data conversion by using torch.from_numpy(y).float()?
I would like to have a difference in value less than 1e-7, 1e-8 is even better.
The difference is from the difference between their default bit.
NumPy's float is 64bit by default.
PyTorch's float is 32bit by default.

n-dimensional sliding window operation in python using gpu accelerated libraries, preferably Tensorflow?

I am looking for a gpu-accelerated n-dimensional sliding window operation implementation in Python using Tensorflow. You can post your implementation in Torch, Caffe or Theano, but I'll choose the Tensorflow implementation as the accepted answer. Please post working code snippet that performs a 2d median filter operation (hopefully, with no code change or minimal code change, can be applied to n-dimensional images)
With my limited knowledge on Tensorflow, I believe the 2 potential modules to start with are sliding_window_batch or extract_image_patches and then with some map,apply,reshape magic?
My failed attempt is posted below, for entertainment. Please note I have posted a similar question 2 years ago, asking for a Theano implementation, nowadays, most people are using tf/keras or torch.
import time
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow as tf
from tensorflow.contrib.data.python.ops import sliding
from skimage import img_as_float, data
from scipy.signal import medfilt
imgs = img_as_float(data.camera())
### SCIPY median ###
stime = time.time()
scipysmoothed = medfilt(imgs,(9,9))
etime = time.time()
print('scipy smoothed: {:1.4f} seconds'.format(etime-stime))
### Failed attempt of TF median ###
method = 'Tensorflow'
stime = time.time()
window_func = lambda x: tf.contrib.distributions.percentile(x, 50.0)
# create TensorFlow Dataset object
data = tf.data.Dataset.from_tensor_slices(imgs)
# sliding window - only 1d is allowed?
window = 3
stride = 1
data = data.apply(sliding.sliding_window_batch(window, stride)).map(lambda x: window_func(x))
# create TensorFlow Iterator object
iterator = tf.data.Iterator.from_structure(data.output_types)
next_element = iterator.get_next()
# create initialization ops
init_op = iterator.make_initializer(data)
c=0
smoothed = np.zeros(imgs.shape)
with tf.Session() as sess:
# initialize the iterator on the data
sess.run(init_op)
while True:
try:
elem = sess.run(next_element)
smoothed[c,:]=elem
# obviously WRONG.
c+=1
except tf.errors.OutOfRangeError:
#print("End of dataset.")
break
#print(c)
etime = time.time()
print('tf smoothed: {:1.4f} seconds'.format(etime-stime))
plt.figure(figsize=(20,20))
plt.subplot(131)
plt.imshow(imgs,cmap='gray',interpolation='none')
plt.title('original')
plt.subplot(132)
plt.imshow(smoothed,cmap='gray',interpolation='none')
plt.title('actual smoothed\nwith {}'.format(method))
plt.subplot(133)
plt.imshow(scipysmoothed,cmap='gray',interpolation='none')
_=plt.title('expected smoothed')
.
scipy smoothed: 1.1899 seconds
tf smoothed: 0.7485 seconds
Proposal 1: My attempt is the below, since it just uses tf.image.extract_image_patches and tf.extract_volume_patches, the implementation supports only 2d and 3d images.
Proposal 2: one could just format the data as a preprocessing step (via tf.data.Dataset.map), however this also takes alot of time, I am not sure why yet ( example https://gist.github.com/pangyuteng/ca5cb07fe383ebe59b521c832f2e2918 ).
Proposal 3: use convolutional blocks to parallelize processing, see "Hypercolumns for Object Segmentation and Fine-grained Localization" https://arxiv.org/abs/1411.5752 .
--
Proposal 1 code:
import time
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow as tf
from tensorflow.contrib.data.python.ops import sliding
from skimage import img_as_float, data
from scipy.signal import medfilt
dtype = 2
if dtype==2:
imgs = img_as_float(data.camera())
elif dtype==3:
imgs = np.random.rand(28,28,28)
imgs = img_as_float(data.camera())
### SCIPY median ###
stime = time.time()
scipysmoothed = medfilt(imgs,(9,9))
etime = time.time()
print('scipy smoothed: {:1.4f} seconds'.format(etime-stime))
### TF median ###
method = 'Tensorflow'
imgs = np.expand_dims(imgs,axis=-1)
imgs = np.expand_dims(imgs,axis=0)
print('imgs.shape:{}'.format(imgs.shape))
imgs = tf.cast(imgs,tf.float32)
stime = time.time()
if len(imgs.shape) == 4:
kernel=(1,9,9,1)
stride=(1,1,1,1)
rates=(1,1,1,1)
padding='SAME'
patches=tf.image.extract_image_patches(
imgs,kernel,stride,rates,padding,
)
_,x,y,n = patches.shape
_,sx,sy,_ = kernel
window_func = lambda x: tf.contrib.distributions.percentile(x, 50.0)
patches = tf.reshape(patches,[x*y,sx,sy])
smoothed = tf.map_fn(lambda x: window_func(patches[x,:,:]), tf.range(x*y), dtype=tf.float32)
smoothed = tf.reshape(smoothed,[x,y])
elif len(imgs.shape) == 5:
kernel=(1,12,12,12,1)
stride=(1,1,1,1,1)
padding='SAME'
patches=tf.extract_volume_patches(
imgs,kernel,stride,padding,
)
_,x,y,z,n = patches.shape
_,sx,sy,sz,_ = kernel
window_func = lambda x: tf.contrib.distributions.percentile(x, 50.0)
patches = tf.reshape(patches,[x*y*z,sx,sy,sz])
smoothed = tf.map_fn(lambda x: window_func(patches[x,:,:]), tf.range(x*y*z), dtype=tf.float32)
smoothed = tf.reshape(smoothed,[x,y,z])
else:
raise NotImplemented()
with tf.Session() as sess:
output = sess.run(smoothed)
etime = time.time()
print('tf smoothed: {:1.4f} seconds'.format(etime-stime))
print(output.shape)
plt.figure(figsize=(20,20))
plt.subplot(131)
imgs = img_as_float(data.camera())
plt.imshow(imgs.squeeze(),cmap='gray',interpolation='none')
plt.title('original')
plt.subplot(132)
plt.imshow(output.squeeze(),cmap='gray',interpolation='none')
plt.title('actual smoothed\nwith {}'.format(method))
plt.subplot(133)
plt.imshow(scipysmoothed,cmap='gray',interpolation='none')
_=plt.title('expected smoothed')

Matrix multiplication in Keras

I try to multiply two matrices in a python program using Keras.
import keras.backend as K
import numpy as np
A = np.random.rand(10,500)
B = np.random.rand(500,6000)
x = K.placeholder(shape=A.shape)
y = K.placeholder(shape=B.shape)
xy = K.dot(x, y)
xy.eval(A,B)
I know this cannot work, but I also don't know how I can make it work.
You need to use a variable instead of a place holder.
import keras.backend as K
import numpy as np
A = np.random.rand(10,500)
B = np.random.rand(500,6000)
x = K.variable(value=A)
y = K.variable(value=B)
z = K.dot(x,y)
# Here you need to use K.eval() instead of z.eval() because this uses the backend session
K.eval(z)

how to display image in Mandlebrot tensorflow program.Current output is <IPython.core.display.Image object>

'''Import libraries for simulation'''
import tensorflow as tf
import numpy as np
'''Imports for visualization'''
from PIL.Image
from io import BytesIO
from IPython.display import Image, display
'''Now we'll define a function to actually display the image once we have
iteration counts'''
def DisplayFractal(a, fmt='jpeg'):
img =np.concatenate([10+20*np.cos(a_cyclic),30+50*np.sin(a_cyclic),155-
80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
sess = tf.InteractiveSession()
# Use NumPy to create a 2D array of complex numbers
Y, X = np.mgrid[-1.3:1.3:0.005, -2:1:0.005]
Z = X+1j*Y
print(Z)
#Now we define and initialize TensorFlow tensors.
xs = tf.constant(Z.astype(np.complex64))
zs = tf.Variable(xs)
ns = tf.Variable(tf.zeros_like(xs, tf.float32))
tf.global_variables_initializer().run()
zs_ = zs*zs + xs
print(zs)
# Have we diverged with this new value?
not_diverged = tf.abs(zs_) < 4
'''
Operation to update the zs and the iteration count.
Note: We keep computing zs after they diverge! This
is very wasteful! There are better, if a little
less simple, ways to do this.
'''
step = tf.group(zs.assign(zs_), ns.assign_add(tf.cast(not_diverged,
tf.float32)))
for i in range(200): step.run()
DisplayFractal(ns.eval())
I had the same problem. You have to run the TensorFlow example in Jupyter notebook:
http://jupyter.org/
If you run it from other IDEs like (Spyder) all you will see is <IPython.core.display.Image object> in the console.
emmm,I have destroy this problem,you can take a look on my function:
def displayFractal(a,fmt='jpeg'):
a_cyclic=(6.28*a/200.0).reshape(list(a.shape)+[1])
# emmm I have changed the number. you can just continue your number
img=np.concatenate([5+10*np.cos(a_cyclic),15+25*np.sin(a_cyclic),70-40*np.cos(a_cyclic)],2)
img[a==a.max()]=0
a=img
a=np.uint8(np.clip(a,0,255))
plt.imshow(PIL.Image.fromarray(a))
plt.show()
of course you should import matplotlib .pyplot as plt at first.

Dataset API does not pass dimensionality information for its output tensor when using py_func

To reproduce my problem, try this first (mapping with py_func):
import tensorflow as tf
import numpy as np
def image_parser(image_name):
a = np.array([1.0,2.0,3.0], dtype=np.float32)
return a
images = [[1,2,3],[4,5,6]]
im_dataset = tf.data.Dataset.from_tensor_slices(images)
im_dataset = im_dataset.map(lambda image:tuple(tf.py_func(image_parser, [image], [tf.float32])), num_parallel_calls = 2)
im_dataset = im_dataset.prefetch(4)
iterator = im_dataset.make_initializable_iterator()
print(im_dataset.output_shapes)
It will give you (TensorShape(None),)
However, if you try this (using direct tensorflow mapping instead of py_func):
import tensorflow as tf
import numpy as np
def image_parser(image_name)
return image_name
images = [[1,2,3],[4,5,6]]
im_dataset = tf.data.Dataset.from_tensor_slices(images)
im_dataset = im_dataset.map(image_parser)
im_dataset = im_dataset.prefetch(4)
iterator = im_dataset.make_initializable_iterator()
print(im_dataset.output_shapes)
It will give you the exact tensor dimension (3,)
This is a general problem with tf.py_func which is intended since TensorFlow cannot infer the output shape itself, see for instance this answer.
You could set the shape yourself if you need to, by moving the tf.py_func inside the parse function:
def parser(x):
a = np.array([1.0,2.0,3.0])
y = tf.py_func(lambda: a, [], tf.float32)
y.set_shape((3,))
return y
dataset = tf.data.Dataset.range(10)
dataset = dataset.map(parser)
print(dataset.output_shapes) # will correctly print (3,)

Categories