Saving image from numpy array gives errors - python

this function is to show image of adversarial and its probability, I only want to download the image.
def visualize(x, x_adv, x_grad, epsilon, clean_pred, adv_pred, clean_prob, adv_prob):
x = x.squeeze(0) #remove batch dimension # B X C H X W ==> C X H X W
x = x.mul(torch.FloatTensor(std).view(3,1,1)).add(torch.FloatTensor(mean).view(3,1,1)).numpy()#reverse of normalization op- "unnormalize"
x = np.transpose( x , (1,2,0)) # C X H X W ==> H X W X C
x = np.clip(x, 0, 1)
x_adv = x_adv.squeeze(0)
x_adv = x_adv.mul(torch.FloatTensor(std).view(3,1,1)).add(torch.FloatTensor(mean).view(3,1,1)).numpy()#reverse of normalization op
x_adv = np.transpose( x_adv , (1,2,0)) # C X H X W ==> H X W X C
x_adv = np.clip(x_adv, 0, 1)
x_grad = x_grad.squeeze(0).numpy()
x_grad = np.transpose(x_grad, (1,2,0))
x_grad = np.clip(x_grad, 0, 1)
figure, ax = plt.subplots(1,3, figsize=(80,80))
ax[0].imshow(x_adv)
im = Image.fromarray(x_adv)
im.save("car.jpeg")
files.download('car.jpeg')
plt.show()
I am getting this error here
TypeError: Cannot handle this data type: (1, 1, 3), <f4

Try changing this:
im = Image.fromarray(x_adv)
to this:
im = Image.fromarray((x_adv * 255).astype(np.uint8))

Related

How to redistort a single image point with a map? python opencv

I have used this method to create an inverse mapping to redistort an image and it works fine. Heres what it looks like in code:
# invert the mapping
combined_map_inverted = invert_map(combined_map, shape)
# apply mapping to image
frame = cv2.remap(img, combined_map_inverted, None ,cv2.INTER_LINEAR)
Notice that its a combined map, not separated into x and y. How can I take a single (x,y) point in the undistorted image and find the corresponding distorted point? I see this answer but I'm unsure how to apply it to my case.
The combined map is a simple look up table - mapping from (u,v) to x and from (u,v) to y.
Assume (u, v) is the column, row coordinate of the undistorted image.
Than the coordinate in the distorted image is:
x = combined_map_inverted[v, u, 0]
y = combined_map_inverted[v, u, 1]
In more compact form:
x, y = combined_map_inverted[v, u].tolist()
In case we want to get the value in the (x, y) coordinate, we may use bi-linear interpolation as described in my following answer (or use other kind of interpolation).
I tried testing it using the code from your previous post:
import cv2
import glob
import numpy as np
import math
import os
if os.path.isfile('xymap_inverted.npy'):
xymap_inverted = np.load('xymap_inverted.npy')
else:
A = -1010
B = -3.931
C = 5.258
D = 978.3
M = -193.8
N = 1740
def get_tan_func_value(x):
return A * math.tan((((x-N)/M)+B)/C) + D
def get_inverse_tan_func_value(x):
return M * (C*math.atan((x-D)/A) - B) + N
# answer from linked post
#def invert_map(F, shape):
# I = np.zeros_like(F)
# I[:,:,1], I[:,:,0] = np.indices(shape)
# P = np.copy(I)
# for i in range(10):
# P += I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
# return P
# https://stackoverflow.com/a/72649764/4926757
def invert_map(F):
(h, w) = F.shape[:2] # (h, w, 2), "xymap"
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices((h,w)) # identity map
P = np.copy(I)
for i in range(10):
correction = I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
P += correction * 0.5
return P
# import image
#images = glob.glob('*.jpg')
img = cv2.imread('image1.jpg') #img = cv2.imread(images[0])
h, w = img.shape[:2]
map_x_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_x_inverse_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_y = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
# x tan function map
for i in range(map_x_tan.shape[0]):
map_x_tan[i,:] = [get_tan_func_value(x) for x in range(map_x_tan.shape[1])]
# x inverse tan function map
for i in range(map_x_inverse_tan.shape[0]):
map_x_inverse_tan[i,:] = [get_inverse_tan_func_value(x) for x in range(map_x_inverse_tan.shape[1])]
# default y map
for j in range(map_y.shape[1]):
map_y[:,j] = [y for y in range(map_y.shape[0])]
# convert x tan map to 2 channel (x,y) map
(xymap_tan, _) = cv2.convertMaps(map1=map_x_tan, map2=map_y, dstmap1type=cv2.CV_32FC2)
# invert the 2 channel x tan map
xymap_inverted = invert_map(xymap_tan)
np.save('xymap_inverted.npy', xymap_inverted)
combined_map_inverted = xymap_inverted
u = 150
v = 120
x, y = combined_map_inverted[v, u].tolist()
The output is:
x = 278.2418212890625
y = 120.0
Bi-lienar interpolation example:
x0 = int(x)
y0 = int(y)
x1 = int(x0 + 1)
y1 = int(y0 + 1)
dx = x - x0
dy = y - y0
new_pixel = np.round(img[y0,x0]*(1-dx)*(1-dy) + img[y1,x0]*(1-dx)*dy + img[y0,x1]*dx*(1-dy) + img[y1,x1]*dx*dy)
Testing by remapping an entire image, and comparing with cv2.remap:
def bilinear_interp(img, x, y):
x0 = int(x)
y0 = int(y)
x1 = int(x0 + 1)
y1 = int(y0 + 1)
dx = x - x0
dy = y - y0
new_pixel = np.round(img[y0,x0]*(1-dx)*(1-dy) + img[y1,x0]*(1-dx)*dy + img[y0,x1]*dx*(1-dy) + img[y1,x1]*dx*dy)
return new_pixel.astype(np.uint8)
img = cv2.imread('image1.jpg')
ref_img = cv2.remap(img, xymap_inverted, None, cv2.INTER_LINEAR)
cv2.imwrite('ref_img.jpg', ref_img)
new_img = np.zeros_like(img)
for v in range(img.shape[0]):
for u in range(img.shape[1]):
x, y = combined_map_inverted[v, u].tolist()
if (x >= 0) and (y >= 0) and (x < img.shape[1]-1) and (y < img.shape[0]-1):
new_img[v, u] = bilinear_interp(img, x, y)
cv2.imwrite('new_img.jpg', new_img)
abs_diff = cv2.absdiff(ref_img, new_img)
cv2.imshow('abs_diff', abs_diff) # Display the absolute difference for testing
cv2.waitKey()
cv2.destroyAllWindows()
ref_img and new_img are almost the same.

torch gather using two index arrays

The goal is to extract a random 2x5 patch from a 5x10 image, and do so randomly for all images in a batch. Looking to write a faster implementation that avoids for loops. Haven't been able to figure out how to use the torch .gather operation with two index arrays (idx_h and idx_w in code example).
Naive for loop:
import torch
b = 3 # batch size
h = 5 # height
w = 10 # width
crop_border = (3, 5) # number of pixels (height, width) to crop
x = torch.arange(b * h * w).reshape(b, h, w)
print(x)
dh_ = torch.randint(0, crop_border[0], size=(b,))
dw_ = torch.randint(0, crop_border[1], size=(b,))
_dh = h - (crop_border[0] - dh_)
_dw = w - (crop_border[1] - dw_)
idx_h = torch.stack([torch.arange(d_, _d) for d_, _d in zip(dh_, _dh)])
idx_w = torch.stack([torch.arange(d_, _d) for d_, _d in zip(dw_, _dw)])
print(idx_h, idx_w)
new_shape = (b, idx_h.shape[1], idx_w.shape[1])
cropped_x = torch.empty(new_shape)
for batch in range(b):
for height in range(idx_h.shape[1]):
for width in range(idx_w.shape[1]):
cropped_x[batch, height, width] = x[
batch, idx_h[batch, height], idx_w[batch, width]
]
print(cropped_x)
Index arrays needed to be repeated and reshaped to work with gather operation. Fast_crop code based pytorch discussion: https://discuss.pytorch.org/t/similar-to-torch-gather-over-two-dimensions/118827
def fast_crop(x, idx1, idx2):
"""
Compute
x: N x B x V
idx1: N x K matrix where idx1[i, j] is between [0, B)
idx2: N x K matrix where idx2[i, j] is between [0, V)
Return:
cropped: N x K matrix where y[i, j] = x[i, idx1[i,j], idx2[i,j]]
"""
x = x.contiguous()
assert idx1.shape == idx2.shape
lin_idx = idx2 + x.size(-1) * idx1
x = x.view(-1, x.size(1) * x.size(2))
lin_idx = lin_idx.view(-1, lin_idx.shape[1] * lin_idx.shape[2])
cropped = x.gather(-1, lin_idx)
return cropped.reshape(idx1.shape)
idx1 = torch.repeat_interleave(idx_h, idx_w.shape[1]).reshape(new_shape)
idx2 = torch.repeat_interleave(idx_w, idx_h.shape[1], dim=0).reshape(new_shape)
cropped = fast_crop(x, idx1, idx2)
(cropped == cropped_x).all()
Using realistic numbers for b = 100, h = 100, w = 130 and crop_border = (40, 95), a 10 trial run takes the for loop 32s while fast_crop only 0.043s.

Overlay a number of colormaps with matplotlib

I have a total of 16 color maps which look as follows:
Is there a way to overlay all the maps while retaining their color? That means, I want to obtain a final image which consists of 16 distributions with different colors. I've been searching a lot, but unfortunately didn't find anything good yet. Thank's a lot!
For reproduction, the code looks as follows:
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
def softmax(logit_map):
bn, kn, h, w = logit_map.shape
map_norm = F.softmax(logit_map.reshape(bn, kn, -1), dim=2).reshape(bn, kn, h, w)
return map_norm
def get_mu_and_prec(part_maps, device, scal):
"""
Calculate mean for each channel of part_maps
:param part_maps: tensor of part map activations [bn, n_part, h, w]
:return: mean calculated on a grid of scale [-1, 1]
"""
bn, nk, h, w = part_maps.shape
y_t = torch.linspace(-1., 1., h).reshape(h, 1).repeat(1, w).unsqueeze(-1)
x_t = torch.linspace(-1., 1., w).reshape(1, w).repeat(h, 1).unsqueeze(-1)
meshgrid = torch.cat((y_t, x_t), dim=-1).to(device) # 64 x 64 x 2
mu = torch.einsum('ijl, akij -> akl', meshgrid, part_maps) # bn x nk x 2
mu_out_prod = torch.einsum('akm,akn->akmn', mu, mu)
mesh_out_prod = torch.einsum('ijm,ijn->ijmn', meshgrid, meshgrid)
stddev = torch.einsum('ijmn,akij->akmn', mesh_out_prod, part_maps) - mu_out_prod
a_sq = stddev[:, :, 0, 0]
a_b = stddev[:, :, 0, 1]
b_sq_add_c_sq = stddev[:, :, 1, 1]
eps = 1e-12
a = torch.sqrt(a_sq + eps) # Σ = L L^T Prec = Σ^-1 = L^T^-1 * L^-1 ->looking for L^-1 but first L = [[a, 0], [b, c]
b = a_b / (a + eps)
c = torch.sqrt(b_sq_add_c_sq - b ** 2 + eps)
z = torch.zeros_like(a)
det = (a * c).unsqueeze(-1).unsqueeze(-1)
row_1 = torch.cat((c.unsqueeze(-1), z.unsqueeze(-1)), dim=-1).unsqueeze(-2)
row_2 = torch.cat((-b.unsqueeze(-1), a.unsqueeze(-1)), dim=-1).unsqueeze(-2)
L_inv = scal / (det + eps) * torch.cat((row_1, row_2), dim=-2) # L^⁻1 = 1/(ac)* [[c, 0], [-b, a]
return mu, L_inv
def get_heat_map(mu, L_inv, device):
h, w, nk = 64, 64, L_inv.shape[1]
y_t = torch.linspace(-1., 1., h).reshape(h, 1).repeat(1, w).unsqueeze(-1)
x_t = torch.linspace(-1., 1., w).reshape(1, w).repeat(h, 1).unsqueeze(-1)
y_t_flat = y_t.reshape(1, 1, 1, -1)
x_t_flat = x_t.reshape(1, 1, 1, -1)
mesh = torch.cat((y_t_flat, x_t_flat), dim=-2).to(device)
dist = mesh - mu.unsqueeze(-1)
proj_precision = torch.einsum('bnik, bnkf -> bnif', L_inv, dist) ** 2 # tf.matmul(precision, dist)**2
proj_precision = torch.sum(proj_precision, -2) # sum x and y axis
heat = 1 / (1 + proj_precision)
heat = heat.reshape(-1, nk, h, w) # bn number parts width height
return heat
color_list = ['black', 'gray', 'brown', 'chocolate', 'orange', 'gold', 'olive', 'lawngreen', 'aquamarine',
'dodgerblue', 'midnightblue', 'mediumpurple', 'indigo', 'magenta', 'pink', 'springgreen']
fmap = torch.randn(1, 16, 64, 64)
fmap_norm = softmax(fmap)
mu, L_inv = get_mu_and_prec(fmap_norm, 'cpu', scal=5.)
heat_map = get_heat_map(mu, L_inv, "cpu")
for i in range(16):
cmap = colors.LinearSegmentedColormap.from_list('my_colormap',
['white', color_list[i]],
256)
plt.imshow(heat_map[0][i].numpy(), cmap=cmap)
plt.show()

pytorch gesv gives different result than scipy sparse solve

I'm trying to implement baseline als subtraction in pytorch so that I can run it on my GPU but I am running into problems because pytorch.gesv gives a different result than scipy.linalg.spsolve. Here is my code for scipy:
def baseline_als(y, lam, p, niter=10):
L = len(y)
D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
w = np.ones(L)
for i in range(niter):
W = sparse.spdiags(w, 0, L, L)
Z = W + lam * D.dot(D.transpose())
z = spsolve(Z, w*y)
w = p * (y > z) + (1-p) * (y < z)
return z
and here is my code for pytorch
def baseline_als_pytorch(y, lam, p, niter=10):
diag = torch.tensor(np.repeat(1, L))
diag = torch.diag(diag, 0)
diag_minus_one = torch.tensor(np.repeat(-2, L - 1))
diag_minus_one = torch.diag(diag_minus_one, -1)
diag_minus_two = torch.tensor(np.repeat(1, L - 2))
diag_minus_two = torch.diag(diag_minus_two, -2)
D = diag + diag_minus_one + diag_minus_two
D = D[:, :L - 2].double()
w = torch.tensor(np.repeat(1, L)).double()
for i in range(10):
W = diag.double()
Z = W + lam * torch.mm(D, D.permute(1, 0))
z = torch.gesv(w * y, Z)
z = z[0].squeeze()
w = p * (y > z).double() + (1 - p) * (y < z).double()
return z
Sorry that the pytorch code looks so bad I'm just starting out in it.
I've confirmed that Z, w, and y are all the same going into both scipy and pytorch and that z is different between them right after I try to solve the system of equations.
Thanks for the comment, here is an example:
I use 100000 for lam and 0.001 for p.
Using the dummy input: y = (5,5,5,5,5,10,10,5,5,5,10,10,10,5,5,5,5,5,5,5),
I get (3.68010263, 4.90344214, 6.12679489, 7.35022406, 8.57384278, 9.79774074, 11.02197199, 12.2465927 , 13.47164891, 14.69711435,15.92287813, 17.14873257, 18.37456982, 19.60038184, 20.82626043,22.05215157, 23.27805103, 24.50400438, 25.73010693, 26.95625922) from scipy and
(6.4938312 , 6.46912395, 6.44440175, 6.41963499, 6.39477958,6.36977727, 6.34455582, 6.31907933, 6.29334844, 6.26735058, 6.24106029, 6.21443939, 6.18748732, 6.16024137, 6.13277694,6.10515785, 6.07743658, 6.04965455, 6.02184242, 5.99402035) from pytorch.
This is with just one iteration of the loop. Scipy is correct, pytorch is not.
Interestingly, if I use a shorter dummy input (5,5,5,5,5,10,10,5,5,5), I get the same answer from both. My real input is 1011 dimensional.
Your pytorch function is wrong (you never update W at the first line inside the for loop), moreover I get the result you say you got from Pytorch from Scipy too.
Scipy version
def baseline_als(y, lam=100000, p=1e-3, niter=1):
L = len(y)
D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
w = np.ones(L)
for i in range(niter):
W = sparse.spdiags(w, 0, L, L)
Z = W + lam * D.dot(D.transpose())
z = spsolve(Z, w*y)
w = p * (y > z) + (1-p) * (y < z)
return z
equivalent in Pytorch
def baseline_als_pytorch(y, lam=100000, p=1e-3, niter=1):
L = len(y)
D = torch.diag(torch.ones(L), 0) + torch.diag(-2 * torch.ones(L-1), -1) + torch.diag(torch.ones(L-2), -2)
D = D[:, :L-2].double()
w = torch.ones(L).double()
for i in range(niter):
W = torch.diag(w)
Z = W + lam * torch.mm(D, D.permute(1, 0))
z = torch.gesv(w * y, Z)
z = z[0].squeeze()
w = p * (y > z).double() + (1 - p) * (y < z).double()
return z
when I feed them with y = np.array([5,5,5,5,5,10,10,5,5,5,10,10,10,5,5,5,5,5,5,5], dtype='float64'):
scipy:
array([6.4938312 , 6.46912395, 6.44440175, 6.41963499, 6.39477958,
6.36977727, 6.34455582, 6.31907933, 6.29334844, 6.26735058,
6.24106029, 6.21443939, 6.18748732, 6.16024137, 6.13277694,
6.10515785, 6.07743658, 6.04965455, 6.02184242, 5.99402035])
pytorch:
tensor([6.4938, 6.4691, 6.4444, 6.4196, 6.3948, 6.3698, 6.3446, 6.3191, 6.2933,
6.2674, 6.2411, 6.2144, 6.1875, 6.1602, 6.1328, 6.1052, 6.0774, 6.0497,
6.0218, 5.9940], dtype=torch.float64)
If I increase n_iter to 10:
scipy:
array([5.00202571, 5.00199038, 5.00195504, 5.00191963, 5.0018841 ,
5.00184837, 5.00181235, 5.00177598, 5.00173927, 5.00170221,
5.00166475, 5.00162685, 5.00158851, 5.00154979, 5.00151077,
5.00147155, 5.0014322 , 5.00139276, 5.00135329, 5.0013138 ])
pytorch:
tensor([5.0020, 5.0020, 5.0020, 5.0019, 5.0019, 5.0018, 5.0018, 5.0018, 5.0017,
5.0017, 5.0017, 5.0016, 5.0016, 5.0015, 5.0015, 5.0015, 5.0014, 5.0014,
5.0014, 5.0013], dtype=torch.float64)
And it checks out with the code of baseline als you linked to in your question.

Adding an offset to one dimension of a tensor, the offset being the entry of another tensor

I am looking for an efficient way to code the sample below with TensorFlow. Below, it is coded in the most silly way with numpy.
The idea is to offset a tensor of size (batch_size, height, width, channels) on the height dimension using another tensor value. In other terms:
tensor_2[i, j, k, l] = tensor_1[i, j + tensor_offset[i, j, k, l], k, l]
Here's the code I'm using:
import numpy as np
import time
begin = time.time()
b, h, w ,c = 5, 256, 512, 20
offset = np.random.rand(b, h, w , c).astype(int)
image = np.ones((b, h, w ,c))
label = np.ones((b, h, w ,c))
label_offset = np.zeros ((b, h, w,c ))
loss = 0
count = 0
for i in range(b):
for j in range (h):
for k in range (w):
for l in range (c):
offset_ = j + np.int(offset [i,j,k,l])
if offset_ > 255:
pass
else:
label_offset[i,j,k,l] = label [i,offset_,k,l]
loss =+ label_offset[i,j,k,l]*np.log(image [i,j,k,l])
count=+1
loss = loss/count
end = time.time()
print ('duree:', end - begin)
You can do it using gather_nd, with indices[i, j, k, l]= [i, j+ tensor_offset [i,j,k,l], k, l] (so indices would be of dimension 5).
You can build it like that:
import numpy as np
import tensorflow as tf
b, h, w ,c = 2, 11, 13, 7
final_shape = [b, h, w, c]
offset = np.random.randint(low=0, high=h, size=final_shape)
image = np.random.randint(low=0, high=1000, size=final_shape)
input_tensor = tf.constant(image)
m1 = tf.transpose(tf.reshape(tf.tile(tf.range(b), multiples=[h * w * c]), [h, w, c, b]), perm=[3, 0, 1, 2])
m2 = tf.transpose(tf.reshape(tf.tile(tf.range(h), multiples=[b * w * c]), [b, w, c, h]), perm=[0, 3, 1, 2]) + offset
not_too_big = tf.less(m2, h)
m2_safe = tf.mod(m2, h) # Makes sure we don't go too far in the original array
m3 = tf.transpose(tf.reshape(tf.tile(tf.range(w), multiples=[b * h * c]), [b, h, c, w]), perm=[0, 1, 3, 2])
m4 = tf.reshape(tf.tile(tf.range(c), multiples=[b * h * w]), [b, h, w, c]) # No transposition needed here
indices = tf.stack([m1, m2_safe, m3, m4], axis=-1)
tmp = tf.gather_nd(input_tensor, indices)
output = tf.multiply(tmp, tf.cast(not_too_big, tmp.dtype)) # Sets all the values corresponding to j+offset>h to 0
EDIT: this works for me with the transposition.
#gdelab
I have made the following improvement to your code:
def tensor_offset (input_tensor, offset_tensor, batch, nbcl):
b, h, w ,c = batch, 256,512,nbcl
m = tf.reshape(tf.tile(tf.range(b), multiples=[w*h*c]), [h,w,c, b])
m1 = tf.reshape(tf.tile(tf.range(h), multiples=[w*b*c]), [b,w,c,h])
m2 = tf.reshape(tf.tile(tf.range(w), multiples=[h*b*c]), [b,h,c,w])
m2 = m2 +tf.transpose(tf.cast(offset_tensor,tf.int32),perm=[0, 1, 3, 2])
m3 = tf.reshape(tf.tile(tf.range(c), multiples=[h*b*w]), [b,h,w,c])
indices = tf.stack([tf.transpose(m,perm=[3,0,1,2]), tf.transpose(m1,perm=[0, 3, 1, 2]), tf.transpose(m2,perm=[0, 1, 3, 2]),m3], axis=-1)
paddings = tf.constant([[0, 0], [0, 0], [0,100], [0,0]])
output = tf.gather_nd(tf.pad(input_tensor, paddings), indices)
return output

Categories