I'm not sure what i did wrong with this code:
import cv2
from matplotlib import image as img
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.pyplot import axis
img = cv2.imread('popi.png', 0)
cv2.imshow('lel', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
a = np.shape(img)
iloscpixeli = a[0] * a[1]
print(iloscpixeli)
b = np.zeros((256, 1))
H = np.zeros((a[0], a[1]))
# czest = np.zeros((256, 1))
# probf = np.zeros((256, 1))
# probc = np.zeros((256, 1))
# cum = np.zeros((256, 1))
dim = np.shape(img)
wyjscie = np.zeros(dim)
H = np.zeros(dim)
print("dim", dim)
czest = np.zeros(dim)
probc = np.zeros(dim)
# print("r",czest)
probf = np.zeros(dim)
cum = np.zeros(dim)
for i in range(1, a[0]):
for j in range(1, a[1]):
wartosc = img[i, j]
czest[wartosc + 1] = (czest[wartosc + 1] + 1)
probf[wartosc + 1] = czest[wartosc + 1] / iloscpixeli
suma = 0
nobins = 255
d = np.zeros((256, 1))
d1 = np.shape(d)
d11 = d1[0]
for i in range(1, d11):
suma = suma + czest[i]
cum[i] = suma
probc[i] = cum[i] / iloscpixeli
wyjscie[i] = np.around(probc[i] * nobins)
wyjscie=wyjscie.tolist()
for i in range(1, a[0]):
for j in range(1, a[1]):
H[i, j] = wyjscie[img[i,j] + 1]
cv2.imshow('wyrownany', H)
And this line(yeah last :C) :
H[i, j] = wyjscie[img[i,j] + 1]
Gives me error ValueError: setting an array element with a sequence. Trying to repair checked about change the 'wyjscie' from array to list.. but doesnt work well.
I looking for any help. It's great when you look for code, probably I do something stupid and...but there is line czest[wartosc + 1] = (czest[wartosc + 1] + 1) and it works well...
H is a numpy-array with dtype=float as it's default. It's shape is 2d
You want to insert wyjscie[img[i,j] + 1]
wyjscie itself is a numpy-array with shape 2d
you convert wyjscie to a list, but this list will be a nested list because original dim is 2d
you index in nested list, therefore obtain a list and put this list into a cell which holds a float = putting a sequence/list into array element ERROR
(you are polish :-D)
Related
The goal is to extract a random 2x5 patch from a 5x10 image, and do so randomly for all images in a batch. Looking to write a faster implementation that avoids for loops. Haven't been able to figure out how to use the torch .gather operation with two index arrays (idx_h and idx_w in code example).
Naive for loop:
import torch
b = 3 # batch size
h = 5 # height
w = 10 # width
crop_border = (3, 5) # number of pixels (height, width) to crop
x = torch.arange(b * h * w).reshape(b, h, w)
print(x)
dh_ = torch.randint(0, crop_border[0], size=(b,))
dw_ = torch.randint(0, crop_border[1], size=(b,))
_dh = h - (crop_border[0] - dh_)
_dw = w - (crop_border[1] - dw_)
idx_h = torch.stack([torch.arange(d_, _d) for d_, _d in zip(dh_, _dh)])
idx_w = torch.stack([torch.arange(d_, _d) for d_, _d in zip(dw_, _dw)])
print(idx_h, idx_w)
new_shape = (b, idx_h.shape[1], idx_w.shape[1])
cropped_x = torch.empty(new_shape)
for batch in range(b):
for height in range(idx_h.shape[1]):
for width in range(idx_w.shape[1]):
cropped_x[batch, height, width] = x[
batch, idx_h[batch, height], idx_w[batch, width]
]
print(cropped_x)
Index arrays needed to be repeated and reshaped to work with gather operation. Fast_crop code based pytorch discussion: https://discuss.pytorch.org/t/similar-to-torch-gather-over-two-dimensions/118827
def fast_crop(x, idx1, idx2):
"""
Compute
x: N x B x V
idx1: N x K matrix where idx1[i, j] is between [0, B)
idx2: N x K matrix where idx2[i, j] is between [0, V)
Return:
cropped: N x K matrix where y[i, j] = x[i, idx1[i,j], idx2[i,j]]
"""
x = x.contiguous()
assert idx1.shape == idx2.shape
lin_idx = idx2 + x.size(-1) * idx1
x = x.view(-1, x.size(1) * x.size(2))
lin_idx = lin_idx.view(-1, lin_idx.shape[1] * lin_idx.shape[2])
cropped = x.gather(-1, lin_idx)
return cropped.reshape(idx1.shape)
idx1 = torch.repeat_interleave(idx_h, idx_w.shape[1]).reshape(new_shape)
idx2 = torch.repeat_interleave(idx_w, idx_h.shape[1], dim=0).reshape(new_shape)
cropped = fast_crop(x, idx1, idx2)
(cropped == cropped_x).all()
Using realistic numbers for b = 100, h = 100, w = 130 and crop_border = (40, 95), a 10 trial run takes the for loop 32s while fast_crop only 0.043s.
I'm trying to achieve linear interpolation, where the data points are N images of shape: HxWx3 (stored in buf (NxHxWx3)), and the points to interpolate are specified in another (2D) grid (interp_values).
Non-vectorizable approach:
In principle I have made interp_values a HxW grid with values 0..N-1 indicating for each i,j element from which image (in buf) to read it from, including fractional values meaning interpolation.
E.g.: a value of 3.6 means blend 40% (1-0.6) of image 3 with 60% (0.6) of image 4. However with this approach it is quite impossible to vectorize the code, and performance was poor.
One vectorization approach:
So I changed interp_values to be a NxHxWx3 grid with values 0..1. Each column :,i,j,c would specify blend coefficients for the N images, where only 1 or 2 elements are non-zero, e.g. for 3.6 we have: [0, 0, 0, 0.6, 0.4, 0, 0, ...]. I can convert interp_values from HxW to NxHxWx3 with:
def expand_interp_values(interp_values):
r = np.zeros((N,) + interp_values.shape + (3,))
for i in range(interp_values.shape[0]):
for j in range(interp_values.shape[1]):
v = interp_values[i, j]
a, b, x = math.floor(v), math.ceil(v), math.fmod(v, 1)
if int(a) == int(b):
r[a, i, j, :] = 3 * [1]
else:
r[a, i, j, :] = 3 * [1 - x]
r[b, i, j, :] = 3 * [x]
return r
This representation is more sparse (many zeros) but now interpolation can be computed as element-wise multiplication between buf and interp_values (the multiplication part of the linear interpolation) followed by a sum(..., axis=0) (i.e. the addition part of the linear interpolation):
def linear_interp(data, interp_values):
return np.sum(data * interp_values, axis=0)
With this approach, there is some performance improvement, however it seems with this approach the CPU will be most of the times busy computing x1*0, x2*0, ... or 0 + 0 + 0...
Can this be improved any better?
Additionally, the creation of the expanded interp_values grid is not vectorized, so perhaps performance would be bad if that grid has to be updated continuously.
Complete python+opencv code:
import cv2
import numpy as np
import math
vid = cv2.VideoCapture(0)
vid.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
vid.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
# store last N images into a NxHxWx3 grid (circular buffer):
N = 25
buf = None
interp_values = None
DOWNSAMPLING = 6
def linear_interp(data, interp_values):
return np.sum(data * interp_values / 256, axis=0)
def expand_interp_values(interp_values):
r = np.zeros((N,) + interp_values.shape + (3,))
for i in range(interp_values.shape[0]):
for j in range(interp_values.shape[1]):
v = interp_values[i, j]
a, b, x = math.floor(v), math.ceil(v), math.fmod(v, 1)
if int(a) == int(b):
r[a, i, j, :] = 3 * [1]
else:
r[a, i, j, :] = 3 * [1 - x]
r[b, i, j, :] = 3 * [x]
return r
while True:
ret, frame = vid.read()
H, W, Ch = frame.shape
frame = cv2.resize(frame, dsize=(W//DOWNSAMPLING, H//DOWNSAMPLING), interpolation=cv2.INTER_LINEAR)
# circular buffer:
if buf is None:
buf = np.zeros((N,) + frame.shape, dtype=np.uint8)
# there should be a simpler way to a FIFO-grid...
for i in reversed(range(1, N)):
buf[i] = buf[i - 1]
buf[0] = frame
if interp_values is None:
# create a lookup pattern here:
interp_values = np.zeros(frame.shape[:2])
for i in range(frame.shape[0]):
for j in range(frame.shape[1]):
y = i / (frame.shape[0] - 1) * 2 - 1
x = j / (frame.shape[1] - 1) * 2 - 1
#interp_values[i, j] = (N - 1) * min(1, math.hypot(x, y))
interp_values[i, j] = (N - 1) * (y + 1) / 2
interp_values = expand_interp_values(interp_values)
im = linear_interp(buf, interp_values)
im = cv2.resize(im, dsize=(W, H), interpolation=cv2.INTER_LANCZOS4)
cv2.imshow('image', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
vid.release()
cv2.destroyAllWindows()
I am writing a function that scales the input image into times of
its input size. The function Resize(Mat I, float s) first fills in the and Mat’s
that contained the query point coordinates. Then I calculate the query value by
using bilinear interpolation.
The output image seems to be alright except it has an unexpected # shape grid on it. Can you provide any hint for the resolution?
Output image:
Code:
import numpy as np
import cv2 as cv
import math
import matplotlib.pyplot as plt
#Mat I, float s
def Resize(I, s):
orig_x = I.shape[0];
orig_y = I.shape[1];
tar_x = int (orig_x * s) #int tar_x and tar_y
tar_y = int (orig_y * s);
#print(tar_x)
# Query points
X = np.empty((tar_y, tar_x), np.float32)
Y = np.empty((tar_y, tar_x), np.float32)
# calc interval between output points
interval = (orig_x-1) / (tar_x-1)
# Setting the query points
for i in range(0, tar_y):
for j in range(0, tar_x):
#set X[i, j] and Y[i,j]
X[i][j] = j * interval
Y[i][j] = i * interval
# Output image
output = np.empty((tar_y, tar_x), np.uint8)
# Performing the interpolation
for i in range(0, tar_y):
for j in range(0, tar_x):
#set output[i,j] using X[i, j] and Y[i,j]
x = X[i][j]
y = Y[i][j]
x1 = math.floor(x)
x2 = math.ceil(x)
y1 = math.floor(y)
y2 = math.ceil(y)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1
return output
s= 640 / 256
I = cv.imread("aerial_256.png", cv.IMREAD_GRAYSCALE)
output = Resize(I,s)
output = cv.cvtColor(output, cv.COLOR_BGR2RGB)
plt.imshow(output)
plt.savefig("aerial_640.png",bbox_inches='tight',transparent=True, pad_inches=0)
plt.show()
You are getting a black pixel where x is an integer and where y is an integer.
Take a look at the following code:
x1 = math.floor(x)
x2 = math.ceil(x)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
Assume: x = 85.0
x1 = floor(x) = 85
x2 = ceil(x) = 85
(x-x1) = (85-85) = 0
(x2-x) = (85-85) = 0
vq1 = (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1] = 0*I[y1,x2] + 0*I[y1,x1] = 0
vq2 = (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1] = 0*I[y2,x2] + 0*I[y2,x1] = 0
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1 = (y-y1)*0 + (y2-y)*0 = 0
Result:
In the entire column where x = 85.0 the value of output[i,j] is zero (we are getting a black column).
Same result applied to y = 85.0 - we are getting a black row.
When does x value is an integer?
Take a look at the following code:
# calc interval between output points
interval = (orig_x-1) / (tar_x-1)
# Setting the query points
for i in range(0, tar_y):
for j in range(0, tar_x):
#set X[i, j] and Y[i,j]
X[i][j] = j * interval
interval = (orig_x-1) / (tar_x-1) = 255/639 = (3*5*17/(3*3*71) = 85/213
j * interval = j * 85/213
Each time j is a multiple of 213, j * interval is an integer (we are getting a black column).
It happens when j=0, j=213, j=426, j=639, so there are two black columns (beside margins).
There are also two visible black rows (beside margins).
Suggested solution:
Replace x2 = math.ceil(x) with x2 = min(x1 + 1, orig_x-1).
Replace y2 = math.ceil(y) with y2 = min(y1 + 1, orig_y-1).
Corrected loop:
for i in range(0, tar_y):
for j in range(0, tar_x):
#set output[i,j] using X[i, j] and Y[i,j]
x = X[i][j]
y = Y[i][j]
x1 = math.floor(x)
x2 = min(x1 + 1, orig_x-1)
y1 = math.floor(y)
y2 = min(y1 + 1, orig_y-1)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1
Result:
The code below shows error "ValueError: shapes (400,16,1) and (16,16) not aligned: 1 (dim 2) != 16 (dim 0)". How can I solve this problem? I want to create an image recognition algorithm using numpy only. Test images are 20*20 px sized. (sorry for my English, I speak Russian)
from numpy import exp, array, random, dot, squeeze, asarray
from PIL import Image
images = []
for k in range(8):
im = Image.open(f'learn\\yes\\{k + 1}.png', 'r')
a = list(im.getdata())
pixel_values = []
for i in a:
pixel_values.append((i[0] + i[1] + i[2] / 3) / 1000)
images.append(pixel_values)
im = Image.open(f'learn\\no\\{k + 1}.png', 'r')
a = list(im.getdata())
pixel_values = []
for i in a:
pixel_values.append((i[0] + i[1] + i[2] / 3) / 1000)
images.append(pixel_values)
im = Image.open(f'test\\1.png', 'r')
a = list(im.getdata())
pixel_values = []
for i in a:
pixel_values.append((i[0] + i[1] + i[2] / 3) / 1000)
print(*images, sep='\n', end='\n\n')
print(pixel_values)
# print(pixel_values3)
training_set_inputs = array([images])
training_set_outputs = array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]]).T
random.seed(1)
print('processing...')
synaptic_weights = squeeze(asarray(3 * random.random((400, 1)) - 1))
for iteration in range(2):
print(f'starting iteration {iteration + 1}')
output = 1 / (1 + exp(-(dot(training_set_inputs, synaptic_weights))))
synaptic_weights += dot(training_set_inputs.T, (training_set_outputs - output) * output * (1 - output))
print('done!')
a = 1 / (1 + exp(-(dot(array(pixel_values), synaptic_weights))))[0]
print(a)
if a > 0.6:
print('yes')
else:
print('no')
I've solved the problem. The problem was here:
training_set_inputs = array([images])
instead of
training_set_inputs = array(images)
I have large lists containing RGB values for pictures.
I use numpy to create patterns and/or pillow to load pictures and convert them to 3D-numpy arrays (int32). Now i want to restructure my array into hex-strings in a weird way:
Three hex-strings for R, G, B in this structure: '0000FFFF', where the first 4 characters always have to be zero, then 2 characters represent pixel n+1 and the last 2 cahracters represent pixel n.
I already have done this with a code which takes too long for larger images and i require some improvement. What i got so far:
import numpy
import numpy.matlib
#from matplotlib.colors import rgb2hex
import time
def pairwise(iterable):
"""Create a paired-list from a list."""
a = iter(iterable)
return zip(a, a)
def test(imgSize=[480,640], brightness=[255,255,255]):
#generate pattern
startPattern = time.time()
patternDescription = 'Stripe Test'
pattern = numpy.zeros((imgSize[0], imgSize[1], 3))
line = (numpy.r_[:imgSize[1]]%255)/255
colorChR = numpy.matlib.repmat(line, imgSize[0], 1)
colorChG = numpy.matlib.repmat(line, imgSize[0], 1)
colorChB = numpy.matlib.repmat(line, imgSize[0], 1)
colorChR[:, :] = 0
colorChR[:, 0:60] = 1
colorChG[:, :] = 0
colorChG[:, 0:60] = 1
colorChB[:, :] = 0
colorChB[:, 0:60] = 1
pattern[:, :, 0] = colorChR
pattern[:, :, 1] = colorChG
pattern[:, :, 2] = colorChB
stopPattern = time.time()
print('TIME: Pattern generation: ' + str(round(stopPattern-startPattern,3)) + ' s. ')
# first reshape
startReshape = time.time()
pattern[:, :, 0] = pattern[:, :, 0]*brightness[0] # red brightness multiplicator
pattern[:, :, 1] = pattern[:, :, 1]*brightness[1] # green brightness multiplicator
pattern[:, :, 2] = pattern[:, :, 2]*brightness[2] # blue brightness multiplicator
img = pattern.astype(int)
# IDEALLY I WANT TO CHANGE THE CODE ONLY FROM HERE ON
# redValues = pattern[:,:,0].astype(int)
# greenValues = pattern[:,:,1].astype(int)
# blueValues = pattern[:,:,2].astype(int)
# test = ("0000" + ("{:0>2X}" * len(redValues))).format(*tuple(redValues[::-1]))
# numpy.set_printoptions(formatter={'int':hex})
# #test = [ rgb2hex(img[i,:]) for i in range(img.shape[0]) ]
# rgb2hex = lambda r,g,b: '%02X%02X%02X' %(r,g,b)
# test = [ rgb2hex(*img[i,:]) for i in range(img.shape[0]) ]
# # img = numpy.array2string(img, formatter = {'int':lambda img: hex(img)})
imgReshape = numpy.reshape(img, (1, imgSize[0]*imgSize[1]*3)) #necessary?
redValues = imgReshape[0][0::3] #red values (0, 3, 6, ..)
greenValues = imgReshape[0][1::3] #green values (1, 4, 7, ..)
blueValues = imgReshape[0][2::3] #blue values (2, 5, 8, ..)
stopReshape = time.time()
print('TIME: Reshape into colors: ' + str(round(stopReshape-startReshape,3)) + ' s. ')
redString = ''
greenString = ''
blueString = ''
outData = dict()
startString = time.time()
for i, j in pairwise(redValues):
tempRed = "0000%02X%02X" % (int(j), int(i))
redString += tempRed
for i, j in pairwise(greenValues):
tempGreen = "0000%02X%02X" % (int(j), int(i))
greenString += tempGreen
for i, j in pairwise(blueValues):
tempBlue = "0000%02X%02X" % (int(j), int(i))
blueString += tempBlue
outData['red'] = redString
outData['green'] = greenString
outData['blue'] = blueString
stopString = time.time()
print('TIME: String formatting: ' + str(round(stopString-startString, 3)) + ' s')
print('DATATEST: First 200 red chars: ' + str(outData['red'][0:200]))
print('DATATEST: First 200 green chars: ' + str(outData['green'][0:200]))
print('DATATEST: First 200 blue chars: ' + str(outData['blue'][0:200]))
#return outData
Try to use numpy array instead:
redValues = np.random.randint(0, 255, (10, 2))
red = np.array(redValues).reshape(-1, 2)
red_channel = (red[:, 1] << 8) + red[:, 0]
redString = ''.join(map(lambda val: f'0000{val:04x}', red_channel))