Elementwise subtraction in numpy arrays - python

I have two numpy arrays of different dimensions:
x.shape = (1,1,M) and Y.shape = (N,N).
How do I perform Z = x - Y efficiently in python, such that Z.shape = (N,N,M), where - is an elementwise subtraction operation.
For example, M=10
x = array([[[1, 2, 3, 4, 5 , 6, 7, 8, 9, 10]]])
and N=8
Y = array([[11, 12, 13, 14, 15, 16, 17, 18],
[21, 22, 23, 24, 25, 26, 27, 28],
[31, 32, 33, 34, 35, 36, 37, 38],
[41, 42, 43, 44, 45, 46, 47, 48],
[51, 52, 53, 54, 55, 56, 57, 58],
[61, 62, 63, 64, 65, 66, 67, 68],
[71, 72, 73, 74, 75, 76, 77, 78],
[81, 82, 83, 84, 85, 86, 87, 88]])
Now the idea is to get a Z such that
Z[:,:,0] = array([[1-11, 1-12, 1-13, 1-14, 1-15, 1-16, 1-17, 1-18],
[1-21, 1-22, 1-23, 1-24, 1-25, 1-26, 1-27, 1-28],
[1-31, 1-32, 1-33, 1-34, 1-35, 1-36, 1-37, 1-38],
[1-41, 1-42, 1-43, 1-44, 1-45, 1-46, 1-47, 1-48],
[1-51, 1-52, 1-53, 1-54, 1-55, 1-56, 1-57, 1-58],
[1-61, 1-62, 1-63, 1-64, 1-65, 1-66, 1-67, 1-68],
[1-71, 1-72, 1-73, 1-74, 1-75, 1-76, 1-77, 1-78],
[1-81, 1-82, 1-83, 1-84, 1-85, 1-86, 1-87, 1-88]])
and
Z[:,:,9] = array([[10-11, 10-12, 10-13, 10-14, 10-15, 10-16, 10-17, 10-18],
[10-21, 10-22, 10-23, 10-24, 10-25, 10-26, 10-27, 10-28],
[10-31, 10-32, 10-33, 10-34, 10-35, 10-36, 10-37, 10-38],
[10-41, 10-42, 10-43, 10-44, 10-45, 10-46, 10-47, 10-48],
[10-51, 10-52, 10-53, 10-54, 10-55, 10-56, 10-57, 10-58],
[10-61, 10-62, 10-63, 10-64, 10-65, 10-66, 10-67, 10-68],
[10-71, 10-72, 10-73, 10-74, 10-75, 10-76, 10-77, 10-78],
[10-81, 10-82, 10-83, 10-84, 10-85, 10-86, 10-87, 10-88]])
and so on.
It is easy to do in MATLAB using just - operation. But Python does not support it.

The answer is: use different shape of y:
>>> y = y.reshape((8, 8, 1))
>>> (x-y).shape
(8, 8, 10)
This is a vizualization for better understanding with smaller dimensions:

You can compute your result without explicit creation of a reshaped array,
but using Numpy broadcasting.
The key to success is to add a new dimension to Y, using np.newaxis:
Z = x - Y[:, :, np.newaxis]

Related

Modifying alternate indices of 3d numpy array

I have a numpy array with shape (140, 23, 2) being 140 frames, 23 objects, and x,y locations. The data has been generated by a GAN and when I animate the movement it's very jittery. I want to smooth it by converting the coordinates for each object so every odd number index to be the mid-point between the even numbered indices either side of it. e.g.
x[1] = (x[0] + x[2]) / 2
x[3] = (x[2] + x[4]) / 2
Below is my code:
def smooth_coordinates(df):
# df shape is (140, 23, 2)
# iterate through each object (23)
for j in range(len(df[0])):
# iterate through 140 frames
for i in range(len(df)):
# if it's an even number and index allows at least 1 index after it
if (i%2 != 0) and (i < (len(df[0])-2)):
df[i][j][0] = ( (df[i-1][j][0]+df[i+1][j][0]) /2 )
df[i][j][1] = ( (df[i-1][j][1]+df[i+1][j][1]) /2 )
return df
Aside from it being very inefficient my input df and output df are identical. Any suggestions for how to achieve this more efficiently?
import numpy as np
a = np.random.randint(100, size= [140, 23, 2]) # input array
b = a.copy()
i = np.ogrid[1: a.shape[0]-1: 2] # odd indicies
i
>>> [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25,
27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,
53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77,
79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103,
105, 107, 109, 111, 113, 115, 117, 119, 121, 123, 125, 127, 129,
131, 133, 135, 137]
(a == b).all() # testing for equality
>>> True
a[i] = (a[i-1] + a[i+1]) / 2 # averaging positions across frames
(a == b).all() # testing for equality again
>>> False

Problem with adding elements from functions to list (too much memory is using?)

I replace in this code
import matplotlib.pyplot as plt
#parametry dla romeo i julii, zeby byly niezmienne w uczuciach musza byc wieksze od 0
aR = 0.5
aL = 0.7
#pR pL odpowiedzi Romea/Julii na miłość
pR = 0.2
pL = 0.5
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
rom = []
jul = []
def Romeo(n):
if n == 0:
return 1
return Romeo(n - 1)*aR
def Julia(n):
if n == 0:
return 1
return Julia(n - 1)*aL
def alfa(n):
if n == 0:
return 1
return aR*Romeo(n - 1) + pR*Julia(n - 1)
def beta(n):
if n == 0:
return 1
return aL*Julia(n - 1) + pL*Romeo(n - 1)
j = 0
while j < 100:
rom.append(alfa(j))
j+=1
j = 0
while j < 100:
jul.append(beta(j))
j+=1
plt.plot(x, rom, label = "Romeo love")
plt.plot(x, jul, label = "Julia love")
plt.xlabel("Days")
plt.ylabel("Romeo love")
plt.title("Some graph")
plt.legend()
plt.show()
only alfa and beta functions byt this:
import matplotlib.pyplot as plt
#parametry dla romeo i julii, zeby byly niezmienne w uczuciach musza byc wieksze od 0
aR = 0.5
aL = 0.7
#pR pL odpowiedzi Romea/Julii na miłość
pR = 0.2
pL = 0.5
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
rom = []
jul = []
def Romeo(n):
if n == 0:
return 1
return Romeo(n - 1)*aR
def Julia(n):
if n == 0:
return 1
return Julia(n - 1)*aL
def alfa(n):
if n == 0:
return 1
return round(aR*alfa(n - 1) + pR*beta(n - 1), 3)
def beta(n):
if n == 0:
return 1
return round(aL*beta(n-1) + pL*alfa(n - 1), 3)
j = 0
while j < 100:
rom.append(alfa(j))
j+=1
j = 0
while j < 100:
jul.append(beta(j))
j+=1
plt.plot(x, rom, label = "Romeo love")
plt.plot(x, jul, label = "Julia love")
plt.xlabel("Days")
plt.ylabel("Romeo love")
plt.title("Some graph")
plt.legend()
plt.show()
And Pycharm does not want to compilate (does not draw this graph) or it will take a lot of time. Ealier it was not a problem. \
I thought that a lot of numbers after point can be a reason and i round every number from list, but it didnt solve the problem.
What I changed by replacing this functions? How can I fix that?
Im pretty sure that the problem is in assigning elements from functions to list [2 while]. But i do not know why.
The current recursive approach is wasteful.
For example, when computing alfa(1) would require alfa(0), beta(0).
When you move on to alfa(2), the code will first compute alfa(1) and beta(1). Then alfa(1) would call alfa(0) and beta(0), while beta(1) would separately call alfa(0), beta(0) again, without recycling what we have computed before. So you need 6 calls for alfa(2).
At alfa(3), you would compute alfa(2) and beta(2), each of which needs 6 calls; so you need 14 calls (if my math is not off).
Imagine how many computations you would need at n == 100; the answer is 2535301200456458802993406410750. Cumulatively, i.e., since you want to plot alfa(1), ..., alfa(100), you need 5070602400912917605986812821300
computations in total, only to produce a single list rom.
You can use memoization to remember the previously calculated results and recycle them.
In python, you can achieve this by using functools.lru_cache (python doc); put
from functools import lru_cache
at the beginning of your code and then put
#lru_cache()
before each function; e.g.,
#lru_cache()
def Romeo(n):
if n == 0:
return 1
return Romeo(n - 1)*aR
You will see the graph almost immediately now.

Finding average?

I am new to python, and I wondered if anyone could help me find the average in the following marks list of students?. Please advise as to what I am doing wrong here.
student_marks = [['Name', ['A','B','C','D','E']],
['Ankit', [41, 34, 45, 55, 63]],
['Aravind',[42, 23, 34, 44, 53]],
['Lakshay',[32, 23, 13, 54, 67]],
['Gyan', [23, 82, 23, 63, 34]],
['Pranav', [21, 23, 25, 56, 56]]
]
for student in student_marks[1:]:
marks_in_C = student[1][2]
print(marks_in_C, end=" ")
total = sum(marks_in_C)
TypeError: 'int' object is not iterable
You can find the average of C in this two ways:
student_marks = [['Name', ['A','B','C','D','E']],
['Ankit', [41, 34, 45, 55, 63]],
['Aravind',[42, 23, 34, 44, 53]],
['Lakshay',[32, 23, 13, 54, 67]],
['Gyan', [23, 82, 23, 63, 34]],
['Pranav', [21, 23, 25, 56, 56]]
]
total_C = 0
for [name, [A, B, C, D, E]] in student_marks[1:]:
total_C += C
avg_of_C = total_C / len(student_marks[1:])
print(avg_of_C)
OR
student_marks = [['Name', ['A','B','C','D','E']],
['Ankit', [41, 34, 45, 55, 63]],
['Aravind',[42, 23, 34, 44, 53]],
['Lakshay',[32, 23, 13, 54, 67]],
['Gyan', [23, 82, 23, 63, 34]],
['Pranav', [21, 23, 25, 56, 56]]
]
marks_in_C = list()
for [name, [A, B, C, D, E]] in student_marks[1:]:
marks_in_C.append(C)
avg_of_C = sum(marks_in_C) / len(marks_in_C)
print(avg_of_C)
Welcome to Python world! The error occurred because you are using int type data to be iterated. sum() method expect iterable data but marks_in_C has only one integer which cannot be iterated
I think you are trying to use marks_in_C as a list. why don't you use your loop looks like this?
...
marks_in_C = list()
for student in student_marks[1:]:
marks_in_C.append(student[1][2])
print(marks_in_C, end=" ")
total = sum(marks_in_C)
hope it to help you.

Is there a fast way to shuffle numpy image in segments?

I want to write a function that can take small images and return a permutation of them, block-wise.
Basically I want to turn this:
Into this:
There was an excellent answer in Is there a function in Python that shuffle data by data blocks? that helped me write a solution. However for ~50,000 28x28 images this takes a long time to run.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
print([[x[i*block_size:(i+1)*block_size].shape] for i in range1])
for x in x1:
np.random.shuffle(range1)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
for a in x:
np.random.shuffle(range2)
a[:] = np.block([a[i*block_size:(i+1)*block_size] for i in range2])
print("x1", time.time() - begin)
begin = time.time()
Here's one approach based on this post -
def randomize_tiles_3D(x1, H, W):
# W,H are width and height of blocks
m,n,p = x1.shape
l1,l2 = n//H,p//W
combs = np.random.rand(m,l1*l2).argsort(axis=1)
r,c = np.unravel_index(combs,(l1,l2))
x1cr = x1.reshape(-1,l1,H,l2,W)
out = x1cr[np.arange(m)[:,None],r,:,c]
return out.reshape(-1,l1,l2,H,W).swapaxes(2,3).reshape(-1,n,p)
Sample run -
In [46]: x1
Out[46]:
array([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]],
[[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47],
[48, 49, 50, 51, 52, 53],
[54, 55, 56, 57, 58, 59],
[60, 61, 62, 63, 64, 65],
[66, 67, 68, 69, 70, 71]]])
In [47]: np.random.seed(0)
In [48]: randomize_tiles_3D(x1, H=3, W=3)
Out[48]:
array([[[21, 22, 23, 0, 1, 2],
[27, 28, 29, 6, 7, 8],
[33, 34, 35, 12, 13, 14],
[18, 19, 20, 3, 4, 5],
[24, 25, 26, 9, 10, 11],
[30, 31, 32, 15, 16, 17]],
[[36, 37, 38, 54, 55, 56],
[42, 43, 44, 60, 61, 62],
[48, 49, 50, 66, 67, 68],
[39, 40, 41, 57, 58, 59],
[45, 46, 47, 63, 64, 65],
[51, 52, 53, 69, 70, 71]]])
I already found a solution that runs much faster. I feel silly because I didn't really need a double for loop, just two separate shuffle indexes. Leaving this solution here in case anyone wants to shuffle an image block-wise in numpy.
If anyone comes up with another good solution, let me know.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
for x in x1:
np.random.shuffle(range1)
np.random.shuffle(range2)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
x[:] = np.block([x[:,i*block_size:(i+1)*block_size] for i in range2])
It will be more efficient to use numpy.lib.stride_tricks.as_strided to break 2D matrices into blocks.
import numpy as np
img_width, block_width = 12, 3
n = img_width // block_width
a = np.arange(img_width * img_width).reshape(img_width, img_width)
print(a)
blocks = np.lib.stride_tricks.as_strided(a, \
shape=(n, n, block_width, block_width), \
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, n * block_width, 1])))
print(blocks)
blocks = blocks.reshape((n * n, block_width, block_width)) # flatten for better shuffle
np.random.shuffle(blocks)
print(blocks)
blocks = np.lib.stride_tricks.as_strided(blocks, \
shape=(n, block_width, n, block_width), \
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, block_width ** 2, 1])))
shuffled = np.reshape(blocks, (img_width, img_width))
print(shuffled)
Output can be found here: blocks_shuffle_example.ipynb
Document: numpy.lib.stride_tricks.as_strided
Here's one approach:
Assume that the original image has shape (m, n), and each block has shape (w, h).
import numpy as np
# split image into tiles of w*h blocks with shape = ((m * n) / (w * h), w, h)
tiles = np.array([img_pad[x : x+w, y : y+h] for x in range(0, m, w) for y in range(0, n, h)])
np.random.shuffle(tiles)
# merge back to shape = (m, n)
mb, nb = m // w, n // h
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
Update:
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
may cause "FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future." while running.
Use
res = np.block([[np.hstack(tiles[i*nb : (i+1)*nb])] for i in range(mb)])
instead and there're no warnings.

Save numpy array as binary to read from FORTRAN

I have a series of numpy array, i need to save these numpy array in a loop as a raw binary float32 (without any header information) which need to be read from FORTRAN.
import numpy as np
f=open('test.bin','wb+')
for i in range(0,10):
np_data=np.random.rand(10,5)
fortran_data=np.asfortranarray(np_data,'float32')
fortran_data.tofile(f)
f.close()
Is this the correct way so that I can read this binary file created in python from FORTRAN correctly. Your suggestions will be highly apprecitaed
The code you wrote is almost right, but the .tofile method always write the vector in C order. I don't know why the np.asfortranarray() avoids this when writing in the binary file, but I tested and unfortunately we need to transpose the matrix before writing to correct read in Fortran without any other concern (this means in Fortran you can give the actual matrix dimension without needing any transpose).
The code below is to illustrate with a 3D matrix (which I ussually need to use) what I am saying:
a = np.arange(1,10*3*4+1)
b = a.reshape(10,12,order='F')
array([[ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, 111],
[ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 102, 112],
[ 3, 13, 23, 33, 43, 53, 63, 73, 83, 93, 103, 113],
[ 4, 14, 24, 34, 44, 54, 64, 74, 84, 94, 104, 114],
[ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115],
[ 6, 16, 26, 36, 46, 56, 66, 76, 86, 96, 106, 116],
[ 7, 17, 27, 37, 47, 57, 67, 77, 87, 97, 107, 117],
[ 8, 18, 28, 38, 48, 58, 68, 78, 88, 98, 108, 118],
[ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 109, 119],
[ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120]])
b is already in Fortran order
c=b.reshape(10,3,4, order='F')
print(c[:,:,0])
[[ 1 11 21]
[ 2 12 22]
[ 3 13 23]
[ 4 14 24]
[ 5 15 25]
[ 6 16 26]
[ 7 17 27]
[ 8 18 28]
[ 9 19 29]
[10 20 30]]
Then I save the matrix c in a binary file:
c.T.tofile('test_c.bin')
So, using this Fortran code I am able to read the binary data in the correct order I created the c matrix in Python:
PROGRAM read_saved_python
IMPLICIT NONE
INTEGER(KIND=8),ALLOCATABLE :: matrix(:,:,:)
INTEGER :: Nx, Ny, Nz
Nx = 10
Ny = 3
Nz = 4
ALLOCATE(matrix(Nx, Ny, Nz))
OPEN(33, FILE="/home/victor/test_c.bin",&
FORM="UNFORMATTED", STATUS="UNKNOWN", ACTION="READ", ACCESS='STREAM')
READ(33) matrix
write(*,*) matrix(:,1,1)
CLOSE(33)
DEALLOCATE(matrix)
END PROGRAM read_saved_python
Notice in Fortran the indexes start in 1 and the print shows in column order (in this case: print the first column, the second and then the third). If you don't transpose the matrix here c.T.tofile('test_c.bin') when reading in Fortran you'll notice that the matrix is not as you wanted, even if you use function np.asfortranarray as you did ( I even tried np.asfortranarray(c).T.tofile('/home/victor/teste_d.bin') (just to make sure) but the matrix is written in c order in the binary file.
You will need the meta data of the array to read it in FORTRAN. This website (https://scipy.github.io/old-wiki/pages/Cookbook/InputOutput.html) has some information on using libnpy to write and an example code fex.f95 to read the binary file.

Categories