Python Dynamic Array allocation, Matlab style - python

I'm trying to move a few Matlab libraries that I've built to the python environment. So far, the biggest issue I faced is the dynamic allocation of arrays based on index specification. For example, using Matlab, typing the following:
x = [1 2];
x(5) = 3;
would result in:
x = [ 1 2 0 0 3]
In other words, I didn't know before hand the size of (x), nor its content. The array must be defined on the fly, based on the indices that I'm providing.
In python, trying the following:
from numpy import *
x = array([1,2])
x[4] = 3
Would result in the following error: IndexError: index out of bounds. On workaround is incrementing the array in a loop and then assigned the desired value as :
from numpy import *
x = array([1,2])
idx = 4
for i in range(size(x),idx+1):
x = append(x,0)
x[idx] = 3
print x
It works, but it's not very convenient and it might become very cumbersome for n-dimensional arrays.I though about subclassing ndarray to achieve my goal, but I'm not sure if it would work. Does anybody knows of a better approach?
Thanks for the quick reply. I didn't know about the setitem method (I'm fairly new to Python). I simply overwritten the ndarray class as follows:
import numpy as np
class marray(np.ndarray):
def __setitem__(self, key, value):
# Array properties
nDim = np.ndim(self)
dims = list(np.shape(self))
# Requested Index
if type(key)==int: key=key,
nDim_rq = len(key)
dims_rq = list(key)
for i in range(nDim_rq): dims_rq[i]+=1
# Provided indices match current array number of dimensions
if nDim_rq==nDim:
# Define new dimensions
newdims = []
for iDim in range(nDim):
v = max([dims[iDim],dims_rq[iDim]])
newdims.append(v)
# Resize if necessary
if newdims != dims:
self.resize(newdims,refcheck=False)
return super(marray, self).__setitem__(key, value)
And it works like a charm! However, I need to modify the above code such that the setitem allow changing the number of dimensions following this request:
a = marray([0,0])
a[3,1,0] = 0
Unfortunately, when I try to use numpy functions such as
self = np.expand_dims(self,2)
the returned type is numpy.ndarray instead of main.marray. Any idea on how I could enforce that numpy functions output marray if a marray is provided as an input? I think it should be doable using array_wrap, but I could never find exactly how. Any help would be appreciated.

Took the liberty of updating my old answer from Dynamic list that automatically expands. Think this should do most of what you need/want
class matlab_list(list):
def __init__(self):
def zero():
while 1:
yield 0
self._num_gen = zero()
def __setitem__(self,index,value):
if isinstance(index, int):
self.expandfor(index)
return super(dynamic_list,self).__setitem__(index,value)
elif isinstance(index, slice):
if index.stop<index.start:
return super(dynamic_list,self).__setitem__(index,value)
else:
self.expandfor(index.stop if abs(index.stop)>abs(index.start) else index.start)
return super(dynamic_list,self).__setitem__(index,value)
def expandfor(self,index):
rng = []
if abs(index)>len(self)-1:
if index<0:
rng = xrange(abs(index)-len(self))
for i in rng:
self.insert(0,self_num_gen.next())
else:
rng = xrange(abs(index)-len(self)+1)
for i in rng:
self.append(self._num_gen.next())
# Usage
spec_list = matlab_list()
spec_list[5] = 14

This isn't quite what you want, but...
x = np.array([1, 2])
try:
x[index] = value
except IndexError:
oldsize = len(x) # will be trickier for multidimensional arrays; you'll need to use x.shape or something and take advantage of numpy's advanced slicing ability
x = np.resize(x, index+1) # Python uses C-style 0-based indices
x[oldsize:index] = 0 # You could also do x[oldsize:] = 0, but that would mean you'd be assigning to the final position twice.
x[index] = value
>>> x = np.array([1, 2])
>>> x = np.resize(x, 5)
>>> x[2:5] = 0
>>> x[4] = 3
>>> x
array([1, 2, 0, 0, 3])
Due to how numpy stores the data linearly under the hood (though whether it stores as row-major or column-major can be specified when creating arrays), multidimensional arrays are pretty tricky here.
>>> x = np.array([[1, 2, 3], [4, 5, 6]])
>>> np.resize(x, (6, 4))
array([[1, 2, 3, 4],
[5, 6, 1, 2],
[3, 4, 5, 6],
[1, 2, 3, 4],
[5, 6, 1, 2],
[3, 4, 5, 6]])
You'd need to do this or something similar:
>>> y = np.zeros((6, 4))
>>> y[:x.shape[0], :x.shape[1]] = x
>>> y
array([[ 1., 2., 3., 0.],
[ 4., 5., 6., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.]])

A python dict will work well as a sparse array. The main issue is the syntax for initializing the sparse array will not be as pretty:
listarray = [100,200,300]
dictarray = {0:100, 1:200, 2:300}
but after that the syntax for inserting or retrieving elements is the same
dictarray[5] = 2345

Related

How to create a numpy array with a an extra dimension depending on an where clause?

Problem
I need to create an array that takes the argmax and based on that maximum value position fill the array with [1,0] while the other fields that are not the maximum will be filled with [0,1].
Example:
Given the vector a:
a.shape = (3,2)
a = np.array([[1,0],[1,2],[1,3]])
Return the vector b:
b.shape = (3,2,2)
b = np.array([[[1,0],[0,1]],[[0,1],[1,0]],[[0,1],[1,0]]])
c = np.argmax(a, axis=1)
b = np.empty(tuple(list(a.shape) + [2]))
b[range(len(c)), c, :] = [1, 0]
b[range(len(c)), ~c, :] = [0, 1]
b
>>>array([[[1., 0.],
[0., 1.]],
[[0., 1.],
[1., 0.]],
[[0., 1.],
[1., 0.]]])
Note this only works in this example since the argmax will ever be only 0 or 1. If the second dimension in a is greater than 2 I don't think that this solution will work
I was able to create a function that returns the desirable result but will only work for two classes. It could be adapted for multiple classes:
a = np.array([[1,0],[1,2],[1,3]])
def create_dist_prob_target(arr):
p_ = np.squeeze(arr,axis=1)
a = np.expand_dims(np.where((p_ == np.amax(p_,axis = 1)[:,None]),1,0),axis=-1)
b = np.expand_dims(np.where((p_ == np.amax(p_,axis = 1)[:,None]),0,1),axis=-1)
return np.concatenate((a,b),axis=2)
b = create_dist_prob_target(a)
print(b)

Create a numpy array and update it's values in every iteration

I'm using a video processing tool that needs to input the processing data from each frame into an array.
for p in det.read(frame, fac):
point_values = np.array([])
for j, (x, y) in enumerate(p): #iteration through points
point_values = np.append(point_values,y)
point_values = np.append(point_values,x)
this code runs again each frame. I'm expecting "point_values = np.array([])" to reset the array and then start filling it again.
I'm not sure if my logic is wrong or is it a syntax issue.
Your code does:
In [77]: p = [(0,0),(0,2),(1,0),(1,2)]
In [78]: arr = np.array([])
In [79]: for j,(x,y) in enumerate(p):
...: arr = np.append(arr,y)
...: arr = np.append(arr,x)
...:
In [80]: arr
Out[80]: array([0., 0., 2., 0., 0., 1., 2., 1.])
No syntax error. The list equivalent is faster and cleaner:
In [85]: alist =[]
In [86]: for x,y in p: alist.extend((y,x))
In [87]: alist
Out[87]: [0, 0, 2, 0, 0, 1, 2, 1]
But you don't give any indication of how this action is supposed to fit within a larger context. You create a new point_values for each p, but then don't do anything with it.

Assign values to different index positions in Numpy array

Say I have an array
np.zeros((4,2))
I have a list of values [4,3,2,1], which I want to assign to the following positions:
[(0,0),(1,1),(2,1),(3,0)]
How can I do that without using the for loop or flattening the array?
I can use fancy index to retrieve the value, but not to assign them.
======Update=========
Thanks to #hpaulj, I realize the bug in my original code is.
When I use zeros_like to initiate the array, it defaults to int and truncates values. Therefore, it looks like I did not assign anything!
You can use tuple indexing:
>>> import numpy as np
>>> a = np.zeros((4,2))
>>> vals = [4,3,2,1]
>>> pos = [(0,0),(1,1),(2,0),(3,1)]
>>> rows, cols = zip(*pos)
>>> a[rows, cols] = vals
>>> a
array([[ 4., 0.],
[ 0., 3.],
[ 2., 0.],
[ 0., 1.]])
Here is a streamlined version of #wim's answer based on #hpaulj's comment. np.transpose automatically converts the Python list of tuples into a NumPy array and transposes it. tuple casts the index coordinates to tuples which works because a[rows, cols] is equivalent to a[(rows, cols)] in NumPy.
import numpy as np
a = np.zeros((4, 2))
vals = range(4)
indices = [(0, 0), (1, 1), (2, 0), (3, 1)]
a[tuple(np.transpose(indices))] = vals
print(a)

Python: Parse string to array

I am currently having the problem parsing a string to a numpy array.
The string look like this:
input = '{{13,1},{2,1},{4,4},{1,7},{9,1}}'
The string represents a sparse vector, where the vector itself is delimited by curly brackets. Each entry, itself delimited by curly brackets, indicates which indices have which entries. The first entry in the list encodes the dimensions of the vector.
In the above example, the vector has length of 13 and 4 entries which are different from 0.
output = np.array([0,7,1,0,4,0,0,0,0,1,0,0,0])
After parsing it to an array, I have to parse to back to a string in its dense format, with the format:
stringoutput = '{0,7,1,0,4,0,0,0,0,1,0,0,0}'
While I managed to parse the numpy array to a string, I ran into the problem of having the wrong brackets (i.e. the build in array2string function uses [], while I need {})
I am open for any suggestions that help, solving this efficiently (even for large sparse vectors).
Thank you.
\edit: The given vector is always one dimensional, i.e. the second number within the first {} will always be 1. (and you only need 1 index to locate the position of elements)
Here is a numpythonic way:
In [132]: inp = '{{13,1},{2,1},{4,4},{1,7},{9,1}}'
# Relace the brackets with parenthesis in order to convert the string to a valid python object.
In [133]: inp = ast.literal_eval(inp.replace('{', '(').replace('}', ')'))
# Unpack the dimention and rest of then values from input object
In [134]: dim, *rest = inp
# Creat the zero array based on extracted dimention
In [135]: arr = np.zeros(dim)
# use `zip` to collecte teh indices and values separately in order to be use in `np.put`
In [136]: indices, values = zip(*rest)
In [137]: np.put(arr, indices, values)
In [138]: arr
Out[138]:
array([[ 0.],
[ 7.],
[ 1.],
[ 0.],
[ 4.],
[ 0.],
[ 0.],
[ 0.],
[ 0.],
[ 1.],
[ 0.],
[ 0.],
[ 0.]])
I like #Kasramvd's approach, but figured I'd put this one out there as well:
In [116]: r = (list(map(int, a.split(','))) for a in input[2:-2].split('},{'))
In [118]: l = np.zeros(next(r)[0], np.int)
In [119]: for a in r:
...: l[a[0]] = a[1]
...:
In [122]: s = '{' + ','.join(map(str, l)) + '}'
In [123]: s
Out[123]: '{0,7,1,0,4,0,0,0,0,1,0,0,0}'
This is based on #Kasramvd's answer. I adjusted how the other values are populated.
from #Kasramvd
import numpy as np
import ast
inp = '{{13,1},{2,1},{4,4},{1,7},{9,1}}'
inp = ast.literal_eval(inp.replace('{', '(').replace('}', ')'))
dim, *rest = inp
my adjustments
a = np.zeros(dim, dtype=int)
r = np.array(rest)
a[r[:, 0], 0] = r[:, 1]
a
array([[0],
[7],
[1],
[0],
[4],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0]])
in one dimension
a = np.zeros(dim[0], dtype=int)
r = np.array(rest)
a[r[:, 0]] = r[:, 1]
a
array([0, 7, 1, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0])

Better way to shuffle two numpy arrays in unison

I have two numpy arrays of different shapes, but with the same length (leading dimension). I want to shuffle each of them, such that corresponding elements continue to correspond -- i.e. shuffle them in unison with respect to their leading indices.
This code works, and illustrates my goals:
def shuffle_in_unison(a, b):
assert len(a) == len(b)
shuffled_a = numpy.empty(a.shape, dtype=a.dtype)
shuffled_b = numpy.empty(b.shape, dtype=b.dtype)
permutation = numpy.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
For example:
>>> a = numpy.asarray([[1, 1], [2, 2], [3, 3]])
>>> b = numpy.asarray([1, 2, 3])
>>> shuffle_in_unison(a, b)
(array([[2, 2],
[1, 1],
[3, 3]]), array([2, 1, 3]))
However, this feels clunky, inefficient, and slow, and it requires making a copy of the arrays -- I'd rather shuffle them in-place, since they'll be quite large.
Is there a better way to go about this? Faster execution and lower memory usage are my primary goals, but elegant code would be nice, too.
One other thought I had was this:
def shuffle_in_unison_scary(a, b):
rng_state = numpy.random.get_state()
numpy.random.shuffle(a)
numpy.random.set_state(rng_state)
numpy.random.shuffle(b)
This works...but it's a little scary, as I see little guarantee it'll continue to work -- it doesn't look like the sort of thing that's guaranteed to survive across numpy version, for example.
Your can use NumPy's array indexing:
def unison_shuffled_copies(a, b):
assert len(a) == len(b)
p = numpy.random.permutation(len(a))
return a[p], b[p]
This will result in creation of separate unison-shuffled arrays.
X = np.array([[1., 0.], [2., 1.], [0., 0.]])
y = np.array([0, 1, 2])
from sklearn.utils import shuffle
X, y = shuffle(X, y, random_state=0)
To learn more, see http://scikit-learn.org/stable/modules/generated/sklearn.utils.shuffle.html
Your "scary" solution does not appear scary to me. Calling shuffle() for two sequences of the same length results in the same number of calls to the random number generator, and these are the only "random" elements in the shuffle algorithm. By resetting the state, you ensure that the calls to the random number generator will give the same results in the second call to shuffle(), so the whole algorithm will generate the same permutation.
If you don't like this, a different solution would be to store your data in one array instead of two right from the beginning, and create two views into this single array simulating the two arrays you have now. You can use the single array for shuffling and the views for all other purposes.
Example: Let's assume the arrays a and b look like this:
a = numpy.array([[[ 0., 1., 2.],
[ 3., 4., 5.]],
[[ 6., 7., 8.],
[ 9., 10., 11.]],
[[ 12., 13., 14.],
[ 15., 16., 17.]]])
b = numpy.array([[ 0., 1.],
[ 2., 3.],
[ 4., 5.]])
We can now construct a single array containing all the data:
c = numpy.c_[a.reshape(len(a), -1), b.reshape(len(b), -1)]
# array([[ 0., 1., 2., 3., 4., 5., 0., 1.],
# [ 6., 7., 8., 9., 10., 11., 2., 3.],
# [ 12., 13., 14., 15., 16., 17., 4., 5.]])
Now we create views simulating the original a and b:
a2 = c[:, :a.size//len(a)].reshape(a.shape)
b2 = c[:, a.size//len(a):].reshape(b.shape)
The data of a2 and b2 is shared with c. To shuffle both arrays simultaneously, use numpy.random.shuffle(c).
In production code, you would of course try to avoid creating the original a and b at all and right away create c, a2 and b2.
This solution could be adapted to the case that a and b have different dtypes.
Very simple solution:
randomize = np.arange(len(x))
np.random.shuffle(randomize)
x = x[randomize]
y = y[randomize]
the two arrays x,y are now both randomly shuffled in the same way
James wrote in 2015 an sklearn solution which is helpful. But he added a random state variable, which is not needed. In the below code, the random state from numpy is automatically assumed.
X = np.array([[1., 0.], [2., 1.], [0., 0.]])
y = np.array([0, 1, 2])
from sklearn.utils import shuffle
X, y = shuffle(X, y)
from np.random import permutation
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data #numpy array
y = iris.target #numpy array
# Data is currently unshuffled; we should shuffle
# each X[i] with its corresponding y[i]
perm = permutation(len(X))
X = X[perm]
y = y[perm]
Shuffle any number of arrays together, in-place, using only NumPy.
import numpy as np
def shuffle_arrays(arrays, set_seed=-1):
"""Shuffles arrays in-place, in the same order, along axis=0
Parameters:
-----------
arrays : List of NumPy arrays.
set_seed : Seed value if int >= 0, else seed is random.
"""
assert all(len(arr) == len(arrays[0]) for arr in arrays)
seed = np.random.randint(0, 2**(32 - 1) - 1) if set_seed < 0 else set_seed
for arr in arrays:
rstate = np.random.RandomState(seed)
rstate.shuffle(arr)
And can be used like this
a = np.array([1, 2, 3, 4, 5])
b = np.array([10,20,30,40,50])
c = np.array([[1,10,11], [2,20,22], [3,30,33], [4,40,44], [5,50,55]])
shuffle_arrays([a, b, c])
A few things to note:
The assert ensures that all input arrays have the same length along
their first dimension.
Arrays shuffled in-place by their first dimension - nothing returned.
Random seed within positive int32 range.
If a repeatable shuffle is needed, seed value can be set.
After the shuffle, the data can be split using np.split or referenced using slices - depending on the application.
you can make an array like:
s = np.arange(0, len(a), 1)
then shuffle it:
np.random.shuffle(s)
now use this s as argument of your arrays. same shuffled arguments return same shuffled vectors.
x_data = x_data[s]
x_label = x_label[s]
There is a well-known function that can handle this:
from sklearn.model_selection import train_test_split
X, _, Y, _ = train_test_split(X,Y, test_size=0.0)
Just setting test_size to 0 will avoid splitting and give you shuffled data.
Though it is usually used to split train and test data, it does shuffle them too.
From documentation
Split arrays or matrices into random train and test subsets
Quick utility that wraps input validation and
next(ShuffleSplit().split(X, y)) and application to input data into a
single call for splitting (and optionally subsampling) data in a
oneliner.
This seems like a very simple solution:
import numpy as np
def shuffle_in_unison(a,b):
assert len(a)==len(b)
c = np.arange(len(a))
np.random.shuffle(c)
return a[c],b[c]
a = np.asarray([[1, 1], [2, 2], [3, 3]])
b = np.asarray([11, 22, 33])
shuffle_in_unison(a,b)
Out[94]:
(array([[3, 3],
[2, 2],
[1, 1]]),
array([33, 22, 11]))
One way in which in-place shuffling can be done for connected lists is using a seed (it could be random) and using numpy.random.shuffle to do the shuffling.
# Set seed to a random number if you want the shuffling to be non-deterministic.
def shuffle(a, b, seed):
np.random.seed(seed)
np.random.shuffle(a)
np.random.seed(seed)
np.random.shuffle(b)
That's it. This will shuffle both a and b in the exact same way. This is also done in-place which is always a plus.
EDIT, don't use np.random.seed() use np.random.RandomState instead
def shuffle(a, b, seed):
rand_state = np.random.RandomState(seed)
rand_state.shuffle(a)
rand_state.seed(seed)
rand_state.shuffle(b)
When calling it just pass in any seed to feed the random state:
a = [1,2,3,4]
b = [11, 22, 33, 44]
shuffle(a, b, 12345)
Output:
>>> a
[1, 4, 2, 3]
>>> b
[11, 44, 22, 33]
Edit: Fixed code to re-seed the random state
Say we have two arrays: a and b.
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.array([[9,1,1],[6,6,6],[4,2,0]])
We can first obtain row indices by permutating first dimension
indices = np.random.permutation(a.shape[0])
[1 2 0]
Then use advanced indexing.
Here we are using the same indices to shuffle both arrays in unison.
a_shuffled = a[indices[:,np.newaxis], np.arange(a.shape[1])]
b_shuffled = b[indices[:,np.newaxis], np.arange(b.shape[1])]
This is equivalent to
np.take(a, indices, axis=0)
[[4 5 6]
[7 8 9]
[1 2 3]]
np.take(b, indices, axis=0)
[[6 6 6]
[4 2 0]
[9 1 1]]
If you want to avoid copying arrays, then I would suggest that instead of generating a permutation list, you go through every element in the array, and randomly swap it to another position in the array
for old_index in len(a):
new_index = numpy.random.randint(old_index+1)
a[old_index], a[new_index] = a[new_index], a[old_index]
b[old_index], b[new_index] = b[new_index], b[old_index]
This implements the Knuth-Fisher-Yates shuffle algorithm.
Shortest and easiest way in my opinion, use seed:
random.seed(seed)
random.shuffle(x_data)
# reset the same seed to get the identical random sequence and shuffle the y
random.seed(seed)
random.shuffle(y_data)
most solutions above work, however if you have column vectors you have to transpose them first. here is an example
def shuffle(self) -> None:
"""
Shuffles X and Y
"""
x = self.X.T
y = self.Y.T
p = np.random.permutation(len(x))
self.X = x[p].T
self.Y = y[p].T
With an example, this is what I'm doing:
combo = []
for i in range(60000):
combo.append((images[i], labels[i]))
shuffle(combo)
im = []
lab = []
for c in combo:
im.append(c[0])
lab.append(c[1])
images = np.asarray(im)
labels = np.asarray(lab)
I extended python's random.shuffle() to take a second arg:
def shuffle_together(x, y):
assert len(x) == len(y)
for i in reversed(xrange(1, len(x))):
# pick an element in x[:i+1] with which to exchange x[i]
j = int(random.random() * (i+1))
x[i], x[j] = x[j], x[i]
y[i], y[j] = y[j], y[i]
That way I can be sure that the shuffling happens in-place, and the function is not all too long or complicated.
Just use numpy...
First merge the two input arrays 1D array is labels(y) and 2D array is data(x) and shuffle them with NumPy shuffle method. Finally split them and return.
import numpy as np
def shuffle_2d(a, b):
rows= a.shape[0]
if b.shape != (rows,1):
b = b.reshape((rows,1))
S = np.hstack((b,a))
np.random.shuffle(S)
b, a = S[:,0], S[:,1:]
return a,b
features, samples = 2, 5
x, y = np.random.random((samples, features)), np.arange(samples)
x, y = shuffle_2d(train, test)

Categories