import numpy as np
x = np.array([[1,1,1],[2,2,2],[3,3,3]])
xt = np.array([1,2,3])
L = len(xt)
for i in range(0,L):
s = x-xt[i]
is there another way to get the same results without the use of a for loop, thanks.
I have a numpy array of booleans:
import numpy as np
x = np.zeros(100).astype(np.bool)
x[20] = True # say
When I try to insert this (one element per document) as part of an OrderedDict into mongodb, I get the following error:
InvalidDocument: cannot encode object: False, of type: <class 'numpy.bool_'>
This is a serialization issue I have encountered before for singleton numpy booleans.
How do I convert the numpy array into an array of python booleans for serialization?
The following did not work:
y = x.astype(bool)
You can use numpy.ndarray.tolist here.
import numpy as np
x = np.zeros(100).astype(np.bool)
y = x.tolist()
print(type(x))
# numpy.ndarray
print(type(x[0]))
# numpy.bool_
print(type(y))
# list
print(type(y[0]))
# bool
You can try numpy.asscalar
import numpy as np
x = np.zeros(100).astype(np.bool)
z = [np.asscalar(x_i) for x_i in x]
print(type(z))
You can also use item() which is a better option since asscalar is depreceted.
import numpy as np
x = np.zeros(100).astype(np.bool)
z = [x_i.item() for x_i in x]
print(type(z))
print(z)
For a longer list, tolist() is better option.
import numpy as np
import time
x = np.zeros(100000).astype(np.bool)
t1 = time.time()
z = [x_i.item() for x_i in x]
t2 = time.time()
print(t2-t1)
t1 = time.time()
z = x.tolist()
t2 = time.time()
print(t2-t1)
0.0519254207611084
0.0015206336975097656
So, I have just this week come across a solution to this (albeit my own) question from two years ago... Thanks SO!
I am going to invoke the brilliant numpyencoder (https://pypi.org/project/numpyencoder) as follows:
# Set up the problem
import numpy as np
x = np.zeros(100).astype(bool) # Note: bool <- np.bool is now deprecated!
x[20] = True
# Let's roll
import json
from numpyencoder import NumpyEncoder
sanitized_json_string = json.dumps(x, cls=NumpyEncoder)
# One could stop there since the payload is now ready to go - but just to confirm:
x_sanitized=json.loads(sanitized_json_string)
print(x_sanitized)
I'm trying to compute the bootstrap statistic of the total of an array and I'm wondering if this can be improved in terms of speed, please?
from numpy import sum
from numpy.random import choice
def bootstrap(observed_array: array, number_of_bootstraps: int = 10000) -> array:
number_of_elements = len(observed_array)
bootstrap_estimates = []
for _ in range(number_of_bootstraps):
indices = choice(number_of_elements, size=number_of_elements, replace=True)
bootstrap_sample = observed_array[indices]
bootstrap_estimate = bootstrap_sample.sum()
bootstrap_estimates.append(bootstrap_estimate)
return array(bootstrap_estimates)
Thanks for any suggestions here.
I've the following numpy ndarray.
[ -0.54761371 17.04850603 4.86054302]
I want to apply this function to all elements of the array
def sigmoid(x):
return 1 / (1 + math.exp(-x))
probabilities = np.apply_along_axis(sigmoid, -1, scores)
This is the error that I get.
TypeError: only length-1 arrays can be converted to Python scalars
What am I doing wrong.
Function numpy.apply_along_axis is not good for this purpose.
Try to use numpy.vectorize to vectorize your function: https://docs.scipy.org/doc/numpy/reference/generated/numpy.vectorize.html
This function defines a vectorized function which takes a nested sequence of objects or numpy arrays as inputs and returns an single or tuple of numpy array as output.
import numpy as np
import math
# custom function
def sigmoid(x):
return 1 / (1 + math.exp(-x))
# define vectorized sigmoid
sigmoid_v = np.vectorize(sigmoid)
# test
scores = np.array([ -0.54761371, 17.04850603, 4.86054302])
print sigmoid_v(scores)
Output: [ 0.36641822 0.99999996 0.99231327]
Performance test which shows that the scipy.special.expit is the best solution to calculate logistic function and vectorized variant comes to the worst:
import numpy as np
import math
import timeit
def sigmoid_(x):
return 1 / (1 + math.exp(-x))
sigmoidv = np.vectorize(sigmoid_)
def sigmoid(x):
return 1 / (1 + np.exp(x))
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(100)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(100)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(100)", number=25)
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(1000)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(1000)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(1000)", number=25)
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(10000)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(10000)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(10000)", number=25)
Results:
size vectorized numpy expit
N=100: 0.00179314613342 0.000460863113403 0.000132083892822
N=1000: 0.0122890472412 0.00084114074707 0.000464916229248
N=10000: 0.109477043152 0.00530695915222 0.00424313545227
Use np.exp and that will work on numpy arrays in a vectorized fashion:
>>> def sigmoid(x):
... return 1 / (1 + np.exp(-x))
...
>>> sigmoid(scores)
array([ 6.33581776e-01, 3.94391811e-08, 7.68673281e-03])
>>>
You will likely not get any faster than this. Consider:
>>> def sigmoid(x):
... return 1 / (1 + np.exp(-x))
...
And:
>>> def sigmoidv(x):
... return 1 / (1 + math.exp(-x))
...
>>> vsigmoid = np.vectorize(sigmoidv)
Now, to compare the timings. With a small (size 100) array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(100)", number=100)
>>> t
0.006894525984534994
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(100)", number=100)
>>> t
0.0007238480029627681
So, still an order-of-magnitude difference with small arrays. This performance differences stays relatively constant, with a 10,000 size array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(10000)", number=100)
>>> t
0.3823414359940216
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(10000)", number=100)
>>> t
0.011259705002885312
And finally with a size 100,000 array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(100000)", number=100)
>>> t
3.7680041620042175
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(100000)", number=100)
>>> t
0.09544878199812956
Just to clarify what apply_along_axis is doing, or not doing.
def sigmoid(x):
print(x) # show the argument
return 1 / (1 + math.exp(-x))
In [313]: np.apply_along_axis(sigmoid, -1,np.array([ -0.54761371 ,17.04850603 ,4.86054302]))
[ -0.54761371 17.04850603 4.86054302] # the whole array
...
TypeError: only length-1 arrays can be converted to Python scalars
The reason you get the error is that apply_along_axis passes a whole 1d array to your function. I.e. the axis. For your 1d array this is the same as
sigmoid(np.array([ -0.54761371 ,17.04850603 ,4.86054302]))
The apply_along_axis does nothing for you.
As others noted,switching to np.exp allows sigmoid to work with the array (with or without the apply_along_axis wrapper).
scipy already implements the function
Luckily, Python allows us to rename things upon import:
from scipy.special import expit as sigmoid
I need to create an 2D array.
import numpy as np
self.col = 10
self.row = 5
...
matrix = np.array(self.row, self.col) # NOT WORKING
What is the right syntax please
i also need to fill it with random binary data
Generate a random matrix with binary values:
import numpy as np
row, col = 10, 5
matrix = np.random.randint(2, size=(row,col))
import numpy as np
def toBit(x):
if x<= 0:
x = 0
else:
x = 1
return x
VtoBit = np.vectorize(toBit)
arr1 = np.random.randn(6,10)
arr2 = VtoBit(arr1)
print(arr2)