Usually when I mock, I have the following type of setup
# my_script.py
import numpy as np
def my_func(x):
out = np.power(x, 2)
return out
then to test the numpy power call in my_script:
# test_myscript.py
import numpy as np
import unittest
import mock
from my_script import my_func
class TestMyScript(unittest.TestCase):
#mock.patch("my_script.np")
def test_my_func(self, mock_os):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
my_func(a)
mock_os.power.assert_called_with(a, 2)
if __name__ == '__main__':
unittest.main()
This works fine.
But now if the situation changes, and say I give the numpy module as an argument into my_func; I don't know how to mock numpy in this case.
How would I mock numpy in the function below in the same way as it was mocked in test_myscript above?
Note that numpy will not be imported in my_script.py but will instead be imported in a separate script that runs functions from my_script.py.
# my_script.py
# numpy NOT imported in this script!
def my_func(x, numpy):
out = numpy.power(x, 2)
return out
EDIT:
Based on #Daniel Roseman's comment, I am including some more code to be explicit on how the functions are called
# main_script.py
import numpy as np
from my_script import my_func
def main():
a = np.array([1, 2, 3])
my_func(a, np) # numpy is passed into `my_func`
Then to test, I am trying the below
# test_myscript.py
import numpy as np
import unittest
import mock
from my_script import my_func
class TestMyScript(unittest.TestCase):
#mock.patch("main_script.np") # import from main_script since numpy is imported here
def test_my_func(self, mock_os):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
my_func(a)
mock_os.power.assert_called_with(a, 2)
if __name__ == '__main__':
unittest.main()
But this fails with
Ran 1 test in 0.154s
>>> FAILED (failures=1)
>>> AssertionError: Expected 'power' to have been called.
I found that using the unittest.mock.Mock object here worked best.
So if we have:
# my_script.py
def my_func(x, numpy):
out = numpy.power(x, 2)
return out
Then to test it we have:
# test_myscript.py
import numpy as np
import unittest
from unittest.mock import Mock
from my_script import my_func
numpy_mock = Mock()
class TestMyScript(unittest.TestCase):
def test_my_func(self):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
_ = my_func(a, numpy_mock) # pass the mocked object here
numpy_mock.power.assert_called_once_with(a, 2)
if __name__ == '__main__':
unittest.main()
Which passes the test
Related
I'm trying to pass a jitclass instance to a function that's using guvectorize but I don't understand what the signature should be. For instance in the example below, what should be the signature for the C parameter ? I tried typeof(myClass()) but it doesn't work.
import numpy as np
from numba import guvectorize, float64,typeof
from numba.experimental import jitclass
spec=[('a', float64[:])]
#jitclass(spec)
class myClass():
def __init__(self):
self.a= np.ones(4,)
#guvectorize([( typeof(myClass()), # the njit class
float64[:], # the result
float64[:], # dummy for size of b
)], '(),(n)->(n)')
def compute(C,c,b):
b = C.a + C.a
C = myClass()
print(C)
b= np.zeros(4)
c= np.zeros(4)
compute(C,c,b)
print(b)
error
NotImplementedError: instance.jitclass.myClass#22dfcdfb280<a:array(float64, 1d, A)> cannot be represented as a Numpy dtype
Can anyone help me understand why this simple example of trying to speed up a for loop using python's multiprocessing module produces unstable results? I use a Manager.List to store the values from the child processes.
Clearly I'm doing at least one thing wrong. What would be the correct way to do this?
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
result.append(val)
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
pool.map(partial(run_parallel, result=m_result), x_array)
return list(m_result)
test_x = np.linspace(0.1,1,50)
serial = my_func(test_x,parallel=False)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x, serial, label='serial')
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()
The output I'm getting looks like this
and it looks different every time this runs.
I added some print functions and it turned out that the order of elements from x_array is arbitrary... That's why it looks so weird. I think you should keep argument and value of arctan pairs and then order it by argument value
EDIT
I read more and it turned out that map returns values in order... This works as you wanted:
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
return val
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
x = pool.map(partial(run_parallel, result=m_result), x_array)
return list(x)
test_x = np.linspace(0.1,1,50)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()
I want to store some Data in ASDF-files and want to use non implemented extensions. I want to extend other extensions, so i tried starting with the Astropy extensions.
I know how to write a working Extension for ASDF. But, The key issue is that the ASDF-file should always look like an ASDF-file created by an astropy polynomial. Creating a new extension where numpy polynomials are stored is not my purpose.
On the other side, the asdf file should always output an numpy polynomial.
This is what i started working on:
import asdf
from astropy.modeling import models, fitting
from numpy.polynomial import Polynomial as P
# these 2 polynomials are equal
poly_np = P([0,0,0])
poly_astropy = models.Polynomial1D(degree=2)
# this is the usual way how to save an astropy polynomial
target = asdf.AsdfFile({'astropy_poly':poly_astropy})
# inline is just for readability...
target.write_to('poly_astropy.yaml',all_array_storage='inline')
# does not work since numpy polynomials are not 'known' by asdf
target = asdf.AsdfFile({'numpy_poly':poly_np})
target.write_to('poly_np.yaml',all_array_storage='inline')
I tried to change the class PolynomialType in polynomial.py from astropy so that it will accept the type 'numpy.polynomial.polynomial.Polynomial'. But the issue was still that the object could not be represented. So where do i need to do a change to get my polynomial.py working? Or maybe my way of overwriting the astropy class was wrong?
import numpy as np
from numpy.polynomial import Polynomial as P
from numpy.testing import assert_array_equal
from asdf import yamlutil
from astropy import modeling
from astropy.io.misc.asdf.tags.transform.basic import TransformType
class PolynomialType_np(TransformType):
name = "transform/polynomial"
types = ['astropy.modeling.models.Polynomial1D',
'astropy.modeling.models.Polynomial2D',
'numpy.polynomial.polynomial.Polynomial']
# from asdf file to np polynomial
#classmethod
def from_tree_transform(cls, node, ctx):
coefficients = np.asarray(node['coefficients'])
return P(coefficients)
# from any polynomial to asdf
#classmethod
def to_tree_transform(cls, model, ctx):
# np.polynomial added
if isinstance(model, np.polynomial.polynomial.Polynomial):
coefficients = p.coef
elif isinstance(model, modeling.models.Polynomial1D):
coefficients = np.array(model.parameters)
elif isinstance(model, modeling.models.Polynomial2D):
degree = model.degree
coefficients = np.zeros((degree + 1, degree + 1))
for i in range(degree + 1):
for j in range(degree + 1):
if i + j < degree + 1:
name = 'c' + str(i) + '_' + str(j)
coefficients[i, j] = getattr(model, name).value
node = {'coefficients': coefficients}
return yamlutil.custom_tree_to_tagged_tree(node, ctx)
# astropy classmethod updated with np.arrays
#classmethod
def assert_equal(cls, a, b):
# TODO: If models become comparable themselves, remove this.
TransformType.assert_equal(a, b)
assert (isinstance(a, (modeling.models.Polynomial1D, modeling.models.Polynomial2D, np.polynomial.polynomial.Polynomial)) and
isinstance(b, (modeling.models.Polynomial1D, modeling.models.Polynomial2D, np.polynomial.polynomial.Polynomial)))
if (isinstance(a, (modeling.models.Polynomial1D, modeling.models.Polynomial2D)) and
isinstance(b, (modeling.models.Polynomial1D, modeling.models.Polynomial2D))):
assert_array_equal(a.parameters, b.parameters)
elif (isinstance(a, (modeling.models.Polynomial1D, modeling.models.Polynomial2D)) and
isinstance(b, np.polynomial.polynomial.Polynomial)):
assert_array_equal(a.parameters, b.coeff)
elif (isinstance(b, (modeling.models.Polynomial1D, modeling.models.Polynomial2D)) and
isinstance(a, np.polynomial.polynomial.Polynomial)):
assert_array_equal(a.coeff, b.parameters)
elif (isinstance(a, np.polynomial.polynomial.Polynomial) and
isinstance(b, np.polynomial.polynomial.Polynomial)):
assert_array_equal(a.coeff, b.coeff)
Here are the two solutions #Iguananaut suggested:
Solution 1
This is the solution where you override the PolynomialType registry by force.
# the code from above and then the following
from astropy.io.misc.asdf.extension import AstropyAsdfExtension
from astropy.io.misc.asdf.types import _astropy_asdf_types
_astropy_asdf_types.remove(
astropy.io.misc.asdf.tags.transform.polynomial.PolynomialType)
#this will work now
target = asdf.AsdfFile({'numpy_poly':poly_np},extensions=AstropyAsdfExtension())
target.write_to('poly_np.yaml',all_array_storage='inline')
Solution 2
This is the solution where you create a subclass of PolynomialType where you add the functionality of adding numpy polynomials. Since its not really necessary to read them as numpy polynomials, they are read as astropy polynomials.
import numpy as np
from numpy.polynomial import Polynomial as P
from numpy.testing import assert_array_equal
import asdf
from asdf import yamlutil
from astropy import modeling
from astropy.io.misc.asdf.tags.transform.polynomial import PolynomialType
from astropy.io.misc.asdf.extension import AstropyAsdfExtension
class PolynomialTypeNumpy(PolynomialType):
#classmethod
def to_tree(cls, model, ctx):
coefficients = model.coef
node = {'coefficients': coefficients}
return yamlutil.custom_tree_to_tagged_tree(node, ctx)
# could/should add assert_equal from above
# And then this works.
target = asdf.AsdfFile({'numpy_poly':P([0,0,0])},
extensions=AstropyAsdfExtension())
target.write_to('poly_np.yaml',all_array_storage='inline')
I want to integrate a function that has no closed form solution with an unknown variable and then plot vs the unknown variable. To try a simpler test, I tried to use the integral of f(x,c) = (x^2+c), integrated with respect to x and plot with different values of c. However, the code below gets the error
only size-1 arrays can be converted to Python scalars
even though the integral of a number, e.g. integral(5), seems to return the correct scalar value.
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate
def f(x,c):
return x**2+c
def integral(c):
return integrate.quad(f,0,10, args = (c,))[0]
y = np.linspace(0,20,200)
plt.plot(y, integral(y))
You pass a numpy array as the argument c while you wanted to integrate over x for all the items of c. Therefore you can use this:
def f(x,c):
return x**2+c
def integrate_f(c):
result = np.zeros(len(c))
counter = 0
for item in c:
result[counter] = integrate.quad(f,0,10, args = (item))[0]
counter +=1
return result
c_array = np.linspace(0,1,200)
plt.plot(c_array, integrate_f(c_array))
onno was a bit faster. But here is my similar solution. You need to loop over all the different c:
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate
def f(x,c):
return x**2+c
def getIntegral(c_list):
result = []
for c in c_list:
integral = integrate.quad(f,0,10,args = c)[0]
result.append(integral)
return result
if __name__ == "__main__":
c_list = np.linspace(0,20,200)
plt.plot(c_list, getIntegral(c_list))
plt.show()
I've the following numpy ndarray.
[ -0.54761371 17.04850603 4.86054302]
I want to apply this function to all elements of the array
def sigmoid(x):
return 1 / (1 + math.exp(-x))
probabilities = np.apply_along_axis(sigmoid, -1, scores)
This is the error that I get.
TypeError: only length-1 arrays can be converted to Python scalars
What am I doing wrong.
Function numpy.apply_along_axis is not good for this purpose.
Try to use numpy.vectorize to vectorize your function: https://docs.scipy.org/doc/numpy/reference/generated/numpy.vectorize.html
This function defines a vectorized function which takes a nested sequence of objects or numpy arrays as inputs and returns an single or tuple of numpy array as output.
import numpy as np
import math
# custom function
def sigmoid(x):
return 1 / (1 + math.exp(-x))
# define vectorized sigmoid
sigmoid_v = np.vectorize(sigmoid)
# test
scores = np.array([ -0.54761371, 17.04850603, 4.86054302])
print sigmoid_v(scores)
Output: [ 0.36641822 0.99999996 0.99231327]
Performance test which shows that the scipy.special.expit is the best solution to calculate logistic function and vectorized variant comes to the worst:
import numpy as np
import math
import timeit
def sigmoid_(x):
return 1 / (1 + math.exp(-x))
sigmoidv = np.vectorize(sigmoid_)
def sigmoid(x):
return 1 / (1 + np.exp(x))
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(100)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(100)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(100)", number=25)
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(1000)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(1000)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(1000)", number=25)
print timeit.timeit("sigmoidv(scores)", "from __main__ import sigmoidv, np; scores = np.random.randn(10000)", number=25),\
timeit.timeit("sigmoid(scores)", "from __main__ import sigmoid, np; scores = np.random.randn(10000)", number=25),\
timeit.timeit("expit(scores)", "from scipy.special import expit; import numpy as np; scores = np.random.randn(10000)", number=25)
Results:
size vectorized numpy expit
N=100: 0.00179314613342 0.000460863113403 0.000132083892822
N=1000: 0.0122890472412 0.00084114074707 0.000464916229248
N=10000: 0.109477043152 0.00530695915222 0.00424313545227
Use np.exp and that will work on numpy arrays in a vectorized fashion:
>>> def sigmoid(x):
... return 1 / (1 + np.exp(-x))
...
>>> sigmoid(scores)
array([ 6.33581776e-01, 3.94391811e-08, 7.68673281e-03])
>>>
You will likely not get any faster than this. Consider:
>>> def sigmoid(x):
... return 1 / (1 + np.exp(-x))
...
And:
>>> def sigmoidv(x):
... return 1 / (1 + math.exp(-x))
...
>>> vsigmoid = np.vectorize(sigmoidv)
Now, to compare the timings. With a small (size 100) array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(100)", number=100)
>>> t
0.006894525984534994
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(100)", number=100)
>>> t
0.0007238480029627681
So, still an order-of-magnitude difference with small arrays. This performance differences stays relatively constant, with a 10,000 size array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(10000)", number=100)
>>> t
0.3823414359940216
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(10000)", number=100)
>>> t
0.011259705002885312
And finally with a size 100,000 array:
>>> t = timeit.timeit("vsigmoid(arr)", "from __main__ import vsigmoid, np; arr = np.random.randn(100000)", number=100)
>>> t
3.7680041620042175
>>> t = timeit.timeit("sigmoid(arr)", "from __main__ import sigmoid, np; arr = np.random.randn(100000)", number=100)
>>> t
0.09544878199812956
Just to clarify what apply_along_axis is doing, or not doing.
def sigmoid(x):
print(x) # show the argument
return 1 / (1 + math.exp(-x))
In [313]: np.apply_along_axis(sigmoid, -1,np.array([ -0.54761371 ,17.04850603 ,4.86054302]))
[ -0.54761371 17.04850603 4.86054302] # the whole array
...
TypeError: only length-1 arrays can be converted to Python scalars
The reason you get the error is that apply_along_axis passes a whole 1d array to your function. I.e. the axis. For your 1d array this is the same as
sigmoid(np.array([ -0.54761371 ,17.04850603 ,4.86054302]))
The apply_along_axis does nothing for you.
As others noted,switching to np.exp allows sigmoid to work with the array (with or without the apply_along_axis wrapper).
scipy already implements the function
Luckily, Python allows us to rename things upon import:
from scipy.special import expit as sigmoid