I have a class with the following property clusters:
import numpy as np
class ClustererKmeans(object):
def __init__(self):
self.clustering = np.array([0, 0, 1, 1, 3, 3, 3, 4, 5, 5])
#property
def clusters(self):
assert self.clustering is not None, 'A clustering shall be set before obtaining clusters'
return np.unique(self.clustering)
I now want to write a unittest for this simple property. I start off with:
from unittest import TestCase, main
from unittest.mock import Mock
class Test_clusters(TestCase):
def test_gw_01(self):
sut = Mock()
sut.clustering = np.array([0, 0, 1, 1, 3, 3, 3, 4, 5, 5])
r = ClustererKmeans.clusters(sut)
e = np.array([0, 1, 3, 4, 5])
# The following line checks to see if the two numpy arrays r and e are equal,
# and gives a detailed error message if they are not.
TestUtils.equal_np_matrix(self, r, e, 'clusters')
if __name__ == "__main__":
main()
However, this does not run.
TypeError: 'property' object is not callable
I next change the line r = ClustererKmeans.clusters(sut) to the following:
r = sut.clusters
But again, I get an unexpected error.
AssertionError: False is not true : r shall be a <class 'numpy.ndarray'> (is now a <class 'unittest.mock.Mock'>)
Is there an easy way to test the implementation of a property in Python using the unittest framework?
To call property directly you can replace in your original code ClustererKmeans.clusters(sut) by ClustererKmeans.clusters.__get__(sut).
Even if I'm a mocking enthusiastic IMHO this case is not a good example to apply it. Mocking are useful to remove dependencies from class and resources. In your case ClustererKmeans have a empty constructor and there isn't any dependency to break. You can do it by:
class Test_clusters(TestCase):
def test_gw_01(self):
sut = ClustererKmeans()
sut.clustering = np.array([0, 0, 1, 1, 3, 3, 3, 4, 5, 5])
np.testing.assert_array_equal(np.array([0, 1, 2, 3, 4, 5]),sut.clusters)
If you would use mocking you can patch ClustererKmeans() object by using unittest.mock.patch.object:
def test_gw_01(self):
sut = ClustererKmeans()
with patch.object(sut,"clustering",new=np.array([0, 0, 1, 1, 3, 3, 3, 4, 5, 5])):
e = np.array([0, 1, 3, 4, 5])
np.testing.assert_array_equal(np.array([0, 1, 2, 3, 4, 5]),sut.clusters)
...but why use patch when python give to you a simple and direct way to do it?
Another way to use mock framework should be trust numpy.unique and check if the property do
the right work:
#patch("numpy.unique")
def test_gw_01(self, mock_unique):
sut = ClustererKmeans()
sut.clustering = Mock()
v = sut.clusters
#Check is called ....
mock_unique.assert_called_with(sut.clustering)
#.... and return
self.assertIs(v, mock_unique.return_value)
#Moreover we can test the exception
sut.clustering = None
self.assertRaises(Exception, lambda s:s.clusters, sut)
I apologize for some errors but I don't test the code. I you notify to me I will fix all as soon as possible.
Related
I'm not quite sure I'm using the right wording in my researches -- if that's the case, please let me know, I may have missed obvious answers just because of that -- but I'd like to serialize (i.e. convert to a dictionary or JSON structure) both the main (outer) and inner class arguments of a class.
Here's an example:
class Outer(object):
def __init__(self, idx, array1, array2):
self.idx = idx
# flatten individual values:
## unpack first array
self.prop_a = array1[0]
self.prop_b = array1[1]
self.prop_c = array1[2]
## unpack second array
self.prop_d = array2[0]
self.prop_e = array2[1]
self.prop_f = array2[2]
# Nest elements to fit a wanted JSON schema
class inner1(object):
def __init__(self, outer):
self.prop_a = outer.prop_a
self.prop_b = outer.prop_b
self.prop_c = outer.prop_c
class inner2(object):
def __init__(self, outer):
self.prop_d = outer.prop_d
self.prop_e = outer.prop_e
self.prop_f = outer.prop_f
self.inner_first = inner1(self)
self.inner_second = inner2(self)
def serialize(self):
return vars(self)
Now I can call both:
import numpy as np
obj = Outer(10, np.array([1,2,3]), np.array([4,5,6]))
obj.prop_a # returns 1, or
obj.inner_first.prop_1 # also returns 1
But when I try to serialize it, it prints:
vars(obj) # prints:
{'idx': 10,
'prop_a': 1,
'prop_b': 2,
'prop_c': 3,
'prop_d': 4,
'prop_e': 5,
'prop_f': 6,
'inner_first': <__main__.Outer.__init__.<locals>.inner1 at 0x7f231a4fe3b0>,
'inner_second': <__main__.Outer.__init__.<locals>.inner2 at 0x7f231a4febc0>}
where I want it to print:
vars(obj) # prints:
{'idx': 10,
'prop_a': 1,
'prop_b': 2,
'prop_c': 3,
'prop_d': 4,
'prop_e': 5,
'prop_f': 6,
'inner_first': {'prop_a': 1, 'prop_b': 2, 'prop_c': 3},
'inner_second': {'prop_d': 4, 'prop_e': 5, 'prop_f': 6}}
with the 'inner_first' key being the actual results of vars(obj.inner_first), and same thing for the 'inner_second' key.
Ideally I'd like to call the serialize() method to convert my object to the desired output: obj.serialize()
I feel I'm close to the results but I can simply not see where I must go to solve this task.
At the really end, I wish I could simply:
obj = Outer(10, np.array([1,2,3]), np.array([4,5,6]))
obj.serialze()
{
'inner_first': {
'prop_a': 1,
'prop_b': 2,
'prop_c': 3
},
'inner_second': {
'prop_d': 4,
'prop_e': 5,
'prop_f': 6
}
}
in order to basically fit a given JSON structure that I have.
Info: this thread helped me to build the inner classes.
Also note that this question only embeds two "layers" or "levels" of the final structure, but I may have more than 2.
Whilst using Flask can one return a set as a json response from a rest endpoint?
For example:
#app.route('/test')
def test():
list = [1, 1, 1, 1, 2, 2, 2, 2, 3, 4, 4]
unique_list = set(list)
return json.dumps(unique_list)
I've tried this and get the following error:
TypeError: unhashable type: 'list'
I've also tried turning the set back into a list and returning that instead. However I am faced with the same error as above.
Any ideas?
Use flask's jsonify to return a JSON response. Also, don't use list as a variable name and convert the unique set back to list.
from flask import jsonify
#app.route('/test')
def test():
my_list = [1, 1, 1, 1, 2, 2, 2, 2, 3, 4, 4]
unique_list = list(set(my_list))
return jsonify(results=unique_list)
I wish to create a diff array in python as follows
>>> a = [1,5,3,8,2,4,7,6]
>>> diff = []
>>> a = sorted(a,reverse=True)
>>> for i in xrange(len(a)-1):
diff.append(a[i]-a[i+1])
But I wanted to refactor the above code. I tried to achieve it using lambda functions. But failed to get the result.
>>> [i for i in lambda x,y:y-x,sorted(a,reverse=True)]
The above code returns
[<function <lambda> at 0x00000000023B9C18>, [1, 2, 3, 4, 5, 6, 7, 8]]
I wished to know can the required functionality be achieved using lambda functions or any other technique?
Thanks in advance for any help!!
NOTES:
1) Array 'a' can be huge. Just for the sake of example I have taken a small array.
2) The result must be achieved in minimum time.
If you can use numpy:
import numpy as np
a = [1,5,3,8,2,4,7,6]
j = np.diff(np.sorted(a)) # array([1, 1, 1, 1, 1, 1, 1])
print list(j)
# [1, 1, 1, 1, 1, 1, 1]
k = np.diff(a) # array([ 4, -2, 5, -6, 2, 3, -1])
print list(k)
# [4, -2, 5, -6, 2, 3, -1]
Timing comparisons with one-hundred-thousand random ints - numpy is faster if the data needs to be sorted:
from timeit import Timer
a = [random.randint(0, 1000000) for _ in xrange(100000)]
##print a[:100]
def foo(a):
a = sorted(a, reverse=True)
return [a[i]-a[i+1] for i in xrange(len(a)-1)]
def bar(a):
return np.diff(np.sort(a))
t = Timer('foo(a)', 'from __main__ import foo, bar, np, a')
print t.timeit(10)
# 0.86916993838
t = Timer('bar(a)', 'from __main__ import foo, bar, np, a')
print t.timeit(10)
# 0.28586356791
You can use list comprehension, as follows:
>>> a = sorted([1,5,3,8,2,4,7,6], reverse=True)
>>> diff = [a[i]-a[i+1] for i in xrange(len(a)-1)]
>>> diff
[1, 1, 1, 1, 1, 1, 1]
>>>
You said or any other technique, so I take this to be valid. However, I haven't found a working lambda solution yet :)
Comparing the time of this answer with all of the below:
Mine:
1.59740447998e-05 seconds
#Marcin's
0.00110197067261 seconds
#roippi's
0.000382900238037
#wwii's
0.00154685974121
Therefore, mine was clearly the fastest by more than twice, followed by #roippi, followed by #Marcin, followed by #wwi.
P.S. I was completely unbiased here, my timing method was using current time.time() minus previous time.time().
a = [1,5,3,8,2,4,7,6]
a = sorted(a,reverse=True)
Can't really improve these lines. You need to transform your data by sorting it, no sense changing what you've done.
from itertools import izip, starmap
from operator import sub
list(starmap(sub,izip(a,a[1:])))
Out[12]: [1, 1, 1, 1, 1, 1, 1]
If a is really massive, you can replace the a[1:] slice with islice to save on memory overhead:
list(starmap(sub,izip(a,islice(a,1,None))))
Though if it is really that massive, you should probably be using numpy anyway.
np.diff(a) * -1
Out[24]: array([1, 1, 1, 1, 1, 1, 1])
You could do as follows:
diff = [v[0] - v[1] for v in zip(sorted(a,reverse=True)[0:-1], sorted(a,reverse=True)[1:])]
#gives: diff = [1, 1, 1, 1, 1, 1, 1]
Though here you use sorting twice. Not sure if this matters to you or not.
As #aj8uppal sugested its better to have a as sorted version before, so in this case you do:
a = sorted([1,5,3,8,2,4,7,6], reverse=True)
diff = [v[0] - v[1] for v in zip(a[0:-1], a[1:])]
#gives: diff = [1, 1, 1, 1, 1, 1, 1]
I have a module A that does a basic map/reduce by taking data and sending it to modules B, C, D etc for analysis and then joining their results together.
But it appears that modules B, C, D etc cannot themselves create a multiprocessing pool, or else I get
AssertionError: daemonic processes are not allowed to have children
Is it possible to parallelize these jobs some other way?
For clarity, here's an (admittedly bad) baby example. (I would normally try/catch but you get the gist).
A.py:
import B
from multiprocessing import Pool
def main():
p = Pool()
results = p.map(B.foo,range(10))
p.close()
p.join()
return results
B.py:
from multiprocessing import Pool
def foo(x):
p = Pool()
results = p.map(str,x)
p.close()
p.join()
return results
is it possible to have a pool inside of a pool?
Yes, it is possible though it might not be a good idea unless you want to raise an army of zombies. From Python Process Pool non-daemonic?:
import multiprocessing.pool
from contextlib import closing
from functools import partial
class NoDaemonProcess(multiprocessing.Process):
# make 'daemon' attribute always return False
def _get_daemon(self):
return False
def _set_daemon(self, value):
pass
daemon = property(_get_daemon, _set_daemon)
# We sub-class multiprocessing.pool.Pool instead of multiprocessing.Pool
# because the latter is only a wrapper function, not a proper class.
class Pool(multiprocessing.pool.Pool):
Process = NoDaemonProcess
def foo(x, depth=0):
if depth == 0:
return x
else:
with closing(Pool()) as p:
return p.map(partial(foo, depth=depth-1), range(x + 1))
if __name__ == "__main__":
from pprint import pprint
pprint(foo(10, depth=2))
Output
[[0],
[0, 1],
[0, 1, 2],
[0, 1, 2, 3],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5, 6],
[0, 1, 2, 3, 4, 5, 6, 7],
[0, 1, 2, 3, 4, 5, 6, 7, 8],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
concurrent.futures supports it by default:
# $ pip install futures # on Python 2
from concurrent.futures import ProcessPoolExecutor as Pool
from functools import partial
def foo(x, depth=0):
if depth == 0:
return x
else:
with Pool() as p:
return list(p.map(partial(foo, depth=depth-1), range(x + 1)))
if __name__ == "__main__":
from pprint import pprint
pprint(foo(10, depth=2))
It produces the same output.
Is it possible to parallelize these jobs some other way?
Yes. For example, look at how celery allows to create a complex workflow.
from copy import*
a=[1,2,3,4]
c={'a':'aaa'}
print c
#{'a': 'aaa'}
b=deepcopy(a,c)
print b
print c
# print {'a': 'aaa', 10310992: 3, 10310980: 4, 10311016: 1, 11588784: [1, 2, 3, 4, [1, 2, 3, 4]], 11566456: [1, 2, 3, 4], 10311004: 2}
why c print that
Please try to use the code, rather than text, because my English is not very good, thank you
in django.utils.tree.py
def __deepcopy__(self, memodict):
"""
Utility method used by copy.deepcopy().
"""
obj = Node(connector=self.connector, negated=self.negated)
obj.__class__ = self.__class__
obj.children = deepcopy(self.children, memodict)
obj.subtree_parents = deepcopy(self.subtree_parents, memodict)
return obj
import copy
memo = {}
x1 = range(5)
x2=range(6,9)
x3=[2,3,4,11]
y1 = copy.deepcopy(x1, memo)
y2=copy.deepcopy(x2, memo)
y3=copy.deepcopy(x3,memo)
print memo
print id(y1),id(y2),id(y3)
y1[0]='www'
print y1,y2,y3
print memo
print :
{10310992: 3, 10310980: 4, 10311016: 1, 11588784: [0, 1, 2, 3, 4, [0, 1, 2, 3, 4]], 10311028: 0, 11566456: [0, 1, 2, 3, 4], 10311004: 2}
{11572448: [6, 7, 8], 10310992: 3, 10310980: 4, 10311016: 1, 11572368: [2, 3, 4, 11], 10310956: 6, 10310896: 11, 10310944: 7, 11588784: [0, 1, 2, 3, 4, [0, 1, 2, 3, 4], 6, 7, 8, [6, 7, 8], 11, [2, 3, 4, 11]], 10311028: 0, 11566456: [0, 1, 2, 3, 4], 10310932: 8, 10311004: 2}
11572408 11581280 11580960
['www', 1, 2, 3, 4] [6, 7, 8] [2, 3, 4, 11]
{11572448: [6, 7, 8], 10310992: 3, 10310980: 4, 10311016: 1, 11572368: [2, 3, 4, 11], 10310956: 6, 10310896: 11, 10310944: 7, 11588784: [0, 1, 2, 3, 4, [0, 1, 2, 3, 4], 6, 7, 8, [6, 7, 8], 11, [2, 3, 4, 11]], 10311028: 0, 11566456: ['www', 1, 2, 3, 4], 10310932: 8, 10311004: 2}
No one above gave a good example of how to use it.
Here's what I do:
def __deepcopy__(self, memo):
copy = type(self)()
memo[id(self)] = copy
copy._member1 = self._member1
copy._member2 = deepcopy(self._member2, memo)
return copy
Where member1 is an object not requiring deepcopy (like a string or integer), and member2 is one that does, like another custom type or a list or dict.
I've used the above code on highly tangled object graphs and it works very well.
If you also want to make your classes pickleable (for file save / load), there is not analogous memo param for getstate / setstate, in other words the pickle system somehow keeps track of already referenced objects, so you don't need to worry.
The above works on PyQt5 classes that you inherit from (as well as pickling - for instance I can deepcopy or pickle a custom QMainWindow, QWidget, QGraphicsItem, etc.)
If there is some initialization code in your constructor that creates new objects, for instance a CustomWidget(QWidget) that creates a new CustomScene(QGraphicsScene), but you'd like to pickle or copy the scene from one CustomWidget to a new one, then one way is to make a new=True parameter in your __init__ and say:
def __init__(..., new=True):
....
if new:
self._scene = CustomScene()
def __deepcopy__(self, memo):
copy = type(self)(..., new=False)
....
copy._scene = deepcopy(self._scene, memo)
....
That ensures you don't create a CustomScene (or some big class that does a lot of initializing) twice! You also should use the same setting (new=False) in your __setstate__ method, eg.:
def __setstate__(self, data):
self.__init__(...., new=False)
self._member1 = data['member 1']
.....
There are other ways to get around the above, but this is the one I converged to and use frequently.
Why did I talk about pickling as well? Because you will want both in any application typically, and you maintain them at the same time. If you add a member to your class, you add it to setstate, getstate, and deepcopy code. I would make it a rule that for any new class you make, you create the above three methods if you plan on doing copy / paste an file save / load in your app. Alternative is JSON and save / loading yourself, but then there's a lot more work for you to do including memoization.
So to support all the above, you need __deepcopy__, __setstate__, and __getstate__ methods and to import deepcopy:
from copy import deepcopy
, and when you write your pickle loader / saver functions (where you call pickle.load()/ pickle.dump() to load / save your object hierarchy / graph) do import _pickle as pickle for the best speeds (_pickle is some faster C impl which is usually compatible with your app requirements).
It's the memo dict, where id-to-object correspondence is kept to reconstruct complex object graphs perfectly. Hard to "use the code", but, let's try:
>>> import copy
>>> memo = {}
>>> x = range(5)
>>> y = copy.deepcopy(x, memo)
>>> memo
{399680: [0, 1, 2, 3, 4], 16790896: 3, 16790884: 4, 16790920: 1,
438608: [0, 1, 2, 3, 4, [0, 1, 2, 3, 4]], 16790932: 0, 16790908: 2}
>>>
and
>>> id(x)
399680
>>> for j in x: print j, id(j)
...
0 16790932
1 16790920
2 16790908
3 16790896
4 16790884
so as you see the IDs are exactly right. Also:
>>> for k, v in memo.items(): print k, id(v)
...
399680 435264
16790896 16790896
16790884 16790884
16790920 16790920
438608 435464
16790932 16790932
16790908 16790908
you see the identity for the (immutable) integers.
So here's a graph:
>>> z = [x, x]
>>> t = copy.deepcopy(z, memo)
>>> print id(t[0]), id(t[1]), id(y)
435264 435264 435264
so you see all the subcopies are the same objects as y (since we reused the memo).
You can read more by checking the Python online documentation:
http://docs.python.org/library/copy.html
The deepcopy() function is recursive, and it will work its way down through a deeply nested object. It uses a dictionary to detect objects it has seen before, to detect an infinite loop. You should just ignore this dictionary.
class A(object):
def __init__(self, *args):
self.lst = args
class B(object):
def __init__(self):
self.x = self
def my_deepcopy(arg):
try:
obj = type(arg)() # get new, empty instance of type arg
for key in arg.__dict__:
obj.__dict__[key] = my_deepcopy(arg.__dict__[key])
return obj
except AttributeError:
return type(arg)(arg) # return new instance of a simple type such as str
a = A(1, 2, 3)
b = B()
b.x is b # evaluates to True
c = my_deepcopy(a) # works fine
c = my_deepcopy(b) # stack overflow, recurses forever
from copy import deepcopy
c = deepcopy(b) # this works because of the second, hidden, dict argument
Just ignore the second, hidden, dict argument. Do not try to use it.
Here's a quick illustration I used for explaining this to myself:
a = [1,2,3]
memo = {}
b = copy.deepcopy(a,memo)
# now memo = {139907464678864: [1, 2, 3], 9357408: 1, 9357440: 2, 9357472: 3, 28258000: [1, 2, 3, [1, 2, 3]]}
key = 139907464678864
print(id(a) == key) #True
print(id(b) == key) #False
print(id(a) == id(memo[key])) #False
print(id(b) == id(memo[key])) #True
in other words:
memo[id_of_initial_object] = copy_of_initial_object