python cache dictionary - counting number of hits - python

I'm implementing a caching service in python. I'm using a simple dictionary so far. What I'd like to do is to count number of hits (number of times when a stored value was retrieved by the key). Python builtin dict has no such possibility (as far as I know). I searched through 'python dictionary count' and found Counter (also on stackoverflow), but this doesn't satisfy my requirements I guess. I don't need to count what already exists. I need to increment something that come from the outside. And I think that storing another dictionary with hits counting only is not the best data structure I can get :)
Do you have any ideas how to do it efficiently?

For an alternative method, if you're using Python 3 (or are willing to add this module to your Python 2 project, which has a slightly different interface), I strongly recommend the lru_cache decorator.
See the docs here. For example, this code :
from functools import lru_cache
#lru_cache(maxsize=32)
def meth(a, b):
print("Taking some time", a, b)
return a + b
print(meth(2, 3))
print(meth(2, 4))
print(meth(2, 3))
...will output :
Taking some time 2 3
5
Taking some time 2 4
6
5 <--- Notice that this function result is cached
As per the documentation, you can get the number of hits and misses with meth.cache_info(), and clear the cache with meth.cache_clear().

You can subclass a built-in dict class:
class CustomDict(dict):
def __init__(self, *args, **kwargs):
self.hits = {}
super(CustomDict, self).__init__(*args, **kwargs)
def __getitem__(self, key):
if key not in self.hits:
self.hits[key] = 0
self.hits[key] += 1
return super(CustomDict, self).__getitem__(key)
usage:
>>> d = CustomDict()
>>> d["test"] = "test"
>>> d["test"]
'test'
>>> d["test"]
'test'
>>> d.hits["test"]
2

Having another dictionary to store the hit counts is probably not a bad option, but you could also do something like:
class CacheService(object):
def __init__(self):
self.data = {}
def __setitem__(self, key, item):
self.data[key] = [item, 0]
def __getitem__(self, key):
value = self.data[key]
value[1] += 1
return value[0]
def getcount(self, key):
return self.data[key][1]
You can use it something like this:
>>> cs = CacheService()
>>> cs[1] = 'one'
>>> cs[2] = 'two'
>>> print cs.getcount(1)
0
>>> cs[1]
'one'
>>> print cs.getcount(1)
1

It will be much easier to just overload the built-in dict data type. This will solve your problem.
def CountDict(dict):
count = {}
def __getitem__(self, key):
CountDict.count[key] = CountDict.count.get(key, 0) + 1
return super(CountDict, self).__getitem__(self, key)
def __setitem__(self, key, value):
return super(CountDict, self).__setitem__(self, key, value)
def get_count(self, key):
return CountDict.count.get(key, 0)
This will give you lot more flexibility. Like you can have two counts one for number of reads and another for number of writes, if you wish without much of a complexity. To learn more about super, see here.
Edited to meet OP's need of keeping a count for reading a key. The output can be obtained by calling get_count method.
>>>my_dict = CountDict()
>>>my_dict["a"] = 1
>>>my_dict["a"]
>>>1
>>>my_dict["a"]
>>>1
>>>my_dict.get_count("a")
>>>2

You could try this approach.
class AccessCounter(object):
'''A class that contains a value and implements an access counter.
The counter increments each time the value is changed.'''
def __init__(self, val):
super(AccessCounter, self).__setattr__('counter', 0)
super(AccessCounter, self).__setattr__('value', val)
def __setattr__(self, name, value):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
# Make this unconditional.
# If you want to prevent other attributes to be set, raise AttributeError(name)
super(AccessCounter, self).__setattr__(name, value)
def __delattr__(self, name):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
super(AccessCounter, self).__delattr__(name)

Related

How to find Dictionary Key(s) from Value in a large nested dictionary of variable depth?

Say that I have a large dictionary full of nested values such as this:
large_dic ={
...
"key":{"sub-key1" :{"sub-key2": "Test"}},
"0key":{"0sub-key1": "0Test"},
"1key":{"1sub-key1":{"1sub-key2":{"1sub-key3":"1Test"}}}
...
}
What I would like to do is to be able to get for example from the final value:
"1Test"
the key(s) to access it, such as in this case:
large_dic["1key"]["1sub-key1"]["1sub-key2"]["1sub-key3"]
Thanks for the support.
Edit to add more infos: The dictionary trees I'm talking about are linear(YAML files converted into a python dictionary structure), there is never more than one key, the ending leaf values may not be unique.
Since OP is looking for hierarchical keys instead
I made this class :
class PointingSlice:
def __init__(self, obj, *slices) -> None:
self.obj = obj
self.slices = slices
def __str__(self):
return f"{str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
and this function for instantiation :
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
Example use:
print(find_longest(large_dic, "1key"))
# output:
# {'key': {'sub-key1': {'sub-key2': 'Test'}}, '0key': {'0sub-key1': '0Test'}, '1key': {'1sub-key1': {'1sub-key2': {'1sub-key3': '1Test'}}}}['1key']['1sub-key1']['1sub-key2']['1sub-key3']
# do note that it is the same thing as large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(find_longest(large_dic, "1key").resolve()) # 1Test
So I made some changes and now it supports additional repr options matching your exact use case :
class PointingSlice:
def __init__(self, obj, *slices, object_name=None) -> None:
self.obj = obj
self.slices = slices
self.object_name = object_name
def __str__(self):
return f"{self.object_name or str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "1Test"}}},
}
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
f = find_longest(large_dic, "1key")
f.object_name = "large_dic" # for representational purposes, it works without this
print(f) # large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(f.resolve()) # 1Test
There are numerous ways to achieve this. You might want to look up "prefix tree traversal" (or "trie traversal").
A simple recursive solution with poor memory efficiency could look like this:
def find_trie_leaf_path(trie: dict, leaf_value, trie_path: list[str] = []):
for key, value in trie.items():
if isinstance(value, dict):
yield from find_trie_leaf_path(value, leaf_value, trie_path + [key])
elif value == leaf_value:
yield trie_path + [key]
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "Test"}}},
}
first_match = next(find_trie_leaf_path(large_dic, "Test"))
all_matches = list(find_trie_leaf_path(large_dic, "Test"))
This should work even if your trie is very wide. If it is very high, I'd rather use an iterative algorithm.
I want to point out, though, that prefix trees are usually used the other way round. If you find yourself needing this search a lot, you should consider a different data structure.
Yes, it's totally possible. Here's the function to get the deeply nested value:
def get_final_value(mapping, key):
value = mapping[key]
while isinstance(value, dict):
(value,) = value.values()
return value
Example use:
>>> get_final_value(large_dic, "key")
'Test'
>>> get_final_value(large_dic, "0key")
'0Test'
>>> get_final_value(large_dic, "1key")
'1Test'
>>>
Can the parent keys be deduced from your final value in any way or is the tree structure rather random? If latter is the case then you'll probably just end up searching your tree until you find your value, what path search algorithm you choose for that again depends on the tree structure you have. As already asked in the comments, does each node only have one other node or is it binary or can it have many child nodes?

How do I clear the cache from #cached_property decorator?

I have an function called "value" that makes heavy calculation...
The result of the function is always the same if the dataset is not changed for the identifier.
Once the dataset is changed for some identifier, I want to clear the cache, and let the function calculate it again.
You can better understand me by looking at this code:
from functools import cached_property
class Test:
identifiers = {}
dataset = an empty object of dataset type
def __init__(self, identifier, ...)
self.identifier = identifier
...
Test.identifiers[identifier] = self
...
#cached_property
def value(self):
result = None
# heavy calculate based on dataset
return result
#classmethod
def get(cls, identifier):
if identifier in cls.identifiers:
return cls.identifiers[identifier]
else:
return cls(identifier, ...)
#classmethod
def update(cls, dataset):
for block in dataset:
# assume there is block['identifier'] in each block
# here i want to clear the cache of value() function
instance = cls.get(block['identifier'])
# clear #cached_property of instance
cls.dataset.append(block)
As you can read in the CPython source, the value for a cached_property in Python 3.8 is stored in an instance variable of the same name. This is not documented, so it may be an implementation detail that you should not rely upon.
But if you just want to get it done without regards to compatibility, you can remove the cache with del instance.value.
As of Python 3.9, this is documented.
(Aditional to #Blckknght answer)
In case that you have a mutable object and you need to refresh all the #cached_property (because the object has been mutated), you could delete the properties that are already cached on the self.__dict__ dictionary (that's where the properties are storaged)
from functools import cached_property
class Test:
datalist: List[int]
#cached_property
def value(self):
result = None
# heavy calculate based on datalist
return result
def add_element(self, new:int)-> None:
# restore cache if calculated
self.__dict__.pop('value', None) # this will delete the cached val if already cached, otherwise do nothing
self.datalist.append(new)
or in case you want to do it more elegant you can directly edit the __setattr__ method
from functools import cached_property
class Test:
datalist: List[int]
#cached_property
def value(self):
result = None
# heavy calculate based on datalist
return result
def __setattr__(self, name, val):
self.__dict__[name] = val
self.__dict__.pop('value', None)
I offer an alternative approach, which might be useful in some cases.
If the type of the dataset you need to do the computation on is hashable, you can make use of the regular functools.cache or lru_cache decorator, applied to a static method that takes the dataset as input.
Here is an example of what I mean:
from functools import lru_cache
class MyClass():
def __init__(self, data):
self.data = data
#property
def slow_attribute(self):
return self._slow_attribute(self.data)
#staticmethod
#lru_cache
def _slow_attribute(data):
# long computation, using data,
# here is just an example
return sum(data)
Here there is no need to concern yourself with when to clear the cache: if the underlying dataset changes, the staticmethod automatically knows it cannot use the cached value anymore.
This has the additional perk that, if the dataset were to be restored to a previously-used state, the lookup may still be able to use a cached value.
Here is a demo of the code above working:
from time import perf_counter_ns
def print_time_and_value_of_computation(c):
t1 = perf_counter_ns()
val = c.slow_attribute
t2 = perf_counter_ns()
print(f'Time taken: {(t2 - t1)/1000} microseconds')
print(f'Value: {val}')
c = MyClass(range(10_000))
print_time_and_value_of_computation(c)
print_time_and_value_of_computation(c)
print('Changing the dataset!')
c.data = range(20_000)
print_time_and_value_of_computation(c)
print_time_and_value_of_computation(c)
print('Going back to the original dataset!')
c.data = range(10_000)
print_time_and_value_of_computation(c)
which returns:
Time taken: 162.074 microseconds
Value: 49995000
Time taken: 2.152 microseconds
Value: 49995000
Changing the dataset!
Time taken: 264.121 microseconds
Value: 199990000
Time taken: 1.989 microseconds
Value: 199990000
Going back to the original dataset!
Time taken: 1.144 microseconds
Value: 49995000
I ran across this problem and came across this thread as I was trying to solve it. The data in my case effectively is immutable, except that the setup of this object in some cases involves using the properties, with the properties being out of date after the setup. #Pablo's answer was helpful, but I wanted that process to dynamically reset everything cached.
Here's a generic example:
Setup and broken thing:
from functools import cached_property
class BaseThing:
def __init__(self, *starting_numbers: int):
self.numbers = []
self.numbers.extend(starting_numbers)
#property
def numbers_as_strings(self) -> dict[int, str]:
"""This property method will be referenced repeatedly"""
def process_arbitrary_numbers(self, *arbitrary_numbers: int) -> list[str]:
return [self.numbers_as_strings.get(number) for number in arbitrary_numbers]
def extend_numbers(self, *additional_numbers: int):
self.numbers.extend(additional_numbers)
class BrokenThing(BaseThing):
#cached_property
def numbers_as_strings(self) -> dict[int, str]:
print("Working on:", " ".join(map(str, self.numbers)))
return {number: str(number) for number in self.numbers}
output:
>>> thing = BrokenThing(1, 2, 3, 4)
>>> thing.process_arbitrary_numbers(1, 3) == ["1", "3"]
Working on: 1 2 3 4
True
>>> thing.extend_numbers(4, 5, 6)
>>> thing.process_arbitrary_numbers(5, 6) == ["5", "6"]
False
#cached_property replaced with #property to make it work, leaving it inefficient:
class InefficientThing(BaseThing):
#property
def numbers_as_strings(self) -> dict[int, str]:
print("Working on:", " ".join(map(str, self.numbers)))
return {number: str(number) for number in self.numbers}
output:
>>> thing = InefficientThing(1, 2, 3)
>>> thing.process_arbitrary_numbers(1, 3) == ["1", "3"]
Working on: 1 2 3
Working on: 1 2 3
True
>>> thing.extend_numbers(4, 5, 6)
>>> thing.process_arbitrary_numbers(5, 6) == ["5", "6"]
Working on: 1 2 3 4 5 6
Working on: 1 2 3 4 5 6
True
Solution:
class EfficientThing(BaseThing):
def _clear_cached_properties(self):
for name in dir(type(self)):
if isinstance(getattr(type(self), name), cached_property):
print(f"Clearing self.{name}")
vars(self).pop(name, None)
def extend_numbers(self, *additional_numbers: int):
self._clear_cached_properties()
return super().extend_numbers(*additional_numbers)
#cached_property
def numbers_as_strings(self) -> dict[int, str]:
print("Working on:", " ".join(map(str, self.numbers)))
return {number: str(number) for number in self.numbers}
output:
>>> thing = EfficientThing(1, 2, 3, 4)
>>> thing.process_arbitrary_numbers(1, 3) == ["1", "3"]
Working on: 1 2 3 4
True
>>> thing.extend_numbers(4, 5, 6)
Clearing self.numbers_as_strings
>>> thing.process_arbitrary_numbers(5, 6) == ["5", "6"]
Working on: 1 2 3 4 4 5 6
True
This loops through all attributes of the object's parent class. If the value of the attribute is an instance of cached_property, it's most likely a cached_property. The attribute is then popped from the instance dictionary. None is passed to pop in case the property hadn't been cached yet.

How to get and set data attributes of a class?

I want to implement 3D vector in Python that stores its components in a numpy array (or another container). And I want to access the components as x, y and z in order to get and set them. What is the best way to do it?
I implemented it like this:
import numpy as np
class Vector3d:
components = ['x', 'y', 'z']
def __init__(self):
self._data = np.array([0.0, 0.0, 0.0])
def __getattr__(self, key):
if key in self.components:
index = self.components.index(key)
return self._data[index]
else:
return super().__getattr__(key)
def __setattr__(self, key, value):
if key in self.components:
index = self.components.index(key)
self._data[index] = value
else:
return super().__setattr__(key, value)
def __repr__(self):
return repr(self._data)
def norm(self):
return np.linalg.norm(self._data)
a = Vector3d()
a.x = 1.2
a.y = 2.3
a.z = 3.4
print(a.x, a.y, a.z)
print(a)
print(a.norm())
What I dislike in it. First, I duplicated the code if key in self.components: index = self.components.index(key). Second, searching for the index every time seems to be non-optimal towards the consuming time. I believe there's a better way to implement it. Please, suggest me your approaches.
I'm searching for a solution for Python 3.
Okay, the comment section seems constricting, so I'll move here...
Here is what I made out, ordered by priority:
You do not like how if key in self.components is linearly searching for the component x (or others).
You do not like that repeating piece of code.
Maybe this is something that can work:
import numpy as np
class Vector3d:
components = {'x':0, 'y':1, 'z':2}
def __init__(self):
self._data = [0.0, 0.0, 0.0]
def __getattr__(self, key):
return self._data[components[key]] ###!!!
def __setattr__(self, key, value):
self._data[components[key]] = value ###!!!
def __repr__(self):
return repr(self._data)
def norm(self):
return np.linalg.norm(self._data)
a = Vector3d()
a.x = 1.2
a.y = 2.3
a.z = 3.4
print(a.x, a.y, a.z)
print(a)
print(a.norm())
The search is now not linear, so it's a little more optimal than what you wrote.
The multiple lines of repeating code is now gone, but that self._data[components[key]] is something we got to live with! :D
You can add a try and catch to access data from super() in case the return self._data[components[key]] fails!

Generic arguments in recursive functions: terrible habit?

I catch myself doing this a lot. The example is simple, but, in practice, there are a lot of complex assignments to update data structures and conditions under which the second recursion is not called.
I'm working with mesh data. Points, Edges, and Faces are stored in separate dictionaries and "pointers" (dict keys) are heavily used.
import itertools
class Demo(object):
def __init__(self):
self.a = {}
self.b = {}
self.keygen = itertools.count()
def add_to_b(self, val):
new_key = next(self.keygen)
self.b[new_key] = val
return new_key
def recur_method(self, arg, argisval=True):
a_key = next(self.keygen)
if argisval is True:
# arg is a value
b_key = self.add_to_b(arg)
self.a[a_key] = b_key
self.recur_method(b_key, argisval=False)
else:
# arg is a key
self.a[a_key] = arg
demo = Demo()
demo.recur_method(2.2)
Is there a better way? short of cutting up all of my assignment code into seven different methods? Should I be worried about this anyway?
Try
def recur_method(self, key=None, val=None):
if key is None and val is None:
raise exception("You fail it")
If None is a valid input, then use a guard value:
sentinel = object()
def recur_method(self, key=sentinel, val=sentinel):
if key is sentinel and val is sentinel:
raise exception("You fail it")

Lazy data-flow (spreadsheet like) properties with dependencies in Python

My problem is the following: I have some python classes that have properties that are derived from other properties; and those should be cached once they are calculated, and the cached results should be invalidated each time the base properties are changed.
I could do it manually, but it seems quite difficult to maintain if the number of properties grows. So I would like to have something like Makefile rules inside my objects to automatically keep track of what needs to be recalculated.
The desired syntax and behaviour should be something like that:
# this does dirty magic, like generating the reverse dependency graph,
# and preparing the setters that invalidate the cached values
#dataflow_class
class Test(object):
def calc_a(self):
return self.b + self.c
def calc_c(self):
return self.d * 2
a = managed_property(calculate=calc_a, depends_on=('b', 'c'))
b = managed_property(default=0)
c = managed_property(calculate=calc_c, depends_on=('d',))
d = managed_property(default=0)
t = Test()
print t.a
# a has not been initialized, so it calls calc_a
# gets b value
# c has not been initialized, so it calls calc_c
# c value is calculated and stored in t.__c
# a value is calculated and stored in t.__a
t.b = 1
# invalidates the calculated value stored in self.__a
print t.a
# a has been invalidated, so it calls calc_a
# gets b value
# gets c value, from t.__c
# a value is calculated and stored in t.__a
print t.a
# gets value from t.__a
t.d = 2
# invalidates the calculated values stored in t.__a and t.__c
So, is there something like this already available or should I start implementing my own? In the second case, suggestions are welcome :-)
Here, this should do the trick.
The descriptor mechanism (through which the language implements "property") is
more than enough for what you want.
If the code bellow does not work in some corner cases, just write me.
class DependentProperty(object):
def __init__(self, calculate=None, default=None, depends_on=()):
# "name" and "dependence_tree" properties are attributes
# set up by the metaclass of the owner class
if calculate:
self.calculate = calculate
else:
self.default = default
self.depends_on = set(depends_on)
def __get__(self, instance, owner):
if hasattr(self, "default"):
return self.default
if not hasattr(instance, "_" + self.name):
setattr(instance, "_" + self.name,
self.calculate(instance, getattr(instance, "_" + self.name + "_last_value")))
return getattr(instance, "_" + self.name)
def __set__(self, instance, value):
setattr(instance, "_" + self.name + "_last_value", value)
setattr(instance, "_" + self.name, self.calculate(instance, value))
for attr in self.dependence_tree[self.name]:
delattr(instance, attr)
def __delete__(self, instance):
try:
delattr(instance, "_" + self.name)
except AttributeError:
pass
def assemble_tree(name, dict_, all_deps = None):
if all_deps is None:
all_deps = set()
for dependance in dict_[name].depends_on:
all_deps.add(dependance)
assemble_tree(dependance, dict_, all_deps)
return all_deps
def invert_tree(tree):
new_tree = {}
for key, val in tree.items():
for dependence in val:
if dependence not in new_tree:
new_tree[dependence] = set()
new_tree[dependence].add(key)
return new_tree
class DependenceMeta(type):
def __new__(cls, name, bases, dict_):
dependence_tree = {}
properties = []
for key, val in dict_.items():
if not isinstance(val, DependentProperty):
continue
val.name = key
val.dependence_tree = dependence_tree
dependence_tree[key] = set()
properties.append(val)
inverted_tree = {}
for property in properties:
inverted_tree[property.name] = assemble_tree(property.name, dict_)
dependence_tree.update(invert_tree(inverted_tree))
return type.__new__(cls, name, bases, dict_)
if __name__ == "__main__":
# Example and visual test:
class Bla:
__metaclass__ = DependenceMeta
def calc_b(self, x):
print "Calculating b"
return x + self.a
def calc_c(self, x):
print "Calculating c"
return x + self.b
a = DependentProperty(default=10)
b = DependentProperty(depends_on=("a",), calculate=calc_b)
c = DependentProperty(depends_on=("b",), calculate=calc_c)
bla = Bla()
bla.b = 5
bla.c = 10
print bla.a, bla.b, bla.c
bla.b = 10
print bla.b
print bla.c
I would like to have something like Makefile rules
then use one! You may consider this model:
one rule = one python file
one result = one *.data file
the pipe is implemented as a makefile or with another dependency analysis tool (cmake, scons)
The hardware test team in our company use such a framework for intensive exploratory tests:
you can integrate other languages and tools easily
you get a stable and proven solution
computations may be distributed one multiple cpu/computers
you track dependencies on values and rules
debug of intermediate values is easy
the (big) downside to this method is that you have to give up python import keyword because it creates an implicit (and untracked) dependency (there are workarounds for this).
import collections
sentinel=object()
class ManagedProperty(object):
'''
If deptree = {'a':set('b','c')}, then ManagedProperties `b` and
`c` will be reset whenever `a` is modified.
'''
def __init__(self,property_name,calculate=None,depends_on=tuple(),
default=sentinel):
self.property_name=property_name
self.private_name='_'+property_name
self.calculate=calculate
self.depends_on=depends_on
self.default=default
def __get__(self,obj,objtype):
if obj is None:
# Allows getattr(cls,mprop) to return the ManagedProperty instance
return self
try:
return getattr(obj,self.private_name)
except AttributeError:
result=(getattr(obj,self.calculate)()
if self.default is sentinel else self.default)
setattr(obj,self.private_name,result)
return result
def __set__(self,obj,value):
# obj._dependencies is defined by #register
map(obj.__delattr__,getattr(obj,'_dependencies').get(self.property_name,tuple()))
setattr(obj,self.private_name,value)
def __delete__(self,obj):
if hasattr(obj,self.private_name):
delattr(obj,self.private_name)
def register(*mproperties):
def flatten_dependencies(name, deptree, all_deps=None):
'''
A deptree such as {'c': set(['a']), 'd': set(['c'])} means
'a' depends on 'c' and 'c' depends on 'd'.
Given such a deptree, flatten_dependencies('d', deptree) returns the set
of all property_names that depend on 'd' (i.e. set(['a','c']) in the
above case).
'''
if all_deps is None:
all_deps = set()
for dep in deptree.get(name,tuple()):
all_deps.add(dep)
flatten_dependencies(dep, deptree, all_deps)
return all_deps
def classdecorator(cls):
deptree=collections.defaultdict(set)
for mprop in mproperties:
setattr(cls,mprop.property_name,mprop)
# Find all ManagedProperties in dir(cls). Note that some of these may be
# inherited from bases of cls; they may not be listed in mproperties.
# Doing it this way allows ManagedProperties to be overridden by subclasses.
for propname in dir(cls):
mprop=getattr(cls,propname)
if not isinstance(mprop,ManagedProperty):
continue
for underlying_prop in mprop.depends_on:
deptree[underlying_prop].add(mprop.property_name)
# Flatten the dependency tree so no recursion is necessary. If one were
# to use recursion instead, then a naive algorithm would make duplicate
# calls to __delete__. By flattening the tree, there are no duplicate
# calls to __delete__.
dependencies={key:flatten_dependencies(key,deptree)
for key in deptree.keys()}
setattr(cls,'_dependencies',dependencies)
return cls
return classdecorator
These are the unit tests I used to verify its behavior.
if __name__ == "__main__":
import unittest
import sys
def count(meth):
def wrapper(self,*args):
countname=meth.func_name+'_count'
setattr(self,countname,getattr(self,countname,0)+1)
return meth(self,*args)
return wrapper
class Test(unittest.TestCase):
def setUp(self):
#register(
ManagedProperty('d',default=0),
ManagedProperty('b',default=0),
ManagedProperty('c',calculate='calc_c',depends_on=('d',)),
ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
class Foo(object):
#count
def calc_a(self):
return self.b + self.c
#count
def calc_c(self):
return self.d * 2
#register(ManagedProperty('c',calculate='calc_c',depends_on=('b',)),
ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
class Bar(Foo):
#count
def calc_c(self):
return self.b * 3
self.Foo=Foo
self.Bar=Bar
self.foo=Foo()
self.foo2=Foo()
self.bar=Bar()
def test_two_instances(self):
self.foo.b = 1
self.assertEqual(self.foo.a,1)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.assertEqual(self.foo2.a,0)
self.assertEqual(self.foo2.b,0)
self.assertEqual(self.foo2.c,0)
self.assertEqual(self.foo2.d,0)
def test_initialization(self):
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.assertEqual(self.foo.b,0)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.assertEqual(self.bar.a,0)
self.assertEqual(self.bar.b,0)
self.assertEqual(self.bar.c,0)
self.assertEqual(self.bar.d,0)
def test_dependence(self):
self.assertEqual(self.Foo._dependencies,
{'c': set(['a']), 'b': set(['a']), 'd': set(['a', 'c'])})
self.assertEqual(self.Bar._dependencies,
{'c': set(['a']), 'b': set(['a', 'c'])})
def test_setting_property_updates_dependent(self):
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.foo.b = 1
# invalidates the calculated value stored in foo.a
self.assertEqual(self.foo.a,1)
self.assertEqual(self.foo.calc_a_count,2)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.foo.d = 2
# invalidates the calculated values stored in foo.a and foo.c
self.assertEqual(self.foo.a,5)
self.assertEqual(self.foo.calc_a_count,3)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,4)
self.assertEqual(self.foo.d,2)
self.assertEqual(self.bar.a,0)
self.assertEqual(self.bar.calc_a_count,1)
self.assertEqual(self.bar.b,0)
self.assertEqual(self.bar.c,0)
self.assertEqual(self.bar.calc_c_count,1)
self.assertEqual(self.bar.d,0)
self.bar.b = 2
self.assertEqual(self.bar.a,8)
self.assertEqual(self.bar.calc_a_count,2)
self.assertEqual(self.bar.b,2)
self.assertEqual(self.bar.c,6)
self.assertEqual(self.bar.calc_c_count,2)
self.assertEqual(self.bar.d,0)
self.bar.d = 2
self.assertEqual(self.bar.a,8)
self.assertEqual(self.bar.calc_a_count,2)
self.assertEqual(self.bar.b,2)
self.assertEqual(self.bar.c,6)
self.assertEqual(self.bar.calc_c_count,2)
self.assertEqual(self.bar.d,2)
sys.argv.insert(1,'--verbose')
unittest.main(argv=sys.argv)

Categories