In Clojure I can do something like this:
(-> path
clojure.java.io/resource
slurp
read-string)
instead of doing this:
(read-string (slurp (clojure.java.io/resource path)))
This is called threading in Clojure terminology and helps getting rid of a lot of parentheses.
In Python if I try to use functional constructs like map, any, or filter I have to nest them to each other. Is there a construct in Python with which I can do something similar to threading (or piping) in Clojure?
I'm not looking for a fully featured version since there are no macros in Python, I just want to do away with a lot of parentheses when I'm doing functional programming in Python.
Edit: I ended up using toolz which supports pipeing.
Here is a simple implementation of #deceze's idea (although, as #Carcigenicate points out, it is at best a partial solution):
import functools
def apply(x,f): return f(x)
def thread(*args):
return functools.reduce(apply,args)
For example:
def f(x): return 2*x+1
def g(x): return x**2
thread(5,f,g) #evaluates to 121
I wanted to take this to the extreme and do it all dynamically.
Basically, the below Chain class lets you chain functions together similar to Clojure's -> and ->> macros. It supports both threading into the first and last arguments.
Functions are resolved in this order:
Object method
Local defined variable
Built-in variable
The code:
class Chain(object):
def __init__(self, value, index=0):
self.value = value
self.index = index
def __getattr__(self, item):
append_arg = True
try:
prop = getattr(self.value, item)
append_arg = False
except AttributeError:
try:
prop = locals()[item]
except KeyError:
prop = getattr(__builtins__, item)
if callable(prop):
def fn(*args, **kwargs):
orig = list(args)
if append_arg:
if self.index == -1:
orig.append(self.value)
else:
orig.insert(self.index, self.value)
return Chain(prop(*orig, **kwargs), index=self.index)
return fn
else:
return Chain(prop, index=self.index)
Thread each result as first arg
file = Chain(__file__).open('r').readlines().value
Thread each result as last arg
result = Chain(range(0, 100), index=-1).map(lambda x: x * x).reduce(lambda x, y: x + y).value
I am making a class but when I test it I get very weird error messages,
This is my code:
class Kaart:
def __init__(self, rows = 10, colls = 10):
self.rows = rows
self.colls = colls
self.matrixRC = []
for _ in range(rows):
self.matrixRC.append([2 for _ in range(colls)])
def __str__(self):
retVal = '\n'.join(''.join(map(str, row)) for row in self.matrixRC)
for index, new in enumerate((' ', '#', '?')):
retVal = retVal.replace(str(index), new)
return retVal
def __getitem__(self, key):
r, c = key
return self.matrixRC[r][c]
def __setitem__(self, key, value):
r, c = key
self.matrixRC[r][c] = value
def __iter__(self):
self.matrixRC.__iter__()
And my test code is here:
import math
import kaart
map = kaart.Kaart()
print(map[1, 2])
but I get the error message AttributeError: Kaart instance has no attribute '__getitem__'
What am I doing wrong? I do have a __getitem__ function so why doesn't he recognize it?
I am having the same problems with __setitem__ and __iter__ but not with __str__
I think that your bug is a syntactic one, meaning that you do not indent all those functions that you are defining. They are not belonging to the class, but to the module's namespace. Try adding indentation for all of them and please let me know if that was the problem.
The problem was in indentation, i used tabs and quad spaces together, this caused problems which made it so that python couldn't identify the code.
I'm implementing a caching service in python. I'm using a simple dictionary so far. What I'd like to do is to count number of hits (number of times when a stored value was retrieved by the key). Python builtin dict has no such possibility (as far as I know). I searched through 'python dictionary count' and found Counter (also on stackoverflow), but this doesn't satisfy my requirements I guess. I don't need to count what already exists. I need to increment something that come from the outside. And I think that storing another dictionary with hits counting only is not the best data structure I can get :)
Do you have any ideas how to do it efficiently?
For an alternative method, if you're using Python 3 (or are willing to add this module to your Python 2 project, which has a slightly different interface), I strongly recommend the lru_cache decorator.
See the docs here. For example, this code :
from functools import lru_cache
#lru_cache(maxsize=32)
def meth(a, b):
print("Taking some time", a, b)
return a + b
print(meth(2, 3))
print(meth(2, 4))
print(meth(2, 3))
...will output :
Taking some time 2 3
5
Taking some time 2 4
6
5 <--- Notice that this function result is cached
As per the documentation, you can get the number of hits and misses with meth.cache_info(), and clear the cache with meth.cache_clear().
You can subclass a built-in dict class:
class CustomDict(dict):
def __init__(self, *args, **kwargs):
self.hits = {}
super(CustomDict, self).__init__(*args, **kwargs)
def __getitem__(self, key):
if key not in self.hits:
self.hits[key] = 0
self.hits[key] += 1
return super(CustomDict, self).__getitem__(key)
usage:
>>> d = CustomDict()
>>> d["test"] = "test"
>>> d["test"]
'test'
>>> d["test"]
'test'
>>> d.hits["test"]
2
Having another dictionary to store the hit counts is probably not a bad option, but you could also do something like:
class CacheService(object):
def __init__(self):
self.data = {}
def __setitem__(self, key, item):
self.data[key] = [item, 0]
def __getitem__(self, key):
value = self.data[key]
value[1] += 1
return value[0]
def getcount(self, key):
return self.data[key][1]
You can use it something like this:
>>> cs = CacheService()
>>> cs[1] = 'one'
>>> cs[2] = 'two'
>>> print cs.getcount(1)
0
>>> cs[1]
'one'
>>> print cs.getcount(1)
1
It will be much easier to just overload the built-in dict data type. This will solve your problem.
def CountDict(dict):
count = {}
def __getitem__(self, key):
CountDict.count[key] = CountDict.count.get(key, 0) + 1
return super(CountDict, self).__getitem__(self, key)
def __setitem__(self, key, value):
return super(CountDict, self).__setitem__(self, key, value)
def get_count(self, key):
return CountDict.count.get(key, 0)
This will give you lot more flexibility. Like you can have two counts one for number of reads and another for number of writes, if you wish without much of a complexity. To learn more about super, see here.
Edited to meet OP's need of keeping a count for reading a key. The output can be obtained by calling get_count method.
>>>my_dict = CountDict()
>>>my_dict["a"] = 1
>>>my_dict["a"]
>>>1
>>>my_dict["a"]
>>>1
>>>my_dict.get_count("a")
>>>2
You could try this approach.
class AccessCounter(object):
'''A class that contains a value and implements an access counter.
The counter increments each time the value is changed.'''
def __init__(self, val):
super(AccessCounter, self).__setattr__('counter', 0)
super(AccessCounter, self).__setattr__('value', val)
def __setattr__(self, name, value):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
# Make this unconditional.
# If you want to prevent other attributes to be set, raise AttributeError(name)
super(AccessCounter, self).__setattr__(name, value)
def __delattr__(self, name):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
super(AccessCounter, self).__delattr__(name)
I catch myself doing this a lot. The example is simple, but, in practice, there are a lot of complex assignments to update data structures and conditions under which the second recursion is not called.
I'm working with mesh data. Points, Edges, and Faces are stored in separate dictionaries and "pointers" (dict keys) are heavily used.
import itertools
class Demo(object):
def __init__(self):
self.a = {}
self.b = {}
self.keygen = itertools.count()
def add_to_b(self, val):
new_key = next(self.keygen)
self.b[new_key] = val
return new_key
def recur_method(self, arg, argisval=True):
a_key = next(self.keygen)
if argisval is True:
# arg is a value
b_key = self.add_to_b(arg)
self.a[a_key] = b_key
self.recur_method(b_key, argisval=False)
else:
# arg is a key
self.a[a_key] = arg
demo = Demo()
demo.recur_method(2.2)
Is there a better way? short of cutting up all of my assignment code into seven different methods? Should I be worried about this anyway?
Try
def recur_method(self, key=None, val=None):
if key is None and val is None:
raise exception("You fail it")
If None is a valid input, then use a guard value:
sentinel = object()
def recur_method(self, key=sentinel, val=sentinel):
if key is sentinel and val is sentinel:
raise exception("You fail it")
My problem is the following: I have some python classes that have properties that are derived from other properties; and those should be cached once they are calculated, and the cached results should be invalidated each time the base properties are changed.
I could do it manually, but it seems quite difficult to maintain if the number of properties grows. So I would like to have something like Makefile rules inside my objects to automatically keep track of what needs to be recalculated.
The desired syntax and behaviour should be something like that:
# this does dirty magic, like generating the reverse dependency graph,
# and preparing the setters that invalidate the cached values
#dataflow_class
class Test(object):
def calc_a(self):
return self.b + self.c
def calc_c(self):
return self.d * 2
a = managed_property(calculate=calc_a, depends_on=('b', 'c'))
b = managed_property(default=0)
c = managed_property(calculate=calc_c, depends_on=('d',))
d = managed_property(default=0)
t = Test()
print t.a
# a has not been initialized, so it calls calc_a
# gets b value
# c has not been initialized, so it calls calc_c
# c value is calculated and stored in t.__c
# a value is calculated and stored in t.__a
t.b = 1
# invalidates the calculated value stored in self.__a
print t.a
# a has been invalidated, so it calls calc_a
# gets b value
# gets c value, from t.__c
# a value is calculated and stored in t.__a
print t.a
# gets value from t.__a
t.d = 2
# invalidates the calculated values stored in t.__a and t.__c
So, is there something like this already available or should I start implementing my own? In the second case, suggestions are welcome :-)
Here, this should do the trick.
The descriptor mechanism (through which the language implements "property") is
more than enough for what you want.
If the code bellow does not work in some corner cases, just write me.
class DependentProperty(object):
def __init__(self, calculate=None, default=None, depends_on=()):
# "name" and "dependence_tree" properties are attributes
# set up by the metaclass of the owner class
if calculate:
self.calculate = calculate
else:
self.default = default
self.depends_on = set(depends_on)
def __get__(self, instance, owner):
if hasattr(self, "default"):
return self.default
if not hasattr(instance, "_" + self.name):
setattr(instance, "_" + self.name,
self.calculate(instance, getattr(instance, "_" + self.name + "_last_value")))
return getattr(instance, "_" + self.name)
def __set__(self, instance, value):
setattr(instance, "_" + self.name + "_last_value", value)
setattr(instance, "_" + self.name, self.calculate(instance, value))
for attr in self.dependence_tree[self.name]:
delattr(instance, attr)
def __delete__(self, instance):
try:
delattr(instance, "_" + self.name)
except AttributeError:
pass
def assemble_tree(name, dict_, all_deps = None):
if all_deps is None:
all_deps = set()
for dependance in dict_[name].depends_on:
all_deps.add(dependance)
assemble_tree(dependance, dict_, all_deps)
return all_deps
def invert_tree(tree):
new_tree = {}
for key, val in tree.items():
for dependence in val:
if dependence not in new_tree:
new_tree[dependence] = set()
new_tree[dependence].add(key)
return new_tree
class DependenceMeta(type):
def __new__(cls, name, bases, dict_):
dependence_tree = {}
properties = []
for key, val in dict_.items():
if not isinstance(val, DependentProperty):
continue
val.name = key
val.dependence_tree = dependence_tree
dependence_tree[key] = set()
properties.append(val)
inverted_tree = {}
for property in properties:
inverted_tree[property.name] = assemble_tree(property.name, dict_)
dependence_tree.update(invert_tree(inverted_tree))
return type.__new__(cls, name, bases, dict_)
if __name__ == "__main__":
# Example and visual test:
class Bla:
__metaclass__ = DependenceMeta
def calc_b(self, x):
print "Calculating b"
return x + self.a
def calc_c(self, x):
print "Calculating c"
return x + self.b
a = DependentProperty(default=10)
b = DependentProperty(depends_on=("a",), calculate=calc_b)
c = DependentProperty(depends_on=("b",), calculate=calc_c)
bla = Bla()
bla.b = 5
bla.c = 10
print bla.a, bla.b, bla.c
bla.b = 10
print bla.b
print bla.c
I would like to have something like Makefile rules
then use one! You may consider this model:
one rule = one python file
one result = one *.data file
the pipe is implemented as a makefile or with another dependency analysis tool (cmake, scons)
The hardware test team in our company use such a framework for intensive exploratory tests:
you can integrate other languages and tools easily
you get a stable and proven solution
computations may be distributed one multiple cpu/computers
you track dependencies on values and rules
debug of intermediate values is easy
the (big) downside to this method is that you have to give up python import keyword because it creates an implicit (and untracked) dependency (there are workarounds for this).
import collections
sentinel=object()
class ManagedProperty(object):
'''
If deptree = {'a':set('b','c')}, then ManagedProperties `b` and
`c` will be reset whenever `a` is modified.
'''
def __init__(self,property_name,calculate=None,depends_on=tuple(),
default=sentinel):
self.property_name=property_name
self.private_name='_'+property_name
self.calculate=calculate
self.depends_on=depends_on
self.default=default
def __get__(self,obj,objtype):
if obj is None:
# Allows getattr(cls,mprop) to return the ManagedProperty instance
return self
try:
return getattr(obj,self.private_name)
except AttributeError:
result=(getattr(obj,self.calculate)()
if self.default is sentinel else self.default)
setattr(obj,self.private_name,result)
return result
def __set__(self,obj,value):
# obj._dependencies is defined by #register
map(obj.__delattr__,getattr(obj,'_dependencies').get(self.property_name,tuple()))
setattr(obj,self.private_name,value)
def __delete__(self,obj):
if hasattr(obj,self.private_name):
delattr(obj,self.private_name)
def register(*mproperties):
def flatten_dependencies(name, deptree, all_deps=None):
'''
A deptree such as {'c': set(['a']), 'd': set(['c'])} means
'a' depends on 'c' and 'c' depends on 'd'.
Given such a deptree, flatten_dependencies('d', deptree) returns the set
of all property_names that depend on 'd' (i.e. set(['a','c']) in the
above case).
'''
if all_deps is None:
all_deps = set()
for dep in deptree.get(name,tuple()):
all_deps.add(dep)
flatten_dependencies(dep, deptree, all_deps)
return all_deps
def classdecorator(cls):
deptree=collections.defaultdict(set)
for mprop in mproperties:
setattr(cls,mprop.property_name,mprop)
# Find all ManagedProperties in dir(cls). Note that some of these may be
# inherited from bases of cls; they may not be listed in mproperties.
# Doing it this way allows ManagedProperties to be overridden by subclasses.
for propname in dir(cls):
mprop=getattr(cls,propname)
if not isinstance(mprop,ManagedProperty):
continue
for underlying_prop in mprop.depends_on:
deptree[underlying_prop].add(mprop.property_name)
# Flatten the dependency tree so no recursion is necessary. If one were
# to use recursion instead, then a naive algorithm would make duplicate
# calls to __delete__. By flattening the tree, there are no duplicate
# calls to __delete__.
dependencies={key:flatten_dependencies(key,deptree)
for key in deptree.keys()}
setattr(cls,'_dependencies',dependencies)
return cls
return classdecorator
These are the unit tests I used to verify its behavior.
if __name__ == "__main__":
import unittest
import sys
def count(meth):
def wrapper(self,*args):
countname=meth.func_name+'_count'
setattr(self,countname,getattr(self,countname,0)+1)
return meth(self,*args)
return wrapper
class Test(unittest.TestCase):
def setUp(self):
#register(
ManagedProperty('d',default=0),
ManagedProperty('b',default=0),
ManagedProperty('c',calculate='calc_c',depends_on=('d',)),
ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
class Foo(object):
#count
def calc_a(self):
return self.b + self.c
#count
def calc_c(self):
return self.d * 2
#register(ManagedProperty('c',calculate='calc_c',depends_on=('b',)),
ManagedProperty('a',calculate='calc_a',depends_on=('b','c')))
class Bar(Foo):
#count
def calc_c(self):
return self.b * 3
self.Foo=Foo
self.Bar=Bar
self.foo=Foo()
self.foo2=Foo()
self.bar=Bar()
def test_two_instances(self):
self.foo.b = 1
self.assertEqual(self.foo.a,1)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.assertEqual(self.foo2.a,0)
self.assertEqual(self.foo2.b,0)
self.assertEqual(self.foo2.c,0)
self.assertEqual(self.foo2.d,0)
def test_initialization(self):
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.assertEqual(self.foo.b,0)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.assertEqual(self.bar.a,0)
self.assertEqual(self.bar.b,0)
self.assertEqual(self.bar.c,0)
self.assertEqual(self.bar.d,0)
def test_dependence(self):
self.assertEqual(self.Foo._dependencies,
{'c': set(['a']), 'b': set(['a']), 'd': set(['a', 'c'])})
self.assertEqual(self.Bar._dependencies,
{'c': set(['a']), 'b': set(['a', 'c'])})
def test_setting_property_updates_dependent(self):
self.assertEqual(self.foo.a,0)
self.assertEqual(self.foo.calc_a_count,1)
self.foo.b = 1
# invalidates the calculated value stored in foo.a
self.assertEqual(self.foo.a,1)
self.assertEqual(self.foo.calc_a_count,2)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,0)
self.assertEqual(self.foo.d,0)
self.foo.d = 2
# invalidates the calculated values stored in foo.a and foo.c
self.assertEqual(self.foo.a,5)
self.assertEqual(self.foo.calc_a_count,3)
self.assertEqual(self.foo.b,1)
self.assertEqual(self.foo.c,4)
self.assertEqual(self.foo.d,2)
self.assertEqual(self.bar.a,0)
self.assertEqual(self.bar.calc_a_count,1)
self.assertEqual(self.bar.b,0)
self.assertEqual(self.bar.c,0)
self.assertEqual(self.bar.calc_c_count,1)
self.assertEqual(self.bar.d,0)
self.bar.b = 2
self.assertEqual(self.bar.a,8)
self.assertEqual(self.bar.calc_a_count,2)
self.assertEqual(self.bar.b,2)
self.assertEqual(self.bar.c,6)
self.assertEqual(self.bar.calc_c_count,2)
self.assertEqual(self.bar.d,0)
self.bar.d = 2
self.assertEqual(self.bar.a,8)
self.assertEqual(self.bar.calc_a_count,2)
self.assertEqual(self.bar.b,2)
self.assertEqual(self.bar.c,6)
self.assertEqual(self.bar.calc_c_count,2)
self.assertEqual(self.bar.d,2)
sys.argv.insert(1,'--verbose')
unittest.main(argv=sys.argv)