Identifying pure functions in python

Identifying pure functions in python - python

I have a decorator #pure that registers a function as pure, for example:
#pure
def rectangle_area(a,b):
return a*b
#pure
def triangle_area(a,b,c):
return ((a+(b+c))(c-(a-b))(c+(a-b))(a+(b-c)))**0.5/4
Next, I want to identify a newly defined pure function
def house_area(a,b,c):
return rectangle_area(a,b) + triangle_area(a,b,c)
Obviously house_area is pure, since it only calls pure functions.
How can I discover all pure functions automatically (perhaps by using ast)

Assuming operators are all pure, then essentially you only need to check all the functions calls. This can indeed be done with the ast module.
First I defined the pure decorator as:
def pure(f):
f.pure = True
return f
Adding an attribute telling that it's pure, allows skipping early or "forcing" a function to identify as pure. This is useful if you'd need a function like math.sin to identify as pure. Additionally since you can't add attributes to builtin functions.
#pure
def sin(x):
return math.sin(x)
All in all. Use the ast module to visit all the nodes. Then for each Call node check whether the function being called is pure.
import ast
class PureVisitor(ast.NodeVisitor):
def __init__(self, visited):
super().__init__()
self.pure = True
self.visited = visited
def visit_Name(self, node):
return node.id
def visit_Attribute(self, node):
name = [node.attr]
child = node.value
while child is not None:
if isinstance(child, ast.Attribute):
name.append(child.attr)
child = child.value
else:
name.append(child.id)
break
name = ".".join(reversed(name))
return name
def visit_Call(self, node):
if not self.pure:
return
name = self.visit(node.func)
if name not in self.visited:
self.visited.append(name)
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Then check whether the function has the pure attribute. If not get code and check if all the functions calls can be classified as pure.
import inspect, textwrap
def is_pure(f, _visited=None):
try:
return f.pure
except AttributeError:
pass
try:
code = inspect.getsource(f.__code__)
except AttributeError:
return False
code = textwrap.dedent(code)
node = compile(code, "<unknown>", "exec", ast.PyCF_ONLY_AST)
if _visited is None:
_visited = []
visitor = PureVisitor(_visited)
visitor.visit(node)
return visitor.pure
Note that print(is_pure(lambda x: math.sin(x))) doesn't work since inspect.getsource(f.__code__) returns code on a line by line basis. So the source returned by getsource would include the print and is_pure call, thus yielding False. Unless those functions are overridden.
To verify that it works, test it by doing:
print(house_area) # Prints: True
To list through all the functions in the current module:
import sys, types
for k in dir(sys.modules[__name__]):
v = globals()[k]
if isinstance(v, types.FunctionType):
print(k, is_pure(v))
The visited list keeps track of which functions have already been verified pure. This help circumvent problems related to recursion. Since the code isn't executed, the evaluation would recursively visit factorial.
#pure
def factorial(n):
return 1 if n == 1 else n * factorial(n - 1)
Note that you might need to revise the following code. Choosing another way to obtain a function from its name.
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False

Related

Using AST to change function names from CamelCase to snake_case

Here in this question, I was asking for a way to convert function names from CamelCase to snake_case, one of the comments suggested using AST.
I found a code snippet to find all function calls in a script
import ast
from collections import deque
class FuncCallVisitor(ast.NodeVisitor):
def __init__(self):
self._name = deque()
#property
def name(self):
return '.'.join(self._name)
#name.deleter
def name(self):
self._name.clear()
def visit_Name(self, node):
self._name.appendleft(node.id)
def visit_Attribute(self, node):
try:
self._name.appendleft(node.attr)
self._name.appendleft(node.value.id)
except AttributeError:
self.generic_visit(node)
def get_func_calls(tree):
func_calls = []
for node in ast.walk(tree):
if isinstance(node, ast.Call):
callvisitor = FuncCallVisitor()
callvisitor.visit(node.func)
func_calls.append(callvisitor.name)
return func_calls
if __name__ == '__main__':
tree = ast.parse(open("some_dir").read())
print(get_func_calls(tree))
using this code I have all function calls in my script, now I want to write a code that converts this name to snake_case.
I found this code snippet to modify a node in AST tree
class RewriteName(ast.NodeTransformer):
def visit_Name(self, node):
return ast.copy_location(ast.Subscript(
value=ast.Name(id='data', ctx=ast.Load()),
slice=ast.Index(value=ast.Str(s=node.id)),
ctx=node.ctx
), node)
tree = RewriteName().visit(tree)
I didn't understand how to use it to serve my purpose. Any explanation or other pieces of advice?

I am kind of late to this, but maybe it will be found in the future.
Anyway, here is a quick hack at it. Actually, you were almost there with your solution. The name method returns your name, then you can arbitrarily change that. So in your def get_func_calls(tree) call you can manipulate the string and re-assign the new name to the Call object.
ccName = callvisitor.name # work with some local var
new_name = '' # the new func name
for char_i in range(len(ccName)): # go over the name
if ccName[char_i].isupper(): # check if the current char is with uppercase
if ccName[char_i - 1] == '.': # check if the previous character is a dot
new_name += ccName[char_i].lower() # if it is, make the char to lowercase
else:
new_name += '_' + ccName[char_i].lower() # otherwise add the snake_
else:
new_name += ccName[char_i] # just add the rest of the lower chars
callvisitor._name = new_name # just re-asign the new name
func_calls.append(callvisitor._name)
This is definitely not a pretty solution and it also depends if you want to change only function definitions or every single function call in a file, but this should give you an idea on how to change the ast.

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?

You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.

In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

Search for recursive functions in a Python project

I need to find all functions in a Python project which are recursive (i.e. call themselves).
Any ideas how to approach this?

It's hard to say whether function recursive or not before it runs. I would personally use this one with inspect.getclosurevars (added in Python 3.3):
import sys
if sys.version_info >= (3, 3, 0):
from inspect import getclosurevars
def is_recursive(func):
if sys.version_info >= (3, 3, 0):
return getclosurevars(func).globals.get(func.__name__) is func
else:
# We can implement part of it if it's not in our standard library
def global_vars_in_closure(func):
vars = {x: func.__globals__.get(x) for x in func.__code__.co_names}
return vars
return global_vars_in_closure(func).get(func.__name__) is func
It will work correctly in most use cases, just remember to use func_X instead of __X__ as function methods on Python 2. It will fail only if a function contain a reference to itself without call:
def false_recursive():
false_recursive
def true_recursive():
true_recursive()
assert is_recursive(true_recursive), 'Must not fail'
assert not is_recursive(false_recursive), 'See? It fails' # AssertionError: See? It fails

You can parse the source code with ast:
code = """
def f(x):
f(x)
def g(x):
pass
"""
import ast
class FindRecursiveFunctions(ast.NodeVisitor):
def __init__(self):
self._current_func = None
self.recursive_funcs = set()
def generic_visit(self, node):
if node.__class__ is ast.FunctionDef:
self._current_func = node.name
if node.__class__ is ast.Call and node.func.id == self._current_func:
self.recursive_funcs.add(self._current_func)
super(FindRecursiveFunctions, self).generic_visit(node)
>>> tree = ast.parse(code)
>>> finder = FindRecursiveFunctions()
>>> finder.visit(tree)
>>> finder.recursive_funcs
set(['f'])

Python decorator with multiprocessing fails

I would like to use a decorator on a function that I will subsequently pass to a multiprocessing pool. However, the code fails with "PicklingError: Can't pickle : attribute lookup __builtin__.function failed". I don't quite see why it fails here. I feel certain that it's something simple, but I can't find it. Below is a minimal "working" example. I thought that using the functools function would be enough to let this work.
If I comment out the function decoration, it works without an issue. What is it about multiprocessing that I'm misunderstanding here? Is there any way to make this work?
Edit: After adding both a callable class decorator and a function decorator, it turns out that the function decorator works as expected. The callable class decorator continues to fail. What is it about the callable class version that keeps it from being pickled?
import random
import multiprocessing
import functools
class my_decorator_class(object):
def __init__(self, target):
self.target = target
try:
functools.update_wrapper(self, target)
except:
pass
def __call__(self, elements):
f = []
for element in elements:
f.append(self.target([element])[0])
return f
def my_decorator_function(target):
#functools.wraps(target)
def inner(elements):
f = []
for element in elements:
f.append(target([element])[0])
return f
return inner
#my_decorator_function
def my_func(elements):
f = []
for element in elements:
f.append(sum(element))
return f
if __name__ == '__main__':
elements = [[random.randint(0, 9) for _ in range(5)] for _ in range(10)]
pool = multiprocessing.Pool(processes=4)
results = [pool.apply_async(my_func, ([e],)) for e in elements]
pool.close()
f = [r.get()[0] for r in results]
print(f)

The problem is that pickle needs to have some way to reassemble everything that you pickle. See here for a list of what can be pickled:
http://docs.python.org/library/pickle.html#what-can-be-pickled-and-unpickled
When pickling my_func, the following components need to be pickled:
An instance of my_decorator_class, called my_func.
This is fine. Pickle will store the name of the class and pickle its __dict__ contents. When unpickling, it uses the name to find the class, then creates an instance and fills in the __dict__ contents. However, the __dict__ contents present a problem...
The instance of the original my_func that's stored in my_func.target.
This isn't so good. It's a function at the top-level, and normally these can be pickled. Pickle will store the name of the function. The problem, however, is that the name "my_func" is no longer bound to the undecorated function, it's bound to the decorated function. This means that pickle won't be able to look up the undecorated function to recreate the object. Sadly, pickle doesn't have any way to know that object it's trying to pickle can always be found under the name __main__.my_func.
You can change it like this and it will work:
import random
import multiprocessing
import functools
class my_decorator(object):
def __init__(self, target):
self.target = target
try:
functools.update_wrapper(self, target)
except:
pass
def __call__(self, candidates, args):
f = []
for candidate in candidates:
f.append(self.target([candidate], args)[0])
return f
def old_my_func(candidates, args):
f = []
for c in candidates:
f.append(sum(c))
return f
my_func = my_decorator(old_my_func)
if __name__ == '__main__':
candidates = [[random.randint(0, 9) for _ in range(5)] for _ in range(10)]
pool = multiprocessing.Pool(processes=4)
results = [pool.apply_async(my_func, ([c], {})) for c in candidates]
pool.close()
f = [r.get()[0] for r in results]
print(f)
You have observed that the decorator function works when the class does not. I believe this is because functools.wraps modifies the decorated function so that it has the name and other properties of the function it wraps. As far as the pickle module can tell, it is indistinguishable from a normal top-level function, so it pickles it by storing its name. Upon unpickling, the name is bound to the decorated function so everything works out.

I also had some problem using decorators in multiprocessing. I'm not sure if it's the same problem as yours:
My code looked like this:
from multiprocessing import Pool
def decorate_func(f):
def _decorate_func(*args, **kwargs):
print "I'm decorating"
return f(*args, **kwargs)
return _decorate_func
#decorate_func
def actual_func(x):
return x ** 2
my_swimming_pool = Pool()
result = my_swimming_pool.apply_async(actual_func,(2,))
print result.get()
and when I run the code I get this:
Traceback (most recent call last):
File "test.py", line 15, in <module>
print result.get()
File "somedirectory_too_lengthy_to_put_here/lib/python2.7/multiprocessing/pool.py", line 572, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
I fixed it by defining a new function to wrap the function in the decorator function, instead of using the decorator syntax
from multiprocessing import Pool
def decorate_func(f):
def _decorate_func(*args, **kwargs):
print "I'm decorating"
return f(*args, **kwargs)
return _decorate_func
def actual_func(x):
return x ** 2
def wrapped_func(*args, **kwargs):
return decorate_func(actual_func)(*args, **kwargs)
my_swimming_pool = Pool()
result = my_swimming_pool.apply_async(wrapped_func,(2,))
print result.get()
The code ran perfectly and I got:
I'm decorating
4
I'm not very experienced at Python, but this solution solved my problem for me

If you want the decorators too bad (like me), you can also use the exec() command on the function string, to circumvent the mentioned pickling.
I wanted to be able to pass all the arguments to an original function and then use them successively. The following is my code for it.
At first, I made a make_functext() function to convert the target function object to a string. For that, I used the getsource() function from the inspect module (see doctumentation here and note that it can't retrieve source code from compiled code etc.). Here it is:
from inspect import getsource
def make_functext(func):
ft = '\n'.join(getsource(func).split('\n')[1:]) # Removing the decorator, of course
ft = ft.replace(func.__name__, 'func') # Making function callable with 'func'
ft = ft.replace('#§ ', '').replace('#§', '') # For using commented code starting with '#§'
ft = ft.strip() # In case the function code was indented
return ft
It is used in the following _worker() function that will be the target of the processes:
def _worker(functext, args):
scope = {} # This is needed to keep executed definitions
exec(functext, scope)
scope['func'](args) # Using func from scope
And finally, here's my decorator:
from multiprocessing import Process
def parallel(num_processes, **kwargs):
def parallel_decorator(func, num_processes=num_processes):
functext = make_functext(func)
print('This is the parallelized function:\n', functext)
def function_wrapper(funcargs, num_processes=num_processes):
workers = []
print('Launching processes...')
for k in range(num_processes):
p = Process(target=_worker, args=(functext, funcargs[k])) # use args here
p.start()
workers.append(p)
return function_wrapper
return parallel_decorator
The code can finally be used by defining a function like this:
#parallel(4)
def hello(args):
#§ from time import sleep # use '#§' to avoid unnecessary (re)imports in main program
name, seconds = tuple(args) # unpack args-list here
sleep(seconds)
print('Hi', name)
... which can now be called like this:
hello([['Marty', 0.5],
['Catherine', 0.9],
['Tyler', 0.7],
['Pavel', 0.3]])
... which outputs:
This is the parallelized function:
def func(args):
from time import sleep
name, seconds = tuple(args)
sleep(seconds)
print('Hi', name)
Launching processes...
Hi Pavel
Hi Marty
Hi Tyler
Hi Catherine
Thanks for reading, this is my very first post. If you find any mistakes or bad practices, feel free to leave a comment. I know that these string conversions are quite dirty, though...

If you use this code for your decorator:
import multiprocessing
from types import MethodType
DEFAULT_POOL = []
def run_parallel(_func=None, *, name: str = None, context_pool: list = DEFAULT_POOL):
class RunParallel:
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
process = multiprocessing.Process(target=self.func, name=name, args=args, kwargs=kwargs)
context_pool.append(process)
process.start()
def __get__(self, instance, owner):
return self if instance is None else MethodType(self, instance)
if _func is None:
return RunParallel
else:
return RunParallel(_func)
def wait_context(context_pool: list = DEFAULT_POOL, kill_others_if_one_fails: bool = False):
finished = []
for process in context_pool:
process.join()
finished.append(process)
if kill_others_if_one_fails and process.exitcode != 0:
break
if kill_others_if_one_fails:
# kill unfinished processes
for process in context_pool:
if process not in finished:
process.kill()
# wait for every process to be dead
for process in context_pool:
process.join()
Then you can use it like this, in these 4 examples:
#run_parallel
def m1(a, b="b"):
print(f"m1 -- {a=} {b=}")
#run_parallel(name="mym2", context_pool=DEFAULT_POOL)
def m2(d, cc="cc"):
print(f"m2 -- {d} {cc=}")
a = 1/0
class M:
#run_parallel
def c3(self, k, n="n"):
print(f"c3 -- {k=} {n=}")
#run_parallel(name="Mc4", context_pool=DEFAULT_POOL)
def c4(self, x, y="y"):
print(f"c4 -- {x=} {y=}")
if __name__ == "__main__":
m1(11)
m2(22)
M().c3(33)
M().c4(44)
wait_context(kill_others_if_one_fails=True)
The output will be:
m1 -- a=11 b='b'
m2 -- 22 cc='cc'
c3 -- k=33 n='n'
(followed by the exception raised in method m2)

Passing 'None' as function parameter (where parameter is a function)

I am writing a small app that has to perform some 'sanity checks' before entering execution. (eg. of a sanity check: test if a certain path is readable / writable / exists)
The code:
import logging
import os
import shutil
import sys
from paths import PATH
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger('sf.core.sanity')
def sanity_access(path, mode):
ret = os.access(path, mode)
logfunc = log.debug if ret else log.warning
loginfo = (os.access.__name__, path, mode, ret)
logfunc('%s(\'%s\', %s)==%s' % loginfo)
return ret
def sanity_check(bool_func, true_func, false_func):
ret = bool_func()
(logfunc, execfunc) = (log.debug, true_func) if ret else \
(log.warning, false_func)
logfunc('exec: %s', execfunc.__name__)
execfunc()
def sanity_checks():
sanity_check(lambda: sanity_access(PATH['userhome'], os.F_OK), \
lambda: None, sys.exit)
My question is related to the sanity_check function.
This function takes 3 parameters (bool_func, true_func, false_func). If the bool_func (which is the test function, returning a boolean value) fails, true_func gets executed, else the false_func gets executed.
1) lambda: None is a little lame , because for example if the sanity_access returns True, lambda: None gets executed, and the output printed will be:
DEBUG:sf.core.sanity:access('/home/nomemory', 0)==True
DEBUG:sf.core.sanity:exec: <lambda>
So it won't be very clear in the logs what function got executed. The log will only contain <lambda> . Is there a default function that does nothing and can be passed as a parameter ? Is it a way to return the name of the first function that is being executed inside a lambda ?
Or a way not to log that "exec" if 'nothing' is sent as a paramter ?
What's the none / do-nothing equivalent for functions ?
sanity_check(lambda: sanity_access(PATH['userhome'], os.F_OK), \
<do nothing, but show something more useful than <lambda>>, sys.exit)
Additional question, why is lambda: pass instead of lambda: None not working ?

What's with all the lambdas that serve no purpose? Well, maybe optional arguments will help you a bit:
def sanity_check( test, name='undefined', ontrue=None, onfalse=None ):
if test:
log.debug(name)
if ontrue is not None:
ontrue()
else:
log.warn( name )
if onfalse is not None:
onfalse()
def sanity_checks():
sanity_check(sanity_access(PATH['userhome'], os.F_OK), 'test home',
onfalse=sys.exit)
But you are really overcomplicating things.

update
I would normally delete this post because THC4k saw through all the complexity and rewrote your function correctly. However in a different context, the K combinator trick might come in handy, so I'll leave it up.
There is no builtin that does what you want AFIK. I believe that you want the K combinator (the link came up on another question) which can be encoded as
def K_combinator(x, name):
def f():
return x
f.__name__ = name
return f
none_function = K_combinator(None, 'none_function')
print none_function()
of course if this is just a one off then you could just do
def none_function():
return None
But then you don't get to say "K combinator". Another advantage of the 'K_combinator' approach is that you can pass it to functions, for example,
foo(call_back1, K_combinator(None, 'name_for_logging'))
as for your second statement, only expressions are allowed in lambda. pass is a statement. Hence, lambda: pass fails.
You can slightly simplify your call to sanity check by removing the lambda around the first argument.
def sanity_check(b, true_func, false_func):
if b:
logfunc = log.debug
execfunc = true_func
else:
logfunc = log.warning
execfunc = false_func
logfunc('exec: %s', execfunc.__name__)
execfunc()
def sanity_checks():
sanity_check(sanity_access(PATH['userhome'], os.F_OK),
K_combinator(None, 'none_func'), sys.exit)
This is more readable (largely from expanding the ternary operator into an if). the boolfunc wasn't doing anything because sanity_check wasn't adding any arguments to the call. Might as well just call instead of wrapping it in a lambda.

You might want to rethink this.
class SanityCheck( object ):
def __call__( self ):
if self.check():
logger.debug(...)
self.ok()
else:
logger.warning(...)
self.not_ok()
def check( self ):
return True
def ok( self ):
pass
def not_ok( self ):
sys.exit(1)
class PathSanityCheck(SanityCheck):
path = "/path/to/resource"
def check( self ):
return os.access( path, os.F_OK )
class AnotherPathSanityCheck(SanityCheck):
path = "/another/path"
def startup():
checks = ( PathSanityCheck(), AnotherPathSanityCheck() )
for c in checks:
c()
Callable objects can simplify your life.

>>> import dis
>>> f = lambda: None
>>> dis.dis(f)
1 0 LOAD_CONST 0 (None)
3 RETURN_VALUE
>>> g = lambda: Pass
>>>
>>>
>>> dis.dis(g)
1 0 LOAD_GLOBAL 0 (Pass)
3 RETURN_VALUE
>>> g = lambda: pass
File "<stdin>", line 1
g = lambda: pass
^
SyntaxError: invalid syntax

Actually, what you want is a function which does nothing, but has a __name__ which is useful to the log. The lambda function is doing exactly what you want, but execfunc.__name__ is giving "<lambda>". Try one of these:
def nothing_func():
return
def ThisAppearsInTheLog():
return
You can also put your own attributes on functions:
def log_nothing():
return
log_nothing.log_info = "nothing interesting"
Then change execfunc.__name__ to getattr(execfunc,'log_info', '')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Identifying pure functions in python - python

Related

Using AST to change function names from CamelCase to snake_case

How to extract functions used in a python code file?

Search for recursive functions in a Python project

Python decorator with multiprocessing fails

Passing 'None' as function parameter (where parameter is a function)

Categories

Resources