Search for recursive functions in a Python project - python

I need to find all functions in a Python project which are recursive (i.e. call themselves).
Any ideas how to approach this?

It's hard to say whether function recursive or not before it runs. I would personally use this one with inspect.getclosurevars (added in Python 3.3):
import sys
if sys.version_info >= (3, 3, 0):
from inspect import getclosurevars
def is_recursive(func):
if sys.version_info >= (3, 3, 0):
return getclosurevars(func).globals.get(func.__name__) is func
else:
# We can implement part of it if it's not in our standard library
def global_vars_in_closure(func):
vars = {x: func.__globals__.get(x) for x in func.__code__.co_names}
return vars
return global_vars_in_closure(func).get(func.__name__) is func
It will work correctly in most use cases, just remember to use func_X instead of __X__ as function methods on Python 2. It will fail only if a function contain a reference to itself without call:
def false_recursive():
false_recursive
def true_recursive():
true_recursive()
assert is_recursive(true_recursive), 'Must not fail'
assert not is_recursive(false_recursive), 'See? It fails' # AssertionError: See? It fails

You can parse the source code with ast:
code = """
def f(x):
f(x)
def g(x):
pass
"""
import ast
class FindRecursiveFunctions(ast.NodeVisitor):
def __init__(self):
self._current_func = None
self.recursive_funcs = set()
def generic_visit(self, node):
if node.__class__ is ast.FunctionDef:
self._current_func = node.name
if node.__class__ is ast.Call and node.func.id == self._current_func:
self.recursive_funcs.add(self._current_func)
super(FindRecursiveFunctions, self).generic_visit(node)
>>> tree = ast.parse(code)
>>> finder = FindRecursiveFunctions()
>>> finder.visit(tree)
>>> finder.recursive_funcs
set(['f'])

Related

Cache Size Decorators in Python

I am building my own decorator function, but I can't seem to be able to update the func.cache_length method of the function.
The code below simply uses an OrderedDict to store the items from all the dataframes loaded in pandas, with 5 dataframes maximum stored in cache.
I want the user to also find out how many items currently the function has loaded using cache_length but every time I run it I get 0.
from functools import wraps
from collections import OrderedDict
def cache(func, max_length=5):
func.cache_dict = OrderedDict()
func.cache_length = 0
#wraps(func)
def wrapper(*args, **kwargs):
if kwargs['df_name'] in func.cache_dict:
return func.cache_dict[kwargs['df_name']]
elif len(func.cache_dict) < max_length:
print('Running function...')
df = func(*args, **kwargs)
func.cache_dict[kwargs['df_name']] = df
func.cache_length += 1
return df
else:
func.cache_dict.popitem(last=True)
df = func(*args, **kwargs)
func.cache_dict[kwargs['df_name']] = df
return df
func.cache_reset = lambda: func.cache_dict.clear()
return wrapper
import pandas as pd
#cache
def data_reader(*, df_name: pd.DataFrame, file: str):
df = pd.read_csv(file)
return df
This is the output vs. expected (I should get 1),
data_reader(df_name='test_dataframe', file="parsed_data.csv")
>>
Running function...
....
>>
data_reader.cache_length
>>
0
Based on what you described, here is a more general implementation: (details below)
from collections import OrderedDict
from functools import wraps
def cache(function=None, *, max_length=5):
def decorator(func):
cache_dict = OrderedDict()
#wraps(func)
def wrapper(*args, **kwargs):
call_repr = f"args={args}, kwargs={kwargs}"
try:
return cache_dict[call_repr]
except KeyError:
pass
if len(cache_dict) >= max_length:
cache_dict.popitem(last=False)
print(f"Running function {func.__name__}...")
cache_dict[call_repr] = output = func(*args, **kwargs)
return output
wrapper.cache = cache_dict
return wrapper
return decorator if function is None else decorator(function)
#cache(max_length=3)
def add(x, y):
return x + y
def main():
print(f"{add(1, 1)=}")
print(f"{add(2, 1)=}")
print(f"{add(1, 1)=}")
print(f"{add(3, 1)=}")
print(f"{add(4, 1)=}")
print(f"{add(1, 1)=}")
print(f"{add.cache=}")
add.cache.clear()
print(f"{len(add.cache)=}")
print(f"{add.cache=}")
if __name__ == "__main__":
main()
Output:
Running function add...
add(1, 1)=2
Running function add...
add(2, 1)=3
add(1, 1)=2
Running function add...
add(3, 1)=4
Running function add...
add(4, 1)=5
Running function add...
add(1, 1)=2
add.cache=OrderedDict([('args=(3, 1), kwargs={}', 4), ('args=(4, 1), kwargs={}', 5), ('args=(1, 1), kwargs={}', 2)])
len(add.cache)=0
add.cache=OrderedDict()
Notice the cache was used for the second add(1, 1) call, but not the third.
Details
Uses the pattern allowing the decorator to be used with or without parantheses
Resulting wrapper function has the cache attribute to allow direct access to the underlying OrderedDict
Caching based on the string representation of all function arguments (positional and keyword)
Caveats
Not completely general by any stretch
Works as expected only with argument types that have a deterministic __repr__ without side effects (which is what one would expect, to be fair)
Cannot differentiate between arguments with identical string representations
Clean type annotations may be a bit more involved
Hope this helps.

get the lists of functions used/called within a function in python

Is there any tool/library through which the list of methods/functions called within another methods/functions can be listed?
For example:
If that tool or library runs for below method
def calculate(a: int, b: int, operator: Operator):
if operator == Operator.add:
add(a, b)
elif operator == Operator.subtract
subtract(a, b)
then it should return
1. add
2. subtract
This question is almost same as this one but it's for Java.
This is basically same as what PyCharm does for Find Usage.
Thanks!
This seems to do the work:
import dis
def list_func_calls(fn):
funcs = []
bytecode = dis.Bytecode(fn)
instrs = list(reversed([instr for instr in bytecode]))
for (ix, instr) in enumerate(instrs):
if instr.opname=="CALL_FUNCTION":
load_func_instr = instrs[ix + instr.arg + 1]
funcs.append(load_func_instr.argval)
return ["%d. %s" % (ix, funcname) for (ix, funcname) in enumerate(reversed(funcs), 1)]
Example:
>>> list_func_calls(calculate)
['1. add', '2. subtract']
What's happening here is:
we make a Bytecode object of the function
we reverse the list of instructions, since the function name will
follow the function call
we step through the list, and for each CALL_FUNCTION instruction,
we use the instructions arg parameter to tell us how many
arguments we're getting
we look one past that to find the instruction that loads the function
we're calling
we add that function's name (instr.argval) to a list which we then
reverse, enumerate, and return in the requested format
Note that since Python 3.6, there are three CALL_FUNCTION instructions, so you'll have to check the documentation to extend this example to be fully functional with current python
Update: added compatibility for Python2.7
Tested and confirmed working with Python2.7, Python3.5 and Python3.6
Credit for pointing out dis goes to Patrick Haugh¹ Implementation (parsing of the dis output) is my own:
Setup:
import dis
import sys
from contextlib import contextmanager
# setup test environment
def a(_,__):
pass
def b(_,__,___):
pass
def c(_):
pass
def g():
pass
d = 4
def test(flag):
e = c
if flag:
a(a(b,c), [l for l in g(1, x=2)])
else:
b(a, int(flag), c(e))
d = d + 1
def calculate(a, b, operator):
if operator == Operator.add:
add(a, b)
elif operator == Operator.subtract:
subtract(a, b)
class Operator(object):
add = "add"
subtract = "subtract"
Python 2/3 compatibility:
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
#contextmanager # https://stackoverflow.com/a/12111817/2422125
def captureStdOut(output):
stdout = sys.stdout
sys.stdout = output
try:
yield
finally:
sys.stdout = stdout
""" for Python <3.4 """
def get_instructions(func):
import StringIO
out = StringIO.StringIO()
with captureStdOut(out):
dis.dis(func)
return [AttrDict({
'opname': i[16:36].strip(),
'arg': int(i[37:42].strip() or 0),
'argval': i[44:-1].strip()
}) for i in out.getvalue().split("\n")]
if sys.version_info < (3, 4):
dis.get_instructions = get_instructions
import __builtin__ as builtin
else:
import builtins as builtin
Code:
def get_function_calls(func, built_ins=False):
# the used instructions
ins = list(dis.get_instructions(func))[::-1]
# dict for function names (so they are unique)
names = {}
# go through call stack
for i, inst in list(enumerate(ins))[::-1]:
# find last CALL_FUNCTION
if inst.opname[:13] == "CALL_FUNCTION":
# function takes ins[i].arg number of arguments
ep = i + inst.arg + (2 if inst.opname[13:16] == "_KW" else 1)
# parse argument list (Python2)
if inst.arg == 257:
k = i+1
while k < len(ins) and ins[k].opname != "BUILD_LIST":
k += 1
ep = k-1
# LOAD that loaded this function
entry = ins[ep]
# ignore list comprehensions / ...
name = str(entry.argval)
if "." not in name and entry.opname == "LOAD_GLOBAL" and (built_ins or not hasattr(builtin, name)):
# save name of this function
names[name] = True
# reduce this CALL_FUNCTION and all its paramters to one entry
ins = ins[:i] + [entry] + ins[ep + 1:]
return sorted(list(names.keys()))
Output:
> print(get_function_calls(test))
> ['a', 'b', 'c', 'g']
> print(get_function_calls(test, built_ins=True))
> ['a', 'b', 'c', 'g', 'int']
> print(get_function_calls(calculate))
> ['add', 'subtract']
¹As Patrick Haugh's comment about dis is over 2h old I consider this one free for taking...

Identifying pure functions in python

I have a decorator #pure that registers a function as pure, for example:
#pure
def rectangle_area(a,b):
return a*b
#pure
def triangle_area(a,b,c):
return ((a+(b+c))(c-(a-b))(c+(a-b))(a+(b-c)))**0.5/4
Next, I want to identify a newly defined pure function
def house_area(a,b,c):
return rectangle_area(a,b) + triangle_area(a,b,c)
Obviously house_area is pure, since it only calls pure functions.
How can I discover all pure functions automatically (perhaps by using ast)
Assuming operators are all pure, then essentially you only need to check all the functions calls. This can indeed be done with the ast module.
First I defined the pure decorator as:
def pure(f):
f.pure = True
return f
Adding an attribute telling that it's pure, allows skipping early or "forcing" a function to identify as pure. This is useful if you'd need a function like math.sin to identify as pure. Additionally since you can't add attributes to builtin functions.
#pure
def sin(x):
return math.sin(x)
All in all. Use the ast module to visit all the nodes. Then for each Call node check whether the function being called is pure.
import ast
class PureVisitor(ast.NodeVisitor):
def __init__(self, visited):
super().__init__()
self.pure = True
self.visited = visited
def visit_Name(self, node):
return node.id
def visit_Attribute(self, node):
name = [node.attr]
child = node.value
while child is not None:
if isinstance(child, ast.Attribute):
name.append(child.attr)
child = child.value
else:
name.append(child.id)
break
name = ".".join(reversed(name))
return name
def visit_Call(self, node):
if not self.pure:
return
name = self.visit(node.func)
if name not in self.visited:
self.visited.append(name)
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Then check whether the function has the pure attribute. If not get code and check if all the functions calls can be classified as pure.
import inspect, textwrap
def is_pure(f, _visited=None):
try:
return f.pure
except AttributeError:
pass
try:
code = inspect.getsource(f.__code__)
except AttributeError:
return False
code = textwrap.dedent(code)
node = compile(code, "<unknown>", "exec", ast.PyCF_ONLY_AST)
if _visited is None:
_visited = []
visitor = PureVisitor(_visited)
visitor.visit(node)
return visitor.pure
Note that print(is_pure(lambda x: math.sin(x))) doesn't work since inspect.getsource(f.__code__) returns code on a line by line basis. So the source returned by getsource would include the print and is_pure call, thus yielding False. Unless those functions are overridden.
To verify that it works, test it by doing:
print(house_area) # Prints: True
To list through all the functions in the current module:
import sys, types
for k in dir(sys.modules[__name__]):
v = globals()[k]
if isinstance(v, types.FunctionType):
print(k, is_pure(v))
The visited list keeps track of which functions have already been verified pure. This help circumvent problems related to recursion. Since the code isn't executed, the evaluation would recursively visit factorial.
#pure
def factorial(n):
return 1 if n == 1 else n * factorial(n - 1)
Note that you might need to revise the following code. Choosing another way to obtain a function from its name.
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

How do I extract the names from a simple function?

I've got this piece of code:
import inspect
import ast
def func(foo):
return foo.bar - foo.baz
s = inspect.getsource(func)
xx = ast.parse(s)
class VisitCalls(ast.NodeVisitor):
def visit_Name(self, what):
if what.id == 'foo':
print ast.dump(what.ctx)
VisitCalls().visit(xx)
From function 'func' I'd like to extract:
['foo.bar', 'foo.baz']
or something like:
(('foo', 'bar'), ('foo', 'baz))
edited
Some background to explain why I think I need to do this
I want to convert the code of a trivial python function to a spreadsheet formula.
So I need to convert:
foo.bar - foo.baz
to:
=A1-B1
sample spreadsheet http://img441.imageshack.us/img441/1451/84516405.png
**edited again*
What I've got so far.
The program below outputs:
('A1', 5)
('B1', 3)
('C1', '= A1 - B1')
The code:
import ast, inspect
import codegen # by Armin Ronacher
from collections import OrderedDict
class SpreadSheetFormulaTransformer(ast.NodeTransformer):
def __init__(self, sym):
self.sym = sym
def visit_Attribute(self, node):
name = self.sym[id(eval(codegen.to_source(node)))]
return ast.Name(id=name, ctx=ast.Load())
def create(**kwargs):
class Foo(object): pass
x = Foo()
x.__dict__.update(kwargs)
return x
def register(x,y):
cell[y] = x
sym[id(x)] = y
def func(foo):
return foo.bar - foo.baz
foo = create(bar=5, baz=3)
cell = OrderedDict()
sym = {}
register(foo.bar, 'A1')
register(foo.baz, 'B1')
source = inspect.getsource(func)
tree = ast.parse(source)
guts = tree.body[0].body[0].value
SpreadSheetFormulaTransformer(sym).visit(guts)
code = '= ' + codegen.to_source(guts)
cell['C1'] = code
for x in cell.iteritems():
print x
I found some resources here: Python internals: Working with Python ASTs
I grabbed a working codegen module here.
import ast, inspect
import codegen # by Armin Ronacher
def func(foo):
return foo.bar - foo.baz
names = []
class CollectAttributes(ast.NodeVisitor):
def visit_Attribute(self, node):
names.append(codegen.to_source(node))
source = inspect.getsource(func)
tree = ast.parse(source)
guts = tree.body[0].body[0].value
CollectAttributes().visit(guts)
print names
output:
['foo.bar', 'foo.baz']
I am not sure why you need to retirieve names, a very crude way to get all names and dots in function is
import inspect
import parser
import symbol
import token
import pprint
def func(foo):
return foo.bar - foo.baz
s = inspect.getsource(func)
st = parser.suite(s)
def search(st):
if not isinstance(st, list):
return
if st[0] in [token.NAME, token.DOT]:
print st[1],
else:
for s in st[1:]:
search(s)
search(parser.ast2list(st))
output:
def func foo return foo . bar foo . baz
May be you can improve upon that by reading syntax tree more elegantly, I am using parser instead of ast module because i am on python 2.5
I haven't used the new ast module yet, but I've working code that uses the older compiler.ast to achieve something similar:
def visitGetattr(self, node):
full_name = [node.attrname]
parent = node.expr
while isinstance(parent, compiler.ast.Getattr):
full_name.append(parent.attrname)
parent = parent.expr
if isinstance(parent, compiler.ast.Name):
full_name.append(parent.name)
full_name = ".".join(reversed(full_name))
# do something with full_name
for c in node.getChildNodes():
self.visit(c)
Code slightly paraphrased, I may have introduced inadvertent bugs. I hope this gives you the general idea: you need to visit both Name and Getattr nodes and construct dotted names, and also deal with the fact that you'll see all the intermediate values too (e.g. 'foo' and 'foo.bar').

Categories