How do I extract the names from a simple function? - python

I've got this piece of code:
import inspect
import ast
def func(foo):
return foo.bar - foo.baz
s = inspect.getsource(func)
xx = ast.parse(s)
class VisitCalls(ast.NodeVisitor):
def visit_Name(self, what):
if what.id == 'foo':
print ast.dump(what.ctx)
VisitCalls().visit(xx)
From function 'func' I'd like to extract:
['foo.bar', 'foo.baz']
or something like:
(('foo', 'bar'), ('foo', 'baz))
edited
Some background to explain why I think I need to do this
I want to convert the code of a trivial python function to a spreadsheet formula.
So I need to convert:
foo.bar - foo.baz
to:
=A1-B1
sample spreadsheet http://img441.imageshack.us/img441/1451/84516405.png
**edited again*
What I've got so far.
The program below outputs:
('A1', 5)
('B1', 3)
('C1', '= A1 - B1')
The code:
import ast, inspect
import codegen # by Armin Ronacher
from collections import OrderedDict
class SpreadSheetFormulaTransformer(ast.NodeTransformer):
def __init__(self, sym):
self.sym = sym
def visit_Attribute(self, node):
name = self.sym[id(eval(codegen.to_source(node)))]
return ast.Name(id=name, ctx=ast.Load())
def create(**kwargs):
class Foo(object): pass
x = Foo()
x.__dict__.update(kwargs)
return x
def register(x,y):
cell[y] = x
sym[id(x)] = y
def func(foo):
return foo.bar - foo.baz
foo = create(bar=5, baz=3)
cell = OrderedDict()
sym = {}
register(foo.bar, 'A1')
register(foo.baz, 'B1')
source = inspect.getsource(func)
tree = ast.parse(source)
guts = tree.body[0].body[0].value
SpreadSheetFormulaTransformer(sym).visit(guts)
code = '= ' + codegen.to_source(guts)
cell['C1'] = code
for x in cell.iteritems():
print x
I found some resources here: Python internals: Working with Python ASTs
I grabbed a working codegen module here.

import ast, inspect
import codegen # by Armin Ronacher
def func(foo):
return foo.bar - foo.baz
names = []
class CollectAttributes(ast.NodeVisitor):
def visit_Attribute(self, node):
names.append(codegen.to_source(node))
source = inspect.getsource(func)
tree = ast.parse(source)
guts = tree.body[0].body[0].value
CollectAttributes().visit(guts)
print names
output:
['foo.bar', 'foo.baz']

I am not sure why you need to retirieve names, a very crude way to get all names and dots in function is
import inspect
import parser
import symbol
import token
import pprint
def func(foo):
return foo.bar - foo.baz
s = inspect.getsource(func)
st = parser.suite(s)
def search(st):
if not isinstance(st, list):
return
if st[0] in [token.NAME, token.DOT]:
print st[1],
else:
for s in st[1:]:
search(s)
search(parser.ast2list(st))
output:
def func foo return foo . bar foo . baz
May be you can improve upon that by reading syntax tree more elegantly, I am using parser instead of ast module because i am on python 2.5

I haven't used the new ast module yet, but I've working code that uses the older compiler.ast to achieve something similar:
def visitGetattr(self, node):
full_name = [node.attrname]
parent = node.expr
while isinstance(parent, compiler.ast.Getattr):
full_name.append(parent.attrname)
parent = parent.expr
if isinstance(parent, compiler.ast.Name):
full_name.append(parent.name)
full_name = ".".join(reversed(full_name))
# do something with full_name
for c in node.getChildNodes():
self.visit(c)
Code slightly paraphrased, I may have introduced inadvertent bugs. I hope this gives you the general idea: you need to visit both Name and Getattr nodes and construct dotted names, and also deal with the fact that you'll see all the intermediate values too (e.g. 'foo' and 'foo.bar').

Related

How to get a list of all non imported names in a Python module?

Given a module containing :
import stuff
from foo import Foo
from bar import *
CST = True
def func(): pass
How can I define a function get_defined_objects so that I can do:
print(get_defined_objects('path.to.module'))
{'CST': True, 'func', <function path.to.module.func>}
Right now the only solution I can imagine is to read the original module file, extract defined names with re.search(r'^(?:def|class )?(\w+)(?:\s*=)?' then import the module, and find the intersection with __dict__.
Is there something cleaner ?
Here is something for you to start with using ast. Note that this code does not cover all possible cases, although it should handle e.g. multiple assignment properly. Consider investigating ast's data structures and API more closely if you would like to get access to compiled code, for example.
import ast
with open('module.py') as f:
data = f.read()
tree = ast.parse(data)
elements = [el for el in tree.body if type(el) in (ast.Assign, ast.FunctionDef, ast.ClassDef)]
result = {}
for el in elements:
if type(el) == ast.Assign:
for t in el.targets:
if type(el.value) == ast.Call:
result[t.id] = el.value.func.id + '()'
else:
for attr in ['id', 'i', 's']:
try:
result[t.id] = getattr(el.value, attr)
break
except Exception as e:
pass
elif type(el) == ast.FunctionDef:
result[el.name] = '<function %s>' % el.name
else:
result[el.name] = '<class %s>' % el.name
print result
#
mod = "foo"
import ast, inspect
import importlib
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
from collections import defaultdict
data = defaultdict(defaultdict)
for node in p.body:
if isinstance(node, (ast.ImportFrom, ast.Import)):
continue
if isinstance(node, (ast.ClassDef, ast.FunctionDef)):
data["classes"][node.name] = mod.__dict__[node.name]
elif isinstance(node, ast.Assign):
for trg in node.targets:
if isinstance(node.value, ast.Num):
data["assignments"][trg.id] = node.value.n
elif isinstance(node.value, ast.Str):
data["assignments"][trg.id] = node.value.s
else:
data["assignments"][trg.id] = mod.__dict__[trg.id]
Output:
There is a nice explanation here that lists what the different types do and their attributes which this is based on:
class Nodes(ast.NodeVisitor):
def __init__(self):
self.data = defaultdict()
super(Nodes, self).__init__()
def visit_FunctionDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
print("In FunctionDef with funcion {}".format(node.name))
def visit_ClassDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
def visit_Assign(self, node):
for trg in node.targets:
if isinstance(node.value, (ast.Str, ast.Num, ast.Dict, ast.List, ast.ListComp, ast.NameConstant)):
self.data[trg.id] = mod.__dict__[trg.id]
self.generic_visit(node)
def visit_Name(self, node):
"""
class Name(idctx)
A variable name. id holds the name as a string
and ctx is either class Load class Store class Del.
"""
print("In Name with {}\n".format(node.id))
#
def visit_Dict(self, node):
"""
class Dict(keys, values)
A dictionary. keys and values
hold lists of nodes with matching order
"""
print("In Dict keys = {}, values = {}\n".format(node.keys,node.values))
def visit_Set(self,node):
"""
class Set(elts)
A set. elts holds a list of
nodes representing the elements.
"""
print("In Set elts = {}\n".format(node.elts))
def visit_List(self, node):
"""
class List(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In List elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_Tuple(self, node):
"""
class Tuple(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In Tuple elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_NameConstant(self, node):
"""
class NameConstant(value)
True, False or None. "value" holds one of those constants.
"""
print("In NameConstant getting value {}\n".format(node.value))
def visit_Load(self, node):
print("In Load with node {}\n".format(node.func))
def visit_Call(self, node):
"""
class Call(func, args, keywords, starargs, kwargs)
A function call. func is the function,
which will often be a Name or Attribute object. Of the arguments:
args holds a list of the arguments passed by position.
keywords holds a list of keyword objects representing arguments
passed by keyword.starargs and kwargs each hold a single node,
for arguments passed as *args and **kwargs.
"""
print("In Call with node {}\n".format(node.func))
def visit_Num(self, node):
print("In Num getting value {}\n".format(node.n))
def visit_Str(self, node):
print("In Str getting value {}\n".format(node.s))
f = Nodes()
f.visit(p)
print(f.data)
A bytecode hack for Python 3.4+. Possible due to dis.get_instructions.
import dis
import importlib
from itertools import islice
import marshal
import os
def consume_iterator(it, n=1):
next(islice(it, n, n), None)
def get_defined_names(module_path):
path, module_name = os.path.split(module_path)
module_name = module_name[:-3]
module_object = importlib.import_module(module_name)
pyc_name = '{}.cpython-34.pyc'.format(module_name)
pyc_path = os.path.join(path, '__pycache__/', pyc_name)
with open(pyc_path, 'rb') as f:
f.read(12) # drop the first 12 bytes
code = marshal.load(f)
# dis.disassemble(code) # see the byte code
instructions = dis.get_instructions(code)
objects = {}
for instruction in instructions:
if instruction.opname == 'STORE_NAME':
objects[instruction.argval] = getattr(module_object,
instruction.argval)
elif instruction.opname == 'IMPORT_NAME':
consume_iterator(instructions, 2)
elif instruction.opname == 'IMPORT_FROM':
consume_iterator(instructions, 1)
return objects
print(get_defined_names('/Users/ashwini/py/so.py'))
For a file like:
#/Users/ashwini/py/so.py
import os
from sys import argv, modules
from math import *
from itertools import product
CST = True
from itertools import permutations, combinations
from itertools import chain
E = 100
from itertools import starmap
def func(): pass
for x in range(10):
pass
class C:
a = 100
d = 1
The output will be:
{'d': 1, 'E': 100, 'CST': True, 'x': 9, 'func': <function func at 0x10efd0510>, 'C': <class 'so.C'>}
A much more better way as someone already mentioned in comments will be to parse the source code using ast module and find out the variable names from there.
While I accepted an answer, it can't hurt to post the solution I ended up using. It's a mix between the other proposals :
import ast
import inspect
import importlib
from types import ModuleType
def extract_definitions(module):
""" Returns the name and value of objects defined at the top level of the given module.
:param module: A module object or the name of the module to import.
:return: A dict {'classes': {}, 'functions': {}, 'assignments': {}} containing defined objects in the module.
"""
if not isinstance(module, ModuleType):
module = importlib.import_module(module)
tree = ast.parse(inspect.getsource(module))
definitions = {'classes': {}, 'functions': {}, 'assignments': {}}
for node in tree.body:
if isinstance(node, ast.ClassDef):
definitions["classes"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.FunctionDef):
definitions["functions"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.Assign):
# for unpacking, you need to loop on all names
for target in node.targets:
definitions["assignments"][target.id] = getattr(module, target.id)
return definitions
I added the ability to import from a string or a module object, then removed the parsing of values and replaced it by a simple getattr from the original module.
Untested
def unexported_names (module):
try:
return [name for name in module.__dict__ if name not in module.__all__]
except AttributeError:
return [name for name in module.__dict__ if name.startswith('_')]

Get function object of caller in Python 2.7?

In playing with inspect and reading the other questions here, I still cannot figure out how to get the function object of the caller more cleanly than to load the module by its path and then find the function within that.
In other words, how would you complete the following so that caller() returns a method object?
import inspect
def caller():
frame = inspect.stack()[2]
code = frame[0]
path = frame[1]
line = frame[2]
name = frame[3] # function NAME string
# TODO: now what?
return func
def cry_wolf():
func = caller()
print "%s cried 'WOLF!'" % (func.__name__,)
def peter():
cry_wolf()
Remember, I already know the function name but what I'm trying to access is the function object that the calling code is running in. The result desired is:
peter cried 'WOLF!'
DONE! Thanks to user 61612, I have completed this code:
import imp, inspect, sys
def caller():
frame = inspect.stack()[2]
code = frame[0]
path = frame[1]
line = frame[2]
name = frame[3]
return code.f_globals[name]
def cry_wolf():
func = caller()
print "%s cried 'WOLF!'" % (func.__name__,)
def peter():
cry_wolf()
Awesome!
Frame objects have the f_globals attribute:
import inspect
def caller():
tup = inspect.stack()[2]
return tup[0].f_globals[tup[3]] # <function peter at address>
def cry_wolf():
func = caller()
print("%s cried 'WOLF!'" % (func.__name__,)) # peter cried 'WOLF!'
def peter():
cry_wolf()

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

Search for recursive functions in a Python project

I need to find all functions in a Python project which are recursive (i.e. call themselves).
Any ideas how to approach this?
It's hard to say whether function recursive or not before it runs. I would personally use this one with inspect.getclosurevars (added in Python 3.3):
import sys
if sys.version_info >= (3, 3, 0):
from inspect import getclosurevars
def is_recursive(func):
if sys.version_info >= (3, 3, 0):
return getclosurevars(func).globals.get(func.__name__) is func
else:
# We can implement part of it if it's not in our standard library
def global_vars_in_closure(func):
vars = {x: func.__globals__.get(x) for x in func.__code__.co_names}
return vars
return global_vars_in_closure(func).get(func.__name__) is func
It will work correctly in most use cases, just remember to use func_X instead of __X__ as function methods on Python 2. It will fail only if a function contain a reference to itself without call:
def false_recursive():
false_recursive
def true_recursive():
true_recursive()
assert is_recursive(true_recursive), 'Must not fail'
assert not is_recursive(false_recursive), 'See? It fails' # AssertionError: See? It fails
You can parse the source code with ast:
code = """
def f(x):
f(x)
def g(x):
pass
"""
import ast
class FindRecursiveFunctions(ast.NodeVisitor):
def __init__(self):
self._current_func = None
self.recursive_funcs = set()
def generic_visit(self, node):
if node.__class__ is ast.FunctionDef:
self._current_func = node.name
if node.__class__ is ast.Call and node.func.id == self._current_func:
self.recursive_funcs.add(self._current_func)
super(FindRecursiveFunctions, self).generic_visit(node)
>>> tree = ast.parse(code)
>>> finder = FindRecursiveFunctions()
>>> finder.visit(tree)
>>> finder.recursive_funcs
set(['f'])

Generate graph of the imports

I'm getting close to my final goal, which is to generate a nice graph between modules and other imported modules.
For example if x imports from y and z, and y imports from t and v I would like to have:
x -> y, z
y -> t, v
Now I already have my import hook defined as below, but running it on a simple file I don't get what I would expect:
python study_imports.py CollectImports simple.py
('study_imports.py', 'study_imports')
Where simple.py actually imports from study_imports.
The problem is that I want to see "simple.py" instead of "study_imports.py", is there a way to get the path of the file actually importing the other module?
class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self, output_file):
self.loaded = set()
self.output_file = output_file
def __str__(self):
return str(self.loaded)
def cleanup(self):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(x for x in self.loaded)
open(self.output_file, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
#TODO: try to find the name of the package which is actually
#importing something else, and how it's doing it
#use a defualtdict with empty sets as the storage for this job
entry = (__file__, module_name)
self.loaded.add(str(entry))
Maybe with the inspect module.
Module a.py
import inspect
print inspect.stack()
Module b.py
import a
when running b.py, I got :
[
(<frame object at 0x28a9b70>, '/path/a.py', 5, '<module>', ['print inspect.stack()\n'], 0),
(<frame object at 0x28a9660>, 'b.py', 2, '<module>', ['import to_import\n'], 0)
]
Looks like the second frame contains what you need.
So I looked in snakefood a bit better and I ended up rewriting my code using the AST.
Snakefood still uses the compiler, which is deprecated and much slower than using the ast.
The result is great, for example this is a visitor:
from ast import parse, NodeVisitor
class ImportVisitor(NodeVisitor):
def __init__(self):
self.imported = set()
super(ImportVisitor, self).__init__()
def __str__(self):
return '\n'.join(x for x in self.imported)
def visit_Import(self, node):
for n in node.names:
self.imported.add(n.name)
#that we are using
def visit_ImportFrom(self, node):
self.imported.add(node.module)
Which can be usef for example as:
def gen_module_imports(mod):
try:
at = parse(open(mod).read())
except SyntaxError:
print("file %s has a syntax error, please fix it" % mod)
return []
else:
v = ImportVisitor()
v.visit(at)
return v.imported
The inspect trick seems to work fine :)
I get something like simple.py: set(['study_imports']) in the imports.log.
Class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self, output_file):
self.loaded = defaultdict(lambda: set())
self.output_file = output_file
def __str__(self):
return str(self.loaded)
def cleanup(self):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(('%s: %s' % (k, v)) for k, v in self.loaded.items())
open(self.output_file, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
st = inspect.stack()
self.loaded[st[1][1]].add(module_name)

Categories