Preface
There is a typed_ast library which is used for cross-Python AST parsing & processing (e.g. in mypy project1).
Problem
I wonder if there is a way to compile nodes the same way as it is for standard ast module?
Because this works
import ast
code = compile(ast.parse('print("Hello World!")'), '<ast>', 'exec')
eval(code) # Hello World!
but this
from typed_ast import ast3
code = compile(ast3.parse('print("Hello World!")'), '<ast>', 'exec') # raises exception
eval(code)
gives me
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: compile() arg 1 must be a string, bytes or AST object
Analysis
I know there is a helper class for converting between typed_ast.ast27 and typed_ast.ast3, but couldn't found similar for typed_ast.ast3 -> ast conversion.
Also I'm aware of typed-astunparse package, but it creates source code as string, which is not an option because I'm using some hacks that keep AST compile'able, but not unparse-parse'able.
And finally there is ast3.dump function which docs say that
... if evaluation is wanted *annotate_fields* must be set to False...
so it looks like there may be a way to evaluate generated dump string? Or maybe there is a way to load this string from ast?
Or should I write my own ast3.NodeTransformer class that performs this kind of conversion?
1: proof
My solution so far with custom ast3.NodeTransformer (tested on Python3.5)
import ast
from functools import partial
from itertools import chain
from typed_ast import ast3
def to_visitor(cls):
def none(_):
return None
try:
plain_cls = getattr(ast, cls.__name__)
except AttributeError:
# node type is not found in `ast` module, skipping
return none
def visit(self, node):
node = self.generic_visit(node)
result = plain_cls(*map(partial(getattr, node), plain_cls._fields))
return ast3.copy_location(result, node)
return visit
def to_subclasses(cls,
*,
deep=True):
result = cls.__subclasses__()
yield from result
if not deep:
return
subclasses_factory = partial(to_subclasses,
deep=deep)
yield from chain.from_iterable(map(subclasses_factory, result))
class TypedToPlain(ast3.NodeTransformer):
visitors = {'visit_' + cls.__name__: to_visitor(cls)
for cls in set(to_subclasses(ast3.AST))}
def __getattr__(self, name):
return partial(self.visitors[name], self)
def generic_visit(self, node):
for field, old_value in ast3.iter_fields(node):
if isinstance(old_value, list):
new_values = []
for value in old_value:
if isinstance(value, ast3.AST):
value = self.visit(value)
if value is None:
continue
elif not isinstance(value, ast.AST):
new_values.extend(value)
continue
new_values.append(value)
old_value[:] = new_values
elif isinstance(old_value, ast3.AST):
new_node = self.visit(old_value)
if new_node is None:
delattr(node, field)
else:
setattr(node, field, new_node)
return node
Test
from typed_ast import ast3
code = compile(TypedToPlain().visit(ast3.parse('print("Hello World!")')),
'<ast>', 'exec')
eval(code) # Hello World!
Related
I am writing a function build_hierarchy_config within a class 'P', that calls recursively recursive_build_hierarchy_config .
This function called on instance P1 iterates over complex dict structure ('config' attribute) and compares all items with same 'config' attribute from another 'P' instance (called P2), that is parent to P1.
The the comparison logic is split into few functions:
recursive call is recursive_build_hierarchy_config
comparison of dicts (iterating over complex structure) is in
get_more_strict_config
comparing single item is in cmp_config_item
So the code looks like below:
import decimal
from typing import Optional
from attrdict import AttrDict
class Config(AttrDict):
"""Represent P config."""
def __init__(self, some_dict):
super(Config, self).__init__(some_dict)
class P(dict):
"""Represents P object."""
def build_hierarchy_config(self,
hierarchy_config_type: str):
"""Fill the hierarchy_config or parent_hierarchy_config attribute."""
def cmp_config_item(slave: Optional[decimal.Decimal],
master: Optional[decimal.Decimal],
cmp_func: str) -> Optional[decimal.Decimal]:
"""Compare two numbers with custom 'None' handling."""
if cmp_func not in ['min', 'max']:
raise Exception(
f'comparison function {cmp_func} not supported')
if slave is not None and master is not None:
return eval(cmp_func + '(' + str(slave) + ',' +
str(master) + ')')
elif slave is None and master is None:
return None
elif slave is None:
return master
elif master is None:
return slave
def get_more_strict_config(config, parent_config) -> Config:
"""Combine configs and return most strict combinaton of values."""
# some logic with few for and if/else
return config
def recursive_build_hierarchy_config(pieceId: Optional[str]):
if pieceId is None:
return None
else:
p = P()
parent_config = recursive_build_hierarchy_config(
p.parentPieceId)
return get_more_strict_config(p.config, parent_config)
if hierarchy_config_type == 'hierarchy_config':
self.hierarchyConfig = recursive_build_hierarchy_config(
self.id)
elif hierarchy_config_type == 'parent_hierarchy_config':
self.parenthierarchyConfig = recursive_build_hierarchy_config(
self.parentPieceId)
but then running flake8 gives me the error:
C901 'P.build_hierarchy_config' is too complex (12)
Which means that complexity for build_hierarchy_config sums complexity for all sub-functions.
Option 1
I can easily fix it by moving function cmp_config_item out of build_hierarchy_config namespace (not sure if this is proper wording) and locating in the main scope:
import decimal
from typing import Optional
from attrdict import AttrDict
class Config(AttrDict):
"""Represent P config."""
def __init__(self, some_dict):
super(Config, self).__init__(some_dict)
class P(dict):
"""Represents P object."""
def build_hierarchy_config(self,
hierarchy_config_type: str):
"""Fill the hierarchy_config or parent_hierarchy_config attribute."""
def get_more_strict_config(config, parent_config) -> Config:
"""Combine configs and return most strict combinaton of values."""
# some logic with few for and if/else
return config
def recursive_build_hierarchy_config(pieceId: Optional[str]):
if pieceId is None:
return None
else:
p = P()
parent_config = recursive_build_hierarchy_config(
p.parentPieceId)
return get_more_strict_config(p.config, parent_config)
if hierarchy_config_type == 'hierarchy_config':
self.hierarchyConfig = recursive_build_hierarchy_config(
self.id)
elif hierarchy_config_type == 'parent_hierarchy_config':
self.parenthierarchyConfig = recursive_build_hierarchy_config(
self.parentPieceId)
def cmp_config_item(slave: Optional[decimal.Decimal],
master: Optional[decimal.Decimal],
cmp_func: str) -> Optional[decimal.Decimal]:
"""Compare two numbers with custom 'None' handling."""
if cmp_func not in ['min', 'max']:
raise Exception(
f'comparison function {cmp_func} not supported')
if slave is not None and master is not None:
return eval(cmp_func + '(' + str(slave) + ',' +
str(master) + ')')
elif slave is None and master is None:
return None
elif slave is None:
return master
elif master is None:
return slave
Now flake8 does not complain anymore. But this makes function cmp_config_item available for the whole scope of the module, and makes code less structured (there is no indication that this function is designated to use in selected scope.
Option 2
I can silence this message for function by adding # noqa: ignore=C901 after def build_hierarchy_config as described in another thread.
But this suppress checking the function build_hierarchy_config itself, so will not be accepted.
Both solutions are trade offs.
I am not an OOP expert, so the question is:
Is there any proper/better way to organise those subroutines that would be
in line with object orientated programming rules
I have this file pluralizer.py containing functions and a class which use the re module:
from re import *
def pluralize(noun, funcs):
for matches_rule, apply_rule in funcs:
if matches_rule(noun):
return apply_rule(noun)
raise ValueError("no matching rule for {0}".format(noun))
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (matches_rule, apply_rule)
class LazyRules:
rules_filename = 'rules.txt' #a class variable - shared across all instances of the LazyRules class
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding="utf-8")
self.cache=[]
def __iter__(self):
self.cache_index=0
return self #returning self signals that this class defines a __next__ method
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index-1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line: #if there's a line to read, it will not be an empty string (even if new row, it will be "\n")
self.pattern_file.close()
raise StopIteration
pattern,search,replace= line.split(None,3)
funcs = build_match_and_apply_functions(pattern,search,replace)
self.cache.append(funcs) # before returning the match&apply functions, we save them in the list self.cache
return funcs
There's also the data file rules.txt:
[sxz]$ $ es
[^aeioudgkprt]h$ $ es
[^aeiou]y$ y$ ies
$ $ s
The way it's supposed to work is:
import pluralizer
funcs = pluralizer.LazyRules()
p = pluralizer.pluralize("baby", funcs)
from which the expected output is "babies", but I get:
NameError: name 're' is not defined
Placing import re inside pluralize function didn't work either. How come the re module 'refuses' to import? I searched old questions but didn't find an answer, sorry if I overlooked it. Thanks!
P.S. Code is from 'Dive Into Python 3' by Mark Pilgrim
works for me as follows, before running it, I make sure to change the working directory within the python shell
import os
os.chdir('whatever your working directory and files are')
The code in my 'lazyrules.py' file looks like
import re
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (matches_rule, apply_rule)
def plural(noun, funcs):
for matches_rule, apply_rule in funcs:
if matches_rule(noun):
return apply_rule(noun)
raise ValueError('no matching rule for {0}'.format(noun))
class LazyRules:
rules_filename = 'plural6-rules.txt'
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding='utf-8')
self.cache = []
def __iter__(self):
self.cache_index = 0
return self
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index - 1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line:
self.pattern_file.close()
raise StopIteration
pattern, search, replace = line.split(None, 3)
funcs = build_match_and_apply_functions(pattern, search, replace)
self.cache.append(funcs)
return funcs
rules = LazyRules()
I have a decorator #pure that registers a function as pure, for example:
#pure
def rectangle_area(a,b):
return a*b
#pure
def triangle_area(a,b,c):
return ((a+(b+c))(c-(a-b))(c+(a-b))(a+(b-c)))**0.5/4
Next, I want to identify a newly defined pure function
def house_area(a,b,c):
return rectangle_area(a,b) + triangle_area(a,b,c)
Obviously house_area is pure, since it only calls pure functions.
How can I discover all pure functions automatically (perhaps by using ast)
Assuming operators are all pure, then essentially you only need to check all the functions calls. This can indeed be done with the ast module.
First I defined the pure decorator as:
def pure(f):
f.pure = True
return f
Adding an attribute telling that it's pure, allows skipping early or "forcing" a function to identify as pure. This is useful if you'd need a function like math.sin to identify as pure. Additionally since you can't add attributes to builtin functions.
#pure
def sin(x):
return math.sin(x)
All in all. Use the ast module to visit all the nodes. Then for each Call node check whether the function being called is pure.
import ast
class PureVisitor(ast.NodeVisitor):
def __init__(self, visited):
super().__init__()
self.pure = True
self.visited = visited
def visit_Name(self, node):
return node.id
def visit_Attribute(self, node):
name = [node.attr]
child = node.value
while child is not None:
if isinstance(child, ast.Attribute):
name.append(child.attr)
child = child.value
else:
name.append(child.id)
break
name = ".".join(reversed(name))
return name
def visit_Call(self, node):
if not self.pure:
return
name = self.visit(node.func)
if name not in self.visited:
self.visited.append(name)
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Then check whether the function has the pure attribute. If not get code and check if all the functions calls can be classified as pure.
import inspect, textwrap
def is_pure(f, _visited=None):
try:
return f.pure
except AttributeError:
pass
try:
code = inspect.getsource(f.__code__)
except AttributeError:
return False
code = textwrap.dedent(code)
node = compile(code, "<unknown>", "exec", ast.PyCF_ONLY_AST)
if _visited is None:
_visited = []
visitor = PureVisitor(_visited)
visitor.visit(node)
return visitor.pure
Note that print(is_pure(lambda x: math.sin(x))) doesn't work since inspect.getsource(f.__code__) returns code on a line by line basis. So the source returned by getsource would include the print and is_pure call, thus yielding False. Unless those functions are overridden.
To verify that it works, test it by doing:
print(house_area) # Prints: True
To list through all the functions in the current module:
import sys, types
for k in dir(sys.modules[__name__]):
v = globals()[k]
if isinstance(v, types.FunctionType):
print(k, is_pure(v))
The visited list keeps track of which functions have already been verified pure. This help circumvent problems related to recursion. Since the code isn't executed, the evaluation would recursively visit factorial.
#pure
def factorial(n):
return 1 if n == 1 else n * factorial(n - 1)
Note that you might need to revise the following code. Choosing another way to obtain a function from its name.
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Given a module containing :
import stuff
from foo import Foo
from bar import *
CST = True
def func(): pass
How can I define a function get_defined_objects so that I can do:
print(get_defined_objects('path.to.module'))
{'CST': True, 'func', <function path.to.module.func>}
Right now the only solution I can imagine is to read the original module file, extract defined names with re.search(r'^(?:def|class )?(\w+)(?:\s*=)?' then import the module, and find the intersection with __dict__.
Is there something cleaner ?
Here is something for you to start with using ast. Note that this code does not cover all possible cases, although it should handle e.g. multiple assignment properly. Consider investigating ast's data structures and API more closely if you would like to get access to compiled code, for example.
import ast
with open('module.py') as f:
data = f.read()
tree = ast.parse(data)
elements = [el for el in tree.body if type(el) in (ast.Assign, ast.FunctionDef, ast.ClassDef)]
result = {}
for el in elements:
if type(el) == ast.Assign:
for t in el.targets:
if type(el.value) == ast.Call:
result[t.id] = el.value.func.id + '()'
else:
for attr in ['id', 'i', 's']:
try:
result[t.id] = getattr(el.value, attr)
break
except Exception as e:
pass
elif type(el) == ast.FunctionDef:
result[el.name] = '<function %s>' % el.name
else:
result[el.name] = '<class %s>' % el.name
print result
#
mod = "foo"
import ast, inspect
import importlib
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
from collections import defaultdict
data = defaultdict(defaultdict)
for node in p.body:
if isinstance(node, (ast.ImportFrom, ast.Import)):
continue
if isinstance(node, (ast.ClassDef, ast.FunctionDef)):
data["classes"][node.name] = mod.__dict__[node.name]
elif isinstance(node, ast.Assign):
for trg in node.targets:
if isinstance(node.value, ast.Num):
data["assignments"][trg.id] = node.value.n
elif isinstance(node.value, ast.Str):
data["assignments"][trg.id] = node.value.s
else:
data["assignments"][trg.id] = mod.__dict__[trg.id]
Output:
There is a nice explanation here that lists what the different types do and their attributes which this is based on:
class Nodes(ast.NodeVisitor):
def __init__(self):
self.data = defaultdict()
super(Nodes, self).__init__()
def visit_FunctionDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
print("In FunctionDef with funcion {}".format(node.name))
def visit_ClassDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
def visit_Assign(self, node):
for trg in node.targets:
if isinstance(node.value, (ast.Str, ast.Num, ast.Dict, ast.List, ast.ListComp, ast.NameConstant)):
self.data[trg.id] = mod.__dict__[trg.id]
self.generic_visit(node)
def visit_Name(self, node):
"""
class Name(idctx)
A variable name. id holds the name as a string
and ctx is either class Load class Store class Del.
"""
print("In Name with {}\n".format(node.id))
#
def visit_Dict(self, node):
"""
class Dict(keys, values)
A dictionary. keys and values
hold lists of nodes with matching order
"""
print("In Dict keys = {}, values = {}\n".format(node.keys,node.values))
def visit_Set(self,node):
"""
class Set(elts)
A set. elts holds a list of
nodes representing the elements.
"""
print("In Set elts = {}\n".format(node.elts))
def visit_List(self, node):
"""
class List(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In List elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_Tuple(self, node):
"""
class Tuple(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In Tuple elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_NameConstant(self, node):
"""
class NameConstant(value)
True, False or None. "value" holds one of those constants.
"""
print("In NameConstant getting value {}\n".format(node.value))
def visit_Load(self, node):
print("In Load with node {}\n".format(node.func))
def visit_Call(self, node):
"""
class Call(func, args, keywords, starargs, kwargs)
A function call. func is the function,
which will often be a Name or Attribute object. Of the arguments:
args holds a list of the arguments passed by position.
keywords holds a list of keyword objects representing arguments
passed by keyword.starargs and kwargs each hold a single node,
for arguments passed as *args and **kwargs.
"""
print("In Call with node {}\n".format(node.func))
def visit_Num(self, node):
print("In Num getting value {}\n".format(node.n))
def visit_Str(self, node):
print("In Str getting value {}\n".format(node.s))
f = Nodes()
f.visit(p)
print(f.data)
A bytecode hack for Python 3.4+. Possible due to dis.get_instructions.
import dis
import importlib
from itertools import islice
import marshal
import os
def consume_iterator(it, n=1):
next(islice(it, n, n), None)
def get_defined_names(module_path):
path, module_name = os.path.split(module_path)
module_name = module_name[:-3]
module_object = importlib.import_module(module_name)
pyc_name = '{}.cpython-34.pyc'.format(module_name)
pyc_path = os.path.join(path, '__pycache__/', pyc_name)
with open(pyc_path, 'rb') as f:
f.read(12) # drop the first 12 bytes
code = marshal.load(f)
# dis.disassemble(code) # see the byte code
instructions = dis.get_instructions(code)
objects = {}
for instruction in instructions:
if instruction.opname == 'STORE_NAME':
objects[instruction.argval] = getattr(module_object,
instruction.argval)
elif instruction.opname == 'IMPORT_NAME':
consume_iterator(instructions, 2)
elif instruction.opname == 'IMPORT_FROM':
consume_iterator(instructions, 1)
return objects
print(get_defined_names('/Users/ashwini/py/so.py'))
For a file like:
#/Users/ashwini/py/so.py
import os
from sys import argv, modules
from math import *
from itertools import product
CST = True
from itertools import permutations, combinations
from itertools import chain
E = 100
from itertools import starmap
def func(): pass
for x in range(10):
pass
class C:
a = 100
d = 1
The output will be:
{'d': 1, 'E': 100, 'CST': True, 'x': 9, 'func': <function func at 0x10efd0510>, 'C': <class 'so.C'>}
A much more better way as someone already mentioned in comments will be to parse the source code using ast module and find out the variable names from there.
While I accepted an answer, it can't hurt to post the solution I ended up using. It's a mix between the other proposals :
import ast
import inspect
import importlib
from types import ModuleType
def extract_definitions(module):
""" Returns the name and value of objects defined at the top level of the given module.
:param module: A module object or the name of the module to import.
:return: A dict {'classes': {}, 'functions': {}, 'assignments': {}} containing defined objects in the module.
"""
if not isinstance(module, ModuleType):
module = importlib.import_module(module)
tree = ast.parse(inspect.getsource(module))
definitions = {'classes': {}, 'functions': {}, 'assignments': {}}
for node in tree.body:
if isinstance(node, ast.ClassDef):
definitions["classes"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.FunctionDef):
definitions["functions"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.Assign):
# for unpacking, you need to loop on all names
for target in node.targets:
definitions["assignments"][target.id] = getattr(module, target.id)
return definitions
I added the ability to import from a string or a module object, then removed the parsing of values and replaced it by a simple getattr from the original module.
Untested
def unexported_names (module):
try:
return [name for name in module.__dict__ if name not in module.__all__]
except AttributeError:
return [name for name in module.__dict__ if name.startswith('_')]
I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()