python - importing module from within a module - python

I have this file pluralizer.py containing functions and a class which use the re module:
from re import *
def pluralize(noun, funcs):
for matches_rule, apply_rule in funcs:
if matches_rule(noun):
return apply_rule(noun)
raise ValueError("no matching rule for {0}".format(noun))
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (matches_rule, apply_rule)
class LazyRules:
rules_filename = 'rules.txt' #a class variable - shared across all instances of the LazyRules class
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding="utf-8")
self.cache=[]
def __iter__(self):
self.cache_index=0
return self #returning self signals that this class defines a __next__ method
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index-1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line: #if there's a line to read, it will not be an empty string (even if new row, it will be "\n")
self.pattern_file.close()
raise StopIteration
pattern,search,replace= line.split(None,3)
funcs = build_match_and_apply_functions(pattern,search,replace)
self.cache.append(funcs) # before returning the match&apply functions, we save them in the list self.cache
return funcs
There's also the data file rules.txt:
[sxz]$ $ es
[^aeioudgkprt]h$ $ es
[^aeiou]y$ y$ ies
$ $ s
The way it's supposed to work is:
import pluralizer
funcs = pluralizer.LazyRules()
p = pluralizer.pluralize("baby", funcs)
from which the expected output is "babies", but I get:
NameError: name 're' is not defined
Placing import re inside pluralize function didn't work either. How come the re module 'refuses' to import? I searched old questions but didn't find an answer, sorry if I overlooked it. Thanks!
P.S. Code is from 'Dive Into Python 3' by Mark Pilgrim

works for me as follows, before running it, I make sure to change the working directory within the python shell
import os
os.chdir('whatever your working directory and files are')
The code in my 'lazyrules.py' file looks like
import re
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return (matches_rule, apply_rule)
def plural(noun, funcs):
for matches_rule, apply_rule in funcs:
if matches_rule(noun):
return apply_rule(noun)
raise ValueError('no matching rule for {0}'.format(noun))
class LazyRules:
rules_filename = 'plural6-rules.txt'
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding='utf-8')
self.cache = []
def __iter__(self):
self.cache_index = 0
return self
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index - 1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line:
self.pattern_file.close()
raise StopIteration
pattern, search, replace = line.split(None, 3)
funcs = build_match_and_apply_functions(pattern, search, replace)
self.cache.append(funcs)
return funcs
rules = LazyRules()

Related

typed-ast nodes evaluation

Preface
There is a typed_ast library which is used for cross-Python AST parsing & processing (e.g. in mypy project1).
Problem
I wonder if there is a way to compile nodes the same way as it is for standard ast module?
Because this works
import ast
code = compile(ast.parse('print("Hello World!")'), '<ast>', 'exec')
eval(code) # Hello World!
but this
from typed_ast import ast3
code = compile(ast3.parse('print("Hello World!")'), '<ast>', 'exec') # raises exception
eval(code)
gives me
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: compile() arg 1 must be a string, bytes or AST object
Analysis
I know there is a helper class for converting between typed_ast.ast27 and typed_ast.ast3, but couldn't found similar for typed_ast.ast3 -> ast conversion.
Also I'm aware of typed-astunparse package, but it creates source code as string, which is not an option because I'm using some hacks that keep AST compile'able, but not unparse-parse'able.
And finally there is ast3.dump function which docs say that
... if evaluation is wanted *annotate_fields* must be set to False...
so it looks like there may be a way to evaluate generated dump string? Or maybe there is a way to load this string from ast?
Or should I write my own ast3.NodeTransformer class that performs this kind of conversion?
1: proof
My solution so far with custom ast3.NodeTransformer (tested on Python3.5)
import ast
from functools import partial
from itertools import chain
from typed_ast import ast3
def to_visitor(cls):
def none(_):
return None
try:
plain_cls = getattr(ast, cls.__name__)
except AttributeError:
# node type is not found in `ast` module, skipping
return none
def visit(self, node):
node = self.generic_visit(node)
result = plain_cls(*map(partial(getattr, node), plain_cls._fields))
return ast3.copy_location(result, node)
return visit
def to_subclasses(cls,
*,
deep=True):
result = cls.__subclasses__()
yield from result
if not deep:
return
subclasses_factory = partial(to_subclasses,
deep=deep)
yield from chain.from_iterable(map(subclasses_factory, result))
class TypedToPlain(ast3.NodeTransformer):
visitors = {'visit_' + cls.__name__: to_visitor(cls)
for cls in set(to_subclasses(ast3.AST))}
def __getattr__(self, name):
return partial(self.visitors[name], self)
def generic_visit(self, node):
for field, old_value in ast3.iter_fields(node):
if isinstance(old_value, list):
new_values = []
for value in old_value:
if isinstance(value, ast3.AST):
value = self.visit(value)
if value is None:
continue
elif not isinstance(value, ast.AST):
new_values.extend(value)
continue
new_values.append(value)
old_value[:] = new_values
elif isinstance(old_value, ast3.AST):
new_node = self.visit(old_value)
if new_node is None:
delattr(node, field)
else:
setattr(node, field, new_node)
return node
Test
from typed_ast import ast3
code = compile(TypedToPlain().visit(ast3.parse('print("Hello World!")')),
'<ast>', 'exec')
eval(code) # Hello World!

decorator that add variable to closure

I want to write a decorator that inject custom local variable into function.
interface may like this.
def enclose(name, value):
...
def decorator(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
return decorator
expectation:
#enclose('param1', 1)
def f():
param1 += 1
print param1
f() will compile and run without error
output:
2
Is it possible to do this in python? why?
I thought I'd try this out just to see how hard it would be. Pretty hard as it turns out.
First thing was how do you implement this? Is the extra parameter an injected local variable, an additional argument to the function or a nonlocal variable. An injected local variable will be a fresh object each time, but how to create more complicated objects... An additional argument will record mutations to the object, but assignments to the name will be forgotten between function invocations. Additionally, this will require either parsing of the source to find where to place the argument, or directly manipulating code objects. Finally, declaring the variables nonlocal will record mutations to the object and assignments to the name. Effectively a nonlocal is global, but only reachable by the decorated function. Again, using a nonlocal will requiring parsing the source and finding where to place the nonlocal declaration or direct manipulation of a code object.
In the end I decided with using a nonlocal variable and parsing the function source. Originally I was going to manipulate code objects, but it seemed too complicated.
Here is the code for the decorator:
import re
import types
import inspect
class DummyInject:
def __call__(self, **kwargs):
return lambda func: func
def __getattr__(self, name):
return self
class Inject:
function_end = re.compile(r"\)\s*:\s*\n")
indent = re.compile("\s+")
decorator = re.compile("#([a-zA-Z0-9_]+)[.a-zA-Z0-9_]*")
exec_source = """
def create_new_func({closure_names}):
{func_source}
{indent}return {func_name}"""
nonlocal_declaration = "{indent}nonlocal {closure_names};"
def __init__(self, **closure_vars):
self.closure_vars = closure_vars
def __call__(self, func):
lines, line_number = inspect.getsourcelines(func)
self.inject_nonlocal_declaration(lines)
new_func = self.create_new_function(lines, func)
return new_func
def inject_nonlocal_declaration(self, lines):
"""hides nonlocal declaration in first line of function."""
function_body_start = self.get_function_body_start(lines)
nonlocals = self.nonlocal_declaration.format(
indent=self.indent.match(lines[function_body_start]).group(),
closure_names=", ".join(self.closure_vars)
)
lines[function_body_start] = nonlocals + lines[function_body_start]
return lines
def get_function_body_start(self, lines):
line_iter = enumerate(lines)
found_function_header = False
for i, line in line_iter:
if self.function_end.search(line):
found_function_header = True
break
assert found_function_header
for i, line in line_iter:
if not line.strip().startswith("#"):
break
return i
def create_new_function(self, lines, func):
# prepares source -- eg. making sure indenting is correct
declaration_indent, body_indent = self.get_indent(lines)
if not declaration_indent:
lines = [body_indent + line for line in lines]
exec_code = self.exec_source.format(
closure_names=", ".join(self.closure_vars),
func_source="".join(lines),
indent=declaration_indent if declaration_indent else body_indent,
func_name=func.__name__
)
# create new func -- mainly only want code object contained by new func
lvars = {"closure_vars": self.closure_vars}
gvars = self.get_decorators(exec_code, func.__globals__)
exec(exec_code, gvars, lvars)
new_func = eval("create_new_func(**closure_vars)", gvars, lvars)
# add back bits that enable function to work well
# includes original global references and
new_func = self.readd_old_references(new_func, func)
return new_func
def readd_old_references(self, new_func, old_func):
"""Adds back globals, function name and source reference."""
func = types.FunctionType(
code=self.add_src_ref(new_func.__code__, old_func.__code__),
globals=old_func.__globals__,
name=old_func.__name__,
argdefs=old_func.__defaults__,
closure=new_func.__closure__
)
func.__doc__ = old_func.__doc__
return func
def add_src_ref(self, new_code, old_code):
return types.CodeType(
new_code.co_argcount,
new_code.co_kwonlyargcount,
new_code.co_nlocals,
new_code.co_stacksize,
new_code.co_flags,
new_code.co_code,
new_code.co_consts,
new_code.co_names,
new_code.co_varnames,
old_code.co_filename, # reuse filename
new_code.co_name,
old_code.co_firstlineno, # reuse line number
new_code.co_lnotab,
new_code.co_freevars,
new_code.co_cellvars
)
def get_decorators(self, source, global_vars):
"""Creates a namespace for exec function creation in. Must remove
any reference to Inject decorator to prevent infinite recursion."""
namespace = {}
for match in self.decorator.finditer(source):
decorator = eval(match.group()[1:], global_vars)
basename = match.group(1)
if decorator is Inject:
namespace[basename] = DummyInject()
else:
namespace[basename] = global_vars[basename]
return namespace
def get_indent(self, lines):
"""Takes a set of lines used to create a function and returns the
outer indentation that the function is declared in and the inner
indentation of the body of the function."""
body_indent = None
function_body_start = self.get_function_body_start(lines)
for line in lines[function_body_start:]:
match = self.indent.match(line)
if match:
body_indent = match.group()
break
assert body_indent
match = self.indent.match(lines[0])
if not match:
declaration_indent = ""
else:
declaration_indent = match.group()
return declaration_indent, body_indent
if __name__ == "__main__":
a = 1
#Inject(b=10)
def f(c, d=1000):
"f uses injected variables"
return a + b + c + d
#Inject(var=None)
def g():
"""Purposefully generate exception to show stacktraces are still
meaningful."""
create_name_error # line number 164
print(f(100)) # prints 1111
assert f(100) == 1111
assert f.__doc__ == "f uses injected variables" # show doc is retained
try:
g()
except NameError:
raise
else:
assert False
# stack trace shows NameError on line 164
Which outputs the following:
1111
Traceback (most recent call last):
File "inject.py", line 171, in <module>
g()
File "inject.py", line 164, in g
create_name_error # line number 164
NameError: name 'create_name_error' is not defined
The whole thing is hideously ugly, but it works. It's also worth noting that if Inject is used for method, then any injected values are shared between all instances of the class.
You can do it using globals but I don't recommend this approach.
def enclose(name, value):
globals()[name] = value
def decorator(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
return decorator
#enclose('param1', 1)
def f():
global param1
param1 += 1
print(param1)
f()

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

Using one class from another class

I wrote a simple program to read through a log and to parse through and obtain the lowest beginning number (the head) and to print it. I am now editing that program and combining it with a class I wrote to parse an actual logfile. Essentially, as opposed to sorting based off of the simple number from the log from my previous program, I now need to reference the parsed information from one class into another class. I was wondering what the most convenient way to do this. I am a beginner programmer in python and don't know if I can explicitly reference the class.
Here are the classes.
Parser
class LogLine:
SEVERITIES = ['EMERG','ALERT','CRIT','ERR','WARNING','NOTICE','INFO','DEBUG']
severity = 1
def __init__(self, line):
try:
m = re.match(r"^(\d{4}-\d{2}-\d{2}\s*\d{2}:\d{2}:\d{2}),?(\d{3}),?(\s+\[(?:[^\]]+)\])+\s+[A-Z]+\s+(\s?[a-zA-Z0-9\.])+\s?(\((?:\s?\w)+\))\s?(\s?.)+", line)
timestr, msstr, sevstr, self.filename, linestr, self.message = m.groups()
self.line = int(linestr)
self.sev = self.SEVERITIES.index(sevstr)
self.time = float(calendar.timegm(time.strptime(timestr, "%Y-%m-%d %H:%M:%S,%f"))) + float(msstr)/1000.0
dt = datetime.strptime(t, "%Y-%m-%d %H:%M:%S,%f")
except Exception:
print 'error',self.filename
def get_time(self):
return self.time
def get_severity(self):
return self.sev
def get_message(self):
return self.message
def get_filename(self):
return self.filename
def get_line(self):
return self.line
Sorter
class LogFile:
def __init__(self,filepath):
self.logfile = open(filepath, "r")
self.head = None
def __str__(self):
return "x=" + str(self.x) + "y="+str(self.y)
def readline(self):
if self.head != None:
h = self.head
self.head = None
return h
else:
return self.logfile.readline().rstrip(' ')
def get_line(self):
if self.head == None:
self.head = self.readline().rstrip(' ')
return self.head.get.line()
else:
return self.head.get.line()
def close (self):
self.logfile.close()
I have begun to edit my second class by adding the get_line function. Don't know if I'm on the right track.
In simpler terms, I need the head to become "LogLine"
It is okay to use one class from another class. You have one class that parses a single line from a log file and builds an object that represents the line; and you have another class that reads lines from a log file. It would be very natural for the second class to call the first class.
Here is a very simple class that reads all lines from a log file and builds a list:
class LogFile(object):
def __init__(self,filepath):
with open(filepath, "r") as f:
self.lst = [LogLine(line) for line in f]
You can see that self.lst is being set to a list of lines from the input log file, but not just the text of the line; the code is calling LogLine(line) to store instances of LogLine. If you want, you can sort the list after you build it:
self.lst.sort(key=LogLine.get_line)
If the log files are very large, it might not be practical to build the list. You have a .get_line() method function, and we can use that:
class LogFile(object):
def __init__(self,filepath):
self.logfile = open(filepath, "r")
def get_line(self):
try:
line = next(self.logfile) # get next line from open file object
return LogLine(line)
except StopIteration: # next() raises this when you reach the end of the file
return None # return
def close(self):
self.logfile.close()
An open file object (returned by the open() function) can be iterated. We can call next() on this object and it will give us the next input line. When the end of file is reached, Python will raise StopIteration to signal the end of the file.
Here the code will catch the StopIteration exception and return None when the end of the log file is reached. But I think this isn't the best way to handle this problem. Let's make the LogFile class work in for loops and such:
class LogFile(object):
def __init__(self,filepath):
self.f = open(filepath)
def __next__(self): # Python 3.x needs this to be named "__next__"
try:
line = next(self.f)
return LogLine(line)
except StopIteration:
# when we reach the end of input, close the file object
self.f.close()
# re-raise the exception
raise
next = __next__ # Python 2.x needs this to be named "next"
A for loop in Python will repeatedly call the .__next__() method function (Python 3.x) or else the .next() method function (Python 2.x) until the StopIteration exception is raised. Here we have defined both method function names so this code should work in Python 2.x or in Python 3.x.
Now you can do this:
for ll in LogFile("some_log_file"):
... # do something with ll, which will always be a LogLine instance

Generate graph of the imports

I'm getting close to my final goal, which is to generate a nice graph between modules and other imported modules.
For example if x imports from y and z, and y imports from t and v I would like to have:
x -> y, z
y -> t, v
Now I already have my import hook defined as below, but running it on a simple file I don't get what I would expect:
python study_imports.py CollectImports simple.py
('study_imports.py', 'study_imports')
Where simple.py actually imports from study_imports.
The problem is that I want to see "simple.py" instead of "study_imports.py", is there a way to get the path of the file actually importing the other module?
class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self, output_file):
self.loaded = set()
self.output_file = output_file
def __str__(self):
return str(self.loaded)
def cleanup(self):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(x for x in self.loaded)
open(self.output_file, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
#TODO: try to find the name of the package which is actually
#importing something else, and how it's doing it
#use a defualtdict with empty sets as the storage for this job
entry = (__file__, module_name)
self.loaded.add(str(entry))
Maybe with the inspect module.
Module a.py
import inspect
print inspect.stack()
Module b.py
import a
when running b.py, I got :
[
(<frame object at 0x28a9b70>, '/path/a.py', 5, '<module>', ['print inspect.stack()\n'], 0),
(<frame object at 0x28a9660>, 'b.py', 2, '<module>', ['import to_import\n'], 0)
]
Looks like the second frame contains what you need.
So I looked in snakefood a bit better and I ended up rewriting my code using the AST.
Snakefood still uses the compiler, which is deprecated and much slower than using the ast.
The result is great, for example this is a visitor:
from ast import parse, NodeVisitor
class ImportVisitor(NodeVisitor):
def __init__(self):
self.imported = set()
super(ImportVisitor, self).__init__()
def __str__(self):
return '\n'.join(x for x in self.imported)
def visit_Import(self, node):
for n in node.names:
self.imported.add(n.name)
#that we are using
def visit_ImportFrom(self, node):
self.imported.add(node.module)
Which can be usef for example as:
def gen_module_imports(mod):
try:
at = parse(open(mod).read())
except SyntaxError:
print("file %s has a syntax error, please fix it" % mod)
return []
else:
v = ImportVisitor()
v.visit(at)
return v.imported
The inspect trick seems to work fine :)
I get something like simple.py: set(['study_imports']) in the imports.log.
Class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self, output_file):
self.loaded = defaultdict(lambda: set())
self.output_file = output_file
def __str__(self):
return str(self.loaded)
def cleanup(self):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(('%s: %s' % (k, v)) for k, v in self.loaded.items())
open(self.output_file, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
st = inspect.stack()
self.loaded[st[1][1]].add(module_name)

Categories