ast python how to find connections between methods at different files - python

I want to index all methods and thee connections between them in an entire application (A directory with sub directories and files eventually). I'm using ast, looping over directories till individual files and then loads them into an ast object like so ast.parse(self.file_content)
The index that i'm trying to create is this
connection
Here is my code, if it's relevant.
def scan(self):
'''
scans a file line by line while keeping context of location and classes
indexes a file buffer content into a ast, Abstract Syntax Trees, https://en.wikipedia.org/wiki/AST.
Then, iterate over the elements, find relevant ones to index, and index them into the db.
'''
parsed_result = ast.parse(self.file_content)
for element in parsed_result.body:
results = self.index_element(element)
def index_element(self, element, class_name=None):
'''
if element is relevant, meaning method -> index
if element is Class -> recursively call it self
:param element:
:param class_name:
:return: [{insert_result: <db_insert_result>, 'structured_data': <method> object}, ...]
'''
# find classes
# find methods inside classes
# find hanging functions
# validation on element type
if self.should_index_element(element):
if self.is_class_definition(element):
class_element = element
indexed_items = []
for inner_element in class_element.body:
# recursive call
results = self.index_element(inner_element, class_name=class_element.name)
indexed_items += results
return indexed_items
else:
structured_data = self.structure_method_into_an_object(element, class_name=class_name)
result_graph = self.dal_client.methods_graph.insert_or_update(structured_data)
return "WhatEver"
return "WhatEver"
My question is, is it possible to create this graph using ast. If yes, how?
From my understanding, I currently can't since I'm loading one file at a time to the ast object and it is not aware of outside methods.
here is an example for 2 files that I want to link between them:
sample_a.py
from sample_class_b import SampleClassB
sample_b = SampleClassB()
class SampleClassA(object):
def __init__(self):
self.a = 1
def test_call_to_another_function(self):
return sample_b.test()
sample_b.py
class SampleClassB(object):
def __init__(self):
self.b = 1
def test(self):
return True

You can traverse the ast.Ast tree and at each recursive call do one of four things:
If the tree is a class definition, store the class name with its associated methods, and then apply Connections.walk to each of the methods, storing the class and method name in the scope.
If the tree is an import statement, load the module and recursively run Connections.walk on it.
If an attribute lookup is being made and Connections.walk is within a method, check if the attribute name is a method of any classes currently loaded. If so, add an edge to edges that links the current scope with this new method discovered.
If none of the above occurs, continue to traverse the tree.
import ast, itertools
import re, importlib
class Connections:
def __init__(self):
self._classes, self.edges = {}, []
def walk(self, tree, scope=None):
t_obj = None
if isinstance(tree, ast.ClassDef):
self._classes[tree.name] = [i for i in tree.body if isinstance(i, ast.FunctionDef) and not re.findall('__[a-z]+__', i.name)]
_ = [self.walk(i, [tree.name, i.name]) for i in self._classes[tree.name]]
t_obj = [i for i in tree.body if i not in self._classes[tree.name]]
elif isinstance(tree, (ast.Import, ast.ImportFrom)):
for p in [tree.module] if hasattr(tree, 'module') else [i.name for i in tree.names]:
with open(importlib.import_module(p).__file__) as f:
t_obj = ast.parse(f.read())
elif isinstance(tree, ast.Attribute) and scope is not None:
if (c:=[a for a, b in self._classes.items() if any(i.name == tree.attr for i in b)]):
self.edges.append((scope, [c[0], tree.attr]))
t_obj = tree.value
if isinstance(t_obj:=(tree if t_obj is None else t_obj), list):
for i in t_obj:
self.walk(i, scope = scope)
else:
for i in getattr(t_obj, '_fields', []):
self.walk(getattr(t_obj, i), scope=scope)
with open('sample_a.py') as f:
c = Connections()
c.walk(ast.parse(f.read()))
print(c.edges)
Output:
[(['SampleClassA', 'test_call_to_another_function'], ['SampleClassB', 'test'])]
Important note: depending on the complexity of the files you are running Connections.walk on, a RecursionError might occur. To circumvent this, here is a Gist that contains an iterative version of Connections.walk.
Creating a graph from edges:
import networkx as nx
import matplotlib.pyplot as plt
g, labels, c1 = nx.DiGraph(), {}, itertools.count(1)
for m1, m2 in c.edges:
if (p1:='.'.join(m1)) not in labels:
labels[p1] = next(c1)
if (p2:='.'.join(m2)) not in labels:
labels[p2] = next(c1)
g.add_node(labels[p1])
g.add_node(labels[p2])
g.add_edge(labels[p1], labels[p2])
nx.draw(g, pos, labels={b:a for a, b in labels.items()}, with_labels = True)
plt.show()
Output:

Related

pickle, dill and cloudpickle returning field as empty dict on custom class after process termination

I have an object of a custom class that I am trying to serialize and permanently store.
When I serialize it, store it, load it and use it in the same run, it works fine. It only messes up when I've ended the process and then try to load it again from the pickle file. This is the code that works fine:
first_model = NgramModel(3, name="debug")
for paragraph in text:
first_model.train(paragraph_to_sentences(text))
# paragraph to sentences just uses regex to do the equivalent of splitting by punctuation
print(first_model.context_options)
# context_options is a dict (counter)
first_model = NgramModel.load_existing_model("debug")
#load_existing_model loads the pickle file. Look in the class code
print(first_model.context_options)
However, when I run this alone, it prints an empty counter:
first_model = NgramModel.load_existing_model("debug")
print(first_model.context_options)
This is a shortened version of the class file (the only two methods that touch the pickle/dill are update_pickle_state and load_existing_model):
import os
import dill
from itertools import count
from collections import Counter
from os import path
class NgramModel:
context_options: dict[tuple, set[str]] = {}
ngram_count: Counter[tuple] = Counter()
n = 0
pickle_path: str = None
num_paragraphs = 0
num_sentences = 0
def __init__(self, n: int, **kwargs):
self.n = n
self.pickle_path = NgramModel.pathify(kwargs.get('name', NgramModel.gen_pickle_name())) #use name if exists else generate random name
def train(self, paragraph_as_list: list[str]):
'''really the central method that coordinates everything else. Takes a list of sentences, generates data(n-grams) from each, updates the fields, and saves the instance (self) to a pickle file'''
self.num_paragraphs += 1
for sentence in paragraph_as_list:
self.num_sentences += 1
generated = self.generate_Ngrams(sentence)
self.ngram_count.update(generated)
for ngram in generated:
self.add_to_set(ngram)
self.update_pickle_state()
def update_pickle_state(self):
'''saves instance to pickle file'''
file = open(self.pickle_path, "wb")
dill.dump(self, file)
file.close()
#staticmethod
def load_existing_model(name: str):
'''returns object from pickle file'''
path = NgramModel.pathify(name)
file = open(path, "rb")
obj: NgramModel = dill.load(file)
return obj
def generate_Ngrams(self, string: str):
'''ref: https://www.analyticsvidhya.com/blog/2021/09/what-are-n-grams-and-how-to-implement-them-in-python/'''
words = string.split(" ")
words = ["<start>"] * (self.n - 1) + words + ["<end>"] * (self.n - 1)
list_of_tup = []
for i in range(len(words) + 1 - self.n):
list_of_tup.append((tuple(words[i + j] for j in range(self.n - 1)), words[i + self.n - 1]))
return list_of_tup
def add_to_set(self, ngram: tuple[tuple[str, ...], str]):
if ngram[0] not in self.context_options:
self.context_options[ngram[0]] = set()
self.context_options[ngram[0]].add(ngram[1])
#staticmethod
def pathify(name):
'''converts name to path'''
return f"models/{name}.pickle"
#staticmethod
def gen_pickle_name():
for i in count():
new_name = f"unnamed-pickle-{i}"
if not path.exists(NgramModel.pathify(new_name)):
return new_name
All the other fields print properly and are complete and correct except the two dicts
The problem is that is that context_options is a mutable class-member, not an instance member. If I had to guess, dill is only pickling instance members, since the class definition holds class members. That would account for why you see a "filled-out" context_options when you're working in the same shell but not when you load fresh — you're using the dirtied class member in the former case.
It's for stuff like this that you generally don't want to use mutable class members (or similarly, mutable default values in function signatures). More typical is to use something like context_options: dict[tuple, set[str]] = None and then check if it's None in the __init__ to set it to a default value, e.g., an empty dict. Alternatively, you could use a #dataclass and provide a field initializer, i.e.
#dataclasses.dataclass
class NgramModel:
context_options: dict[tuple, set[str]] = dataclasses.field(default_factory=dict)
...
You can observe what I mean about it being a mutable class member with, for instance...
if __name__ == '__main__':
ng = NgramModel(3, name="debug")
print(ng.context_options) # {}
ng.context_options[("foo", "bar")] = {"baz", "qux"}
print(ng.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
ng2 = NgramModel(3, name="debug")
print(ng2.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
I would expect a brand new ng2 to have the same context that the brand new ng had - empty (or whatever an appropriate default is).

Identifying pure functions in python

I have a decorator #pure that registers a function as pure, for example:
#pure
def rectangle_area(a,b):
return a*b
#pure
def triangle_area(a,b,c):
return ((a+(b+c))(c-(a-b))(c+(a-b))(a+(b-c)))**0.5/4
Next, I want to identify a newly defined pure function
def house_area(a,b,c):
return rectangle_area(a,b) + triangle_area(a,b,c)
Obviously house_area is pure, since it only calls pure functions.
How can I discover all pure functions automatically (perhaps by using ast)
Assuming operators are all pure, then essentially you only need to check all the functions calls. This can indeed be done with the ast module.
First I defined the pure decorator as:
def pure(f):
f.pure = True
return f
Adding an attribute telling that it's pure, allows skipping early or "forcing" a function to identify as pure. This is useful if you'd need a function like math.sin to identify as pure. Additionally since you can't add attributes to builtin functions.
#pure
def sin(x):
return math.sin(x)
All in all. Use the ast module to visit all the nodes. Then for each Call node check whether the function being called is pure.
import ast
class PureVisitor(ast.NodeVisitor):
def __init__(self, visited):
super().__init__()
self.pure = True
self.visited = visited
def visit_Name(self, node):
return node.id
def visit_Attribute(self, node):
name = [node.attr]
child = node.value
while child is not None:
if isinstance(child, ast.Attribute):
name.append(child.attr)
child = child.value
else:
name.append(child.id)
break
name = ".".join(reversed(name))
return name
def visit_Call(self, node):
if not self.pure:
return
name = self.visit(node.func)
if name not in self.visited:
self.visited.append(name)
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Then check whether the function has the pure attribute. If not get code and check if all the functions calls can be classified as pure.
import inspect, textwrap
def is_pure(f, _visited=None):
try:
return f.pure
except AttributeError:
pass
try:
code = inspect.getsource(f.__code__)
except AttributeError:
return False
code = textwrap.dedent(code)
node = compile(code, "<unknown>", "exec", ast.PyCF_ONLY_AST)
if _visited is None:
_visited = []
visitor = PureVisitor(_visited)
visitor.visit(node)
return visitor.pure
Note that print(is_pure(lambda x: math.sin(x))) doesn't work since inspect.getsource(f.__code__) returns code on a line by line basis. So the source returned by getsource would include the print and is_pure call, thus yielding False. Unless those functions are overridden.
To verify that it works, test it by doing:
print(house_area) # Prints: True
To list through all the functions in the current module:
import sys, types
for k in dir(sys.modules[__name__]):
v = globals()[k]
if isinstance(v, types.FunctionType):
print(k, is_pure(v))
The visited list keeps track of which functions have already been verified pure. This help circumvent problems related to recursion. Since the code isn't executed, the evaluation would recursively visit factorial.
#pure
def factorial(n):
return 1 if n == 1 else n * factorial(n - 1)
Note that you might need to revise the following code. Choosing another way to obtain a function from its name.
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False

How to get a list of all non imported names in a Python module?

Given a module containing :
import stuff
from foo import Foo
from bar import *
CST = True
def func(): pass
How can I define a function get_defined_objects so that I can do:
print(get_defined_objects('path.to.module'))
{'CST': True, 'func', <function path.to.module.func>}
Right now the only solution I can imagine is to read the original module file, extract defined names with re.search(r'^(?:def|class )?(\w+)(?:\s*=)?' then import the module, and find the intersection with __dict__.
Is there something cleaner ?
Here is something for you to start with using ast. Note that this code does not cover all possible cases, although it should handle e.g. multiple assignment properly. Consider investigating ast's data structures and API more closely if you would like to get access to compiled code, for example.
import ast
with open('module.py') as f:
data = f.read()
tree = ast.parse(data)
elements = [el for el in tree.body if type(el) in (ast.Assign, ast.FunctionDef, ast.ClassDef)]
result = {}
for el in elements:
if type(el) == ast.Assign:
for t in el.targets:
if type(el.value) == ast.Call:
result[t.id] = el.value.func.id + '()'
else:
for attr in ['id', 'i', 's']:
try:
result[t.id] = getattr(el.value, attr)
break
except Exception as e:
pass
elif type(el) == ast.FunctionDef:
result[el.name] = '<function %s>' % el.name
else:
result[el.name] = '<class %s>' % el.name
print result
#
mod = "foo"
import ast, inspect
import importlib
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
from collections import defaultdict
data = defaultdict(defaultdict)
for node in p.body:
if isinstance(node, (ast.ImportFrom, ast.Import)):
continue
if isinstance(node, (ast.ClassDef, ast.FunctionDef)):
data["classes"][node.name] = mod.__dict__[node.name]
elif isinstance(node, ast.Assign):
for trg in node.targets:
if isinstance(node.value, ast.Num):
data["assignments"][trg.id] = node.value.n
elif isinstance(node.value, ast.Str):
data["assignments"][trg.id] = node.value.s
else:
data["assignments"][trg.id] = mod.__dict__[trg.id]
Output:
There is a nice explanation here that lists what the different types do and their attributes which this is based on:
class Nodes(ast.NodeVisitor):
def __init__(self):
self.data = defaultdict()
super(Nodes, self).__init__()
def visit_FunctionDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
print("In FunctionDef with funcion {}".format(node.name))
def visit_ClassDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
def visit_Assign(self, node):
for trg in node.targets:
if isinstance(node.value, (ast.Str, ast.Num, ast.Dict, ast.List, ast.ListComp, ast.NameConstant)):
self.data[trg.id] = mod.__dict__[trg.id]
self.generic_visit(node)
def visit_Name(self, node):
"""
class Name(idctx)
A variable name. id holds the name as a string
and ctx is either class Load class Store class Del.
"""
print("In Name with {}\n".format(node.id))
#
def visit_Dict(self, node):
"""
class Dict(keys, values)
A dictionary. keys and values
hold lists of nodes with matching order
"""
print("In Dict keys = {}, values = {}\n".format(node.keys,node.values))
def visit_Set(self,node):
"""
class Set(elts)
A set. elts holds a list of
nodes representing the elements.
"""
print("In Set elts = {}\n".format(node.elts))
def visit_List(self, node):
"""
class List(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In List elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_Tuple(self, node):
"""
class Tuple(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In Tuple elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_NameConstant(self, node):
"""
class NameConstant(value)
True, False or None. "value" holds one of those constants.
"""
print("In NameConstant getting value {}\n".format(node.value))
def visit_Load(self, node):
print("In Load with node {}\n".format(node.func))
def visit_Call(self, node):
"""
class Call(func, args, keywords, starargs, kwargs)
A function call. func is the function,
which will often be a Name or Attribute object. Of the arguments:
args holds a list of the arguments passed by position.
keywords holds a list of keyword objects representing arguments
passed by keyword.starargs and kwargs each hold a single node,
for arguments passed as *args and **kwargs.
"""
print("In Call with node {}\n".format(node.func))
def visit_Num(self, node):
print("In Num getting value {}\n".format(node.n))
def visit_Str(self, node):
print("In Str getting value {}\n".format(node.s))
f = Nodes()
f.visit(p)
print(f.data)
A bytecode hack for Python 3.4+. Possible due to dis.get_instructions.
import dis
import importlib
from itertools import islice
import marshal
import os
def consume_iterator(it, n=1):
next(islice(it, n, n), None)
def get_defined_names(module_path):
path, module_name = os.path.split(module_path)
module_name = module_name[:-3]
module_object = importlib.import_module(module_name)
pyc_name = '{}.cpython-34.pyc'.format(module_name)
pyc_path = os.path.join(path, '__pycache__/', pyc_name)
with open(pyc_path, 'rb') as f:
f.read(12) # drop the first 12 bytes
code = marshal.load(f)
# dis.disassemble(code) # see the byte code
instructions = dis.get_instructions(code)
objects = {}
for instruction in instructions:
if instruction.opname == 'STORE_NAME':
objects[instruction.argval] = getattr(module_object,
instruction.argval)
elif instruction.opname == 'IMPORT_NAME':
consume_iterator(instructions, 2)
elif instruction.opname == 'IMPORT_FROM':
consume_iterator(instructions, 1)
return objects
print(get_defined_names('/Users/ashwini/py/so.py'))
For a file like:
#/Users/ashwini/py/so.py
import os
from sys import argv, modules
from math import *
from itertools import product
CST = True
from itertools import permutations, combinations
from itertools import chain
E = 100
from itertools import starmap
def func(): pass
for x in range(10):
pass
class C:
a = 100
d = 1
The output will be:
{'d': 1, 'E': 100, 'CST': True, 'x': 9, 'func': <function func at 0x10efd0510>, 'C': <class 'so.C'>}
A much more better way as someone already mentioned in comments will be to parse the source code using ast module and find out the variable names from there.
While I accepted an answer, it can't hurt to post the solution I ended up using. It's a mix between the other proposals :
import ast
import inspect
import importlib
from types import ModuleType
def extract_definitions(module):
""" Returns the name and value of objects defined at the top level of the given module.
:param module: A module object or the name of the module to import.
:return: A dict {'classes': {}, 'functions': {}, 'assignments': {}} containing defined objects in the module.
"""
if not isinstance(module, ModuleType):
module = importlib.import_module(module)
tree = ast.parse(inspect.getsource(module))
definitions = {'classes': {}, 'functions': {}, 'assignments': {}}
for node in tree.body:
if isinstance(node, ast.ClassDef):
definitions["classes"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.FunctionDef):
definitions["functions"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.Assign):
# for unpacking, you need to loop on all names
for target in node.targets:
definitions["assignments"][target.id] = getattr(module, target.id)
return definitions
I added the ability to import from a string or a module object, then removed the parsing of values and replaced it by a simple getattr from the original module.
Untested
def unexported_names (module):
try:
return [name for name in module.__dict__ if name not in module.__all__]
except AttributeError:
return [name for name in module.__dict__ if name.startswith('_')]

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

Python / YAML: How to initialize additional objects not just from the YAML file, within loadConfig?

I have what I think is a small misconception with loading some YAML objects. I defined the class below.
What I want to do is load some objects with the overridden loadConfig function for YAMLObjects. Some of these come from my .yaml file, but others should be built out of objects loaded from the YAML file.
For instance, in the class below, I load a member object named "keep" which is a string naming some items to keep in the region. But I want to also parse this into a list and have the list stored as a member object too. And I don't want the user to have to give both the string and list version of this parameter in the YAML.
My current work around has been to override the __getattr__ function inside Region and make it create the defaults if it looks and doesn't find them. But this is clunky and more complicated than needed for just initializing objects.
What convention am I misunderstanding here. Why doesn't the loadConfig method create additional things not found in the YAML?
import yaml, pdb
class Region(yaml.YAMLObject):
yaml_tag = u'!Region'
def __init__(self, name, keep, drop):
self.name = name
self.keep = keep
self.drop = drop
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
def loadConfig(self, yamlConfig):
yml = yaml.load_all(file(yamlConfig))
for data in yml:
# These get created fine
self.name = data["name"]
self.keep = data["keep"]
self.drop = data["drop"]
# These do not get created.
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
### End Region
if __name__ == "__main__":
my_yaml = "/home/path/to/test.yaml"
region_iterator = yaml.load_all(file(my_yaml))
# Set a debug breakpoint to play with region_iterator and
# confirm the extra stuff isn't created.
pdb.set_trace()
And here is test.yaml so you can run all of this and see what I mean:
Regions:
# Note: the string conventions below are for an
# existing system. This is a shortened, representative
# example.
Market1:
!Region
name: USAndGB
keep: US+GB
drop: !!null
Market2:
!Region
name: CanadaAndAustralia
keep: CA+AU
drop: !!null
And here, for example, is what it looks like for me when I run this in an IPython shell and explore the loaded object:
In [57]: %run "/home/espears/testWorkspace/testRegions.py"
--Return--
> /home/espears/testWorkspace/testRegions.py(38)<module>()->None
-> pdb.set_trace()
(Pdb) region_iterator
<generator object load_all at 0x1139d820>
(Pdb) tmp = region_iterator.next()
(Pdb) tmp
{'Regions': {'Market2': <__main__.Region object at 0x1f858550>, 'Market1': <__main__.Region object at 0x11a91e50>}}
(Pdb) us = tmp['Regions']['Market1']
(Pdb) us
<__main__.Region object at 0x11a91e50>
(Pdb) us.name
'USAndGB'
(Pdb) us.keep
'US+GB'
(Pdb) us.keep_list
*** AttributeError: 'Region' object has no attribute 'keep_list'
A pattern I have found useful for working with yaml for classes that are basically storage is to have the loader use the constructor so that objects are created in the same way as when you make them normally. If I understand what you are attempting to do correctly, this kind of structure might be useful:
import inspect
import yaml
from collections import OrderedDict
class Serializable(yaml.YAMLObject):
__metaclass__ = yaml.YAMLObjectMetaclass
#property
def _dict(self):
dump_dict = OrderedDict()
for var in inspect.getargspec(self.__init__).args[1:]:
if getattr(self, var, None) is not None:
item = getattr(self, var)
if isinstance(item, np.ndarray) and item.ndim == 1:
item = list(item)
dump_dict[var] = item
return dump_dict
#classmethod
def to_yaml(cls, dumper, data):
return ordered_dump(dumper, '!{0}'.format(data.__class__.__name__),
data._dict)
#classmethod
def from_yaml(cls, loader, node):
fields = loader.construct_mapping(node, deep=True)
return cls(**fields)
def ordered_dump(dumper, tag, data):
value = []
node = yaml.nodes.MappingNode(tag, value)
for key, item in data.iteritems():
node_key = dumper.represent_data(key)
node_value = dumper.represent_data(item)
value.append((node_key, node_value))
return node
You would then want to have your Region class inherit from Serializable, and remove the loadConfig stuff. The code I posted inspects the constructor to see what data to save to the yaml file, and then when loading a yaml file calls the constructor with that same set of data. That way you just have to get the logic right in your constructor and the yaml loading should get it for free.
That code was ripped from one of my projects, apologies in advance if it doesn't quite work. It is also slightly more complicated than it needs to be because I wanted to control the order of output by using OrderedDict. You could replace my ordered_dump function with a call to dumper.represent_dict.

Categories