Dynamically subclass an Enum base class - python

I've set up a metaclass and base class pair for creating the line specifications of several different file types I have to parse.
I have decided to go with using enumerations because many of the individual parts of the different lines in the same file often have the same name. Enums make it easy to tell them apart. Additionally, the specification is rigid and there will be no need to add more members, or extend the line specifications later.
The specification classes work as expected. However, I am having some trouble dynamically creating them:
>>> C1 = LineMakerMeta('C1', (LineMakerBase,), dict(a = 0))
AttributeError: 'dict' object has no attribute '_member_names'
Is there a way around this? The example below works just fine:
class A1(LineMakerBase):
Mode = 0, dict(fill=' ', align='>', type='s')
Level = 8, dict(fill=' ', align='>', type='d')
Method = 10, dict(fill=' ', align='>', type='d')
_dummy = 20 # so that Method has a known length
A1.format(**dict(Mode='DESIGN', Level=3, Method=1))
# produces ' DESIGN 3 1'
The metaclass is based on enum.EnumMeta, and looks like this:
import enum
class LineMakerMeta(enum.EnumMeta):
"Metaclass to produce formattable LineMaker child classes."
def _iter_format(cls):
"Iteratively generate formatters for the class members."
for member in cls:
yield member.formatter
def __str__(cls):
"Returns string line with all default values."
return cls.format()
def format(cls, **kwargs):
"Create formatted version of the line populated by the kwargs members."
# build resulting string by iterating through members
result = ''
for member in cls:
# determine value to be injected into member
try:
try:
value = kwargs[member]
except KeyError:
value = kwargs[member.name]
except KeyError:
value = member.default
value_str = member.populate(value)
result = result + value_str
return result
And the base class is as follows:
class LineMakerBase(enum.Enum, metaclass=LineMakerMeta):
"""A base class for creating Enum subclasses used for populating lines of a file.
Usage:
class LineMaker(LineMakerBase):
a = 0, dict(align='>', fill=' ', type='f'), 3.14
b = 10, dict(align='>', fill=' ', type='d'), 1
b = 15, dict(align='>', fill=' ', type='s'), 'foo'
# ^-start ^---spec dictionary ^--default
"""
def __init__(member, start, spec={}, default=None):
member.start = start
member.spec = spec
if default is not None:
member.default = default
else:
# assume value is numerical for all provided types other than 's' (string)
default_or_set_type = member.spec.get('type','s')
default = {'s': ''}.get(default_or_set_type, 0)
member.default = default
#property
def formatter(member):
"""Produces a formatter in form of '{0:<format>}' based on the member.spec
dictionary. The member.spec dictionary makes use of these keys ONLY (see
the string.format docs):
fill align sign width grouping_option precision type"""
try:
# get cached value
return '{{0:{}}}'.format(member._formatter)
except AttributeError:
# add width to format spec if not there
member.spec.setdefault('width', member.length if member.length != 0 else '')
# build formatter using the available parts in the member.spec dictionary
# any missing parts will simply not be present in the formatter
formatter = ''
for part in 'fill align sign width grouping_option precision type'.split():
try:
spec_value = member.spec[part]
except KeyError:
# missing part
continue
else:
# add part
sub_formatter = '{!s}'.format(spec_value)
formatter = formatter + sub_formatter
member._formatter = formatter
return '{{0:{}}}'.format(formatter)
def populate(member, value=None):
"Injects the value into the member's formatter and returns the formatted string."
formatter = member.formatter
if value is not None:
value_str = formatter.format(value)
else:
value_str = formatter.format(member.default)
if len(value_str) > len(member) and len(member) != 0:
raise ValueError(
'Length of object string {} ({}) exceeds available'
' field length for {} ({}).'
.format(value_str, len(value_str), member.name, len(member)))
return value_str
#property
def length(member):
return len(member)
def __len__(member):
"""Returns the length of the member field. The last member has no length.
Length are based on simple subtraction of starting positions."""
# get cached value
try:
return member._length
# calculate member length
except AttributeError:
# compare by member values because member could be an alias
members = list(type(member))
try:
next_index = next(
i+1
for i,m in enumerate(type(member))
if m.value == member.value
)
except StopIteration:
raise TypeError(
'The member value {} was not located in the {}.'
.format(member.value, type(member).__name__)
)
try:
next_member = members[next_index]
except IndexError:
# last member defaults to no length
length = 0
else:
length = next_member.start - member.start
member._length = length
return length

This line:
C1 = enum.EnumMeta('C1', (), dict(a = 0))
fails with exactly the same error message. The __new__ method of EnumMeta expects an instance of enum._EnumDict as its last argument. _EnumDict is a subclass of dict and provides an instance variable named _member_names, which of course a regular dict doesn't have. When you go through the standard mechanism of enum creation, this all happens correctly behind the scenes. That's why your other example works just fine.
This line:
C1 = enum.EnumMeta('C1', (), enum._EnumDict())
runs with no error. Unfortunately, the constructor of _EnumDict is defined as taking no arguments, so you can't initialize it with keywords as you apparently want to do.
In the implementation of enum that's backported to Python3.3, the following block of code appears in the constructor of EnumMeta. You could do something similar in your LineMakerMeta class:
def __new__(metacls, cls, bases, classdict):
if type(classdict) is dict:
original_dict = classdict
classdict = _EnumDict()
for k, v in original_dict.items():
classdict[k] = v
In the official implementation, in Python3.5, the if statement and the subsequent block of code is gone for some reason. Therefore classdict must be an honest-to-god _EnumDict, and I don't see why this was done. In any case the implementation of Enum is extremely complicated and handles a lot of corner cases.
I realize this is not a cut-and-dried answer to your question but I hope it will point you to a solution.

Create your LineMakerBase class, and then use it like so:
C1 = LineMakerBase('C1', dict(a=0))
The metaclass was not meant to be used the way you are trying to use it. Check out this answer for advice on when metaclass subclasses are needed.
Some suggestions for your code:
the double try/except in format seems clearer as:
for member in cls:
if member in kwargs:
value = kwargs[member]
elif member.name in kwargs:
value = kwargs[member.name]
else:
value = member.default
this code:
# compare by member values because member could be an alias
members = list(type(member))
would be clearer with list(member.__class__)
has a false comment: listing an Enum class will never include the aliases (unless you have overridden that part of EnumMeta)
instead of the complicated __len__ code you have now, and as long as you are subclassing EnumMeta you should extend __new__ to automatically calculate the lengths once:
# untested
def __new__(metacls, cls, bases, clsdict):
# let the main EnumMeta code do the heavy lifting
enum_cls = super(LineMakerMeta, metacls).__new__(cls, bases, clsdict)
# go through the members and calculate the lengths
canonical_members = [
member
for name, member in enum_cls.__members__.items()
if name == member.name
]
last_member = None
for next_member in canonical_members:
next_member.length = 0
if last_member is not None:
last_member.length = next_member.start - last_member.start

The simplest way to create Enum subclasses on the fly is using Enum itself:
>>> from enum import Enum
>>> MyEnum = Enum('MyEnum', {'a': 0})
>>> MyEnum
<enum 'MyEnum'>
>>> MyEnum.a
<MyEnum.a: 0>
>>> type(MyEnum)
<class 'enum.EnumMeta'>
As for your custom methods, it might be simpler if you used regular functions, precisely because Enum implementation is so special.

Related

Is possible to make mypy accept dynamic generated classes as valid types?

I have this code to encode ADTs in python
from dataclasses import make_dataclass
def adt(datatype, *ctrs: str):
basecls = type(datatype, (), {})
klass = lambda x: x.split()[0]
fields = lambda x: x.split()[1:]
clss = (make_dataclass(klass(cls),
bases=(basecls,),
fields=fields(cls))
for cls in ctrs)
return (basecls, *clss)
# Just call the adt function passing the
# constructors. the first is the type constructor
# and the others are data constructors. It will
# return a tuple of the constructors in the same
# order.
Maybe, Just, None_ = adt("Maybe", "Just x", "None_")
# Here's how to use the maybe datatype
just1 = Just(1)
none = None_()
# __repr__ is provided by the dataclass
print(just1) # Just(x=1)
# isinstance relation is preserved
print(isinstance(just1, Maybe)) # True
print(type(just1) is Just) # True
print(type(none) is None_) # True
It works fine, BUT, if I use one of the generated types as a hint, mypy will complain about it not being a valid type
def foo(maybe: Maybe): ... # Variable "adt.Maybe" is not valid as a type
```
I tried to make `adt` returns `-> Iterable[type]` but it din't changed nothing. Not sure if there is a solution to this.

A python function that return a list of function with a for loop

I am trying to implement a function (make_q) that returns a list of functions(Q) that are generated using the argument that make_q gets (P). Q is a variable dependent to n(=len(P)) and making the Q functions are similar, so it can be done in a for loop but here is the catch if I name the function in the loop, they will all have the same address so I only get the last Q, Is there to bypass this?
Here is my code,
def make_q(self):
Temp_P=[p for p in self.P]
Q=()
for i in range(self.n-1):
p=min(Temp_P)
q=max(Temp_P)
index_p=Temp_P.index(p)
index_q=Temp_P.index(q)
def tempQ():
condition=random.random()
if condition<=(p*self.n):
return index_p
else:
return index_q
Temp_Q=list(Q)
Temp_Q.append(tempQ)
Q=tuple(Temp_Q)
q-=(1-p*self.n)/self.n
Temp_P[index_q]=q
Temp_P.pop(index_p)
return Q
test.Q
(<function __main__.Test.make_q.<locals>.tempQ()>,
<function __main__.Test.make_q.<locals>.tempQ()>,
<function __main__.Test.make_q.<locals>.tempQ()>,
<function __main__.Test.make_q.<locals>.tempQ()>,
<function __main__.Test.make_q.<locals>.tempQ()>)
I also tried to make them a tuple so they have different addresses but it didn't work.
Is there a way to name functions(tempQ) dynamic like tempQi
jasonharper's observation and solution in comments is correct(and should be the accepted answer). But since you asked about metaclasses, I am posting this anyway.
In python, each class is a type , with "name", "bases" (base classes) and "attrs"(all members of a class). Essentially, a metaclass defines a behaviour of a class, you can read more about it at https://www.python-course.eu/python3_metaclasses.php and various other online tutorials.
The __new__ method runs when a class is set up. Note the usage of attrs where your class member self.n is accessed by attrs['n'] (as attrs is a dict of all class members). I am defining functions tempQ_0, tempQ_1... dynamically. As you can see, we can also add docstrings to this dynamically defined class members.
import random
class MyMetaClass(type):
def __new__(cls, name, bases, attrs):
Temp_P = [p for p in attrs['P']]
for i in range(attrs['n'] - 1):
p = min(Temp_P)
q = max(Temp_P)
index_p = Temp_P.index(p)
index_q = Temp_P.index(q)
def fget(self, index_p=index_p, index_q=index_q): # this is an unbound method
condition = random.random()
return index_p if condition <= (p * self.n) else index_q
attrs['tempQ_{}'.format(i)] = property(fget, doc="""
This function returns {} or {} randomly""".format(index_p, index_q))
q -= (1 - p * attrs['n']) / attrs['n']
Temp_P[index_q] = q
Temp_P.pop(index_p)
return super(MyMetaClass, cls).__new__(cls, name, bases, attrs)
# PY2
# class MyClass(object):
# __metaclass__ = MyMetaClass
# n = 3
# P = [3, 6, 8]
# PY3
class MyClass(metaclass=MyMetaClass):
n = 3
P = [3, 6, 8]
# or use with_metaclass from future.utils for both Py2 and Py3
# print(dir(MyClass))
print(MyClass.tempQ_0, MyClass.tempQ_1)
output
<property object at 0x10e5fbd18> <property object at 0x10eaad0e8>
So your list of functions is [MyClass.tempQ_0, MyClass.tempQ_1]
Please try via formatted strings, for eg: "function_{}.format(name)" also, how do you want your output to look like?

Any way to bypass namedtuple 255 arguments limitation?

I'm using a namedtuple to hold sets of strings and their corresponding values.
I'm not using a dictionary, because I want the strings accessible as attributes.
Here's my code:
from collections import namedtuple
# Shortened for readability :-)
strings = namedtuple("strings", ['a0', 'a1', 'a2', ..., 'a400'])
my_strings = strings(value0, value1, value2, ..., value400)
Ideally, once my_strings is initialized, I should be able to do this:
print(my_strings.a1)
and get value1 printed back.
However, I get the following error instead:
strings(value0, value1, value2, ...value400)
^SyntaxError: more than 255 arguments
It seems python functions (including namedtuple's init()), do not accept more than 255 arguments when called.
Is there any way to bypass this issue and have named tuples with more than 255 items? Why is there a 255 arguments limit anyway?
This is a limit to CPython function definitions; in versions before Python 3.7, you cannot specify more than 255 explicit arguments to a callable. This applies to any function definition, not just named tuples.
Note that this limit has been lifted in Python 3.7 and newer, where the new limit is sys.maxint. See What is a maximum number of arguments in a Python function?
It is the generated code for the class that is hitting this limit. You cannot define a function with more than 255 arguments; the __new__ class method of the resulting class is thus not achievable in the CPython implementation.
You'll have to ask yourself, however, if you really should be using a different structure instead. It looks like you have a list-like piece of data to me; 400 numbered names is a sure sign of your data bleeding into your names.
You can work around this by creating your own subclass, manually:
from operator import itemgetter
from collections import OrderedDict
class strings(tuple):
__slots__ = ()
_fields = tuple('a{}'.format(i) for i in range(400))
def __new__(cls, *args, **kwargs):
req = len(cls._fields)
if len(args) + len(kwargs) > req:
raise TypeError(
'__new__() takes {} positional arguments but {} were given'.format(
req, len(args) + len(kwargs)))
if kwargs.keys() > set(cls._fields):
raise TypeError(
'__new__() got an unexpected keyword argument {!r}'.format(
(kwargs.keys() - set(cls._fields)).pop()))
missing = req - len(args)
if kwargs.keys() & set(cls._fields[:-missing]):
raise TypeError(
'__new__() got multiple values for argument {!r}'.format(
(kwargs.keys() & set(cls._fields[:-missing])).pop()))
try:
for field in cls._fields[-missing:]:
args += (kwargs[field],)
missing -= 1
except KeyError:
pass
if len(args) < req:
raise TypeError('__new__() missing {} positional argument{}: {}'.format(
missing, 's' if missing > 1 else '',
' and '.join(filter(None, [', '.join(map(repr, cls._fields[-missing:-1])), repr(cls._fields[-1])]))))
return tuple.__new__(cls, args)
#classmethod
def _make(cls, iterable, new=tuple.__new__, len=len):
'Make a new strings object from a sequence or iterable'
result = new(cls, iterable)
if len(result) != len(cls._fields):
raise TypeError('Expected %d arguments, got %d' % (len(cls._fields), len(result)))
return result
def __repr__(self):
'Return a nicely formatted representation string'
format = '{}({})'.format(self.__class__.__name__, ', '.join('{}=%r'.format(n) for n in self._fields))
return format % self
def _asdict(self):
'Return a new OrderedDict which maps field names to their values'
return OrderedDict(zip(self._fields, self))
__dict__ = property(_asdict)
def _replace(self, **kwds):
'Return a new strings object replacing specified fields with new values'
result = self._make(map(kwds.pop, self._fields, self))
if kwds:
raise ValueError('Got unexpected field names: %r' % list(kwds))
return result
def __getnewargs__(self):
'Return self as a plain tuple. Used by copy and pickle.'
return tuple(self)
def __getstate__(self):
'Exclude the OrderedDict from pickling'
return None
for i, name in enumerate(strings._fields):
setattr(strings, name,
property(itemgetter(i), doc='Alias for field number {}'.format(i)))
This version of the named tuple avoids the long argument lists altogether, but otherwise behaves exactly like the original. The somewhat verbose __new__ method is not strictly needed but does closely emulate the original behaviour when arguments are incomplete. Note the construction of the _fields attribute; replace this with your own to name your tuple fields.
Pass in a generator expression to set your arguments:
s = strings(i for i in range(400))
or if you have a list of values:
s = strings(iter(list_of_values))
Either technique bypasses the limits on function signatures and function call argument counts.
Demo:
>>> s = strings(i for i in range(400))
>>> s
strings(a0=0, a1=1, a2=2, a3=3, a4=4, a5=5, a6=6, a7=7, a8=8, a9=9, a10=10, a11=11, a12=12, a13=13, a14=14, a15=15, a16=16, a17=17, a18=18, a19=19, a20=20, a21=21, a22=22, a23=23, a24=24, a25=25, a26=26, a27=27, a28=28, a29=29, a30=30, a31=31, a32=32, a33=33, a34=34, a35=35, a36=36, a37=37, a38=38, a39=39, a40=40, a41=41, a42=42, a43=43, a44=44, a45=45, a46=46, a47=47, a48=48, a49=49, a50=50, a51=51, a52=52, a53=53, a54=54, a55=55, a56=56, a57=57, a58=58, a59=59, a60=60, a61=61, a62=62, a63=63, a64=64, a65=65, a66=66, a67=67, a68=68, a69=69, a70=70, a71=71, a72=72, a73=73, a74=74, a75=75, a76=76, a77=77, a78=78, a79=79, a80=80, a81=81, a82=82, a83=83, a84=84, a85=85, a86=86, a87=87, a88=88, a89=89, a90=90, a91=91, a92=92, a93=93, a94=94, a95=95, a96=96, a97=97, a98=98, a99=99, a100=100, a101=101, a102=102, a103=103, a104=104, a105=105, a106=106, a107=107, a108=108, a109=109, a110=110, a111=111, a112=112, a113=113, a114=114, a115=115, a116=116, a117=117, a118=118, a119=119, a120=120, a121=121, a122=122, a123=123, a124=124, a125=125, a126=126, a127=127, a128=128, a129=129, a130=130, a131=131, a132=132, a133=133, a134=134, a135=135, a136=136, a137=137, a138=138, a139=139, a140=140, a141=141, a142=142, a143=143, a144=144, a145=145, a146=146, a147=147, a148=148, a149=149, a150=150, a151=151, a152=152, a153=153, a154=154, a155=155, a156=156, a157=157, a158=158, a159=159, a160=160, a161=161, a162=162, a163=163, a164=164, a165=165, a166=166, a167=167, a168=168, a169=169, a170=170, a171=171, a172=172, a173=173, a174=174, a175=175, a176=176, a177=177, a178=178, a179=179, a180=180, a181=181, a182=182, a183=183, a184=184, a185=185, a186=186, a187=187, a188=188, a189=189, a190=190, a191=191, a192=192, a193=193, a194=194, a195=195, a196=196, a197=197, a198=198, a199=199, a200=200, a201=201, a202=202, a203=203, a204=204, a205=205, a206=206, a207=207, a208=208, a209=209, a210=210, a211=211, a212=212, a213=213, a214=214, a215=215, a216=216, a217=217, a218=218, a219=219, a220=220, a221=221, a222=222, a223=223, a224=224, a225=225, a226=226, a227=227, a228=228, a229=229, a230=230, a231=231, a232=232, a233=233, a234=234, a235=235, a236=236, a237=237, a238=238, a239=239, a240=240, a241=241, a242=242, a243=243, a244=244, a245=245, a246=246, a247=247, a248=248, a249=249, a250=250, a251=251, a252=252, a253=253, a254=254, a255=255, a256=256, a257=257, a258=258, a259=259, a260=260, a261=261, a262=262, a263=263, a264=264, a265=265, a266=266, a267=267, a268=268, a269=269, a270=270, a271=271, a272=272, a273=273, a274=274, a275=275, a276=276, a277=277, a278=278, a279=279, a280=280, a281=281, a282=282, a283=283, a284=284, a285=285, a286=286, a287=287, a288=288, a289=289, a290=290, a291=291, a292=292, a293=293, a294=294, a295=295, a296=296, a297=297, a298=298, a299=299, a300=300, a301=301, a302=302, a303=303, a304=304, a305=305, a306=306, a307=307, a308=308, a309=309, a310=310, a311=311, a312=312, a313=313, a314=314, a315=315, a316=316, a317=317, a318=318, a319=319, a320=320, a321=321, a322=322, a323=323, a324=324, a325=325, a326=326, a327=327, a328=328, a329=329, a330=330, a331=331, a332=332, a333=333, a334=334, a335=335, a336=336, a337=337, a338=338, a339=339, a340=340, a341=341, a342=342, a343=343, a344=344, a345=345, a346=346, a347=347, a348=348, a349=349, a350=350, a351=351, a352=352, a353=353, a354=354, a355=355, a356=356, a357=357, a358=358, a359=359, a360=360, a361=361, a362=362, a363=363, a364=364, a365=365, a366=366, a367=367, a368=368, a369=369, a370=370, a371=371, a372=372, a373=373, a374=374, a375=375, a376=376, a377=377, a378=378, a379=379, a380=380, a381=381, a382=382, a383=383, a384=384, a385=385, a386=386, a387=387, a388=388, a389=389, a390=390, a391=391, a392=392, a393=393, a394=394, a395=395, a396=396, a397=397, a398=398, a399=399)
>>> s.a391
391
namedtuple out of the box doesn't support what you are trying to do.
So the following might achieve the goal, which might change from 400 to 450 arguments, or lesser and saner.
def customtuple(*keys):
class string:
_keys = keys
_dict = {}
def __init__(self, *args):
args = list(args)
if len(args) != len(self._keys):
raise Exception("No go forward")
for key in range(len(args)):
self._dict[self._keys[key]] = args[key]
def __setattr__(self, *args):
raise BaseException("Not allowed")
def __getattr__(self, arg):
try:
return self._dict[arg]
except:
raise BaseException("Name not defined")
def __repr__(self):
return ("string(%s)"
%(", ".join(["%s=%r"
%(self._keys[key],
self._dict[self._keys[key]])
for key in range(len(self._dict))])))
return string
>>> strings = customtuple(*['a'+str(x) for x in range(1, 401)])
>>> s = strings(*['a'+str(x) for x in range(2, 402)])
>>> s.a1
'a2'
>>> s.a1 = 1
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/hus787/p.py", line 15, in __setattr__
def __setattr__(self, *args):
BaseException: Not allowed
For more light on the subject.
Here is my version of a replacement for namedtuple that supports more than 255 arguments. The idea was not to be functionally equivalent but rather to improve on some aspects (in my opinion). This is for Python 3.4+ only:
class SequenceAttrReader(object):
""" Class to function similar to collections.namedtuple but allowing more than 255 keys.
Initialize with attribute string (space separated), then load in data via a sequence, then access the list keys as properties
i.e.
csv_line = SequenceAttrReader('a b c')
csv_line = csv_line.load([1, 2, 3])
print(csv_line.b)
>> 2
"""
_attr_string = None
_attr_list = []
_data_list = []
def __init__(self, attr_string):
if not attr_string:
raise AttributeError('SequenceAttrReader not properly initialized, please use a non-empty string')
self._attr_string = attr_string
self._attr_list = attr_string.split(' ')
def __getattr__(self, name):
if not self._attr_string or not self._attr_list or not self._data_list:
raise AttributeError('SequenceAttrReader not properly initialized or loaded')
try:
index = self._attr_list.index(name)
except ValueError:
raise AttributeError("'{name}'' not in attribute string".format(name=name)) from None
try:
value = self._data_list[index]
except IndexError:
raise AttributeError("No attribute named '{name}'' in".format(name=name)) from None
return value
def __str__(self):
return str(self._data_list)
def __repr__(self):
return 'SequenceAttrReader("{attr_string}")'.format(attr_string=self._attr_string)
def load_data(self, data_list):
if not self._attr_list:
raise AttributeError('SequenceAttrReader not properly initialized')
if not data_list:
raise AttributeError('SequenceAttrReader needs to load a non-empty sequence')
self._data_list = data_list
This is probably not the most efficient way if you are doing a lot of individual lookups, converting it internally to a dict may be better. I'll work on an optimized version once I have more time or at least see what the performance difference is.

Python / YAML: How to initialize additional objects not just from the YAML file, within loadConfig?

I have what I think is a small misconception with loading some YAML objects. I defined the class below.
What I want to do is load some objects with the overridden loadConfig function for YAMLObjects. Some of these come from my .yaml file, but others should be built out of objects loaded from the YAML file.
For instance, in the class below, I load a member object named "keep" which is a string naming some items to keep in the region. But I want to also parse this into a list and have the list stored as a member object too. And I don't want the user to have to give both the string and list version of this parameter in the YAML.
My current work around has been to override the __getattr__ function inside Region and make it create the defaults if it looks and doesn't find them. But this is clunky and more complicated than needed for just initializing objects.
What convention am I misunderstanding here. Why doesn't the loadConfig method create additional things not found in the YAML?
import yaml, pdb
class Region(yaml.YAMLObject):
yaml_tag = u'!Region'
def __init__(self, name, keep, drop):
self.name = name
self.keep = keep
self.drop = drop
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
def loadConfig(self, yamlConfig):
yml = yaml.load_all(file(yamlConfig))
for data in yml:
# These get created fine
self.name = data["name"]
self.keep = data["keep"]
self.drop = data["drop"]
# These do not get created.
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
### End Region
if __name__ == "__main__":
my_yaml = "/home/path/to/test.yaml"
region_iterator = yaml.load_all(file(my_yaml))
# Set a debug breakpoint to play with region_iterator and
# confirm the extra stuff isn't created.
pdb.set_trace()
And here is test.yaml so you can run all of this and see what I mean:
Regions:
# Note: the string conventions below are for an
# existing system. This is a shortened, representative
# example.
Market1:
!Region
name: USAndGB
keep: US+GB
drop: !!null
Market2:
!Region
name: CanadaAndAustralia
keep: CA+AU
drop: !!null
And here, for example, is what it looks like for me when I run this in an IPython shell and explore the loaded object:
In [57]: %run "/home/espears/testWorkspace/testRegions.py"
--Return--
> /home/espears/testWorkspace/testRegions.py(38)<module>()->None
-> pdb.set_trace()
(Pdb) region_iterator
<generator object load_all at 0x1139d820>
(Pdb) tmp = region_iterator.next()
(Pdb) tmp
{'Regions': {'Market2': <__main__.Region object at 0x1f858550>, 'Market1': <__main__.Region object at 0x11a91e50>}}
(Pdb) us = tmp['Regions']['Market1']
(Pdb) us
<__main__.Region object at 0x11a91e50>
(Pdb) us.name
'USAndGB'
(Pdb) us.keep
'US+GB'
(Pdb) us.keep_list
*** AttributeError: 'Region' object has no attribute 'keep_list'
A pattern I have found useful for working with yaml for classes that are basically storage is to have the loader use the constructor so that objects are created in the same way as when you make them normally. If I understand what you are attempting to do correctly, this kind of structure might be useful:
import inspect
import yaml
from collections import OrderedDict
class Serializable(yaml.YAMLObject):
__metaclass__ = yaml.YAMLObjectMetaclass
#property
def _dict(self):
dump_dict = OrderedDict()
for var in inspect.getargspec(self.__init__).args[1:]:
if getattr(self, var, None) is not None:
item = getattr(self, var)
if isinstance(item, np.ndarray) and item.ndim == 1:
item = list(item)
dump_dict[var] = item
return dump_dict
#classmethod
def to_yaml(cls, dumper, data):
return ordered_dump(dumper, '!{0}'.format(data.__class__.__name__),
data._dict)
#classmethod
def from_yaml(cls, loader, node):
fields = loader.construct_mapping(node, deep=True)
return cls(**fields)
def ordered_dump(dumper, tag, data):
value = []
node = yaml.nodes.MappingNode(tag, value)
for key, item in data.iteritems():
node_key = dumper.represent_data(key)
node_value = dumper.represent_data(item)
value.append((node_key, node_value))
return node
You would then want to have your Region class inherit from Serializable, and remove the loadConfig stuff. The code I posted inspects the constructor to see what data to save to the yaml file, and then when loading a yaml file calls the constructor with that same set of data. That way you just have to get the logic right in your constructor and the yaml loading should get it for free.
That code was ripped from one of my projects, apologies in advance if it doesn't quite work. It is also slightly more complicated than it needs to be because I wanted to control the order of output by using OrderedDict. You could replace my ordered_dump function with a call to dumper.represent_dict.

Elegant pattern for mutually exclusive keyword args?

Sometimes in my code I have a function which can take an argument in one of two ways. Something like:
def func(objname=None, objtype=None):
if objname is not None and objtype is not None:
raise ValueError("only 1 of the ways at a time")
if objname is not None:
obj = getObjByName(objname)
elif objtype is not None:
obj = getObjByType(objtype)
else:
raise ValueError("not given any of the ways")
doStuffWithObj(obj)
Is there any more elegant way to do this? What if the arg could come in one of three ways? If the types are distinct I could do:
def func(objnameOrType):
if type(objnameOrType) is str:
getObjByName(objnameOrType)
elif type(objnameOrType) is type:
getObjByType(objnameOrType)
else:
raise ValueError("unk arg type: %s" % type(objnameOrType))
But what if they are not? This alternative seems silly:
def func(objnameOrType, isName=True):
if isName:
getObjByName(objnameOrType)
else:
getObjByType(objnameOrType)
cause then you have to call it like func(mytype, isName=False) which is weird.
How about using something like a command dispatch pattern:
def funct(objnameOrType):
dispatcher = {str: getObjByName,
type1: getObjByType1,
type2: getObjByType2}
t = type(objnameOrType)
obj = dispatcher[t](objnameOrType)
doStuffWithObj(obj)
where type1,type2, etc are actual python types (e.g. int, float, etc).
Sounds like it should go to https://codereview.stackexchange.com/
Anyway, keeping the same interface, I may try
arg_parsers = {
'objname': getObjByName,
'objtype': getObjByType,
...
}
def func(**kwargs):
assert len(kwargs) == 1 # replace this with your favorite exception
(argtypename, argval) = next(kwargs.items())
obj = arg_parsers[argtypename](argval)
doStuffWithObj(obj)
or simply create 2 functions?
def funcByName(name): ...
def funcByType(type_): ...
One way to make it slightly shorter is
def func(objname=None, objtype=None):
if [objname, objtype].count(None) != 1:
raise TypeError("Exactly 1 of the ways must be used.")
if objname is not None:
obj = getObjByName(objname)
else:
obj = getObjByType(objtype)
I have not yet decided if I would call this "elegant".
Note that you should raise a TypeError if the wrong number of arguments was given, not a ValueError.
For whatever it's worth, similar kinds of things happen in the Standard Libraries; see, for example, the beginning of GzipFile in gzip.py (shown here with docstrings removed):
class GzipFile:
myfileobj = None
max_read_chunk = 10 * 1024 * 1024 # 10Mb
def __init__(self, filename=None, mode=None,
compresslevel=9, fileobj=None):
if mode and 'b' not in mode:
mode += 'b'
if fileobj is None:
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
if filename is None:
if hasattr(fileobj, 'name'): filename = fileobj.name
else: filename = ''
if mode is None:
if hasattr(fileobj, 'mode'): mode = fileobj.mode
else: mode = 'rb'
Of course this accepts both filename and fileobj keywords and defines a particular behavior in the case that it receives both; but the general approach seems pretty much identical.
I use a decorator:
from functools import wraps
def one_of(kwarg_names):
# assert that one and only one of the given kwarg names are passed to the decorated function
def inner(f):
#wraps(f)
def wrapped(*args, **kwargs):
count = 0
for kw in kwargs:
if kw in kwarg_names and kwargs[kw] is not None:
count += 1
assert count == 1, f'exactly one of {kwarg_names} required, got {kwargs}'
return f(*args, **kwargs)
return wrapped
return inner
Used as:
#one_of(['kwarg1', 'kwarg2'])
def my_func(kwarg1='default', kwarg2='default'):
pass
Note that this only accounts for non- None values that are passed as keyword arguments. E.g. multiple of the kwarg_names may still be passed if all but one of them have a value of None.
To allow for passing none of the kwargs simply assert that the count is <= 1.
It sounds like you're looking for function overloading, which isn't implemented in Python 2. In Python 2, your solution is nearly as good as you can expect to get.
You could probably bypass the extra argument problem by allowing your function to process multiple objects and return a generator:
import types
all_types = set([getattr(types, t) for t in dir(types) if t.endswith('Type')])
def func(*args):
for arg in args:
if arg in all_types:
yield getObjByType(arg)
else:
yield getObjByName(arg)
Test:
>>> getObjByName = lambda a: {'Name': a}
>>> getObjByType = lambda a: {'Type': a}
>>> list(func('IntType'))
[{'Name': 'IntType'}]
>>> list(func(types.IntType))
[{'Type': <type 'int'>}]
The built-in sum() can be used to on a list of boolean expressions. In Python, bool is a subclass of int, and in arithmetic operations, True behaves as 1, and False behaves as 0.
This means that this rather short code will test mutual exclusivity for any number of arguments:
def do_something(a=None, b=None, c=None):
if sum([a is not None, b is not None, c is not None]) != 1:
raise TypeError("specify exactly one of 'a', 'b', or 'c'")
Variations are also possible:
def do_something(a=None, b=None, c=None):
if sum([a is not None, b is not None, c is not None]) > 1:
raise TypeError("specify at most one of 'a', 'b', or 'c'")
I occasionally run into this problem as well, and it is hard to find an easily generalisable solution. Say I have more complex combinations of arguments that are delineated by a set of mutually exclusive arguments and want to support additional arguments for each (some of which may be required and some optional), as in the following signatures:
def func(mutex1: str, arg1: bool): ...
def func(mutex2: str): ...
def func(mutex3: int, arg1: Optional[bool] = None): ...
I would use object orientation to wrap the arguments in a set of descriptors (with names depending on the business meaning of the arguments), which can then be validated by something like pydantic:
from typing import Optional
from pydantic import BaseModel, Extra
# Extra.forbid ensures validation error if superfluous arguments are provided
class BaseDescription(BaseModel, extra=Extra.forbid):
pass # Arguments common to all descriptions go here
class Description1(BaseDescription):
mutex1: str
arg1: bool
class Description2(BaseDescription):
mutex2: str
class Description3(BaseDescription):
mutex3: int
arg1: Optional[bool]
You could instantiate these descriptions with a factory:
class DescriptionFactory:
_class_map = {
'mutex1': Description1,
'mutex2': Description2,
'mutex3': Description3
}
#classmethod
def from_kwargs(cls, **kwargs) -> BaseDescription:
kwargs = {k: v for k, v in kwargs.items() if v is not None}
set_fields = kwargs.keys() & cls._class_map.keys()
try:
[set_field] = set_fields
except ValueError:
raise ValueError(f"exactly one of {list(cls._class_map.keys())} must be provided")
return cls._class_map[set_field](**kwargs)
#classmethod
def validate_kwargs(cls, func):
def wrapped(**kwargs):
return func(cls.from_kwargs(**kwargs))
return wrapped
Then you can wrap your actual function implementation like this and use type checking to see which arguments were provided:
#DescriptionFactory.validate_kwargs
def func(desc: BaseDescription):
if isinstance(desc, Description1):
... # use desc.mutex1 and desc.arg1
elif isinstance(desc, Description2):
... # use desc.mutex2
... # etc.
and call as func(mutex1='', arg1=True), func(mutex2=''), func(mutex3=123) and so on.
This is not overall shorter code, but it performs argument validation in a very descriptive way according to your specification, raises useful pydantic errors when validation fails, and results in accurate static types in each branch of the function implementation.
Note that if you're using Python 3.10+, structural pattern matching could simplify some parts of this.

Categories