Related
I am trying to declare a base class with certain attributes for which the (very expensive) calculation differs depending on the subclass, but that accepts injecting the value if previously calculated
class Test:
_value1: int | None = None
_value2: str | None = None
_value3: list | None = None
_value4: dict | None = None
#property
def value1(self) -> int:
if self._value1 is None:
self._value1 = self._get_value1()
return self._value1
#value1.setter
def value1(self, value1: int) -> None:
self._value1 = value1
def _get_value1(self) -> int:
raise NotImplementedError
class SubClass(Test):
def _get_value1(self) -> int:
time.sleep(1000000)
return 1
instance = SubClass()
instance.value1 = 1
print(instance.value1) # doesn't wait
As you can see it becomes very verbose, with every property having three different functions associated to it.
Is there a way to dynamically declare at the very least the setter, so that mypy knows it's always the same function but with proper typing? Or in general, is there a more concise way to declare this kind of writable property for which the underlying implementation must be implemented by the base class, in bulk?
Declaring __setattr__ doesn't seem to be viable, because just having __setattr__ declared tricks mpy into thinking I can just assign any value to anything else that's not overloaded, while I still want errors to show up in case I'm trying to assign the wrong attributes. It also doesn't fix that I still need to declare setters, otherwise it thinks the value is immutable.
Instead of inheriting a bunch of pre-defined properties from a base class, I would move all the logic surrounding each property into a custom descriptor class. (The following assumes Python 3.11 and mypy version 1.0.0.)
from typing import TypeVar, Generic, Callable, Type, Optional, Self, Union, overload
T = TypeVar('T')
C = TypeVar('C')
class Descriptor(Generic[C, T]):
def __init__(self, f: Callable[[C], T]):
self.getter = f
def __set_name__(self, owner: C, name: str):
self.private_name = "_" + name
self.public_name = name
#overload
def __get__(self: Self, obj: C, objtype: Optional[Type[C]]) -> T:
...
#overload
def __get__(self: Self, obj: None, objtype: Type[C]) -> Self:
...
def __get__(self: Self, obj: Optional[C], owner: Optional[Type[C]] = None) -> Union[Self, T]:
if obj is None:
return self
if getattr(obj, self.private_name, None) is None:
init_value = self.getter(obj)
self.__set__(obj, init_value)
return getattr(obj, self.private_name)
def __set__(self, obj: C, value: T):
setattr(obj, self.private_name, value)
Then you can define each descriptor similar to how you would define a property, by decorating the function that will return the value an initial value if none has yet been defined.
class Test:
#Descriptor
def value1(self) -> int:
time.sleep(10000000)
return 1
#Descriptor
def value2(self) -> str:
return "foo"
#Descriptor
def value3(self) -> list:
return [1, 2, 3]
#Descriptor
def value4(self) -> dict:
return dict(foo=9)
The descriptor class is generic in both the class it will be used in and the type of the wrapped value.
x = Test()
reveal_type(x.value1) # int
reveal_type(Test.value1) # Descriptor[Test, int]
x.value1 = 3 # OK
x.value1 = "foo" # error, x.__set__ expects an int, not a str
If you wanted to simply omit writing #property.setter (this part)
#value1.setter
def value1(self, value1: int) -> None:
self._value1 = value1
one possible implementation would be to subclass property to automatically implement a __set__ method which matches the behaviour specified in your example:
from __future__ import annotations
import typing as t
if t.TYPE_CHECKING:
import collections.abc as cx
_ValueT = t.TypeVar("_ValueT")
class settable(property, t.Generic[_ValueT]):
fget: cx.Callable[[t.Any], _ValueT]
def __init__(self, fget: cx.Callable[[t.Any], _ValueT], /) -> None:
super().__init__(fget)
if t.TYPE_CHECKING:
# Type-safe descriptor protocol for property retrieval methods (`__get__`)
# see https://docs.python.org/3/howto/descriptor.html
# These are under `typing.TYPE_CHECKING` because we don't need
# to modify their implementation from `builtins.property`, but
# just need to add type-safety.
#t.overload # type: ignore[override, no-overload-impl]
def __get__(self, instance: None, Class: type, /) -> settable[_ValueT]:
"""
Retrieving a property from on a class (`instance: None`) retrieves the
property object (`settable[_ValueT]`)
"""
#t.overload
def __get__(self, instance: object, Class: type, /) -> _ValueT:
"""
Retrieving a property from the instance (all other `typing.overload` cases)
retrieves the value
"""
def __set__(self, instance: t.Any, value: _ValueT) -> None:
"""
Type-safe setter method. Grabs the name of the function first decorated with
`#settable`, then calls `setattr` on the given value with an attribute name of
'_<function name>'.
"""
setattr(instance, f"_{self.fget.__name__}", value)
Here's a demonstration of type-safety:
import time
class Test:
_value1: int | None = None
_value2: str | None = None
_value3: list | None = None
_value4: dict | None = None
#settable
def value1(self) -> int:
if self._value1 is None:
self._value1 = self._get_value1()
return self._value1
def _get_value1(self) -> int:
raise NotImplementedError
class SubClass(Test):
def _get_value1(self) -> int:
time.sleep(1000000)
return 1
>>> instance: SubClass = SubClass()
>>> instance.value1 = 1 # OK
>>>
>>> if t.TYPE_CHECKING:
... reveal_type(instance.value1) # mypy: Revealed type is "builtins.int"
...
>>> print(instance.value1)
1
>>> instance.value1 = "1" # mypy: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment]
>>> SubClass.value1 = 1 # mypy: Cannot assign to a method [assignment]
... # mypy: Incompatible types in assignment (expression has type "int", variable has type "settable[int]") [assignment]
Basically I need following. I have a python3 dataclass or NamedTuple, with only enum and bool fields. E.g.:
from enum import Enum, auto
from typing import NamedTuple
class MyEnum(Enum):
v1 = auto()
v2 = auto()
v3 = auto()
class MyStateDefinition(NamedTuple):
a: MyEnum
b: bool
Is here any good known solution to enumerate all possible non-equal instances of such a dataclass? (Example above has 6 possible non-equal instances).
Perhaps it is not a dataclass I should use, but something else. Or should I play with such things like dataclasses.fields directly?
I imagine it as some table generator which accepts a namedtuple or dataclass as an input parameter and produces all possible values.
table = DataTable(MyStateDefinition)
for item in table:
# Use items somehow
print(item.a)
print(item.b)
Why do I need it? I just have some state definition which consists of enums and bools. I believe it could be implemented as a bitmask. But when it comes to extending your bitmask with new values, it turns out to be a nightmare. Afterall, bitmasks seem to be a non-pythonic way of doing things.
Currently I have to use an implementation of my own. But perhaps I'm reinventing the wheel.
Thanks!
You can do this using enums, with the data-tuples as the enum-members' value (an Enum/NamedTuple hybrid, if you will). The _ignore_ attribute is used to prevent certain names in the class namespace from being converted into enum members.
from itertools import product
from enum import Enum
class Data(Enum):
_ignore_ = "Data", "myenum_member", "truthiness"
#property
def a(self):
return self.value[0]
#property
def b(self):
return self.value[1]
def __repr__(self):
return f'Data(a={self.a!r}, b={self.b!r})'
Data = vars()
for myenum_member, truthiness in product(MyEnum, (True, False)):
Data[f'{myenum_member.name}_{truthiness}'] = (myenum_member, truthiness)
You should be able to iterate through the resulting enum class just as you desire.
This use of enums is similar to the "time period" example in the Enum HOWTO section of the docs.
Generating this kind of table dynamically
If you want to generate this kind of table dynamically, you could do something like this, (ab)using metaclasses. I've shown example usages for how you would use this DataTable class in the docstrings. (For some reason, using typing.get_type_hints in a doctest seems to cause the doctest module to error out, but the examples do work if you try them yourself in an interactive terminal.) Rather than special-casing bool, as you did in your answer, I decided to special-case typing.Literal, as it seemed like a more extensible option (and bool can just be spelled as typing.Literal[True, False]).
from __future__ import annotations
from itertools import product
from enum import Enum, EnumMeta
from typing import (
Iterable,
Mapping,
cast,
Protocol,
get_type_hints,
Any,
get_args,
get_origin,
Literal,
TypeVar,
Union,
Optional
)
D = TypeVar('D')
T = TypeVar('T')
class DataTableFactory(EnumMeta):
"""A helper class for making data tables (an implementation detail of `DataTable`)."""
_CLS_BASES = (Enum,)
#classmethod
def __prepare__( # type: ignore[override]
metacls,
cls_name: str,
fields: Mapping[str, Iterable[Any]]
) -> dict[str, Any]:
cls_dict = cast(
dict[str, Any],
super().__prepare__(cls_name, metacls._CLS_BASES)
)
for i, field in enumerate(fields.keys()):
cls_dict[field] = property(fget=lambda self, i=i: self.value[i]) # type: ignore[misc]
for p in product(*fields.values()):
cls_dict['_'.join(map(str, p))] = p
def __repr__(self: Enum) -> str:
contents = ', '.join(
f'{field}={getattr(self, field)!r}'
for field in fields
)
return f'{cls_name}Member({contents})'
cls_dict['__repr__'] = __repr__
return cls_dict
#classmethod
def make_datatable(
metacls,
cls_name: str,
*,
fields: Mapping[str, Iterable[Any]],
doc: Optional[str] = None
) -> type[Enum]:
"""Create a new data table"""
cls_dict = metacls.__prepare__(cls_name, fields)
new_cls = metacls.__new__(metacls, cls_name, metacls._CLS_BASES, cls_dict)
new_cls.__module__ = __name__
if doc is None:
all_attrs = '\n'.join(
f' {f"{attr_name}: ":<{(max(map(len, fields)) + 3)}}one of {attr_val!r}'
for attr_name, attr_val in fields.items()
)
fields_len = len(fields)
doc = (
f'An enum-like data table.\n\n'
f'All members of this data table have {fields_len} '
f'read-only attribute{"s" if fields_len > 1 else ""}:\n'
f'{all_attrs}\n\n'
f'----------------------------------------------------------------------'
)
new_cls.__doc__ = doc
return cast(type[Enum], new_cls)
def __repr__(cls) -> str:
return f"<Data table '{cls.__name__}'>"
def index_of(cls: Iterable[D], member: D) -> int:
"""Get the index of a member in the list of members."""
return list(cls).index(member)
def get(
cls: Iterable[D],
/,
*,
default_: Optional[T] = None,
**kwargs: Any
) -> Union[D, T, None]:
"""Return instance for given arguments set.
Return `default_` if no member matches those arguments.
"""
it = (
member for member in cls
if all((getattr(member, key) == val) for key, val in kwargs.items())
)
return next(it, default_)
def __dir__(cls) -> list[str]:
# By defining __dir__, we make methods defined in this class
# discoverable by the interactive help() function in the REPL
return cast(list[str], super().__dir__()) + ['index_of', 'get']
class TypedStructProto(Protocol):
"""In order to satisfy this interface, a type must have an __annotations__ dict."""
__annotations__: dict[str, Union[Iterable[Any], type[Literal[True]]]]
class DataTableMeta(type):
"""Metaclass for `DataTable`."""
__call__ = DataTableFactory.make_datatable # type: ignore[assignment]
class DataTable(metaclass=DataTableMeta):
"""A mechanism to create 'data table enumerations' -- not really a class at all!
Example usage
-------------
>>> Cars = DataTable('Cars', fields={'make': ('Toyota', 'Audi'), 'colour': ('Red', 'Blue')})
>>> Cars
<Data table 'Cars'>
>>> list(Cars)
[CarsMember(make=Toyota, colour=Red), CarsMember(make=Toyota, colour=Blue), CarsMember(make=Audi, colour=Red), CarsMember(make=Audi, colour=Blue)]
>>> Cars.get(make='Audi', colour='Red')
CarsMember(make=Audi, colour=Red)
>>> Cars.index_of(_)
2
"""
#classmethod
def from_struct(cls, cls_name: str, *, struct: type[TypedStructProto], doc: Optional[str] = None) -> type[Enum]:
"""Make a DataTable from a "typed struct" -- e.g. a dataclass, NamedTuple or TypedDict.
Example usage (works the same way with dataclasses and TypedDicts)
-------------------------------------------------------------------
>>> from enum import Enum, auto
>>> from typing import NamedTuple, Literal
>>> class E(Enum):
... v1 = auto()
... v2 = auto()
... v3 = auto()
...
>>> class BoolsEndEnums(NamedTuple):
... a: E
... b: Literal[True, False]
...
>>> BoolsEndEnumsTable = DataTable.from_struct('BoolsEndEnumsTable', struct=BoolsEndEnums)
>>> list(BoolsEndEnumsTable)
[BoolsEndEnumsTableMember(a=E.v1, b=True), BoolsEndEnumsTableMember(a=E.v1, b=False), BoolsEndEnumsTableMember(a=E.v2, b=True), BoolsEndEnumsTableMember(a=E.v2, b=False), BoolsEndEnumsTableMember(a=E.v3, b=True), BoolsEndEnumsTableMember(a=E.v3, b=False)]
"""
fields = get_type_hints(struct)
for field_name, field_val in fields.items():
if get_origin(field_val) is Literal:
fields[field_name] = get_args(field_val)
return cast(type[Enum], cls(cls_name, fields=fields, doc=doc)) # type: ignore[call-arg]
I've had to do some "interesting" things with the type hints, but MyPy is sort of happy with all this.
Also posting implementation of my own. Not ideal, I had to use some protected members.
Usage:
from typing import NamedTuple
from datatable import DataTable
class BoolsEndEnums(NamedTuple):
a: E
b: bool
tbl = DataTable(BoolsEndEnums)
item = tbl[0]
print(item.a) # a is v1
print(item.b) # b is False
See test_datatable.py, _test_cls for more usage examples.
datatable.py
import collections
import dataclasses
from collections import Iterable
from enum import Enum
from typing import Union, Any, Tuple, Iterator, get_type_hints, NamedTuple
def is_cls_namedtuple(cls):
return issubclass(cls, tuple) and hasattr(cls, "_fields")
class DataTable(Iterable):
def __init__(self, data_cls):
self._table = []
self._index = {}
self._rindex = {}
self._named_tuple_cls = None
fields = None
if dataclasses.is_dataclass(data_cls):
fields = [f.name for f in dataclasses.fields(data_cls)]
self._named_tuple_cls = collections.namedtuple(
f"{data_cls.__name__}_immutable",
fields
)
elif is_cls_namedtuple(data_cls):
self._named_tuple_cls = data_cls
fields = data_cls._fields
else:
raise ValueError(
"Only dataclasses and NamedTuple subclasses are supported."
)
hints = get_type_hints(data_cls)
self._build_table([], [(f, hints[f]) for f in fields])
def index_of(self, instance):
"""
Returns record index of given instance in table.
:param instance:
:return:
"""
index = self._as_index(instance)
return self._rindex.get(index)
def get(self, **kw):
"""
Returns instance for given arguments set
:param kw:
:return:
"""
index = self._as_index(kw)
return self._table[self._rindex[index]]
def __len__(self):
return len(self._table)
def __getitem__(self, i: Union[int, slice]):
return self._table[i]
def __iter__(self) -> Iterator:
return self._table.__iter__()
def _build_table(self, defined_fields, remained_fields):
if not remained_fields:
instance = self._named_tuple_cls(**dict(defined_fields))
item_id = len(self._table)
self._index[item_id] = instance
self._rindex[self._as_index(defined_fields)] = item_id
self._table.append(instance)
return
next_name, next_type = remained_fields[0]
remained_fields = remained_fields[1:]
if issubclass(next_type, Enum):
for v in next_type:
self._build_table(
defined_fields + [(next_name, v)],
remained_fields
)
return
if next_type is bool:
self._build_table(
defined_fields + [(next_name, False)],
remained_fields
)
self._build_table(
defined_fields + [(next_name, True)],
remained_fields
)
return
raise ValueError(f"Got unexpected dataclass field type: {next_type}")
#staticmethod
def _as_index(v: Union[Any, Tuple[str, Any]]):
items = None
if dataclasses.is_dataclass(v):
items = dataclasses.asdict(v).items()
elif is_cls_namedtuple(type(v)):
items = v._asdict().items()
elif isinstance(v, dict):
items = v.items()
else:
assert isinstance(v, collections.Sequence)
items = v
return tuple(sorted(items, key=lambda x: x[0]))
test_datatable.py
import dataclasses
from enum import Enum, auto
from typing import NamedTuple
import pytest
from dataclass_utils import DataTable
class E(Enum):
v1 = auto()
v2 = auto()
v3 = auto()
#dataclasses.dataclass
class BoolsEndEnums:
a: E
b: bool
class BoolsEndEnumsNamedTuple(NamedTuple):
a: E
b: bool
#dataclasses.dataclass
class HugeSetOfValues:
a: int
b: bool
class NotSupportedCls:
pass
def _test_cls(cls):
tbl = DataTable(cls)
first = cls(E.v1, False)
last = cls(E.v3, True)
expected_num_entries = 6
assert tbl.index_of(first) == 0
assert tbl.index_of(last) == (expected_num_entries - 1)
assert len(tbl) == expected_num_entries
actual_third = tbl.get(a=E.v2, b=False)
assert actual_third.a == E.v2
assert actual_third.b is False
actual_forth = tbl[3]
assert actual_forth.a == E.v2
assert actual_forth.b is True
items = [item for item in tbl]
actual_fifth = items[4]
assert actual_fifth.a == E.v3
assert actual_fifth.b is False
# Test that we can't change result
with pytest.raises(AttributeError):
tbl[0].a = E.v2
def test_dataclass():
_test_cls(BoolsEndEnums)
def test_namedtuple():
_test_cls(BoolsEndEnumsNamedTuple)
def test_datatable_neg():
"""
Generic negative tests
"""
with pytest.raises(ValueError):
DataTable(HugeSetOfValues)
with pytest.raises(ValueError):
DataTable(NotSupportedCls)
Suppose I have class hierarchy like this:
class SerializableWidget(object):
# some code
class WidgetA(SerilizableWidget):
# some code
class WidgetB(SerilizableWidget):
# some code
I want to be able to serialize instances of WidgetA and WidgetB (and potentially other widgets) to text files as json. Then, I want to be able to deserialize those, without knowing beforehand their specific class:
some_widget = deserielize_from_file(file_path) # pseudocode, doesn't have to be exactly a method like this
and some_widget needs to be constructed from the precise subclass of SerilizableWidget. How do I do this? What methods exactly do I need to override/implement in each of the classes of my hierarchy?
Assume all fields of the above classes are primitive types. How do I override some __to_json__ and __from_json__ methods, something like that?
You can solve this with many methods. One example is to use the object_hook and default parameters to json.load and json.dump respectively.
All you need is to store the class together with the serialized version of the object, then when loading you have to use a mapping of which class goes with which name.
The example below uses a dispatcher class decorator to store the class name and object when serializing, and look it up later when deserializing. All you need is a _as_dict method on each class to convert the data to a dict:
import json
#dispatcher
class Parent(object):
def __init__(self, name):
self.name = name
def _as_dict(self):
return {'name': self.name}
#dispatcher
class Child1(Parent):
def __init__(self, name, n=0):
super().__init__(name)
self.n = n
def _as_dict(self):
d = super()._as_dict()
d['n'] = self.n
return d
#dispatcher
class Child2(Parent):
def __init__(self, name, k='ok'):
super().__init__(name)
self.k = k
def _as_dict(self):
d = super()._as_dict()
d['k'] = self.k
return d
Now for the tests. First lets create a list with 3 objects of different types.
>>> obj = [Parent('foo'), Child1('bar', 15), Child2('baz', 'works')]
Serializing it will yield the data with the class name in each object:
>>> s = json.dumps(obj, default=dispatcher.encoder_default)
>>> print(s)
[
{"__class__": "Parent", "name": "foo"},
{"__class__": "Child1", "name": "bar", "n": 15},
{"__class__": "Child2", "name": "baz", "k": "works"}
]
And loading it back generates the correct objects:
obj2 = json.loads(s, object_hook=dispatcher.decoder_hook)
print(obj2)
[
<__main__.Parent object at 0x7fb6cd561cf8>,
<__main__.Child1 object at 0x7fb6cd561d68>,
<__main__.Child2 object at 0x7fb6cd561e10>
]
Finally, here's the implementation of dispatcher:
class _Dispatcher:
def __init__(self, classname_key='__class__'):
self._key = classname_key
self._classes = {} # to keep a reference to the classes used
def __call__(self, class_): # decorate a class
self._classes[class_.__name__] = class_
return class_
def decoder_hook(self, d):
classname = d.pop(self._key, None)
if classname:
return self._classes[classname](**d)
return d
def encoder_default(self, obj):
d = obj._as_dict()
d[self._key] = type(obj).__name__
return d
dispatcher = _Dispatcher()
I really liked #nosklo's answer, but I wanted to customize what the property value was for how the model type got saved, so I extended his code a little to add a sub-annotation.
(I know this isn't directly related to the question, but you can use this to serialize to json too since it produces dict objects. Note that your base class must use the #dataclass annotation to serialize correctly - otherwise you could adjust this code to define the __as_dict__ method like #nosklo's answer)
data.csv:
model_type, prop1
sub1, testfor1
sub2, testfor2
test.py:
import csv
from abc import ABC
from dataclasses import dataclass
from polymorphic import polymorphic
#polymorphic(keyname="model_type")
#dataclass
class BaseModel(ABC):
prop1: str
#polymorphic.subtype_when_(keyval="sub1")
class SubModel1(BaseModel):
pass
#polymorphic.subtype_when_(keyval="sub2")
class SubModel2(BaseModel):
pass
with open('data.csv') as csvfile:
reader = csv.DictReader(csvfile, skipinitialspace=True)
for row_data_dict in reader:
price_req = BaseModel.deserialize(row_data_dict)
print(price_req, '\n\tre-serialized: ', price_req.serialize())
polymorphic.py:
import dataclasses
import functools
from abc import ABC
from typing import Type
# https://stackoverflow.com/a/51976115
class _Polymorphic:
def __init__(self, keyname='__class__'):
self._key = keyname
self._class_mapping = {}
def __call__(self, abc: Type[ABC]):
functools.update_wrapper(self, abc)
setattr(abc, '_register_subtype', self._register_subtype)
setattr(abc, 'serialize', lambda self_subclass: self.serialize(self_subclass))
setattr(abc, 'deserialize', self.deserialize)
return abc
def _register_subtype(self, keyval, cls):
self._class_mapping[keyval] = cls
def serialize(self, self_subclass) -> dict:
my_dict = dataclasses.asdict(self_subclass)
my_dict[self._key] = next(keyval for keyval, cls in self._class_mapping.items() if cls == type(self_subclass))
return my_dict
def deserialize(self, data: dict):
classname = data.pop(self._key, None)
if classname:
return self._class_mapping[classname](**data)
raise ValueError(f'Invalid data: {self._key} was not found or it referred to an unrecognized class')
#staticmethod
def subtype_when_(*, keyval: str):
def register_subtype_for(_cls: _Polymorphic):
nonlocal keyval
if not keyval:
keyval = _cls.__name__
_cls._register_subtype(keyval, _cls)
#functools.wraps(_cls)
def construct_original_subclass(*args, **kwargs):
return _cls(*args, **kwargs)
return construct_original_subclass
return register_subtype_for
polymorphic = _Polymorphic
Sample console output:
SubModel1(prop1='testfor1')
re-serialized: {'prop1': 'testfor1', 'model_type': 'sub1'}
SubModel2(prop1='testfor2')
re-serialized: {'prop1': 'testfor2', 'model_type': 'sub2'}
I read the documentation and am not sure how to simplify the following code with Python properties:
class PatientRecordJson:
def __init__(self):
self.json = {}
def set_raw_data_field(self, string):
self.json['raw_data'] = string
def get_raw_data_field(self):
return self.json.get('raw_data', None)
def set_data_type(self, string):
self.json['data_type'] = string
def get_data_type(self):
return self.json.get('data_type', None)
def set_type_of_record(self, string):
self.json['type_of_record'] = string
def get_type_of_record(self):
return self.json.get('type_of_record', None)
def set_npi(self, string):
self.json['npi'] = string
def get_npi(self):
return self.json.get('npi', None)
You could override __getattr__ and __setattr__ which are called when you access a property with obj.prop.
class PatientRecordJson:
properties = ['raw_data', 'data_type', 'type_of_record', 'npi']
def __init__(self):
self.json = {}
def __getattr__(self, name):
if name in PatientRecordJson.properties:
return self.json.get(name)
return super().__getattr__(name)
def __setattr__(self, name, value):
if name in PatientRecordJson.properties:
self.json[name] = value
return super().__setattr__(name, value)
Usage example:
pr = PatientRecordJson()
pr.raw_data #=> None
pr.raw_data = 'raw data'
pr.raw_data #=> 'raw data'
pr.json #=> {'raw_data': 'raw data'}
pr.z #=> AttributeError
pr.z = 2
pr.z #=> 2
pr.json #=> {'raw_data': 'raw data'}
A note: you've defined json on the class, if you want it to be instance variable create it on self in __init__.
If you're just learning Python, this may be too advanced—however by using it you can largely automate the process of creating any number of classes like this by using a metaclass (a class whose instances are other classes).
Although doing so requires some non-trivial code, it makes defining the target classes extremely simple. Plus, as a bonus I added optional type-checking.
def typed_property(field_name, expected_type=None):
""" Helper function which creates and returns a property with the given
name with optional type-checking. Each property retrieves or stores
values from/to an instance-defined "json" dictionary attribute.
"""
#property
def prop(self):
return self.json[field_name]
#prop.setter
def prop(self, value):
if expected_type and not isinstance(value, expected_type):
raise TypeError('Only {} values may be assigned to {}'.format(
expected_type.__name__, field_name))
self.json[field_name] = value
return prop
class PatientRecordMeta(type):
""" Metaclass to define properties based on a class-level defined "fields"
dictionary.
"""
def __new__(metaclass, classname, bases, classdict):
cls = super().__new__(metaclass, classname, bases, classdict)
fields = classdict.get('fields')
if not fields or not isinstance(fields, dict):
raise TypeError('Class {} did not define required "fields" '
'instance dictionary'.format(classname))
# Create the properties.
for field, expected_type in fields.items():
setattr(cls, field, typed_property(field, expected_type))
return cls
The defined metaclass makes it very easy to create a class with exactly the desired properties:
class PatientRecordJson(metaclass=PatientRecordMeta):
fields = {'raw_data': str,
'data_type': str,
'type_of_record': str,
'npi': int} # Note changed to "int" to test type-checking,
def __init__(self):
self.json = {} # define required instance attribute
# Other methods could be defined here, too, if desired.
# ...
patient_rec = PatientRecordJson()
patient_rec.raw_data = 'something'
patient_rec.bogus = 'something else' # OK, but not saved in "self.json" dict.
try:
patient_rec.npi = 'spam' # -> Should cause a TypeError
except TypeError:
pass # expected TypeError occurred
else:
print('Error: a TypeError did not occur as expected')
patient_rec.npi = 42 # Integer value is OK.
patient_rec.json['raw_data'] = 'eggs' # can still do this
print(patient_rec.raw_data) # -> eggs
print(patient_rec.npi) # -> 42
print(patient_rec.json) # -> {'raw_data': 'something', 'npi': 42}
You can use __getattr__ and __setattr__ to treat your dynamic fields as if they are properties of the object itself, rather than of the internal json object.
class PatientRecordJson:
def __init__(self):
self.fields = ['raw_data', 'data_type', 'type_of_record', 'npi']
self.json = {}
def __getattr__(self, key):
if key not in self.fields:
raise AttributeError
return self.json.get(key, None)
def __setattr__(self, key, data):
if key not in self.fields
raise AttributeError
self.json[key] = data
The sample above will allow you to access the properties like so.
patient = PatientRecordJson()
patient.data_type = 'something'
patient.npi = 12345
patient.raw_data = 'whatever you want here'
print(patient.data_type) # 'something'
print(patient.doesntexist) # AttributeError
patient.notinfields = True # AttributeError
I want to parse strings into python enums. Normally one would implement a parse method to do so. A few days ago I spotted the __new__ method which is capable of returning different instances based on a given parameter.
Here my code, which will not work:
import enum
class Types(enum.Enum):
Unknown = 0
Source = 1
NetList = 2
def __new__(cls, value):
if (value == "src"): return Types.Source
# elif (value == "nl"): return Types.NetList
# else: raise Exception()
def __str__(self):
if (self == Types.Unknown): return "??"
elif (self == Types.Source): return "src"
elif (self == Types.NetList): return "nl"
When I execute my Python script, I get this message:
[...]
class Types(enum.Enum):
File "C:\Program Files\Python\Python 3.4.0\lib\enum.py", line 154, in __new__
enum_member._value_ = member_type(*args)
TypeError: object() takes no parameters
How can I return a proper instance of a enum value?
Edit 1:
This Enum is used in URI parsing, in particular for parsing the schema. So my URI would look like this
nl:PoC.common.config
<schema>:<namespace>[.<subnamespace>*].entity
So after a simple string.split operation I would pass the first part of the URI to the enum creation.
type = Types(splitList[0])
type should now contain a value of the enum Types with 3 possible values (Unknown, Source, NetList)
If I would allow aliases in the enum's member list, it won't be possible to iterate the enum's values alias free.
The __new__ method on the your enum.Enum type is used for creating new instances of the enum values, so the Types.Unknown, Types.Source, etc. singleton instances. The enum call (e.g. Types('nl') is handled by EnumMeta.__call__, which you could subclass.
Using name aliases fits your usecases
Overriding __call__ is perhaps overkill for this situation. Instead, you can easily use name aliases:
class Types(enum.Enum):
Unknown = 0
Source = 1
src = 1
NetList = 2
nl = 2
Here Types.nl is an alias and will return the same object as Types.Netlist. You then access members by names (using Types[..] index access); so Types['nl'] works and returns Types.Netlist.
Your assertion that it won't be possible to iterate the enum's values alias free is incorrect. Iteration explicitly doesn't include aliases:
Iterating over the members of an enum does not provide the aliases
Aliases are part of the Enum.__members__ ordered dictionary, if you still need access to these.
A demo:
>>> import enum
>>> class Types(enum.Enum):
... Unknown = 0
... Source = 1
... src = 1
... NetList = 2
... nl = 2
... def __str__(self):
... if self is Types.Unknown: return '??'
... if self is Types.Source: return 'src'
... if self is Types.Netlist: return 'nl'
...
>>> list(Types)
[<Types.Unknown: 0>, <Types.Source: 1>, <Types.NetList: 2>]
>>> list(Types.__members__)
['Unknown', 'Source', 'src', 'NetList', 'nl']
>>> Types.Source
<Types.Source: 1>
>>> str(Types.Source)
'src'
>>> Types.src
<Types.Source: 1>
>>> str(Types.src)
'src'
>>> Types['src']
<Types.Source: 1>
>>> Types.Source is Types.src
True
The only thing missing here is translating unknown schemas to Types.Unknown; I'd use exception handling for that:
try:
scheme = Types[scheme]
except KeyError:
scheme = Types.Unknown
Overriding __call__
If you want to treat your strings as values, and use calling instead of item access, this is how you override the __call__ method of the metaclass:
class TypesEnumMeta(enum.EnumMeta):
def __call__(cls, value, *args, **kw):
if isinstance(value, str):
# map strings to enum values, defaults to Unknown
value = {'nl': 2, 'src': 1}.get(value, 0)
return super().__call__(value, *args, **kw)
class Types(enum.Enum, metaclass=TypesEnumMeta):
Unknown = 0
Source = 1
NetList = 2
Demo:
>>> class TypesEnumMeta(enum.EnumMeta):
... def __call__(cls, value, *args, **kw):
... if isinstance(value, str):
... value = {'nl': 2, 'src': 1}.get(value, 0)
... return super().__call__(value, *args, **kw)
...
>>> class Types(enum.Enum, metaclass=TypesEnumMeta):
... Unknown = 0
... Source = 1
... NetList = 2
...
>>> Types('nl')
<Types.NetList: 2>
>>> Types('?????')
<Types.Unknown: 0>
Note that we translate the string value to integers here and leave the rest to the original Enum logic.
Fully supporting value aliases
So, enum.Enum supports name aliases, you appear to want value aliases. Overriding __call__ can offer a facsimile, but we can do better than than still by putting the definition of the value aliases into the enum class itself. What if specifying duplicate names gave you value aliases, for example?
You'll have to provide a subclass of the enum._EnumDict too as it is that class that prevents names from being re-used. We'll assume that the first enum value is a default:
class ValueAliasEnumDict(enum._EnumDict):
def __init__(self):
super().__init__()
self._value_aliases = {}
def __setitem__(self, key, value):
if key in self:
# register a value alias
self._value_aliases[value] = self[key]
else:
super().__setitem__(key, value)
class ValueAliasEnumMeta(enum.EnumMeta):
#classmethod
def __prepare__(metacls, cls, bases):
return ValueAliasEnumDict()
def __new__(metacls, cls, bases, classdict):
enum_class = super().__new__(metacls, cls, bases, classdict)
enum_class._value_aliases_ = classdict._value_aliases
return enum_class
def __call__(cls, value, *args, **kw):
if value not in cls. _value2member_map_:
value = cls._value_aliases_.get(value, next(iter(Types)).value)
return super().__call__(value, *args, **kw)
This then lets you define aliases and a default in the enum class:
class Types(enum.Enum, metaclass=ValueAliasEnumMeta):
Unknown = 0
Source = 1
Source = 'src'
NetList = 2
NetList = 'nl'
Demo:
>>> class Types(enum.Enum, metaclass=ValueAliasEnumMeta):
... Unknown = 0
... Source = 1
... Source = 'src'
... NetList = 2
... NetList = 'nl'
...
>>> Types.Source
<Types.Source: 1>
>>> Types('src')
<Types.Source: 1>
>>> Types('?????')
<Types.Unknown: 0>
Yes, you can override the __new__() method of an enum subclass to implement a parse method if you're careful, but in order to avoid specifying the integer encoding in two places, you'll need to define the method separately, after the class, so you can reference the symbolic names defined by the enumeration.
Here's what I mean:
import enum
class Types(enum.Enum):
Unknown = 0
Source = 1
NetList = 2
def __str__(self):
if (self == Types.Unknown): return "??"
elif (self == Types.Source): return "src"
elif (self == Types.NetList): return "nl"
else: raise TypeError(self)
def _Types_parser(cls, value):
if not isinstance(value, str):
# forward call to Types' superclass (enum.Enum)
return super(Types, cls).__new__(cls, value)
else:
# map strings to enum values, default to Unknown
return { 'nl': Types.NetList,
'ntl': Types.NetList, # alias
'src': Types.Source,}.get(value, Types.Unknown)
setattr(Types, '__new__', _Types_parser)
if __name__ == '__main__':
print("Types('nl') ->", Types('nl')) # Types('nl') -> nl
print("Types('ntl') ->", Types('ntl')) # Types('ntl') -> nl
print("Types('wtf') ->", Types('wtf')) # Types('wtf') -> ??
print("Types(1) ->", Types(1)) # Types(1) -> src
Update
Here's a more table-driven version that eliminates some of the repetitious coding that would otherwise be involved:
from collections import OrderedDict
import enum
class Types(enum.Enum):
Unknown = 0
Source = 1
NetList = 2
__str__ = lambda self: Types._value_to_str.get(self)
# Define after Types class.
Types.__new__ = lambda cls, value: (cls._str_to_value.get(value, Types.Unknown)
if isinstance(value, str) else
super(Types, cls).__new__(cls, value))
# Define look-up table and its inverse.
Types._str_to_value = OrderedDict((( '??', Types.Unknown),
('src', Types.Source),
('ntl', Types.NetList), # alias
( 'nl', Types.NetList),))
Types._value_to_str = {val: key for key, val in Types._str_to_value.items()}
if __name__ == '__main__':
print("Types('nl') ->", Types('nl')) # Types('nl') -> nl
print("Types('ntl') ->", Types('ntl')) # Types('ntl') -> nl
print("Types('wtf') ->", Types('wtf')) # Types('wtf') -> ??
print("Types(1) ->", Types(1)) # Types(1) -> src
print(list(Types)) # -> [<Types.Unknown: 0>, <Types.Source: 1>, <Types.NetList: 2>]
import pickle # Demostrate picklability
print(pickle.loads(pickle.dumps(Types.NetList)) == Types.NetList) # -> True
Note that in Python 3.7+ regular dictionaries are ordered, so the use of OrderedDict in the code above would not be needed and it could be simplified to just:
# Define look-up table and its inverse.
Types._str_to_value = {'??': Types.Unknown,
'src': Types.Source,
'ntl': Types.NetList, # alias
'nl': Types.NetList}
Types._value_to_str = {val: key for key, val in Types._str_to_value.items()}
Is it possible to override __new__ in a python enum to parse strings to an instance?
In a word, yes. As martineau illustrates you can replace the __new__ method after the class has been instanciated (his original code):
class Types(enum.Enum):
Unknown = 0
Source = 1
NetList = 2
def __str__(self):
if (self == Types.Unknown): return "??"
elif (self == Types.Source): return "src"
elif (self == Types.NetList): return "nl"
else: raise TypeError(self) # completely unnecessary
def _Types_parser(cls, value):
if not isinstance(value, str):
raise TypeError(value)
else:
# map strings to enum values, default to Unknown
return { 'nl': Types.NetList,
'ntl': Types.NetList, # alias
'src': Types.Source,}.get(value, Types.Unknown)
setattr(Types, '__new__', _Types_parser)
and also as his demo code illustrates, if you are not extremely careful you will break other things such as pickling, and even basic member-by-value lookup:
--> print("Types(1) ->", Types(1)) # doesn't work
Traceback (most recent call last):
...
TypeError: 1
--> import pickle
--> pickle.loads(pickle.dumps(Types.NetList))
Traceback (most recent call last):
...
TypeError: 2
Martijn showed is a clever way of enhancing EnumMeta to get what we want:
class TypesEnumMeta(enum.EnumMeta):
def __call__(cls, value, *args, **kw):
if isinstance(value, str):
# map strings to enum values, defaults to Unknown
value = {'nl': 2, 'src': 1}.get(value, 0)
return super().__call__(value, *args, **kw)
class Types(enum.Enum, metaclass=TypesEnumMeta):
...
but this puts us having duplicate code, and working against the Enum type.
The only thing lacking in basic Enum support for your use-case is the ability to have one member be the default, but even that can be handled gracefully in a normal Enum subclass by creating a new class method.
The class that you want is:
class Types(enum.Enum):
Unknown = 0
Source = 1
src = 1
NetList = 2
nl = 2
def __str__(self):
if self is Types.Unknown:
return "??"
elif self is Types.Source:
return "src"
elif self is Types.NetList:
return "nl"
#classmethod
def get(cls, name):
try:
return cls[name]
except KeyError:
return cls.Unknown
and in action:
--> for obj in Types:
... print(obj)
...
??
src
nl
--> Types.get('PoC')
<Types.Unknown: 0>
If you really need value aliases, even that can be handled without resorting to metaclass hacking:
class Types(Enum):
Unknown = 0,
Source = 1, 'src'
NetList = 2, 'nl'
def __new__(cls, int_value, *value_aliases):
obj = object.__new__(cls)
obj._value_ = int_value
for alias in value_aliases:
cls._value2member_map_[alias] = obj
return obj
print(list(Types))
print(Types(1))
print(Types('src'))
which gives us:
[<Types.Unknown: 0>, <Types.Source: 1>, <Types.NetList: 2>]
Types.Source
Types.Source
I think the by far easiest solution to your problem is to use the functional API of the Enum class which gives more freedom when it comes to choosing names since we specify them as strings:
from enum import Enum
Types = Enum(
value='Types',
names=[
('??', 0),
('Unknown', 0),
('src', 1),
('Source', 1),
('nl', 2),
('NetList', 2),
]
)
This creates an enum with name aliases. Mind the order of the entries in the names list. The first one will be chosen as default value (and also returned for name), further ones are considered as aliases but both can be used:
>>> Types.src
<Types.src: 1>
>>> Types.Source
<Types.src: 1>
To use the name property as a return value for str(Types.src) we replace the default version from Enum:
>>> Types.__str__ = lambda self: self.name
>>> Types.__format__ = lambda self, _: self.name
>>> str(Types.Unknown)
'??'
>>> '{}'.format(Types.Source)
'src'
>>> Types['src']
<Types.src: 1>
Note that we also replace the __format__ method which is called by str.format().
I don't have enough rep to comment on the accepted answer, but in Python 2.7 with the enum34 package the following error occurs at run-time:
"unbound method <lambda>() must be called with instance MyEnum as first argument (got EnumMeta instance instead)"
I was able to correct this by changing:
# define after Types class
Types.__new__ = lambda cls, value: (cls._str_to_value.get(value, Types.Unknown)
if isinstance(value, str) else
super(Types, cls).__new__(cls, value))
to the following, wrapping the lambda in with staticmethod():
# define after Types class
Types.__new__ = staticmethod(
lambda cls, value: (cls._str_to_value.get(value, Types.Unknown)
if isinstance(value, str) else
super(Types, cls).__new__(cls, value)))
This code tested correctly in both Python 2.7 and 3.6.