Suppose I have class hierarchy like this:
class SerializableWidget(object):
# some code
class WidgetA(SerilizableWidget):
# some code
class WidgetB(SerilizableWidget):
# some code
I want to be able to serialize instances of WidgetA and WidgetB (and potentially other widgets) to text files as json. Then, I want to be able to deserialize those, without knowing beforehand their specific class:
some_widget = deserielize_from_file(file_path) # pseudocode, doesn't have to be exactly a method like this
and some_widget needs to be constructed from the precise subclass of SerilizableWidget. How do I do this? What methods exactly do I need to override/implement in each of the classes of my hierarchy?
Assume all fields of the above classes are primitive types. How do I override some __to_json__ and __from_json__ methods, something like that?
You can solve this with many methods. One example is to use the object_hook and default parameters to json.load and json.dump respectively.
All you need is to store the class together with the serialized version of the object, then when loading you have to use a mapping of which class goes with which name.
The example below uses a dispatcher class decorator to store the class name and object when serializing, and look it up later when deserializing. All you need is a _as_dict method on each class to convert the data to a dict:
import json
#dispatcher
class Parent(object):
def __init__(self, name):
self.name = name
def _as_dict(self):
return {'name': self.name}
#dispatcher
class Child1(Parent):
def __init__(self, name, n=0):
super().__init__(name)
self.n = n
def _as_dict(self):
d = super()._as_dict()
d['n'] = self.n
return d
#dispatcher
class Child2(Parent):
def __init__(self, name, k='ok'):
super().__init__(name)
self.k = k
def _as_dict(self):
d = super()._as_dict()
d['k'] = self.k
return d
Now for the tests. First lets create a list with 3 objects of different types.
>>> obj = [Parent('foo'), Child1('bar', 15), Child2('baz', 'works')]
Serializing it will yield the data with the class name in each object:
>>> s = json.dumps(obj, default=dispatcher.encoder_default)
>>> print(s)
[
{"__class__": "Parent", "name": "foo"},
{"__class__": "Child1", "name": "bar", "n": 15},
{"__class__": "Child2", "name": "baz", "k": "works"}
]
And loading it back generates the correct objects:
obj2 = json.loads(s, object_hook=dispatcher.decoder_hook)
print(obj2)
[
<__main__.Parent object at 0x7fb6cd561cf8>,
<__main__.Child1 object at 0x7fb6cd561d68>,
<__main__.Child2 object at 0x7fb6cd561e10>
]
Finally, here's the implementation of dispatcher:
class _Dispatcher:
def __init__(self, classname_key='__class__'):
self._key = classname_key
self._classes = {} # to keep a reference to the classes used
def __call__(self, class_): # decorate a class
self._classes[class_.__name__] = class_
return class_
def decoder_hook(self, d):
classname = d.pop(self._key, None)
if classname:
return self._classes[classname](**d)
return d
def encoder_default(self, obj):
d = obj._as_dict()
d[self._key] = type(obj).__name__
return d
dispatcher = _Dispatcher()
I really liked #nosklo's answer, but I wanted to customize what the property value was for how the model type got saved, so I extended his code a little to add a sub-annotation.
(I know this isn't directly related to the question, but you can use this to serialize to json too since it produces dict objects. Note that your base class must use the #dataclass annotation to serialize correctly - otherwise you could adjust this code to define the __as_dict__ method like #nosklo's answer)
data.csv:
model_type, prop1
sub1, testfor1
sub2, testfor2
test.py:
import csv
from abc import ABC
from dataclasses import dataclass
from polymorphic import polymorphic
#polymorphic(keyname="model_type")
#dataclass
class BaseModel(ABC):
prop1: str
#polymorphic.subtype_when_(keyval="sub1")
class SubModel1(BaseModel):
pass
#polymorphic.subtype_when_(keyval="sub2")
class SubModel2(BaseModel):
pass
with open('data.csv') as csvfile:
reader = csv.DictReader(csvfile, skipinitialspace=True)
for row_data_dict in reader:
price_req = BaseModel.deserialize(row_data_dict)
print(price_req, '\n\tre-serialized: ', price_req.serialize())
polymorphic.py:
import dataclasses
import functools
from abc import ABC
from typing import Type
# https://stackoverflow.com/a/51976115
class _Polymorphic:
def __init__(self, keyname='__class__'):
self._key = keyname
self._class_mapping = {}
def __call__(self, abc: Type[ABC]):
functools.update_wrapper(self, abc)
setattr(abc, '_register_subtype', self._register_subtype)
setattr(abc, 'serialize', lambda self_subclass: self.serialize(self_subclass))
setattr(abc, 'deserialize', self.deserialize)
return abc
def _register_subtype(self, keyval, cls):
self._class_mapping[keyval] = cls
def serialize(self, self_subclass) -> dict:
my_dict = dataclasses.asdict(self_subclass)
my_dict[self._key] = next(keyval for keyval, cls in self._class_mapping.items() if cls == type(self_subclass))
return my_dict
def deserialize(self, data: dict):
classname = data.pop(self._key, None)
if classname:
return self._class_mapping[classname](**data)
raise ValueError(f'Invalid data: {self._key} was not found or it referred to an unrecognized class')
#staticmethod
def subtype_when_(*, keyval: str):
def register_subtype_for(_cls: _Polymorphic):
nonlocal keyval
if not keyval:
keyval = _cls.__name__
_cls._register_subtype(keyval, _cls)
#functools.wraps(_cls)
def construct_original_subclass(*args, **kwargs):
return _cls(*args, **kwargs)
return construct_original_subclass
return register_subtype_for
polymorphic = _Polymorphic
Sample console output:
SubModel1(prop1='testfor1')
re-serialized: {'prop1': 'testfor1', 'model_type': 'sub1'}
SubModel2(prop1='testfor2')
re-serialized: {'prop1': 'testfor2', 'model_type': 'sub2'}
Related
I have some classes FooA and FooB which are basically a collection of "static" methods. They operate on data - let's say it is an DataItem object:
# Base class with common behavior
class FooBase:
#classmethod
def method1(cls, arg, data: DataItem):
#res = ...
return res
#classmethod
def method2(cls, arg1, arg2, data: DataItem):
# res = ... # using method1
return res
# specialized classes
class FooA(FooBase):
# define extra methods
pass
class FooB(FooBase):
# define extra methods
pass
# usage 1: as "static methods"
res = FooA.method1(arg, data)
res2 = FooB.method2(args, data)
Now, I'd like to use these classes as attributes of a "managing" class (MyApp) which also has access to a datasource and should implicitly supply DataItems to the static methods of FooA and FooB. Moreover, the datasource supplies a list of DataItem objects.
# usage 2: as part of an "App" class
# here, the "data" argument should be supplied implicitly by MyApp
# also: MyApp contains a list of "data" objects
class MyApp:
def __init__(self, datasrc):
self.datasrc = datasrc
# this could be a generator
def get_data(self, key) -> List[DataItem]:
return self.datasrc.get_data(key)
# FooA, FooB as class / instance level attributes, descriptors, ???
# usage
my_app = MyApp("datasrc")
res_list = my_app.foo_a.method1(arg) # foo_a is a FooA obj, "data" arg is supplied automatically
# optionally, but not necessarily call as a static attribute:
res = MyApp.foo_a.method1(arg, data: DataItem) # same as FooA.method1(arg, data)
I have tried different things but found not satisfactory solution.
So... I am not sure can it be done in nice way, I thought about that and all approaches has serious drawbacks. One of the problem is we actually want to have a method that returns list or single item, depending on input parameters, which is bad.
One of way could be store datasrc in FooBase, but it violates SRP
class FooBase:
def __init__(self, datasrc):
FooBase.datasrc = datasrc
#classmethod
def method1(cls, arg, data=None):
if data is None:
return [cls.method1(arg, d) for d in cls.datasrc]
return data
Or use isinstance
#classmethod
def method1(cls, arg, data):
if isinstance(data, list):
return [cls.method1(arg, d) for d in data]
return data
But it forces us to adjust every method (which could be done with decorator or metaclass).
Another way could be use some intermediate layer:
def decorator(datasrc):
def wrapper(foo):
def f(*args, **kwargs):
# We could catch TypeError here to serve case when data is passed
return [foo(*args, **kwargs, data=data) for data in datasrc]
return f
return wrapper
class FooAdapter:
def __init__(self, datasrc, foo_cls):
self.datasrc = datasrc
methods = [
getattr(foo_cls, m)
for m in dir(foo_cls)
if callable(getattr(foo_cls, m)) and not m.startswith("__")
] # all methods of our Foo class
for method in methods:
setattr(self, method.__name__, decorator(datasrc)(method))
class MyApp:
def __init__(self, datasrc):
self.datasrc = datasrc
self.foo_a = FooAdapter(datasrc, FooA)
self.foo_b = FooAdapter(datasrc, FooB)
But solution with dynamically added functions breaks IDE support.
The cleanest solution imo could be to have Enum for Foo methods and Enum for Foo classes, then you could write code in MyApp
def get_bulk(m: MethodEnum, f: FooEnum, *args):
return [getattr(enum_to_cls_mapping[f], m)(*args, data=d) for d in self.datasrc]
class NiceClass():
some_value = SomeObject(...)
some_other_value = SomeOtherObject(...)
#classmethod
def get_all_vars(cls):
...
I want get_all_vars() to return [SomeObject(...), SomeOtherObject(...)], or more specifically, the values of the variables in cls.
Solutions tried that didn't work out for me:
return [cls.some_value, cls.some_other_value, ...] (requires listing the variable manually)
subclassing Enum then using list(cls) (requires using some_value.value to access the value elsewhere in the program, also type hinting would be a mess)
namedtuples (nope not touching that subject, heard it was much more complicated than Enum)
[value for key, value in vars(cls).items() if not callable(value) and not key.startswith("__")] (too hacky due to using vars(cls), also for some reason it also includes get_all_vars due to it being a classmethod)
There are two ways. This is a straight answer to your question:
class Foo:
pass
class Bar:
x: int = 1
y: str = 'hello'
z: Foo = Foo()
#classmethod
def get_all(cls):
xs = []
for name, value in vars(cls).items():
if not (name.startswith('__') or isinstance(value, classmethod)):
xs.append(value)
return xs
This is what I suggest:
from dataclasses import dataclass, fields
class Foo:
pass
#dataclass
class Bar:
x: int = 1
y: str = 'hello'
z: Foo = Foo()
#classmethod
def get_defaults(cls):
return [f.default for f in fields(cls)]
#classmethod
def get_all(cls):
return [getattr(cls, f.name) for f in fields(cls)]
results:
Bar.get_defaults() == Bar.get_all()
# True -> [1, 'hello', __main__.Foo]
Bar.x = 10
Bar.get_defaults() == Bar.get_all()
# False -> [1, 'hello', __main__.Foo] != [10, 'hello', __main__.Foo]
You can create a list of values and define individual attributes at the same time with minimal boilerplate.
class NiceClass():
(some_value,
some_other_value,
) = _all_values = [
SomeObject(...),
SomeOtherObject(...)
]
#classmethod
def get_all_vars(cls):
return cls._all_values
The most obvious drawback to this is that it's easy to get the order of names and values out of sync.
Ideally, you might like to do something like
class NiceClass:
_attributes = {
'some_value': SomeObject(...),
'some_other_value': SomeOtherObject(...)
}
#classmethod
def get_all_vars(cls):
return cls._attributes.values()
and have some way to "inject" the contents of _attributes into the class namespace directly. The simplest way to do this is with a mix-in class that defines __init_subclass__:
class AddAttributes:
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
cls.__dict__.update(cls._attributes)
class NiceClass(AddAttributes):
# As above
...
This might sound like a https://xyproblem.info/ but my solution might work in the other case as well. You can get the fields of an object by using __dict__ or vars (which is considered more pythonic given: Python dictionary from an object's fields)
You could do something like:
class ClassTest:
def __init__(self):
self.one = 1
self.two = 2
list(vars(ClassTest()).values())
But you will see that it has some limitations. It doesn't recognize the not in self.variable_name defined variables like you have used above. It might help you nonetheless, because you can simply define them in init.
I have a class:
class DataReader:
def get_data(self, name):
# get data of given name
It's OK that I use it as following:
reader = DataReader()
a = reader.get_data('a')
b = reader.get_data('b')
c = reader.get_data('c')
...
Is it possible that I write codes like following:
a = reader.get_a()
b = reader.get_b()
c = reader.get_c()
For current codes, it will fail since class DataReader has no methods like get_a(). What I want is, do something to let DataReader support method like get_a, and automatically convert it to self.get_data('a'), without really define get_xxx methods one by one.
Here, the a, b, c can be any string, and I cannot know all of them while defining DataReader class. So, let me ask my question in another way: is there some shortcut way to let DataReader support all (infinity) get_xxx methods (here xxx can be any string), as if I defined infinity methods like:
class DataReader:
def get_a(self): return self.get('a')
def get_b(self): return self.get('b')
...
def get_z(self): return self.get('z')
def get_aa(self): return self.get('aa')
...
def get_asdf(self): return self.get('asdf')
...
def get_okjgoke(self): return self.get('okjgoke')
...
One method is having the DataReader to define __getattr__ special method (that method is invoked when attribute is not found inside the object):
class DataReader:
def __init__(self, data):
self.items = data.copy()
def __getattr__(self, attr):
if attr.startswith('get_'):
return lambda: self.items[attr.split('get_')[-1]]
raise AttributeError('Attribute "{}" not found.'.format(attr))
d = DataReader({'a':1, 'b':2})
print(d.get_a())
print(d.get_b())
Prints:
1
2
Your approach of passing the name to get_data seems pretty reasonable to me. But if you insist on using the attribute based lookups, you can override __getattr__ and use get_data to in there for lookups e.g.:
class DataReader:
def __getattr__(self, attr):
parts = attr.partition('_')
if parts[0] == 'get' and parts[-1] != 'data':
return self.get_data(parts[-1])
return super().__getattr__(attr)
def get_data(self, name):
return name
Now you can use Foo().get_a to get Foo().get_data('a').
If you want get the value from a callable like Foo().get_a() instead of Foo().get_a, you can use tuck in a lambda:
class DataReader:
def __getattr__(self, attr):
parts = attr.partition('_')
if parts[0] == 'get' and parts[-1] != 'data':
return lambda: self.get_data(parts[-1])
return super().__getattr__(attr)
def get_data(self, name):
return name
Now you can do Foo().get_a().
I have a class like this:
class User:
def __init__(self, uid):
userinfo = json.load(urlopen(passportapi + 'user/' + uid))
This class would load user information from a remote api and set corresponding attributes for this class so I can access these attributes by:
print user.username
print user.group_id
Is there any way to implement this?
Thanks
import json
api_result = '{"username":"wawa","age":20}'
class User(object):
def __init__(self, api_result):
userinfo = json.loads(api_result)
self.__dict__.update(userinfo)
import unittest
class DefaultTestCase(unittest.TestCase):
def test_default(self):
user = User(api_result)
self.assertEqual(user.username, 'wawa')
self.assertEqual(user.age, 20)
if __name__ == '__main__':
unittest.main()
You can do this sort of thing using the setattr function:
>>> class A(object):
pass
>>> a = A()
>>> setattr(a, 'val', 4)
>>> a.val
4
In your case, assuming your parsed json file provides some sort of key-value pair (like a dict), you can just iterate through those, and call setattr on self; something like this (assuming userinfo is a dict):
class User:
def __init__(self, uid):
userinfo = json.load(urlopen(passportapi + 'user/' + uid))
for key, value in userinfo.iteritems():
setattr(self, key, value)
Assuming your JSON request returns a Python dictionary:
class User:
def __init__(self, uid):
self.__dict__.update(json.load(urlopen(passportapi + 'user/' + uid)))
Sometimes I like to have a little more encapsulation and control than what 'update' would offer. I would probably accomplish what you are trying to do like this:
class User(object):
def __init__(self, api_result):
self.userinfo = json.loads(api_result)
def __getattr__(self, name):
if name in self.userinfo: return self.userinfo[name]
raise AttributeError
I think this method will allow you to do other things like filter certain keywords and raise custom exceptions for accessing your data.
how do i create object-instances on runtime in python?
say i have 2 classes:
class MyClassA(object):
def __init__(self, prop):
self.prop = prop
self.name = "CLASS A"
def println(self):
print self.name
class MyClassB(object):
def __init__(self, prop):
self.prop = prop
self.name = "CLASS B"
def println(self):
print self.name
and a dict
{('a': MyClassA), ('b': MyClassB)}
how can i create dynamic an instance of one of my two classes, depending of i choose 'a' or 'b'.
kind of this:
somefunc(str):
if 'a': return new MyClassA
if 'b': return new MyClassB
to get "CLASS B" on calling: somefunc('a').println
but in a more elegant and dynamic way (say i add more classes to the dict on runtime)
You might create a dispatcher, which is a dictionary with your keys mapping to classes.
dispatch = {
"a": MyClassA,
"b": MyClassB,
}
instance = dispatch[which_one]() # Notice the second pair of parens here!
You create a class instance by calling the class. Your class dict {('a': MyClassA), ('b': MyClassB)} returns classes; so you need only call the class:
classes['a']()
But I get the sense you want something more specific. Here's a subclass of dict that, when called with a key, looks up the associated item and calls it:
>>> class ClassMap(dict):
... def __call__(self, key, *args, **kwargs):
... return self.__getitem__(key)(*args, **kwargs)
...
>>> c = ClassMap()
>>> c['a'] = A
>>> c['b'] = B
>>> c('a')
<__main__.A object at 0x1004cc7d0>