I'm trying to replicate mongoengine functionality that lets you define field objects that can be used like normal python objects in the code.
My idea is to create a FieldHolder class that contains the value and (de)serialization logic, and a Document object with overridden __setattr__ and __getattribute__ methods.
In my code draft, if I set x.h to some value, this value gets correctly assigned to x.h._value. When I get x.h, I correctly get x.h._value.
However, I would also like get h as a FieldHolder object and not as its value. I have tried using object.__getattribute__ (inside serialize method), but I'm still getting h._value (object.__getattribute__(self, 'h') returns abc). What am I doing wrong? Thanks
class FieldHolder:
_value = None
# Some serialization and deserialization methods
class Document(object):
h = FieldHolder()
def __setattr__(self, key, value):
attr = getattr(self, key, None)
if attr is not None and isinstance(attr, FieldHolder):
attr._value = value
else:
super().__setattr__(key, value)
def __getattribute__(self, key):
val = super().__getattribute__(key)
if isinstance(val, FieldHolder):
return val._value
else:
return val
def serialize(self):
res = {}
for name, value in vars(self).items():
obj = object.__getattribute__(self, name) # not working as expected
if isinstance(obj, FieldHolder):
res[name] = value
return res
x = Document()
x.h = "abc" # h._value is now "abc"
print(x.h) # prints "abc"
s = x.serialize() # should return {'h': 'abc'} but returns {}
print(s)
Related
say I have two (simple toy) nested data structure like this:
d = dict(zip(list('abc'), list(range(3))))
nested_dict = {k:d.copy() for k in d}
nested_listof_dict = {k:[d.copy() for _ in range(3)] for k in d}
Now I want to make this behave more like a 'regular' class-like object (meaning dot-indexable)
class dictobj(dict):
def __init__(self, data: dict, name):
data['_name'] = name
super().__init__(data)
for name, item in data.items():
if isinstance(item, (list, tuple)):
setattr(self, name, [dictobj(x, name) if isinstance(x, dict) else x for x in item])
else:
setattr(self, name, dictobj(item, name) if isinstance(item, dict) else item)
def __repr__(self):
return f"{self['_name']}"
data_dictobj = dictobj(data, 'test') # size 1185 bytes
which works nicely for both the nested dict and nested_listof_dict
assert nested_listof_dict.a[0].b == nested_listof_dict['a'][0]['b']
but, since both attributes and dictionaries are mutable, this might happen
nested_listof_dict['a'][0]['b'] = 2
assert nested_listof_dict.a[0].b != nested_listof_dict['a'][0]['b'] # unwanted behavior
So, therefore it would be a good idea to implement the attributes as properties. I figured it would probably be a good idea to avoid using lambda functions because of closure scoping. First looking at getting the getter implemented, I focused on the nested_dict, since it's a simpler structure.
class dictobj(dict):
def __init__(self, data: dict, name):
def make_property(self, name, item):
def getter(self):
return dictobj(item, name) if isinstance(item, dict) else item
setattr(self.__class__, name, property(getter))
# def setter(self, value):
# if not isinstance(value, type(item)):
# raise ValueError(f'cannot change the data structure, expected '+
# f'{type(item).__name__} got {type(value).__name__}')
# self[name] = value
# setattr(self.__class__, name, property(getter, setter))
data['_name'] = name
super().__init__(data)
for name, item in data.items():
if isinstance(item, (list, tuple)):
setattr(self, name, [dictobj(x, name) if isinstance(x, dict) else x for x in item])
else:
make_property(self, name, item)
def __repr__(self):
return f"{self['_name']}"
then test if the the attribute can no longer be set
d = dictobj(d, 'test')
# d.a = 1 # fails as should: "AttributeError: can't set attribute"
# d.a.a = 1 # fails as should: "AttributeError: can't set attribute"
But somehow I am still messing up, the following behavior is observed:
print(d.a) # returns object "a" - as desired
print(d.a) # returns 0 - second call returns the nested value
I don't know how to avoid this behavior from occurring.
Apart from that, I would also like to generate a setter that enforces the data structure to be maintained. Un-out-commenting the setter I wrote above, not surprisingly, also yields unintended behavior
d.a = {1} # ValueError: cannot change the data structure, expected dict got set - as desired
d.a.a = 2 # AttributeError: 'int' object has no attribute 'a'
d.a = 2
assert d.a == 0 and d['a'] == 2 # again unintended
I would like to understand what I'm doing wrong, and to make this work. It should also be noted that I have not even yet considered generating properties for the nested_listof_dict, which would also be needed.
munch does exactly what I need
I read the documentation and am not sure how to simplify the following code with Python properties:
class PatientRecordJson:
def __init__(self):
self.json = {}
def set_raw_data_field(self, string):
self.json['raw_data'] = string
def get_raw_data_field(self):
return self.json.get('raw_data', None)
def set_data_type(self, string):
self.json['data_type'] = string
def get_data_type(self):
return self.json.get('data_type', None)
def set_type_of_record(self, string):
self.json['type_of_record'] = string
def get_type_of_record(self):
return self.json.get('type_of_record', None)
def set_npi(self, string):
self.json['npi'] = string
def get_npi(self):
return self.json.get('npi', None)
You could override __getattr__ and __setattr__ which are called when you access a property with obj.prop.
class PatientRecordJson:
properties = ['raw_data', 'data_type', 'type_of_record', 'npi']
def __init__(self):
self.json = {}
def __getattr__(self, name):
if name in PatientRecordJson.properties:
return self.json.get(name)
return super().__getattr__(name)
def __setattr__(self, name, value):
if name in PatientRecordJson.properties:
self.json[name] = value
return super().__setattr__(name, value)
Usage example:
pr = PatientRecordJson()
pr.raw_data #=> None
pr.raw_data = 'raw data'
pr.raw_data #=> 'raw data'
pr.json #=> {'raw_data': 'raw data'}
pr.z #=> AttributeError
pr.z = 2
pr.z #=> 2
pr.json #=> {'raw_data': 'raw data'}
A note: you've defined json on the class, if you want it to be instance variable create it on self in __init__.
If you're just learning Python, this may be too advanced—however by using it you can largely automate the process of creating any number of classes like this by using a metaclass (a class whose instances are other classes).
Although doing so requires some non-trivial code, it makes defining the target classes extremely simple. Plus, as a bonus I added optional type-checking.
def typed_property(field_name, expected_type=None):
""" Helper function which creates and returns a property with the given
name with optional type-checking. Each property retrieves or stores
values from/to an instance-defined "json" dictionary attribute.
"""
#property
def prop(self):
return self.json[field_name]
#prop.setter
def prop(self, value):
if expected_type and not isinstance(value, expected_type):
raise TypeError('Only {} values may be assigned to {}'.format(
expected_type.__name__, field_name))
self.json[field_name] = value
return prop
class PatientRecordMeta(type):
""" Metaclass to define properties based on a class-level defined "fields"
dictionary.
"""
def __new__(metaclass, classname, bases, classdict):
cls = super().__new__(metaclass, classname, bases, classdict)
fields = classdict.get('fields')
if not fields or not isinstance(fields, dict):
raise TypeError('Class {} did not define required "fields" '
'instance dictionary'.format(classname))
# Create the properties.
for field, expected_type in fields.items():
setattr(cls, field, typed_property(field, expected_type))
return cls
The defined metaclass makes it very easy to create a class with exactly the desired properties:
class PatientRecordJson(metaclass=PatientRecordMeta):
fields = {'raw_data': str,
'data_type': str,
'type_of_record': str,
'npi': int} # Note changed to "int" to test type-checking,
def __init__(self):
self.json = {} # define required instance attribute
# Other methods could be defined here, too, if desired.
# ...
patient_rec = PatientRecordJson()
patient_rec.raw_data = 'something'
patient_rec.bogus = 'something else' # OK, but not saved in "self.json" dict.
try:
patient_rec.npi = 'spam' # -> Should cause a TypeError
except TypeError:
pass # expected TypeError occurred
else:
print('Error: a TypeError did not occur as expected')
patient_rec.npi = 42 # Integer value is OK.
patient_rec.json['raw_data'] = 'eggs' # can still do this
print(patient_rec.raw_data) # -> eggs
print(patient_rec.npi) # -> 42
print(patient_rec.json) # -> {'raw_data': 'something', 'npi': 42}
You can use __getattr__ and __setattr__ to treat your dynamic fields as if they are properties of the object itself, rather than of the internal json object.
class PatientRecordJson:
def __init__(self):
self.fields = ['raw_data', 'data_type', 'type_of_record', 'npi']
self.json = {}
def __getattr__(self, key):
if key not in self.fields:
raise AttributeError
return self.json.get(key, None)
def __setattr__(self, key, data):
if key not in self.fields
raise AttributeError
self.json[key] = data
The sample above will allow you to access the properties like so.
patient = PatientRecordJson()
patient.data_type = 'something'
patient.npi = 12345
patient.raw_data = 'whatever you want here'
print(patient.data_type) # 'something'
print(patient.doesntexist) # AttributeError
patient.notinfields = True # AttributeError
So I have this example code, where Foo is very dynamic data structure.
def fetch():
# simulate api request
return {'param1':1, 'param2':{'description':'param2', 'value':2}}
class Foo(object):
def __init__(self):
self._rawdata = fetch()
#set attributes accordingly to the downloaded data
for key, val in self._rawdata.items():
setattr(self, key, val)
def __str__(self):
# just an example
out = ''
for key, val in self._rawdata.items():
out += key + ': ' + str(val) + '\n'
return out
A user might want to try do this:
f = Foo()
print(f.param3)
The user doesn't know whether the 'param3' exists or not (API might not have any data avaliable, in which case it won't provide the key at all)
Naturally, this will result in AttributeError being raised.
print(f.param3)
AttributeError: 'Foo' object has no attribute 'param3'
My question is this: Is there some metaprogramming magic way to wrap the Foo() class into something that will make 'f.nonexistent_attribute' return 'None' instead of traceback? I would really like to avoid hardcoding expected properties (what if API changes?).
Implement the __getattr__ method; it'll be called for all nonexistent attributes:
def __getattr__(self, name):
# all non-existing attributes produce None
return None
From the documentation:
Called when an attribute lookup has not found the attribute in the usual places (i.e. it is not an instance attribute nor is it found in the class tree for self). name is the attribute name. This method should return the (computed) attribute value or raise an AttributeError exception.
Demo:
>>> def fetch():
... # simulate api request
... return {'param1':1, 'param2':{'description':'param2', 'value':2}}
...
>>> class Foo(object):
... def __init__(self):
... self._rawdata = fetch()
... #set attributes accordingly to the downloaded data
... for key, val in self._rawdata.items():
... setattr(self, key, val)
... def __getattr__(self, name):
... # all non-existing attributes produce None
... return None
...
>>> print(f.param1)
1
>>> f = Foo()
>>> print(f.param3)
None
I have this (Py2.7.2):
class MyClass(object):
def __init__(self, dict_values):
self.values = dict_values
self.changed_values = {} #this should track changes done to the values{}
....
I can use it like this:
var = MyClass()
var.values['age'] = 21
var.changed_values['age'] = 21
But I want to use it like this:
var.age = 21
print var.changed_values #prints {'age':21}
I suspect I can use properties to do that, but how?
UPDATE:
I don't know the dict contents at the design time. It will be known at run-time only. And it will likely to be not empty
You can create a class that inherits from a dict and override the needed functions
class D(dict):
def __init__(self):
self.changed_values = {}
self.__initialized = True
def __setitem__(self, key, value):
self.changed_values[key] = value
super(D, self).__setitem__(key, value)
def __getattr__(self, item):
"""Maps values to attributes.
Only called if there *isn't* an attribute with this name
"""
try:
return self.__getitem__(item)
except KeyError:
raise AttributeError(item)
def __setattr__(self, item, value):
"""Maps attributes to values.
Only if we are initialised
"""
if not self.__dict__.has_key('_D__initialized'): # this test allows attributes to be set in the __init__ method
return dict.__setattr__(self, item, value)
elif self.__dict__.has_key(item): # any normal attributes are handled normally
dict.__setattr__(self, item, value)
else:
self.__setitem__(item, value)
a = D()
a['hi'] = 'hello'
print a.hi
print a.changed_values
a.hi = 'wow'
print a.hi
print a.changed_values
a.test = 'test1'
print a.test
print a.changed_values
output
>>hello
>>{'hi': 'hello'}
>>wow
>>{'hi': 'wow'}
>>test1
>>{'hi': 'wow', 'test': 'test1'}
Properties (descriptors, really) will only help if the set of attributes to monitor is bounded. Simply file the new value away in the __set__() method of the descriptor.
If the set of attributes is arbitrary or unbounded then you will need to overrive MyClass.__setattr__() instead.
You can use the property() built-in function.
This is preferred to overriding __getattr__ and __setattr__, as explained here.
class MyClass:
def __init__(self):
self.values = {}
self.changed_values = {}
def set_age( nr ):
self.values['age'] = nr
self.changed_values['age'] = nr
def get_age():
return self.values['age']
age = property(get_age,set_age)
Sorry for the confusing title.
I would like to do the following: (Similar to defstruct in Lisp)
def mkstruct(structname, field_dict):
# create a function called "structname" and get/set functions
# called "structname_get(s, field_name)" and "structname_set(s, field_name, value)"
# Create a struct "lstnode"
mkstruct("lstnode", {ndkey : 0, nxt: None})
# Make a new struct
node = lstnode()
node_set(node, "ndkey", 5)
v = node_get(node, "ndkey") # v should be 5
This can be done in C with a macro define. The reason I am not using a class is because The "struct" I am creating will be "tied" to a database (just a text file in some format in this case). And I don't want to take up any memory associated with an object - I will represent the struct as a number (an object ID if you will)
This should be a step in the direction of what you want:
def mkstruct(name, attrs):
def init(self):
self.id = # not sure how you want to get the id
def getattr(self, attr):
if attr not in attrs:
raise AttributeError(attr)
# put your database lookup statement here
def setattr(self, attr, value):
if attr not in attrs:
raise AttributeError(attr)
# put your database update statement here
return type(
name,
(object,),
__init__=init,
__getattr__=getattr,
__setattr__=setattr)
lstnode = mkstruct("lstnode", ("ndkey", "nxt"))
Looks to me that what you're looking for is already provided by the type built-in:
def mkstruct(structname, field_dict):
return type(structname, (object,), field_dict)
lstnode = mkstruct("lstnode", {'ndkey' : 0, 'nxt': None})
node = lstnode()
node.ndkey = 5
v = node.ndkey
If you need just the keys in field_dict to be members of the structure, you can add '__slots__' to field_dict.
Note: This doesn't implement any setter or getter, but as pointed out already by the comments, this is not really needed when using classes.
It looks like that this isn't easy to do in python - after some research. The only way to add a inner function to the global namespace is to modify the globals() dict, which is rather awkward.
>>> def mkfunc(funcname):
... def func():
... print "my name is %s" % funcname
... func.__name__ = funcname
... return func
...
>>> mkfunc("abc")
<function abc at 0xb773ae64>
>>> globals()["abc"] = mkfunc("abc")
>>> abc()
my name is abc
As for my own problem, I am content to do the following:
def mkstruct(fields):
def maker(args):
# validate #args against #fields
oid = db_insert_row(fields)
return oid
def getter(oid, fieldname):
rec = db_retrieve(oid)
return rec[fieldname]
def setter(oid, fieldname, value):
db_update(oid, fieldname, value)
return (maker, getter, setter,)
lstnode, lstnode_get, lstnode_set = mkstruct({nodekey: 0, nxt: None})
n = lstnode(nodekey=5)