python OOP class method retaining variable. Weird scoping thing - python

I have two classes, a main class which creates instances of my other class.
class Builder:
def __init__(self, id):
self.id = id
def build_thing(self, main_ftr, main_attr, attrs = {}):
# note the main ftr/attrs gets added to attrs no matter what
attrs[main_ftr] = attrs.get(main_ftr, []) + [main_attr]
return Thing(main_ftr, main_attr, attrs)
class Thing:
def __init__(self, main_ftr, main_attr, attrs):
self.main_ftr = main_ftr
self.main_attr = main_attr
self.attrs = attrs
The issue I'm having has to do with the attrs dictionary that gets passed to the Thing class. The problem is that each time I use the Builder class to create a Thing class, the attrs argument retains it's previous values
b = Builder('123')
t = b.build_thing('name', 'john')
print(t.attrs) # {'name': ['john'] }
# Goal is this creates a new "Thing" with only attrs = {'name':['mike']}
t2 = b.build_thing('name', 'mike')
print(t2.attrs) # {'name': ['john', 'mike']}
My Question is 2 part:
Why is this happening?
How do I fix it?

Functions' optional arguments are initialized once. Since attrs is mutable, each time you call the function, you add new key-value pair to this dictionary and it is kept for further calls. If you need a mutable data structure as a default parameter, use something like:
def build_thing(self, main_ftr, main_attr, attrs=None):
if attrs is None:
attrs = {}
attrs[main_ftr] = attrs.get(main_ftr, []) + [main_attr]
return Thing(main_ftr, main_attr, attrs)

Related

Adding properties dynamically to a Python class that point to items in a dictionary

I am trying to attach properties dynamically to a class (Registry) for the sake of easy access to values in a dict. I am using defaultdict to define the dictionary, with the default value as an empty list.
But because of the way I am accessing the list values in the dictionary while defining the property, I end up with all properties pointing to the same list object.
Gist: https://gist.github.com/subhashb/adb75a3a05a611c3d9193da695d46dd4
from collections import defaultdict
from enum import Enum
class ElementTypes(Enum):
PHONE = "PHONE"
CAR = "CAR"
class Registry:
def __new__(cls, *args, **kwargs):
cls.setup_properties()
instance = super(Registry, cls).__new__(cls, *args, **kwargs)
return instance
def __init__(self):
self._elements = {}
def register(self, element_type, item):
if element_type.value not in self._elements:
self._elements[element_type.value] = []
self._elements[element_type.value].append(item)
def get(self, element_type):
return self._elements[element_type.value]
#classmethod
def setup_properties(cls):
for item in ElementTypes:
prop_name = item.value
prop = property(lambda self: getattr(self, "get")(item))
setattr(Registry, prop_name.lower(), prop)
registry = Registry()
registry.register(ElementTypes.PHONE, "phone1")
registry.register(ElementTypes.PHONE, "phone2")
registry.register(ElementTypes.CAR, "car1")
registry.register(ElementTypes.CAR, "car2")
assert dict(registry._elements) == {
"CAR": ["car1", "car2"],
"PHONE": ["phone1", "phone2"],
}
assert hasattr(registry, "phone")
assert hasattr(registry, "car")
assert registry.car == ["car1", "car2"]
assert registry.phone == ["phone1", "phone2"] # This fails
How do I define the code withing the property to be truly dynamic and get access to the individual list values in the dict?
First, don't setup properties in __new__, that gets called for every Registry object created! Instead, just assign the properties outside the class definition.
Secondly, this trips a lot of people up, but if you use a lambda inside a for-loop and you want to use the item variable, you need to make sure that you add an argument called item with the default value of item, otherwise all the properties will refer to the last item of the loop.
class Registry:
def __init__(self):
self._elements = defaultdict(list)
def register(self, element_type, item):
self._elements[element_type.value].append(item)
def get(self, element_type):
return self._elements[element_type.value]
for item in ElementTypes:
prop_name = item.value
prop = property(lambda self, item=item: self.get(item))
setattr(Registry, prop_name.lower(), prop)

How to define a class with dynamic attributes?

In my project, I need to create a class with attributes passed by a dict, something like this:
class_attributes = {"sensor": Nested(Sensor),
"serial_interface": Nested(SerialInterface)}
class MSchema(marshmallow.ModelSchema):
class Meta:
model = cls
attr = class_attributes
I need that "sensor" and "serial_interface" to be in the class, and can be access using MSchema.sensor or MSchema.serial_interface.
You can call the metaclass of ModelSchema directly, rather than defining the class declaratively using a class statement.
m = marshmallow.ModelSchema
class_attributes = {
"sensor": Nested(Sensor),
"serial_interface": Nested(SerialInterface)
}
m = marshmallow.ModelSchema
mc = type(m)
MSchema = mc('MSchema', (m,), {
'Meta': type('Meta', (), {'model': cls}),
**class_attributes
})
In case you aren't aware, a class statement is just a declarative syntax for calling type (or some other metaclass) with 3 arguments: the name of the class, a tuple of parent classes, and a dict of class attributes. The class statement evaluates its body to produce the dict, then calls type (or another given metaclass), and binds the return value to the name. Some simpler examples:
# Foo = type('Foo', (), {})
class Foo:
pass
# Foo = Bar('Foo', (), {})
class Foo(metaclass=Bar):
pass
# Foo = Bar('Foo', (Parent,), {'x': 3})
class Foo(Parent, metaclass=Bar):
x = 3
# def foo_init(self, x):
# self.x = x
# Foo = Bar('Foo', (), {'__init__': foo_init})
class Foo(metaclass=Bar):
def __init__(self, x):
self.x = x
Not entirely sure I understand the question to 100%, but have you tried using setattr()?
Example code would look like the following:
m_schema = MSchema()
for key, value in class_attributes.items():
setattr(m_schema, key, value)
setattr(object, string, value) takes an object to set attributes on, a string for the attribute name, and an arbitrary value as the attribute value.

numpy array subclass unexpedly shares attributes across instances

I am having a weird subclass numpy.ndarray issue that feels like
Values of instance variables of superclass persist across instances of subclass
But I have not been able to understand fully or make it work for for my example.
Reading through
Slightly more realistic example - attribute added to existing array I am trying to do pretty much exactly this. I want to add an attrs attribute to an array to hold information such as units in a dictionary.
Here is what I have:
import numpy
class dmarray(numpy.ndarray):
def __new__(cls, input_array, attrs={}):
obj = numpy.asarray(input_array).view(cls)
obj.attrs = attrs
return obj
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None:
return
self.attrs = getattr(obj, 'attrs', {})
So then to use it and demonstrate the issue
a = dmarray([1,2,3,4])
b = dmarray([1,2,3,4])
a.attrs['foo'] = 'bar'
print(b.attrs)
#{'foo': 'bar'}
b.attrs is a.attrs
# True # hmm....
So b is picking up attrs that I don't want it to. Annoyingly it works fine if you do this:
from datamodel import *
a = dmarray([1,2,3,4], attrs={'foo':'bar'})
b = dmarray([1,2,3,4])
b.attrs
# {}
So how in the world do I make this dmarray work how I want it to?
Edit:
OK so this seems to fix the problem but I don't understand why. So lets change the question to what this is doing and why it works?
class dmarray(numpy.ndarray):
def __new__(cls, input_array, attrs=None):
obj = numpy.asarray(input_array).view(cls)
return obj
def __init__(self, input_array, attrs=None):
if attrs == None:
attrs = {}
self.attrs = attrs
So by removing the kwarg from __new__() and putting it in __init__() it works. I just tried this as a "well it might work"
a = dmarray([1,2,3,4])
b = dmarray([1,2,3,4])
a.attrs['foo'] = 'bar'
b.attrs
# {}
The problem is here:
def __new__(cls, input_array, attrs={})
Never do this attrs={} in a function header. The expected result is (probably) not what you think it is. This is a common Python Pitfall. See here Default Parameter Values in Python
The right way how to do this:
def __new__(cls, input_array, attrs=None):
if attrs is None:
attrs = {}

Python data structure for a collection of objects with random access based on an attribute

I need a collection of objects which can be looked up by a certain (unique) attribute common to each of the objects. Right now I am using a dicitionary assigning the dictionary key to the attribute.
Here is an example of what I have now:
class Item():
def __init__(self, uniq_key, title=None):
self.key = uniq_key
self.title = title
item_instance_1 = Item("unique_key1", title="foo")
item_instance_2 = Item("unique_key3", title="foo")
item_instance_3 = Item("unique_key2", title="foo")
item_collection = {
item_instance_1.key: item_instance_1,
item_instance_2.key: item_instance_2,
item_instance_3.key: item_instance_3
}
item_instance_1.key = "new_key"
Now this seems a rather cumbersome solution, as the key is not a reference to the attribute but takes the value of the key-attribute on assignment, meaning that:
the keys of the dictionary duplicate information already present in form of the object attribute and
when the object attribute is changed the dictionary key is not updated.
Using a list and iterating through the object seems even more inefficient.
So, is there more fitting data structure than dict for this particular case, a collection of objects giving me random access based on a certain object attribute?
This would need to work with Python 2.4 as that's what I am stuck with (at work).
If it hasn't been obvious, I'm new to Python.
There is actually no duplication of information as you fear: the dict's key, and the object's .key attribute, are just two references to exactly the same object.
The only real problem is "what if the .key gets reassigned". Well then, clearly you must use a property that updates all the relevant dicts as well as the instance's attribute; so each object must know all the dicts in which it may be enregistered. Ideally one would want to use weak references for the purpose, to avoid circular dependencies, but, alas, you can't take a weakref.ref (or proxy) to a dict. So, I'm using normal references here, instead (the alternative is not to use dict instances but e.g. some special subclass -- not handy).
def enregister(d, obj):
obj.ds.append(d)
d[obj.key] = obj
class Item(object):
def __init__(self, uniq_key, title=None):
self._key = uniq_key
self.title = title
self.ds = []
def adjust_key(self, newkey):
newds = [d for d in self.ds if self._key in d]
for d in newds:
del d[self._key]
d[newkey] = self
self.ds = newds
self._key = newkey
def get_key(self):
return self._key
key = property(get_key, adjust_key)
Edit: if you want a single collection with ALL the instances of Item, that's even easier, as you can make the collection a class-level attribute; indeed it can be a WeakValueDictionary to avoid erroneously keeping items alive, if that's what you need. I.e.:
class Item(object):
all = weakref.WeakValueDictionary()
def __init__(self, uniq_key, title=None):
self._key = uniq_key
self.title = title
# here, if needed, you could check that the key
# is not ALREADY present in self.all
self.all[self._key] = self
def adjust_key(self, newkey):
# "key non-uniqueness" could be checked here too
del self.all[self._key]
self.all[newkey] = self
self._key = newkey
def get_key(self):
return self._key
key = property(get_key, adjust_key)
Now you can use Item.all['akey'], Item.all.get('akey'), for akey in Item.all:, and so forth -- all the rich functionality of dicts.
There are a number of great things you can do here. One example would be to let the class keep track of everything:
class Item():
_member_dict = {}
#classmethod
def get_by_key(cls,key):
return cls._member_dict[key]
def __init__(self, uniq_key, title=None):
self.key = uniq_key
self.__class__._member_dict[key] = self
self.title = title
>>> i = Item('foo')
>>> i == Item.get_by_key('foo')
True
Note you will retain the update problem: if key changes, the _member_dict falls out of sync. This is where encapsulation will come in handy: make it (practically) impossible to change key without updating the dictionary. For a good tutorial on how to do that, see this tutorial.
Well, dict really is what you want. What may be cumbersome is not the dict itself, but the way you are building it. Here is a slight enhancement to your example, showing how to use a list expression and the dict constructor to easily create your lookup dict. This also shows how to create a multimap kind of dict, to look up matching items given a field value that might be duplicated across items:
class Item(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __str__(self):
return str(self.__dict__)
def __repr__(self):
return str(self)
allitems = [
Item(key="red", title="foo"),
Item(key="green", title="foo"),
Item(key="blue", title="foofoo"),
]
# if fields are unique
itemByKey = dict([(i.key,i) for i in allitems])
# if field value can be duplicated across items
# (for Python 2.5 and higher, you could use a defaultdict from
# the collections module)
itemsByTitle = {}
for i in allitems:
if i.title in itemsByTitle:
itemsByTitle[i.title].append(i)
else:
itemsByTitle[i.title] = [i]
print itemByKey["red"]
print itemsByTitle["foo"]
Prints:
{'key': 'red', 'title': 'foo'}
[{'key': 'red', 'title': 'foo'}, {'key': 'green', 'title': 'foo'}]
Editing to correct the problem I had - which was due to my "collection = dict()" default parameter (*bonk*). Now, each call to the function will return a class with its own collection as intended - this for convenience in case more than one such collection should be needed. Also am putting the collection in the class and just returning the class instead of the two separately in a tuple as before. (Leaving the default container here as dict(), but that could be changed to Alex's WeakValueDictionary, which is of course very cool.)
def make_item_collection(container = None):
''' Create a class designed to be collected in a specific collection. '''
container = dict() if container is None else container
class CollectedItem(object):
collection = container
def __init__(self, key, title=None):
self.key = key
CollectedItem.collection[key] = self
self.title = title
def update_key(self, new_key):
CollectedItem.collection[
new_key] = CollectedItem.collection.pop(self.key)
self.key = new_key
return CollectedItem
# Usage Demo...
Item = make_item_collection()
my_collection = Item.collection
item_instance_1 = Item("unique_key1", title="foo1")
item_instance_2 = Item("unique_key2", title="foo2")
item_instance_3 = Item("unique_key3", title="foo3")
for k,v in my_collection.iteritems():
print k, v.title
item_instance_1.update_key("new_unique_key")
print '****'
for k,v in my_collection.iteritems():
print k, v.title
And here's the output in Python 2.5.2:
unique_key1 foo1
unique_key2 foo2
unique_key3 foo3
****
new_unique_key foo1
unique_key2 foo2
unique_key3 foo3

Finding the static attributes of a class in Python

This is an unusual question, but I'd like to dynamically generate the __slots__ attribute of the class based on whatever attributes I happened to have added to the class.
For example, if I have a class:
class A(object):
one = 1
two = 2
__slots__ = ['one', 'two']
I'd like to do this dynamically rather than specifying the arguments by hand, how would I do this?
At the point you're trying to define slots, the class hasn't been built yet, so you cannot define it dynamically from within the A class.
To get the behaviour you want, use a metaclass to introspect the definition of A and add a slots attribute.
class MakeSlots(type):
def __new__(cls, name, bases, attrs):
attrs['__slots__'] = attrs.keys()
return super(MakeSlots, cls).__new__(cls, name, bases, attrs)
class A(object):
one = 1
two = 2
__metaclass__ = MakeSlots
One very important thing to be aware of -- if those attributes stay in the class, the __slots__ generation will be useless... okay, maybe not useless -- it will make the class attributes read-only; probably not what you want.
The easy way is to say, "Okay, I'll initialize them to None, then let them disappear." Excellent! Here's one way to do that:
class B(object):
three = None
four = None
temp = vars() # get the local namespace as a dict()
__slots__ = temp.keys() # put their names into __slots__
__slots__.remove('temp') # remove non-__slots__ names
__slots__.remove('__module__') # now remove the names from the local
for name in __slots__: # namespace so we don't get read-only
del temp[name] # class attributes
del temp # and get rid of temp
If you want to keep those initial values it takes a bit more work... here's one possible solution:
class B(object):
three = 3
four = 4
def __init__(self):
for key, value in self.__init__.defaults.items():
setattr(self, key, value)
temp = vars()
__slots__ = temp.keys()
__slots__.remove('temp')
__slots__.remove('__module__')
__slots__.remove('__init__')
__init__.defaults = dict()
for name in __slots__:
__init__.defaults[name] = temp[name]
del temp[name]
del temp
As you can see, it is possible to do this without a metaclass -- but who wants all that boilerplate? A metaclass could definitely help us clean this up:
class MakeSlots(type):
def __new__(cls, name, bases, attrs):
new_attrs = {}
new_attrs['__slots__'] = slots = attrs.keys()
slots.remove('__module__')
slots.remove('__metaclass__')
new_attrs['__weakref__'] = None
new_attrs['__init__'] = init = new_init
init.defaults = dict()
for name in slots:
init.defaults[name] = attrs[name]
return super(MakeSlots, cls).__new__(cls, name, bases, new_attrs)
def new_init(self):
for key, value in self.__init__.defaults.items():
setattr(self, key, value)
class A(object):
__metaclass__ = MakeSlots
one = 1
two = 2
class B(object):
__metaclass__ = MakeSlots
three = 3
four = 4
Now all the tediousness is kept in the metaclass, and the actual class is easy to read and (hopefully!) understand.
If you need to have anything else in these classes besides attributes I strongly suggest you put whatever it is in a mixin class -- having them directly in the final class would complicate the metaclass even more.

Categories