Comparison of hashable objects - python

I have a tuple of python objects, from which I need a list of objects with no duplicates, using set() (this check for duplicate objects is to be done on an attribute.). This code will give a simple illustration:
class test:
def __init__(self, t):
self.t = t
def __repr__(self):
return repr(self.t)
def __hash__(self):
return self.t
l = (test(1), test(2), test(-1), test(1), test(3), test(2))
print l
print set(l)
However, it did not work. I can do it on an iteration over l, but any idea why set() is not working? Here is the official documentation.

From the documentation you linked to:
The set classes are implemented using dictionaries. Accordingly, the
requirements for set elements are the same as those for dictionary
keys; namely, that the element defines both __eq__() and __hash__().
To be more specific, if a == b then your implementation must be such that hash(a) == hash(b). The reverse is not required.
Also, you should probably call hash in __hash__ to handle long integers
class Test:
def __init__(self, t):
self.t = t
def __repr__(self):
return repr(self.t)
def __hash__(self):
return hash(self.t)
def __eq__(self, other):
return isinstance(other, Test) and self.t == other.t

Small nit picks:
Your implementation of __eq__ doesn't give the other object a chance to run its own __eq__. The class must also consider its members as immutable as the hash must stay constant. You don't want to break your dicts, do you?
class Test:
def __init__(self, t):
self._t = t
#property
def t(self):
return self._t
def __repr__(self):
return repr(self._t)
def __hash__(self):
return hash(self._t)
def __eq__(self, other):
if not isinstance(other, Test):
return NotImplemented # don't know how to handle `other`
return self.t == other.t

Related

Generalized __eq__() method in Python

I'd like to create a generalized __eq__() method for the following Class. Basically I'd like to be able to add another property (nick) without having to change __eq__()
I imagine I can do this somehow by iterating over dir() but I wonder if there is a way to create a comprehension that just delivers the properties.
class Person:
def __init__(self, first, last):
self.first=first
self.last=last
#property
def first(self):
assert(self._first != None)
return self._first
#first.setter
def first(self,fn):
assert(isinstance(fn,str))
self._first=fn
#property
def last(self):
assert(self._last != None)
return self._last
#last.setter
def last(self,ln):
assert(isinstance(ln,str))
self._last=ln
#property
def full(self):
return f'{self.first} {self.last}'
def __eq__(self, other):
return self.first==other.first and self.last==other.last
p = Person('Raymond', 'Salemi')
p2= Person('Ray', 'Salemi')
You could use __dict__ to check if everything is the same, which scales for all attributes:
If the objects are not matching types, I simply return False.
class Person:
def __init__(self, first, last, nick):
self.first = first
self.last = last
self.nick = nick
def __eq__(self, other):
return self.__dict__ == other.__dict__ if type(self) == type(other) else False
>>> p = Person('Ray', 'Salemi', 'Ray')
>>> p2= Person('Ray', 'Salemi', 'Ray')
>>> p3 = Person('Jared', 'Salemi', 'Jarbear')
>>> p == p2
True
>>> p3 == p2
False
>>> p == 1
False
You can get all the properties of a Class with a construct like this:
from itertools import chain
#classmethod
def _properties(cls):
type_dict = dict(chain.from_iterable(typ.__dict__.items() for typ in reversed(cls.mro())))
return {k for k, v in type_dict.items() if 'property' in str(v)}
The __eq__ would become something like this:
def __eq__(self, other):
properties = self._properties() & other._properties()
if other._properties() > properties and self._properties() > properties:
# types are not comparable
return False
try:
return all(getattr(self, prop) == getattr(other, prop) for prop in properties)
except AttributeError:
return False
The reason to work with the reversed(cls.mro()) is so something like this also works:
class Worker(Person):
#property
def wage(self):
return 0
p4 = Worker('Raymond', 'Salemi')
print(p4 == p3)
True
you can try to do this, it will also work if you want eq inside dict and set
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(self, other.__class__):
return self.__hash__() == other.__hash__()
return NotImplemented
def __hash__(self):
"""Overrides the default implementation,
and set which fieds to use for hash generation
"""
__make_hash = [
self.first
]
return hash(tuple(sorted(list(filter(None, __make_hash)))))

What is the correct way to use objects as keys in a ZOBD OOBTree?

In ZOBD (in Python 3.x) I would like to be able to store objects as keys in an BTrees.OOBTree.OOBTree(). Example of the error I get when I try (see the comment):
from BTrees.OOBTree import OOBTree as Btree
class Test:
pass
bt=Btree()
t=Test()
bt[t]=None #TypeError: Object has default comparison
So, I read somewhere that __eq__ may need to be defined to remove that error, but although that seemed to fix the previous problem it seems to cause more problems. Example:
[EDIT: It should be noted that I've found some problems with inheriting OOBTree (and TreeSet) as I do here. Apparently, they don't save properly; so, it's not the same as inheriting Persistent, even though they inherit Persistent.]
from BTrees.OOBTree import OOBTree as Btree
class Test:
def __eq__(self, other): #Maybe this isn't the way to define the method
return self==other
bt=Btree()
t=Test()
bt[t]=None
t in bt #TypeError: unorderable types: Test() < Test()
What is the correct way to use objects as keys in a BTree or OOBTree? I do need to test whether the key exists, too.
For those who don't know, BTrees in ZODB are pretty much like scalable Python dictionaries (they should be workable with more key-value pairs than a regular Python dictionary) designed for persistence.
I think this answer can help with your problem.
Bascically, you have to reimplent three methods on your object:
__eq__ (equality check)
__ne__ (non equality check)
__hash__ to make the object really serializable as a dictionary key
Although Eliot Berriot's answer led me to the answers I needed, I figured I would post the full answer that helped me so others don't have to spend extra time figuring stuff out. (I'm going to speak to myself in the second person.)
First of all (I didn't really ask about it, but it's something you might be tempted to do), don't inherit OOBTree or OOTreeSet (this causes problems). Make your own classes that inherit Persistent, and put an OOBTree or an OOTreeSet inside, if you want something like an inherited OOBTree (also, define the methods needed to make it seem like a dictionary or a set if you want that).
Next of all, you need to create a Persistent ID system (for every object that you put in the OOBTree or OOTreeSet, because objects cause OOBTrees and OOTreeSets to malfunction if you don't have a unique integer that ZOBD can keep track of your objects with. You need to define the methods that Eliot mentioned, as well as some other similar ones (and these need to compare that integer ID—not the object itself); i.e. define these methods of your classes that produce objects that will be keys of an OOBTree or contained in an OOTreeSet: __eq__, __ne__, __hash__, __lt__, __le__, __gt__, and __ge__. However, in order to have a persistent ID, you're going to have to make an ID counter class or something (because it won't save plain integers as values in an OOBTree for some odd reason, unless I did it wrong), and that counter class will have to have an ID, too.
Next of all, you need to make sure that if you're making objects keys, then you better not make things like strings be keys, too, in the same OOBTree, or else you'll have mysterious issues (due to strings not having the same sort of ID system as your objects). It'll be comparing the string keys to your object keys, and cause an error, because they're not designed to compare.
Here is a working example of Python 3.x code that allows you to use objects as keys in an OOBTree, and it will allow you to iterate over persistent objects in an OOBTree (and use them as keys). It also shows you how it can save and load the objects.
Sorry it's kind of long, but it should give you a good idea of how this can work:
import transaction, ZODB, ZODB.FileStorage
from persistent import Persistent
from BTrees.OOBTree import OOBTree as OOBTree
from BTrees.OOBTree import OOTreeSet as OOTreeSet
class Btree(Persistent):
def __init__(self, ID=None, **attr):
#I like to use entirely uppercase variables to represent ones you aren't supposed to access outside of the class (because it doesn't have the restrictions that adding _ and __ to the beginning do, and because you don't really need all caps for constants in Python)
Persistent.__init__(self)
self.DS=OOBTree() #DS stands for data structure
self.DS.update(attr)
if ID==None:
self.ID=-1 #To give each object a unique id. The value, -1, is replaced.
self.ID_SET=False
else:
self.ID=ID #You should remember what you’re putting here, and it should be negative.
self.ID_SET=True
def clear(self):
self.DS.clear()
def __delitem__(self, key):
del self.DS[key]
def __getitem__(self, key):
return self.DS[key]
def __len__(self):
return len(self.DS)
def __iadd__(self, other):
self.DS.update(other)
def __isub__(self, other):
for x in other:
try:
del self.DS[x]
except KeyError:
pass
def __contains__(self, key):
return self.DS.has_key(key)
def __setitem__(self, key, value):
self.DS[key]=value
def __iter__(self):
return iter(self.DS)
def __eq__(self, other):
return self.id==other.id
def __ne__(self, other):
return self.id!=other.id
def __hash__(self):
return self.id
def __lt__(self, other):
return self.id<other.id
def __le__(self, other):
return self.id<=other.id
def __gt__(self, other):
return self.id>other.id
def __ge__(self, other):
return self.id>=other.id
#property
def id(self):
if self.ID_SET==False:
print("Warning. self.id_set is False. You are accessing an id that has not been set.")
return self.ID
#id.setter
def id(self, num):
if self.ID_SET==True:
raise ValueError("Once set, the id value may not be changed.")
else:
self.ID=num
self.ID_SET=True
def save(self, manager, commit=True):
if self.ID_SET==False:
self.id=manager.inc()
manager.root.other_set.add(self)
if commit==True:
transaction.commit()
class Set(Persistent):
def __init__(self, ID=None, *items):
Persistent.__init__(self)
self.DS=OOTreeSet()
if ID==None:
self.ID=-1 #To give each object a unique id. The value, -1, is replaced automatically when saved by the project for the first time (which should be done right after the object is created).
self.ID_SET=False
else:
if ID>=0:
raise ValueError("Manual values should be negative.")
self.ID=ID #You should remember what you’re putting here, and it should be negative.
self.ID_SET=True
self.update(items)
def update(self, items):
self.DS.update(items)
def add(self, *items):
self.DS.update(items)
def remove(self, *items):
for x in items:
self.DS.remove(x)
def has(self, *items):
for x in items:
if not self.DS.has_key(x):
return False
return True
def __len__(self):
return len(self.DS)
def __iadd__(self, other):
self.DS.update(other)
def __isub__(self, other):
self.remove(*other)
def __contains__(self, other):
return self.DS.has_key(other)
def __iter__(self):
return iter(self.DS)
def __eq__(self, other):
return self.id==other.id
def __ne__(self, other):
return self.id!=other.id
def __hash__(self):
return self.id
def __lt__(self, other):
return self.id<other.id
def __le__(self, other):
return self.id<=other.id
def __gt__(self, other):
return self.id>other.id
def __ge__(self, other):
return self.id>=other.id
#property
def id(self):
if self.ID_SET==False:
print("Warning. self.id_set is False. You are accessing an id that has not been set.")
return self.ID
#id.setter
def id(self, num):
if self.ID_SET==True:
raise ValueError("Once set, the id value may not be changed.")
else:
self.ID=num
self.ID_SET=True
def save(self, manager, commit=True):
if self.ID_SET==False:
self.id=manager.inc()
manager.root.other_set.add(self)
if commit==True:
transaction.commit()
class Counter(Persistent):
#This is for creating a persistent id count object (using a plain integer outside of a class doesn't seem to work).
def __init__(self, value=0):
self.value=value
self.ID_SET=False
self.id=value
#The following methods are so it will fit fine in a BTree (they don't have anything to do with self.value)
def __eq__(self, other):
return self.id==other.id
def __ne__(self, other):
return self.id!=other.id
def __hash__(self):
return self.id
def __lt__(self, other):
return self.id<other.id
def __le__(self, other):
return self.id<=other.id
def __gt__(self, other):
return self.id>other.id
def __ge__(self, other):
return self.id>=other.id
#property
def id(self):
if self.ID_SET==False:
print("Warning. self.id_set is False. You are accessing an id that has not been set.")
return self.ID
#id.setter
def id(self, num):
if self.ID_SET==True:
raise ValueError("Once set, the id value may not be changed.")
else:
self.ID=num
self.ID_SET=True
class Manager:
def __init__(self, filepath):
self.filepath=filepath
self.storage = ZODB.FileStorage.FileStorage(filepath)
self.db = ZODB.DB(self.storage)
self.conn = self.db.open()
self.root = self.conn.root
print("Database opened.\n")
try:
self.root.other_dict #This holds arbitrary stuff, like the Counter. String keys.
except AttributeError:
self.root.other_dict=OOBTree()
self.root.other_dict["id_count"]=Counter()
try:
self.root.other_set #set other
except AttributeError:
self.root.other_set=OOTreeSet() #This holds all our Btree and Set objects (they are put here when saved to help them be persistent).
def inc(self): #This increments our Counter and returns the new value to become the integer id of a new object.
self.root.other_dict["id_count"].value+=1
return self.root.other_dict["id_count"].value
def close(self):
self.db.pack()
self.db.close()
print("\nDatabase closed.")
class Btree2(Btree):
#To prove that we can inherit our own classes we created that inherit Persistent (but inheriting OOBTree or OOTreeSet causes issues)
def __init__(self, ID=None, **attr):
Btree.__init__(self, ID, **attr)
m=Manager("/path/to/database/test.fs")
try:
m.root.tree #Causes an AttributeError if this is the first time you ran the program, because it doesn't exist.
print("OOBTree loaded.")
except AttributeError:
print("Creating OOBTree.")
m.root.tree=OOBTree()
for i in range(5):
key=Btree2()
key.save(m, commit=False) #Saving without committing adds it to the manager's OOBTree and gives it an integer ID. This needs to be done right after creating an object (whether or not you commit).
value=Btree2()
value.save(m, commit=False)
m.root.tree[key]=value #Assigning key and value (which are both objects) to the OOBTree
transaction.commit() #Commit the transactions
try:
m.root.set
print("OOTreeSet loaded.")
except AttributeError:
print("Creating OOTreeSet")
m.root.set=OOTreeSet()
for i in range(5):
item=Set()
item.save(m, commit=False)
m.root.set.add(item)
transaction.commit()
#Doing the same with an OOTreeSet (since objects in them suffered from the same problem as objects as keys in an OOBTree)
for x in m.root.tree:
print("Key: "+str(x.id))
print("Value: "+str(m.root.tree[x].id))
if x in m.root.tree:
print("Comparison works for "+str(x.id))
print("\nOn to OOTreeSet.\n")
for x in m.root.set:
if x in m.root.set:
print("Comparison works for "+str(x.id))
m.close()

Is there a comparison key for set objects?

Is there a way to give a comparator to set() so when adding items it checks an attribute of that item for likeness rather than if the item is the same? For example, I want to use objects in a set that can contain the same value for one attribute.
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
values = set()
a = TestObj('a')
b = TestObj('b')
a2 = TestObj('a')
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not ok but still gets added
# Hypothetical code
values = set(lambda x, y: x.value != y.value)
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not added
I have implemented my own sorta thing that does similar functionality but wanted to know if there was a builtin way.
from Queue import Queue
class UniqueByAttrQueue(Queue):
def __init__(self, attr, *args, **kwargs):
Queue.__init__(self, *args, **kwargs)
self.attr = attr
def _init(self, maxsize):
self.queue = set()
def _put(self, item):
# Potential race condition, worst case message gets put in twice
if hasattr(item, self.attr) and item not in self:
self.queue.add(item)
def __contains__(self, item):
item_attr = getattr(item, self.attr)
for x in self.queue:
x_attr = getattr(x, self.attr)
if x_attr == item_attr:
return True
return False
def _get(self):
return self.queue.pop()
Just define __hash__ and __eq__ on the object in terms of the attribute in question and it will work with sets. For example:
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
def __eq__(self, other):
if not instance(other, TestObj):
return NotImplemented
return self.value == other.value
def __hash__(self):
return hash(self.value)
If you can't change the object (or don't want to, say, because other things are important to equality), then use a dict instead. You can either do:
mydict[obj.value] = obj
so new objects replace old, or
mydict.setdefault(obj.value, obj)
so old objects are maintained if the value in question is already in the keys. Just make sure to iterate using .viewvalues() (Python 2) or .values() (Python 3) instead of iterating directly (which would get the keys, not the values). You could actually use this approach to make a custom set-like object with a key as you describe (though you'd need to implement many more methods than I show to make it efficient, the default methods are usually fairly slow):
from collections.abc import MutableSet # On Py2, collections without .abc
class keyedset(MutableSet):
def __init__(self, it=(), key=lambda x: x):
self.key = key
self.contents = {}
for x in it:
self.add(x)
def __contains__(self, x):
# Use anonymous object() as default so all arguments handled properly
sentinel = object()
getval = self.contents.get(self.key(x), sentinel)
return getval is not sentinel and getval == x
def __iter__(self):
return iter(self.contents.values()) # itervalues or viewvalues on Py2
def __len__(self):
return len(self.contents)
def add(self, x):
self.contents.setdefault(self.key(x), x)
def discard(self, x):
self.contents.pop(self.key(x), None)

sorting list of objects python

I wrote a class that stores a list of objects which I have also defined.
I would like to be able to call obj_list.sort(), and have the results sorted in ascending order, but it isn't working out exactly how I want it.
If I get the obj data and call sort() three times this is the behavior with my current implementation:
class MyClass():
def __init__(self):
self.obj_list = self.set_obj_list()
def set_obj_list(self):
data = []
for x in range(20):
obj = MyObjClass(x)
data.append( obj )
data.sort()
return data
class MyObjClass():
def __init__(self, number):
self.number = number # number is an integer
def __lt__(self, other):
return cmp(self.number, other.number)
def __repr__(self):
return str(self.number)
a = MyClass()
print a.obj_list
a.obj_list.sort()
print a.obj_list
a.obj_list.sort()
print a.obj_list
a.obj_list.sort()
print a.obj_list
Thank you.
I want it sorted in ascending order, but for sort() to do nothing if already sorted.
__lt__ should return a true value if & only if self is less than other, but cmp(self, other) will return a true value (1 or -1) if self does not equal other. You need to change this:
def __lt__(self, other):
return cmp(self.number, other.number)
to this:
def __lt__(self, other):
return cmp(self.number, other.number) < 0

How does a Python set([]) check if two objects are equal? What methods does an object need to define to customise this?

I need to create a 'container' object or class in Python, which keeps a record of other objects which I also define. One requirement of this container is that if two objects are deemed to be identical, one (either one) is removed. My first thought was to use a set([]) as the containing object, to complete this requirement.
However, the set does not remove one of the two identical object instances. What must I define to create one?
Here is the Python code.
class Item(object):
def __init__(self, foo, bar):
self.foo = foo
self.bar = bar
def __repr__(self):
return "Item(%s, %s)" % (self.foo, self.bar)
def __eq__(self, other):
if isinstance(other, Item):
return ((self.foo == other.foo) and (self.bar == other.bar))
else:
return False
def __ne__(self, other):
return (not self.__eq__(other))
Interpreter
>>> set([Item(1,2), Item(1,2)])
set([Item(1, 2), Item(1, 2)])
It is clear that __eq__(), which is called by x == y, is not the method called by the set. What is called? What other method must I define?
Note: The Items must remain mutable, and can change, so I cannot provide a __hash__() method. If this is the only way of doing it, then I will rewrite for use of immutable Items.
Yes, you need a __hash__()-method AND the comparing-operator which you already provided.
class Item(object):
def __init__(self, foo, bar):
self.foo = foo
self.bar = bar
def __repr__(self):
return "Item(%s, %s)" % (self.foo, self.bar)
def __eq__(self, other):
if isinstance(other, Item):
return ((self.foo == other.foo) and (self.bar == other.bar))
else:
return False
def __ne__(self, other):
return (not self.__eq__(other))
def __hash__(self):
return hash(self.__repr__())
I am afraid you will have to provide a __hash__() method. But you can code it the way, that it does not depend on the mutable attributes of your Item.

Categories