pickle a dictionary where it's values are of some custom class - python

It is time to make my first question here.
I am facing the following issue:
i am using the pickle module to dump a large dictionary and then load it from the disk back.
The problem is that after unpickling the two objects are not the same.
Actually this is what I am doing:
In the file A.py I define my class that has some attributes and methods.
In file B.py I make a dictionary where the values are instances of the class in A.py
Also in file B.py I pickle this dictionary and unpickle it again.
The two dicts are not the same. I checked the keys and are the same. The problem lies with the values.
Any ideas?

Two different dictionaries with the same keys & values would not be considered identical:
>>> d1 = {'k1': 12345}
>>> d2 = {'k1': 12345}
>>> d1 is d2
False
>>> d1['k1'] is d2['k1']
False
Even if you actually copy the values from one to the other:
>>> d1['k1'] = d2['k1']
>>> d1['k1'] is d2['k1']
True
>>> d1 is d2
False
That's because each one is a separate container. On the other hand, you could check if all the keys and values have equal values rather than literally being identical objects using an expression like this:
(set(d1) == set(d2)) and all(d1[k] == d2[k] for k in d1)
The first subexpression makes sure each has keys with the same values and the second that the values associated with each of these keys are equal.
When you reconstitute the dictionary from the saved data a new one is created. It won't be the same dictionary but all its keys and values should be equal to the original. If that is indeed not the case, please provide us with at least the code you're using to determine they are not the same.

the instances of the class that are actually the values of the dictionary looks like:
class SchemaObject:
def __init__(self):
self.type = ''
self.name = ''
self.parentdn = ''
self.dn = ''
self.oclass = ''
def initWithXMLNode(self, xmlnode, parentdn):
self.type = ''
self.name = ''
self.parentdn = parentdn
if xmlnode.nodeName=='fragments':
self.dn = parentdn
if xmlnode.nodeName=='fragment':
self.initFragment(xmlnode)
elif xmlnode.nodeName=='configGroupLdap':
self.initGroup(xmlnode)
elif xmlnode.nodeName=='configObjectLdap':
self.initObject(xmlnode)
def initWithFragment(self, dn, parentdn, name):
self.type = 'object'
self.name = name
self.parentdn = parentdn
self.dn = dn
self.oclass = name
def initFragment(self, xmlnode):
self.type = 'fragment'
self.dn = 'fsFragmentId=' + xmlnode.firstChild.nodeValue + ',' + self.parentdn
self.oclass = 'FSFragment'
def initGroup(self, xmlnode):
self.type = 'group'
self.name = 'group-' + xmlnode.getAttribute('name')
self.dn = xmlnode.getAttribute('dn')
self.oclass = 'FSFragment'
def initObject(self, xmlnode):
self.type = 'object'
self.name = xmlnode.getAttribute('name')
self.oclass = self.name
if not xmlnode.hasAttribute('rdnname'):
self.type = 'no_rdnname'
return
else:
rdnname = xmlnode.getAttribute('rdnname')
parts = rdnname.split(',')
if xmlnode.getAttribute('multiple')!='true':
dn = self.parentdn
for part in reversed(parts):
dn = 'fsFragmentId=' + part + ',' + dn
self.dn = dn
else:
self.type = ''
self.dn = 'fsFragmentId=' + parts[len(parts)-1] + ',' + self.parentdn
dynamicStatics.append(self.oclass)
and in file B.py i create a dictionary that its values are based on this class.
the dict is the my_dict.
and i have also checked that the keys are equal.
only when i try to compare two values between these dictionaries it fails.
so i try to pickle it with:
with open('my_dumped.pkl','wb') as schema:
pickle.dump(my_dict,schema)
and when trying to restore it from disk:
with open('my_dumped.pkl','rb') as schema:
b = pickle.load(schema)
if now i issue my_dumped == b shouldn't this return True?i do not care for identity.just for equality.that as marteanu said the keys are all there,and each key has the correct value.
the above equality returns False unfortunately.

Related

pickle, dill and cloudpickle returning field as empty dict on custom class after process termination

I have an object of a custom class that I am trying to serialize and permanently store.
When I serialize it, store it, load it and use it in the same run, it works fine. It only messes up when I've ended the process and then try to load it again from the pickle file. This is the code that works fine:
first_model = NgramModel(3, name="debug")
for paragraph in text:
first_model.train(paragraph_to_sentences(text))
# paragraph to sentences just uses regex to do the equivalent of splitting by punctuation
print(first_model.context_options)
# context_options is a dict (counter)
first_model = NgramModel.load_existing_model("debug")
#load_existing_model loads the pickle file. Look in the class code
print(first_model.context_options)
However, when I run this alone, it prints an empty counter:
first_model = NgramModel.load_existing_model("debug")
print(first_model.context_options)
This is a shortened version of the class file (the only two methods that touch the pickle/dill are update_pickle_state and load_existing_model):
import os
import dill
from itertools import count
from collections import Counter
from os import path
class NgramModel:
context_options: dict[tuple, set[str]] = {}
ngram_count: Counter[tuple] = Counter()
n = 0
pickle_path: str = None
num_paragraphs = 0
num_sentences = 0
def __init__(self, n: int, **kwargs):
self.n = n
self.pickle_path = NgramModel.pathify(kwargs.get('name', NgramModel.gen_pickle_name())) #use name if exists else generate random name
def train(self, paragraph_as_list: list[str]):
'''really the central method that coordinates everything else. Takes a list of sentences, generates data(n-grams) from each, updates the fields, and saves the instance (self) to a pickle file'''
self.num_paragraphs += 1
for sentence in paragraph_as_list:
self.num_sentences += 1
generated = self.generate_Ngrams(sentence)
self.ngram_count.update(generated)
for ngram in generated:
self.add_to_set(ngram)
self.update_pickle_state()
def update_pickle_state(self):
'''saves instance to pickle file'''
file = open(self.pickle_path, "wb")
dill.dump(self, file)
file.close()
#staticmethod
def load_existing_model(name: str):
'''returns object from pickle file'''
path = NgramModel.pathify(name)
file = open(path, "rb")
obj: NgramModel = dill.load(file)
return obj
def generate_Ngrams(self, string: str):
'''ref: https://www.analyticsvidhya.com/blog/2021/09/what-are-n-grams-and-how-to-implement-them-in-python/'''
words = string.split(" ")
words = ["<start>"] * (self.n - 1) + words + ["<end>"] * (self.n - 1)
list_of_tup = []
for i in range(len(words) + 1 - self.n):
list_of_tup.append((tuple(words[i + j] for j in range(self.n - 1)), words[i + self.n - 1]))
return list_of_tup
def add_to_set(self, ngram: tuple[tuple[str, ...], str]):
if ngram[0] not in self.context_options:
self.context_options[ngram[0]] = set()
self.context_options[ngram[0]].add(ngram[1])
#staticmethod
def pathify(name):
'''converts name to path'''
return f"models/{name}.pickle"
#staticmethod
def gen_pickle_name():
for i in count():
new_name = f"unnamed-pickle-{i}"
if not path.exists(NgramModel.pathify(new_name)):
return new_name
All the other fields print properly and are complete and correct except the two dicts
The problem is that is that context_options is a mutable class-member, not an instance member. If I had to guess, dill is only pickling instance members, since the class definition holds class members. That would account for why you see a "filled-out" context_options when you're working in the same shell but not when you load fresh — you're using the dirtied class member in the former case.
It's for stuff like this that you generally don't want to use mutable class members (or similarly, mutable default values in function signatures). More typical is to use something like context_options: dict[tuple, set[str]] = None and then check if it's None in the __init__ to set it to a default value, e.g., an empty dict. Alternatively, you could use a #dataclass and provide a field initializer, i.e.
#dataclasses.dataclass
class NgramModel:
context_options: dict[tuple, set[str]] = dataclasses.field(default_factory=dict)
...
You can observe what I mean about it being a mutable class member with, for instance...
if __name__ == '__main__':
ng = NgramModel(3, name="debug")
print(ng.context_options) # {}
ng.context_options[("foo", "bar")] = {"baz", "qux"}
print(ng.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
ng2 = NgramModel(3, name="debug")
print(ng2.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
I would expect a brand new ng2 to have the same context that the brand new ng had - empty (or whatever an appropriate default is).

How to convert an object back into the code used to create it?

For example if I have a custom Python object like this;
#!/usr/bin/env python3
import os
base_dir = os.path.abspath(".")
class MyFile(dict):
def __init__(self, name, size = None, dir = base_dir):
self.name = name
self.path = os.path.join(dir, name)
self.bytes = size
and somewhere in my program, I initialize my object class;
a = MyFile(name = "foo", size = 10)
I want to be able to return the code used to create the object in the first place. For example;
print(a)
# <__main__.MyFile object at 0x102b84470>
# should instead print:
# MyFile(name = "foo", size = 10)
But since my object has some default attribute values, I only want those to show up in the output if they were explicitly included when the object was initialized;
b = MyFile(name = "bar", dir = "/home")
print(b)
# <__main__.MyFile object at 0x102b845c0>
# should instead print:
# MyFile(name = "bar", dir = "/home")
And to be clear, I am not trying to pull this from the source code, because a lot of my objects will be created dynamically, and I want to be able to return the same thing for them as well;
l = [ ("baz", 4), ("buzz", 12) ]
f = [ MyFile(name = n, size = s) for n, s in l ]
print(f)
# [<__main__.MyFile object at 0x1023844a8>, <__main__.MyFile object at 0x102384828>]
# should instead print:
# [ MyFile(name = "baz", size = 4), MyFile(name = "buzz", size = 12) ]
I saw the inspect library (https://docs.python.org/3/library/inspect.html) but it does not seem to have anything that does this. What am I missing? This functionality would be pretty analogous to R's dput function.
At a very basic level you can do this:
class MyClass:
def __init__(self, a, b):
self.a = a
self.b = b
def __repr__(self):
return f'{self.__class__.__name__}({self.a}, {self.b})'
class MyOtherClass(MyClass):
def method(self):
pass
c = MyClass(1, 2)
oc = MyOtherClass(3, 4)
print(c, oc)
Result:
MyClass(1, 2) MyOtherClass(3, 4)
This does what you ask, as well as taking subclassing into account to provide the correct class name. But of course things can get complicated for several reasons:
class MyClass:
def __init__(self, a, b):
self.a = a + 1
self.b = b if b < 10 else a
self.c = 0
def inc_c(self):
self.c += 1
def __repr__(self):
return f'{self.__class__.__name__}({self.a - 1}, {self.b})'
The value of c isn't covered by the constructor, so the proposed call would set it to 0. And Although you could compensate for the + 1 for a, the value of b will be more complicated - even more so if you realise someone could have changed the value later.
And then you need to consider that subclasses can override behaviour, etc. So, doing something like this only makes sense in very limited use cases.
As simple as replacing your code snippet with the following:
import os
base_dir = os.path.abspath(".")
class MyFile(object):
def __init__(self, name, size = None, dir = base_dir):
self.name = name
self.path = os.path.join(dir, name)
self.bytes = size
self.remember(name,size, dir)
def remember(self, name,size, dir):
self.s= '{}(name = \'{}\'{}{})'.format(self.__class__.__name__,name, ", size="+str(size) if size!=None else "", ', dir="'+dir+'"' if dir!=base_dir else "")
def __repr__(self):
return self.s
a) for a it returns:
MyFile(name = 'foo', size=10)
b) for b it returns:
MyFile(name = 'bar', dir="/home")
c) for f it returns:
[MyFile(name = 'baz', size=4), MyFile(name = 'buzz', size=12)]
Thanks to everyone who commented and answered. Ultimately, I incorporated their ideas and feedback into the following method, which allowed me to preserve the object's native __repr__ while still getting the behaviors I wanted.
#!/usr/bin/env python3
import os
base_dir = os.path.abspath(".")
class MyFile(dict):
"""
A custom dict class that auto-populates some keys based on simple input args
compatible with unittest.TestCase.assertDictEqual
"""
def __init__(self, name, size = None, dir = base_dir):
"""
standard init methods
"""
self.name = name
self.path = os.path.join(dir, name)
self.bytes = size
# auto-populate this key
self['somekey'] = self.path + ' ' + str(self.bytes)
# more logic for more complex keys goes here...
# use these later with `init` and `repr`
self.args = None
self.kwargs = None
#classmethod
def init(cls, *args, **kwargs):
"""
alternative method to initialize the object while retaining the args passed
"""
obj = cls(*args, **kwargs)
obj.args = args
obj.kwargs = kwargs
return(obj)
def repr(self):
"""
returns a text representation of the object that can be used to
create a new copy of an identical object, displaying only the
args that were originally used to create the current object instance
(do not show args that were not passed e.g. default value args)
"""
n = 'MyFile('
if self.args:
for i, arg in enumerate(self.args):
n += arg.__repr__()
if i < len(self.args) - 1 or self.kwargs:
n += ', '
if self.kwargs:
for i, (k, v) in enumerate(self.kwargs.items()):
n += str(k) + '=' + v.__repr__()
if i < len(self.kwargs.items()) - 1:
n += ', '
n += ')'
return(n)
Usage:
# normal object initialization
obj1 = MyFile('foo', size=10)
print(obj1) # {'somekey': '/Users/me/test/foo 10'}
# initialize with classmethod instead to preserve args
obj2 = MyFile.init("foo", size = 10)
print(obj2) # {'somekey': '/Users/me/test/foo 10'}
# view the text representation
repr = obj2.repr()
print(repr) # MyFile('foo', size=10)
# re-load a copy of the object from the text representation
obj3 = eval(repr)
print(obj3) # {'somekey': '/Users/me/test/foo 10'}
The use case for this being where I need to represent large simple data structures (dicts) in my Python code (integration tests), where the data values are dynamically generated from a smaller set of variables. But when I have many hundreds of such data structures that I need to include in the test case, it becomes infeasible to write the code for e.g. MyFile(...) out hundreds of times. This method allows me to use a script to ingest the data, and then print out compact Python code needed to recreate the data using my custom object class. Which I can then just copy/paste into my test cases.

Class duplicates input objects

I have two python class objects that I use to extract data from Oracle to ArcMap. The various activities in the process cause me to start with a list of 'column' objects and build a pyTable object. The pyTable object has a list of insert Fields by name. During __init__ I use the getSelect() function to populate the list of insert Fields.
I have added a bunch of statements to make sure that each time I call pyTable I get a newly created object, but I'm still seeing a strange result. The first time I use the class, all is fine. The second time I issue the same statement, the colList is new, but the field list is duplicated. My apologies for not cleaning out the extraneous code sections.
Where am I messing up my object references?
Here are the execution results. myList has 8 column objects.
>>> arcTable = pyTable(myList)
>>> len(arcTable.getTuple())
8
>>> arcTable = pyTable(myList)
>>> len(arcTable.getTuple())
16
>>> arcTable = pyTable(myList)
>>> len(arcTable.getTuple())
8
>>> arcTable = pyTable(myList)
>>> len(arcTable.getTuple())
8
>>> newTable = pyTable(myList)
>>> len(newTable.getTuple())
8
>>> thirdTable = pyTable(myList)
>>> len(thirdTable.getTuple())
16
>>> thirdTable = pyTable(myList)
>>> len(thirdTable.getTuple())
24
>>> thirdTable = pyTable(myList)
>>> len(thirdTable.getTuple())
8
>>>
Here are the two classes:
import arcpy, cx_Oracle
class column:
# Add to the arcType and cxType functions to support more Oracle data types.
# BLOB and CLOB fields will need additional support in Read and Write fx's.
name = ''
dataType = ''
dataLen = 1
dataPrecision = 0
dataScale = 0
query = ''
isShape = False
isLOB = False
def __init__(self, Name, DataType, DataLen, DataPrecision, DataScale):
self.name = Name
self.dataType = DataType
self.dataLen = DataLen
self.dataPrecision = DataPrecision
self.dataScale = DataScale
if DataType == 'WKT':
self.query = 'sdo_util.to_wktgeometry(t.' + Name + ') wkb, '
else:
self.query = 't.' + Name
if DataType == 'SDO_GEOMETRY':
self.isShape = True
if DataType == 'BLOB' or DataType == 'CLOB' or DataType == 'WKT':
self.isLOB = True
def getArcType(self, *args): # Data type translation 'Oracle_type':'ESRI_type'
return {
# 'BINARY_DOUBLE':'DOUBLE',
# 'BINARY_FLOAT':'FLOAT',
# 'BLOB':'BLOB',
'CHAR':'STRING',
'CLOB':'CLOB',
'DATE':'DATE',
# 'FLOAT':'FLOAT',
# 'LONG':'LONG',
# 'LONG RAW':'BLOB',
'NUMBER':'DOUBLE',
# 'RAW':'BLOB',
# 'ROWID':'SHORT',
'SDO_GEOMETRY':'GEOMETRY',
'VARCHAR2':'STRING',
'WKT':'WKT',
}.get(self.dataType,"undefined")
def getCxType(self, *args): # Data type translation 'Oracle_type':'cx_Oracle.type'
return {
'BLOB':cx_Oracle.BLOB,
'CHAR':cx_Oracle.STRING,
'CLOB':cx_Oracle.CLOB,
'DATE':cx_Oracle.DATETIME,
'NUMBER':cx_Oracle.NUMBER,
'SDO_GEOMETRY':cx_Oracle.CLOB,
'VARCHAR2':cx_Oracle.STRING,
}.get(self.dataType,"undefined")
class pyTable:
# Create an object to track columns for read and write operations.
# BLOB, CLOB and SDO_GEOMETRY types will need additional support in Read and Write fx's.
length = 0
# colList = [] # The original list of columns is coming from an Oracle query.
# These two lists are different because of the way I treat shape.
# I create a FC and then add attribute columns. This puts the Shape column first in the list.
__insCols = [] # I use insCols as a list of column type objects to write to ArcMap.
__insertFields = []
__colTuple = None
__myData = []
__pKey = 'P_KEY' # The name of the primary key field should be <table>_CN
__insBlobCols = [] # A list of column positions that contain BLOB data types.
__insKeyCol = -1 # The position of the primary key column.
def __init__(self, ColList):
self.colList = ColList[:]
self.length = len(ColList)
self.isFC = self.__getShape()
self.__select = self.getSelect()
arcpy.AddMessage('New table class created with ' + str(self.length) + ' columns.')
def __del__(self):
self.colList = []
del self.__insCols [:]
del self.__insertFields [:]
del self.__myData [:]
del self.__insBlobCols [:]
def addDataRow(self, inDataRow):
self.__myData.append(inDataRow)
def getInsCols(self):
return self.__insCols
def getTuple(self):
return self.__colTuple
def getPK(self):
return self.__pKey
def getInsBlobCols(self):
return self.__insBlobCols
def clearData(self):
self.__myData = []
def getData(self):
return self.__myData
def getKeyCol(self):
return self.__insKeyCol
def __getShape(self):
isFeature = False
featureName = ''
for col in self.colList:
if col.isShape:
isFeature = True
featureName = col.name
if isFeature:
wktShape = column(featureName, 'WKT', 0, 0, 0)
self.__insCols.append(wktShape)
for col in self.colList:
if not col.isShape:
self.__insCols.append(col)
return isFeature
def getSelect(self):
# Build the select statement
# Build the list of insert Field names
# Build the Tuple of insert Field names
# Identify the LOB columns by index number
statement = 'select '
del self.__insertFields[:]
indx = 0
# print ('Table has ', len(self.__insCols), ' insert columns.')
for col in self.__insCols:
if col.dataType == 'WKT':
statement += 'sdo_util.to_wktgeometry(t.shape) wkb, '
self.__insertFields.append('SHAPE#WKT')
else:
statement += 't.' + col.name + ', '
self.__insertFields.append(col.name)
if col.dataType == 'BLOB':
self.__insBlobCols.append(indx)
#
# ToDo: The key column should be <table>_CN
# But, the logic needs to work for views with different names.
#
if col.name == self.__pKey:
self.__insKeyCol = indx
indx += 1
statement = statement[:statement.rfind(',')] # Trim off the trailing comma
# print ('Insert is composed of ', len(self.__insertFields), ' fields.' )
self.__colTuple = tuple(self.__insertFields)
return statement
def createTemp(self, WorkSpace, tempFC):
success = False
insertCols = self.__insCols
arcpy.AddMessage('Adding ' + tempFC + ' with ' + str(len(insertCols)) + ' columns.')
try:
if self.isFC:
arcpy.CreateFeatureclass_management(WorkSpace, tempFC, 'POINT')
arcpy.AddMessage(tempFC + ' feature class was successfully created.')
else:
arcpy.CreateTable_management(WorkSpace, tempFC)
arcpy.AddMessage(tempFC + ' table was successfully created.')
for col in insertCols:
esriType = col.getArcType()
if esriType == "undefined":
arcpy.AddError('Data type not currently supported, ' + col.dataType)
return success
if col.dataType <> 'WKT':
arcpy.AddField_management(tempFC, col.name, esriType, col.dataPrecision, col.dataScale, col.dataLen)
arcpy.AddMessage('Created column: ' + col.name)
success = True
except:
e = sys.exc_info()[1]
arcpy.AddError('Create of ' + tempFC + ' failed with ' + str(e.args[0]))
return success
You are making a shallow copy of the list passed to your class in the init function.
See Shallow and deep copy operations in the Python documentation for some basic information.
self.colList = ColList[:] makes a new LIST, but inside that new list are references to the same objects that were in the original list (a shallow copy).
You need a deep copy:
import copy
...
self.colList = copy.deepcopy(ColList)
A deep copy has a new list, as well as new objects initialized to match the objects in the original list. So if the objects in one class' list change, they don't change in every class.

Elegant way to avoid .put() on unchanged entities

A reoccurring pattern in my Python programming on GAE is getting some entity from the data store, then possibly changing that entity based on various conditions. In the end I need to .put() the entity back to the data store to ensure that any changes that might have been made to it get saved.
However often there were no changes actually made and the final .put() is just a waste of money. How to easily make sure that I only put an entity if it has really changed?
The code might look something like
def handle_get_request():
entity = Entity.get_by_key_name("foobar")
if phase_of_moon() == "full":
entity.werewolf = True
if random.choice([True, False]):
entity.lucky = True
if some_complicated_condition:
entity.answer = 42
entity.put()
I could maintain a "changed" flag which I set if any condition changed the entity, but that seems very brittle. If I forget to set it somewhere, then changes would be lost.
What I ended up using
def handle_get_request():
entity = Entity.get_by_key_name("foobar")
original_xml = entity.to_xml()
if phase_of_moon() == "full":
entity.werewolf = True
if random.choice([True, False]):
entity.lucky = True
if some_complicated_condition:
entity.answer = 42
if entity.to_xml() != original_xml: entity.put()
I would not call this "elegant". Elegant would be if the object just saved itself automatically in the end, but I felt this was simple and readable enough to do for now.
Why not check if the result equals (==) the original and so decide whether to save it. This depends on a correctly implemented __eq__, but by default a field-by-field comparison based on the __dict__ should do it.
def __eq__(self, other) :
return self.__dict__ == other.__dict__
(Be sure that the other rich comparison and hash operators work correctly if you do this. See here.)
One possible solution is using a wrapper that tracks any attribute change:
class Wrapper(object):
def __init__(self, x):
self._x = x
self._changed = False
def __setattr__(self, name, value):
if name[:1] == "_":
object.__setattr__(self, name, value)
else:
if getattr(self._x, name) != value:
setattr(self._x, name, value)
self._changed = True
def __getattribute__(self, name):
if name[:1] == "_":
return object.__getattribute__(self, name)
return getattr(self._x, name)
class Contact:
def __init__(self, name, address):
self.name = name
self.address = address
c = Contact("Me", "Here")
w = Wrapper(c)
print w.name # --> Me
w.name = w.name
print w.name, w._changed # --> Me False
w.name = "6502"
print w.name, w._changed # --> 6502 True
This answer is a part of an question i posted about a Python checksum of a dict
With the answers of this question I developed a method to generate checksum from
a db.Model.
This is an example:
>>> class Actor(db.Model):
... name = db.StringProperty()
... age = db.IntegerProperty()
...
>>> u = Actor(name="John Doe", age=26)
>>> util.checksum_from_model(u, Actor)
'-42156217'
>>> u.age = 47
>>> checksum_from_model(u, Actor)
'-63393076'
I defined these methods:
def checksum_from_model(ref, model, exclude_keys=[], exclude_properties=[]):
"""Returns the checksum of a db.Model.
Attributes:
ref: The reference og the db.Model
model: The model type instance of db.Model.
exclude_keys: To exclude a list of properties name like 'updated'
exclude_properties: To exclude list of properties type like 'db.DateTimeProperty'
Returns:
A checksum in signed integer.
"""
l = []
for key, prop in model.properties().iteritems():
if not (key in exclude_keys) and \
not any([True for x in exclude_properties if isinstance(prop, x)]):
l.append(getattr(ref, key))
return checksum_from_list(l)
def checksum_from_list(l):
"""Returns a checksum from a list of data into an int."""
return reduce(lambda x,y : x^y, [hash(repr(x)) for x in l])
Note:
For the base36 implementation: http://en.wikipedia.org/wiki/Base_36#Python_implementation
Edit:
I removed the return in base36, now these functions run without dependences. (An advice from #Skirmantas)
Didn't work with GAE but in same situation I'd use something like:
entity = Entity.get_by_key_name("foobar")
prev_entity_state = deepcopy(entity.__dict__)
if phase_of_moon() == "full":
entity.werewolf = True
if random.choice([True, False]):
entity.lucky = True
if some_complicated_condition:
entity.answer = 42
if entity.__dict__ == prev_entity_state:
entity.put()

Python serializable objects json [duplicate]

This question already has answers here:
How to make a class JSON serializable
(41 answers)
Closed 6 months ago.
class gpagelet:
"""
Holds 1) the pagelet xpath, which is a string
2) the list of pagelet shingles, list
"""
def __init__(self, parent):
if not isinstance( parent, gwebpage):
raise Exception("Parent must be an instance of gwebpage")
self.parent = parent # This must be a gwebpage instance
self.xpath = None # String
self.visibleShingles = [] # list of tuples
self.invisibleShingles = [] # list of tuples
self.urls = [] # list of string
class gwebpage:
"""
Holds all the datastructure after the results have been parsed
holds: 1) lists of gpagelets
2) loc, string, location of the file that represents it
"""
def __init__(self, url):
self.url = url # Str
self.netloc = False # Str
self.gpagelets = [] # gpagelets instance
self.page_key = "" # str
Is there a way for me to make my class json serializable? The thing that I am worried is the recursive reference.
Write your own encoder and decoder, which can be very simple like return __dict__
e.g. here is a encoder to dump totally recursive tree structure, you can enhance it or use as it is for your own purpose
import json
class Tree(object):
def __init__(self, name, childTrees=None):
self.name = name
if childTrees is None:
childTrees = []
self.childTrees = childTrees
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if not isinstance(obj, Tree):
return super(MyEncoder, self).default(obj)
return obj.__dict__
c1 = Tree("c1")
c2 = Tree("c2")
t = Tree("t",[c1,c2])
print json.dumps(t, cls=MyEncoder)
it prints
{"childTrees": [{"childTrees": [], "name": "c1"}, {"childTrees": [], "name": "c2"}], "name": "t"}
you can similarly write a decoder but there you will somehow need to identify is it is your object or not, so may be you can put a type too if needed.
Indirect answer: instead of using JSON, you could use YAML, which has no problem doing what you want. (JSON is essentially a subset of YAML.)
Example:
import yaml
o1 = gwebpage("url")
o2 = gpagelet(o1)
o1.gpagelets = [o2]
print yaml.dump(o1)
In fact, YAML nicely handles cyclic references for you.
I implemented a very simple todict method with the help of https://stackoverflow.com/a/11637457/1766716
Iterate over properties that is not starts with __
Eliminate methods
Eliminate some properties manually which is not necessary (for my case, coming from sqlalcemy)
And used getattr to build dictionary.
class User(Base):
id = Column(Integer, primary_key=True)
firstname = Column(String(50))
lastname = Column(String(50))
password = Column(String(20))
def props(self):
return filter(
lambda a:
not a.startswith('__')
and a not in ['_decl_class_registry', '_sa_instance_state', '_sa_class_manager', 'metadata']
and not callable(getattr(self, a)),
dir(self))
def todict(self):
return {k: self.__getattribute__(k) for k in self.props()}
My solution for this was to extend the 'dict' class and perform checks around required/allowed attributes by overriding init, update, and set class methods.
class StrictDict(dict):
required=set()
at_least_one_required=set()
cannot_coexist=set()
allowed=set()
def __init__(self, iterable={}, **kwargs):
super(StrictDict, self).__init__({})
keys = set(iterable.keys()).union(set(kwargs.keys()))
if not keys.issuperset(self.required):
msg = str(self.__class__.__name__) + " requires: " + str([str(key) for key in self.required])
raise AttributeError(msg)
if len(list(self.at_least_one_required)) and len(list(keys.intersection(self.at_least_one_required))) < 1:
msg = str(self.__class__.__name__) + " requires at least one: " + str([str(key) for key in self.at_least_one_required])
raise AttributeError(msg)
for key, val in iterable.iteritems():
self.__setitem__(key, val)
for key, val in kwargs.iteritems():
self.__setitem__(key, val)
def update(self, E=None, **F):
for key, val in E.iteritems():
self.__setitem__(key, val)
for key, val in F.iteritems():
self.__setitem__(key, val)
super(StrictDict, self).update({})
def __setitem__(self, key, value):
all_allowed = self.allowed.union(self.required).union(self.at_least_one_required).union(self.cannot_coexist)
if key not in list(all_allowed):
msg = str(self.__class__.__name__) + " does not allow member '" + key + "'"
raise AttributeError(msg)
if key in list(self.cannot_coexist):
for item in list(self.cannot_coexist):
if key != item and item in self.keys():
msg = str(self.__class__.__name__) + "does not allow members '" + key + "' and '" + item + "' to coexist'"
raise AttributeError(msg)
super(StrictDict, self).__setitem__(key, value)
Example usage:
class JSONDoc(StrictDict):
"""
Class corresponding to JSON API top-level document structure
http://jsonapi.org/format/#document-top-level
"""
at_least_one_required={'data', 'errors', 'meta'}
allowed={"jsonapi", "links", "included"}
cannot_coexist={"data", "errors"}
def __setitem__(self, key, value):
if key == "included" and "data" not in self.keys():
msg = str(self.__class__.__name__) + " does not allow 'included' member if 'data' member is not present"
raise AttributeError(msg)
super(JSONDoc, self).__setitem__(key, value)
json_doc = JSONDoc(
data={
"id": 5,
"type": "movies"
},
links={
"self": "http://url.com"
}
)

Categories