Python StructuredProperty to dictionary - python

My models all have a method which converts the model to a dictionary:
def to_dict(model):
output = {}
SIMPLE_TYPES = (int, long, float, bool, dict, basestring, list)
for key, prop in model._properties.iteritems():
value = getattr(model, key)
if value is None:
continue
if isinstance(value, SIMPLE_TYPES):
output[key] = value
elif isinstance(value, datetime.date):
dateString = value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
output[key] = dateString
elif isinstance(value, ndb.Model):
output[key] = to_dict(value)
else:
raise ValueError('cannot encode ' + repr(prop))
return output
Now, one of my models, X, has a LocalStructuredProperty:
metaData = ndb.LocalStructuredProperty(MetaData, repeated=True)
So, repeated=True means this will be a list of MetaData objects. MetaData is another model, and it also has the same to_dict method.
However, when I call json.dumps(xInstance.to_dict()), I get an exception:
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: MetaData(count=0, date=datetime.datetime(2012, 9, 19, 2, 46, 56, 660000), unique_id=u'8E2C3B07A06547C78AB00DD73B574B8C') is not JSON serializable
How can I handle this?

If you want to handle this in to_dict() and before the level of serializing to JSON, you'll just need a few more cases in your to_dict(). Firstly, you said the to_dict definition above is a method. I would have it delegate to a function or staticmethod so you have something you can call on ints and such without checking the type first. The code will just come out better that way.
def coerce(value):
SIMPLE_TYPES = (int, long, float, bool, basestring)
if value is None or isinstance(value, SIMPLE_TYPES):
return value
elif isinstance(value, datetime.date):
return value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
elif hasattr(value, 'to_dict'): # hooray for duck typing!
return value.to_dict()
elif isinstance(value, dict):
return dict((coerce(k), coerce(v)) for (k, v) in value.items())
elif hasattr(value, '__iter__'): # iterable, not string
return map(coerce, value)
else:
raise ValueError('cannot encode %r' % value)
Then just plug that into your to_dict method itself:
def to_dict(model):
output = {}
for key, prop in model._properties.iteritems():
value = coerce(getattr(model, key))
if value is not None:
output[key] = value
return output

All you need to do to serialize is to implement a function
def default_encode(obj):
return obj.to_dict()
and then encode your JSON with
json.dumps(X.to_dict(), default=default_encode)

I figured out how to solve the issue: in the X class, add this to the to_dict() method:
...
if value is None:
continue
if key == 'metaData':
array = list()
for data in value:
array.append(data.to_dict())
output[key] = array
elif isinstance(value, SIMPLE_TYPES):
output[key] = value
...
Though I'm not really sure how to automate this case where it's not based off key, but rather whenever it encounters a list of custom objects, it first converts each object in the list to_dict() first.

Related

How to find Dictionary Key(s) from Value in a large nested dictionary of variable depth?

Say that I have a large dictionary full of nested values such as this:
large_dic ={
...
"key":{"sub-key1" :{"sub-key2": "Test"}},
"0key":{"0sub-key1": "0Test"},
"1key":{"1sub-key1":{"1sub-key2":{"1sub-key3":"1Test"}}}
...
}
What I would like to do is to be able to get for example from the final value:
"1Test"
the key(s) to access it, such as in this case:
large_dic["1key"]["1sub-key1"]["1sub-key2"]["1sub-key3"]
Thanks for the support.
Edit to add more infos: The dictionary trees I'm talking about are linear(YAML files converted into a python dictionary structure), there is never more than one key, the ending leaf values may not be unique.
Since OP is looking for hierarchical keys instead
I made this class :
class PointingSlice:
def __init__(self, obj, *slices) -> None:
self.obj = obj
self.slices = slices
def __str__(self):
return f"{str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
and this function for instantiation :
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
Example use:
print(find_longest(large_dic, "1key"))
# output:
# {'key': {'sub-key1': {'sub-key2': 'Test'}}, '0key': {'0sub-key1': '0Test'}, '1key': {'1sub-key1': {'1sub-key2': {'1sub-key3': '1Test'}}}}['1key']['1sub-key1']['1sub-key2']['1sub-key3']
# do note that it is the same thing as large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(find_longest(large_dic, "1key").resolve()) # 1Test
So I made some changes and now it supports additional repr options matching your exact use case :
class PointingSlice:
def __init__(self, obj, *slices, object_name=None) -> None:
self.obj = obj
self.slices = slices
self.object_name = object_name
def __str__(self):
return f"{self.object_name or str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "1Test"}}},
}
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
f = find_longest(large_dic, "1key")
f.object_name = "large_dic" # for representational purposes, it works without this
print(f) # large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(f.resolve()) # 1Test
There are numerous ways to achieve this. You might want to look up "prefix tree traversal" (or "trie traversal").
A simple recursive solution with poor memory efficiency could look like this:
def find_trie_leaf_path(trie: dict, leaf_value, trie_path: list[str] = []):
for key, value in trie.items():
if isinstance(value, dict):
yield from find_trie_leaf_path(value, leaf_value, trie_path + [key])
elif value == leaf_value:
yield trie_path + [key]
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "Test"}}},
}
first_match = next(find_trie_leaf_path(large_dic, "Test"))
all_matches = list(find_trie_leaf_path(large_dic, "Test"))
This should work even if your trie is very wide. If it is very high, I'd rather use an iterative algorithm.
I want to point out, though, that prefix trees are usually used the other way round. If you find yourself needing this search a lot, you should consider a different data structure.
Yes, it's totally possible. Here's the function to get the deeply nested value:
def get_final_value(mapping, key):
value = mapping[key]
while isinstance(value, dict):
(value,) = value.values()
return value
Example use:
>>> get_final_value(large_dic, "key")
'Test'
>>> get_final_value(large_dic, "0key")
'0Test'
>>> get_final_value(large_dic, "1key")
'1Test'
>>>
Can the parent keys be deduced from your final value in any way or is the tree structure rather random? If latter is the case then you'll probably just end up searching your tree until you find your value, what path search algorithm you choose for that again depends on the tree structure you have. As already asked in the comments, does each node only have one other node or is it binary or can it have many child nodes?

Deserializing API response method - returning a data frame and handling exception

API class has deserialize_response method which turns a dictionary into the data frame. Apparently there is one exception for particular data key ('liquidity'), which is somehow different in shape and after deserializing returns an empty data frame. I'd like to catch and handle that exception and call different custom method to deserialize, but I'm not quite sure how to approach this properly with the current setup:
#staticmethod
def deserialize_response(data):
if set(data.keys()) == {"data", "schema"}:
return Answers.deserialize_dataframe(data)
elif isinstance(data, dict):
return {
key: Answers.deserialize_dataframe(frame)
for key, frame in data.items()
}
else:
return data
#staticmethod
def deserialize_dataframe(data):
try:
df = pd.read_json(json.dumps(data), orient="table")
except:
df = pd.read_json(json.dumps(data["data"]), orient="records")
columns = [x.split("::") if isinstance(x, str) else x for x in df.columns]
if columns and isinstance(columns[0], list) and len(columns[0]) > 1:
df.columns = pd.MultiIndex.from_tuples(columns)
index = [x.split("::") if isinstance(x, str) else x for x in df.index]
if index and isinstance(index[0], list) and len(index[0]) > 1:
df.index = pd.MultiIndex.from_tuples(index)
return df
I cannot do it this way:
return {key: Answers.deserialize_dataframe(frame) for key, frame in data.items() if key != 'liquidity'}
obviously it won't return anything for that key and call deserialize_dataframe method

Check multiple keys in a dictionary for existence and value

I would like to do something only if an object has two keys with given values:
tel = ...
nam = ...
for obj in listofobjs:
for key, val in obj.items():
if (key == 'tel' and val == tel) and \
(key == 'nam' and val == name):
# do something...
Which won't work since key and value can't be two values at the same time.
Here's one way to do it without having to use .items():
for obj in listofobjs:
if 'tel' in obj and 'nam' in obj and obj['tel']==tel and obj['nam']==nam:
...
Or you could ask for forgiveness provided all dictionary access in the if block are safe:
for obj in listofobjs:
try:
if obj['tel']==tel and obj['nam']==nam:
...
except KeyError:
pass
You don't need to loop over the .items() to do this.
for obj in listofobjs:
if (obj.get('tel', None) == tel) and (obj.get('nam', None) == nam):
Just use .get to get the key, so that you don't get a KeyError if the key doesn't exist.
.get returns None by default, but I'm specifying it here to highlight the ability to use a different default value. If you want to use None as the default, you can leave out the second parameter from the .get call.
Replace None with a value that you know will never be a valid value for tel or nam.

How to convert a nested namedtuple to a dict?

I am trying to convert the below mentioned nested namedtuple to a dict.
I am using Python 3.4.2 and psd-tools-1.2
TypeToolObjectSetting(version=1, xx=0.0, xy=-1.55729984301413, yx=1.6070307595731337, yy=0.0, tx=628.1016949152543, ty=516.5, text_version=50, descriptor1_version=16, text_data=Descriptor(name='', classID=b'TxLr', items=[(b'Txt ', String(value='34px')), (b'textGridding', Enum(type=b'textGridding', value=b'None')), (b'Ornt', Enum(type=b'Ornt', value=b'Hrzn')), (b'AntA', Enum(type=b'Annt', value=b'Anno')), (b'bounds', Descriptor(name='', classID=b'bounds', items=[(b'Left', UnitFloat(unit='POINTS', value=-10.0)), (b'Top ', UnitFloat(unit='POINTS', value=-6.908203125)), (b'Rght', UnitFloat(unit='POINTS', value=10.0)), (b'Btom', UnitFloat(unit='POINTS', value=2.42578125))])), (b'boundingBox', Descriptor(name='', classID=b'boundingBox', items=[(b'Left', UnitFloat(unit='POINTS', value=-9.34375)), (b'Top ', UnitFloat(unit='POINTS', value=-5.9375)), (b'Rght', UnitFloat(unit='POINTS', value=9.5)), (b'Btom', UnitFloat(unit='POINTS', value=1.609375))])), (b'TextIndex', Integer(value=0)), (b'EngineData', RawData(value=b'\n\n<<\n\t/EngineDict\n\t<<\n\t\t/Edito ... =8205'))]), warp_version=1, descriptor2_version=16, warp_data=Descriptor(name='', classID=b'warp', items=[(b'warpStyle', Enum(type=b'warpStyle', value=b'warpNone')), (b'warpValue', Double(value=0.0)), (b'warpPerspective', Double(value=0.0)), (b'warpPerspectiveOther', Double(value=0.0)), (b'warpRotate', Enum(type=b'Ornt', value=b'Hrzn'))]), left=0, top=0, right=0, bottom=0)
I have tried _asdict() but its of no use since the data is being streamed and I don't want to change anything in the library itself.
You can unnest namedtuples by recursively unpacking ._asdict().items(), checking the instance of the value type and unpacking the nested values accordingly. The instance checking of a namedtuple takes a little extra effort to make sure the nested namedtuples aren't seen by the unpacker as pure tuples.
def isnamedtupleinstance(x):
_type = type(x)
bases = _type.__bases__
if len(bases) != 1 or bases[0] != tuple:
return False
fields = getattr(_type, '_fields', None)
if not isinstance(fields, tuple):
return False
return all(type(i)==str for i in fields)
def unpack(obj):
if isinstance(obj, dict):
return {key: unpack(value) for key, value in obj.items()}
elif isinstance(obj, list):
return [unpack(value) for value in obj]
elif isnamedtupleinstance(obj):
return {key: unpack(value) for key, value in obj._asdict().items()}
elif isinstance(obj, tuple):
return tuple(unpack(value) for value in obj)
else:
return obj
# data = TypeToolObjectSetting(version=1, xx=0.0, ..
unpacked_data = unpack(data)

What is an elegant way to select all non-None elements from parameters and place them in a python dictionary?

def function(varone=None, vartwo=None, varthree=None):
values = {}
if var1 is not None:
values['var1'] = varone
if var2 is not None:
values['var2'] = vartwo
if var3 is not None:
values['var3'] = varthree
if not values:
raise Exception("No values provided")
Can someone suggest a more elegant, pythonic way to accomplish taking placing non-null named variables and placing them in a dictionary? I do not want the values to be passed in as a dictionary. The key names of "values" are important and must be as they are. The value of "varone" must go into var1, "vartwo" must go into var2 and so on; Thanks.
You could use kwargs:
def function(*args, **kwargs):
values = {}
for k in kwargs:
if kwargs[k] is not None:
values[k] = kwargs[k]
if not values:
raise Exception("No values provided")
return values
>>> function(varone=None, vartwo="fish", varthree=None)
{'vartwo': 'fish'}
With this syntax, Python removes the need to explicitly specify any argument list, and allows functions to handle any old keyword arguments they want.
If you're specifically looking for keys var1 etc instead of varone you just modify the function call:
>>> function(var1=None, var2="fish", var3=None)
{'var2': 'fish'}
If you want to be REALLY slick, you can use list comprehensions:
def function(**kwargs):
values = dict([i for i in kwargs.iteritems() if i[1] != None])
if not values:
raise Exception("foo")
return values
Again, you'll have to alter your parameter names to be consistent with your output keys.
Use **kwargs. Example:
def function(**kwargs):
if not kwargs:
raise Exception("No values provided")
for k, v in kwargs.items():
print("%s: %r") % (k, v)
If you really are going to call function with None arguments, you can strip them out:
def function(**kwargs):
for k, v in kwargs.items():
if v is None:
del kwargs[k]
if not kwargs:
raise Exception("No values provided")
for k, v in kwargs.items():
print("%s: %r") % (k, v)
Obviously you could call the dict values instead, but kwargs is the conventional name, and will make your code more intelligible to other people.
Well, you can pass all those values inside a keyword argument: -
def function(*nkwargs, **kwargs):
values = {}
for k in kwargs:
if kwargs[k] is not None:
values[k] = kwargs[k]
if not values:
raise Exception("No values")
print values
try:
function()
except Exception, e:
print e
function(varOne=123, varTwo=None)
function(varOne=123, varTwo=234)
OUTPUT: -
No values
{'varOne': 123}
{'varOne': 123, 'varTwo': 234}
Call your function as usual, but accept as **kwargs. Then filter them:
def fn(**kwargs):
items = {'var%s' % i: v for i, (k, v) in enumerate(items)}
fn(a=1, b=2, c=3)
if you need a specific set of names, then make a dict of names:
names = dict(zip('varOne varTwo varThree'.split(), range(1, 4)))
walk over this dict and check if the var is in kwargs:
items = {'var%s' % k: kwargs[v] for k, v in names.items() if v in kwargs}

Categories