Given an object that can be a nested structure of dict-like or array-like objects. What is the pythonic way to set a value on that object given a dotted path as a string?
Example:
obj = [
{'a': [1, 2]},
{'b': {
'c': [3, 4],
}},
]
path = '1.b.c.0'
# operation that sets a value at the given path, e.g.
obj[path] = 5
# or
set_value(obj, path, 5)
The above call/assignment should replace the 3 in the example with a 5.
Note: The path can contain list indices as well as keys. Every level of the object can be an array or a dict or something that behaves like that.
The solution should be roughly like what the npm package object-path does in javascript.
A non-recursive version, which also works for numeric dictionary keys:
from collections.abc import MutableMapping
def set_value_at_path(obj, path, value):
*parts, last = path.split('.')
for part in parts:
if isinstance(obj, MutableMapping):
obj = obj[part]
else:
obj = obj[int(part)]
if isinstance(obj, MutableMapping):
obj[last] = value
else:
obj[int(last)] = value
Sounds like a job for recursion and str.partition:
def set_value(obj, path, val):
first, sep, rest = path.partition(".")
if first.isnumeric():
first = int(first)
if rest:
new_obj = obj[first]
set_value(new_obj, rest, val)
else:
obj[first] = val
Because 1 is not the same as "1", I'm making a compromise here: If something looks like a number, it's treated as a number.
You could get this to behave on custom objects, but not really on builtin types without awful hackery, because Python doesn't have an equivalent to object.prototype.
Related
I have to update a nested JSON object.
If I knew the specifics of which items were to be updated I could do :
json_object['basket']['items']['apple'] = 'new value'
However, my list of elements to target is dynamic.
> basket.items.apple = 'green'
> name = 'my shopping'
> basket.cost = '15.43'
I could do this by looping through elements.
Find 'basket' > then find 'items > then find 'apple' > set value
Find 'name' > set value
However, was hoping that there was a way to just reference directly/dynamicaly.
i.e. from a string 'basket.cost', build the expression :
json_object['basket']['cost']
P.s. it has to cope with lists of dictionaries too !
Any guidance appreciated :)
Once you have the string "basket.cost", you can split it on "." and it's pretty easy to drill down into json_object['basket']['cost'] using a loop. Functionally, there is no difference between doing this and doing it "directly": you are still getting the 'basket' key first, and then getting the 'cost' key from the value of json_object['basket'].
def get_element(d, path):
# This function can take the string "basket.cost", or the list ["basket", "cost"]
if isinstance(path, str):
path = path.split(".")
for p in path:
d = d[p]
return d
def set_element(d, path, value):
path = path.split(".")
dict_to_set = get_element(d, path[:-1])
key_to_set = path[-1]
dict_to_set[key_to_set] = value
set_element(json_object, "basket.items.apple", 100)
Now, this assumes all elements of your path already exist, so let's say you create a dictionary that looks like so:
json_object = {"basket": {"items": dict()}}
set_element(json_object, "basket.items.apple", 100)
set_element(json_object, "basket.cost", 10)
print(json_object)
# Output: {'basket': {'items': {'apple': 100}, 'cost': 10}}
print(get_element(json_object, "basket.cost"))
# Output: 10
If you try to access an element that doesn't already exist, you get a KeyError:
get_element(json_object, "basket.date")
# KeyError: 'date'
This also happens if you try to set a value in an element that doesn't exist:
set_element(json_object, "basket.date.day", 1)
# KeyError: 'date'
If we want to allow your function to create the dictionaries when they don't exist, we can modify the get_element function to account for this situation and add the key:
def get_element(d, path, create_missing=False):
# This function can take the string "basket.cost", or an iterable containing the elements "basket" and "cost"
if isinstance(path, str):
path = path.split(".")
for p in path:
if create_missing and p not in d:
d[p] = dict()
d = d[p]
return d
def set_element(d, path, value, create_missing=True):
path = path.split(".")
dict_to_set = get_element(d, path[:-1], create_missing)
key_to_set = path[-1]
dict_to_set[key_to_set] = value
set_element(json_object, "basket.date.day", 1)
print(json_object)
# Output: {'basket': {'items': {'apple': 100}, 'cost': 10, 'date': {'day': 1}}}
If using third party package is an option, you can try python-box. It comes with lots of options and utilities to load from json, yaml files. The implementation is optimized for speed using Cython.
from box import Box
test_data = {
"basket": {
"products": [
{"name": "apple", "colour": "green"}
],
}
}
a = Box(test_data)
a.basket.cost = 12.3
a.basket.products[0].colour = "pink"
a.basket.products.append({"name": "pineapple", "taste": "sweet"})
print(a.basket.products[1].taste)
You can get exactly what you want by overloading some python magic methods: __getattr__ and __setattr__. I'll show an example of the API to wet the appetite and then the full code:
test_data = {'basket': {'items': [{'name': 'apple', 'colour': 'green'},
{'name': 'pineapple', 'taste': 'sweet',},
],
'cost': 12.3,
},
'name': 'Other'}
o = wrap(test_data) # This wraps with the correct class, depending if it is a dict or a list
print(o.name) # Prints 'Other'
print(o.basket.items) # Prints the list of items
print(o.basket.cost) # Prints 12.3
o.basket.cost = 10.0 # Changes the cost
assert o.basket.cost == 10.0
assert len(o) == 2
assert len(o.basket.items) == 2
o.basket.items.append({'name': 'orange'})
o.basket.items[2].colour = 'yellow' # It works with lists!
assert o.basket.items[2].name == 'orange'
assert o.basket.items[2].colour == 'yellow'
# You can get a part of it and it holds a reference to the original
b = o.basket
b.type = 'groceries'
assert o.basket.type == 'groceries'
# It is also possible to create a separate wrapped part and then join:
employees = wrap({})
employees.Clara.id = 101
employees.Clara.age = 23
employees.Lucia.id = 102
employees.Lucia.age = 29
o.employees = employees
The implementation is based on special wrapper classes, one for dicts, another for lists. They all inherit from a base class. Note that the need to use super().__setattr__ instead of simply self._data is because we will override the __getattr__ and __setattr__ methods to look for the data inside _data. Of course it gives an infinite loop when you try to define _data.
from collections.abc import Mapping, Sequence, MutableSequence
class BaseWrapper:
__slots__ = ('_data')
def __init__(self, data):
super().__setattr__('_data', data)
def __repr__(self):
return f'{self.__class__.__name__}({repr(self._data)})'
The wrapper for dictionaries is the most interesting: it uses __getattr__ to look for a key in the wrapped dictionary. This allows for a very natural API: if o is a wrapped dictionary, o.entry will give the same result as o['entry']. Most of the code should be self-explanatory, there are only two tricks: the first is that __getattr__ checks if the output is a dict or list and wraps it. This allows for chaining of calls like o.basket.cost. The downside is that a new wrapper is created every call. The second trick is when setting an attribute: it checks if what is being set is a wrapped instance and un-wraps it. Thus, wrapped dictionaries can be combined and the underlying dictionary is always "clean".
class MappingWrapper(BaseWrapper):
"""Wraps a dictionary and provides the keys of the dictionary as class members.
Create new keys when they do not exist."""
def __getattr__(self, name):
# Note: these two lines allow automatic creation of attributes, e.g. in an object 'obj'
# that doesn't have an attribute 'car', the following is possible:
# >> o.car.colour = 'blue'
# And all the missing levels will be automatically created
if name not in self._data and not name.startswith('_'):
self._data[name] = {}
return wrap(self._data[name])
def __setattr__(self, name, value):
self._data[name] = unwrap(value)
# Implements standard dictionary access
def __getitem__(self, name):
return wrap(self._data[name])
def __setitem__(self, name, value):
self._data[name] = unwrap(value)
def __delitem__(self, name):
del self._data[name]
def __len__(self):
return len(self._data)
The list wrapper is simpler, no need to mess around with attribute access. The only special care we have to take is to wrap and unwrap the list elements when one is requested/set. Note that, just like with the dictionary wrapper, the same wrap and unwrap functions are used (in __getitem__/__setitem__/insert).
class ListWrapper(BaseWrapper, MutableSequence):
"""Wraps a list. Essentially, provides wrapping of elements of the list."""
def __getitem__(self, idx):
return wrap(self._data[idx])
def __setitem__(self, idx, value):
self._data[idx] = unwrap(value)
def __delitem__(self, idx):
del self._data[idx]
def __len__(self):
return len(self._data)
def insert(self, index, obj):
self._data.insert(index, unwrap(obj))
Finally, the definition of wrap, which just selects the correct wrapper based on the type of the input, and unwrap, which extracts the raw data:
def wrap(obj):
if isinstance(obj, dict):
return MappingWrapper(obj)
if isinstance(obj, list):
return ListWrapper(obj)
return obj
def unwrap(obj):
if isinstance(obj, BaseWrapper):
return obj._data
return obj
The full code can be found in this gist.
An important caveat: to keep the implementation simple, wrapper objects are created at every access. Thus using this method inside large loops may cause performance issues (per my measurements, this method of access is between 12 to 30 times slower).
I'm going to assume that you already know how to handle the value errors that will probably come up with this nested collection accessing, so I won't focus on it in my approach.
I would split this in two parts:
Traversing a nested collection according to a list of keys for each level
Getting a list of keys out of a string
The first one is quite trivial, where as you said simply looping through the keys and getting to the end of those gives you access to the collection element in question. A simple implementation of that could look something like this:
def get_nested(collection, key):
for part in key:
collection = collection[part]
return collection
def set_nested(collection, key, value):
for part in key[:-1]:
collection = collection[part]
collection[key[-1]] = value
Here the key is expected to be some iterable of keys, such as a tuple or list.
Of course that means there is an expectation that your string representing a path along the collection is already parsed. We can get to that next.
This step would also be very trivial, since one could simply expression.split(".") it. However, since you also want to be able to index nested lists along with dicts, it get's a little more complicated.
There is a tradeoff to be made here. One could simply say: "Any time that one of the items in expression.split(".") can be parsed to an int, we will do just that, and assume that it was ment as an index in a list", however naturally that isn't necessarily the case. There is nothing preventing you from using a number in string form as a key in a dict. However if you think this is never going to be the case for you, perhaps the you can just call it like this:
set_nested(
collection,
(int(part) if part.isdigit() else part for part in expression.split(".")),
"target value",
)
(or of course wrap it in another function like this).
However if the consideration of using digit keys in dicts is important for you, there is another solution:
Whenever traversing the nested collection downward, we check if the collection we are currently looking at is a list. Only if it is a list, do we actually try to parse the path part as an int.
This would be the respective set_nested and get_nested functions for that:
def get_nested(collection, key: str):
for part in key.split("."):
if type(collection) == list:
part = int(part)
collection = collection[part]
return collection
def set_nested(collection, key: str, val):
key = key.split(".")
for i, part in enumerate(key):
if type(collection) == list:
part = int(part)
if i == len(key) - 1:
collection[part] = val
else:
collection = collection[part]
I believe that's the simplest solution to your problem, though of course it's important to keep in mind:
There is no error handling in this code, and indexing on dynamic paths is a topic where you are bound to run into errors. Depending on where and how you want to handle those it's going to be easy or very tedious.
There is no checking of setting values in dicts that don't exist yet, or for expanding arrays to a specific size, but since you didn't mention those that as a requirement I'm presuming it's not an issue. It might be for others reading this.
This is tricky and I would discourage it unless necessary as it is an easy thing to design and implmenet badly.
First: it's easy to split on path separator and follow the object tree to the desired key.
But after a while questions will start to appear. E.g.: what separator to split on?
A slash? It can appear in the JSON dictionary key... A dot? Same.
We'll need to either restrict legal / handled paths or implement some kind of escaping mechanism.
How do you handle empty strings?
Another goal: handle lists... Ok. So how do we interpret a path a.0? Is it ['a'][0] or ['a']['0'] ?
It seem that we'll have to complicate the language or drop the requirement.
So, in general -- I'd avoid it. Ultimately here's a quick implementation which
desing choices may or may not satisfy you:
there's basic backslash escaping of path separator
empty string is allowed as a key
lists are not handled due to ambiguity
def deep_set(root: dict, path: str, value):
segments = [*iter_segments(path, '.')]
for k in segments[:-1]:
root = root[k]
root[segments[-1]] = value
def iter_segments(path: str, separator: str = '.'):
segment = ''
path_iter = iter(path)
while True:
c = next(path_iter, '')
if c in ('.', ''):
yield segment
segment = ''
if c == '':
break
continue
elif '\\' == c:
c = next(path_iter, '')
segment += c
Is it possible to create a dynamic linked variable? So that changes in original_VAR will automatically take effect in copied_VAR ? Like so:
original_VAR = 'original_VAL'
copied_VAR = original_VAR
original_VAR = 'modified_VAL'
print(copied_VAR)
#desired output:
>>>> 'modified_VAL'
A similar behavior can be created for lists under few conditions:
original_DICT_ARR = [{'key': 'original_VAL'}]
# 1 - does not create a dynamic link
copied_DICT_ARR = [value for value in original_DICT_ARR]
# 2 - does create a dynamic link
copied_DICT_ARR = original_DICT_ARR
# 3 - does create a dynamic link, if the copied element is a list or dict, but not if string, boolean, int, float
copied_DICT_ARR = []
copied_DICT_ARR.append(original_DICT_ARR[0])
# MODIFICATION:
original_DICT_ARR[0]['key'] = 'modified_VAL'
# RESULT for 2,3
print(copied_DICT_ARR[0])
>>>> {'key': 'modified_VAL'}
Why would I want to do this?
I am building a list, the list is full of dict objects. I need to assign a value to a certain dict key.
Later, that value might change - I don't want to loop through all dictionaries in the list again. I want to change the original variable, and have the effect taken place in all dictionaries automatically.
You can keep a reference the specific dict you want to modify later. Since it refers to the same underlying dict - changes will be reflected in your reference. Like so:
original_DICT_ARR = [{'key': 'original_VAL'}, {'key': 'another_val'}]
target_dict = original_DICT_ARR[0]
# MODIFICATION:
original_DICT_ARR[0]['key'] = 'modified_VAL'
# RESULT for 2,3
print(target_dict)
Gives:
{'key': 'modified_VAL'}
You could achieve this and keep things abstract by using a mutable object and subclassing UserDict and overriding __getitem__:
class ChangingVal:
def __init__(self, val):
self.val = val
class ChangingValsDict(UserDict):
def __getitem__(self, key):
ret = self.data[key]
if isinstance(ret, ChangingVal):
ret = ret.val
return ret
my_dict = ChangingValsDict()
changing_val = ChangingVal(3)
my_dict["changing_value"] = changing_val
print(my_dict["changing_value"]) # -- outputs "3"
# change your value
changing_val.val = 6
print(my_dict["changing_value"]) # -- outputs "6"
I am trying to create a multi-level dict with variable depth and with list and int type.
Data structure is like below
A
--B1
-----C1=1
-----C2=[1]
--B2=[3]
D
--E
----F
------G=4
In the case of above data structure, the last value can be an int or list.
If the above data structure has the only int then I can be easily achieved by using the below code:
from collections import defaultdict
f = lambda: defaultdict(f)
d = f()
d['A']['B1']['C1'] = 1
But as the last value has both list and int, it becomes a bit problematic for me.
Now we can insert data in a list using two ways.
d['A']['B1']['C2']= [1]
d['A']['B1']['C2'].append([2])
But when I am using only the append method it is causing the error.
Error is:
AttributeError: 'collections.defaultdict' object has no attribute 'append'
so Is there any way to use only the append method for a list?
There's no way you can use your current defaultdict-based structure to make d['A']['B1']['C2'].append(1) work properly if the 'C2' key doesn't already exist, since the data structure can't tell that the unknown key should correspond to a list rather than another layer of dictionary. It doesn't know what method you're going to call on the value it returns, so it can't know it shouldn't return a dictionary (like it did when it first looked up 'A' and 'B').
This isn't an issue for bare integers, since for those you're as assigning directly to a new key (and all the earlier levels are dictionaries). When you're assigning, the data structure isn't creating the value, you are, so you can use any type you want.
Now, if your keys are distinctive in some way, so that given a key like 'C2' you can know for sure that it should correspond to a list, you may have a chance. You can write your own dict subclass, defining a __missing__ method to handle lookups of keys that don't exist yet in your own special way:
def Tree(dict):
def __missing__(self, key):
if key_corresponds_to_list(key): # magic from somewhere
result = self[key] = []
else:
result = self[key] = Tree()
return result
# you might also want a custom __repr__
Here's an example run with a magic key function that makes any even-length key default to a list, while an odd-length key defaults to a dict:
> def key_corresponds_to_list(key):
return len(key) % 2 == 0
> t = Tree()
> t["A"]["B"]["C2"].append(1) # the default value for C2 is a list because it's even length
> t
{'A': {'B': {'C2': [1]}}}
> t["A"]["B"]["C10"]["D"] = 2 # C10's another layer of dict, since it's length is odd
> t
{'A': {'B': {'C10': {'D': 2}, 'C2': [1]}}} # it didn't matter what length D was though
You probably won't actually want to use a global function to control the class like this, I just did that as an example. If you go with this approach, I'd suggest putting the logic directly into the __missing__ method (or maybe passing a function as a parameter, like defaultdict does with its factory function).
I see this pattern in some code that i'm writing
e = {...} # a dictionary
e["table"] = "users"
e["timestamp"] = time.time()
queue.push(e)
del e["table"]
del e["timestamp"]
[...]
e["table"] = "events"
queue2.push(e)
del e["table"]
# etc..
I'm demultiplexing an event over some queues but each queue has a slightly different format. I've started doing this:
queue.push( dict(e.items() + [("table":"users"), ("timestamp", time.time())]) )
but it looks ugly and it kind of slows down the code. What else can i do?
Assuming queue.push only needs read access, you could try something like this:
class MergedDicts(dict):
def __init__(self, *dicts, **kw):
self.dicts = dicts + (kw,)
def __getitem__(self, key):
for d in self.dicts:
if key in d: return d[key]
raise KeyError(key)
This would give you a dictionary returning items from both sources, but avoid the overhead of building another actual copy from the originals (you may need to implement more than just __getitem__ though, depending on what push needs).
Usage:
other = {"table": "users", "timestamp": time.time()}
queue.push(MergedDicts(e, other))
or:
queue.push(MergedDicts(e, table="users", timestamp=time.time()))
If the number of modifications to the dictionary are relatively small compared to the size of the dictionary itself, you can avoid making a copy of it each time by creating a context manager function and using it as shown. This will insure that any changes made to the dictionary are temporary, even if an exception is thrown while using it inside the block.
from contextlib import contextmanager
#contextmanager
def contextdict(adict, **kwargs):
# modify dictionary
changed = {}
added = []
for key in kwargs:
if key in adict:
changed[key] = adict[key]
else:
added.append(key)
adict[key] = kwargs[key]
yield adict
# restore dictionary
adict.update(changed)
for key in added:
del adict[key]
e = dict(...) # some dictionary
with contextdict(e, table="users", timestamp=time.time()) as context:
queue.push(context)
with contextdict(e, table="events") as context:
queue.push(context)
# e will be unchanged at this point
You can create a new dictionary with the new fields you want and use dict.update on it with the base fields
e = {...} # a dictionary
d={"table":"users", "timestamp":time.time()}
d.update(e)
queue.push(d)
You could also create a new dict with fields as a list:
e = {...} # a dictionary
queue.push( e.items() + [("table","users"), ("timestamp",time.time())] )
If you do this a lot on large dictionaries, and don't want to create a copy, you can use a Context Manager that modifies the dictionary temporarily, automating what you're doing right now.
Another option, instead of the context manager, is performing the modification in a function, passing the operations you want to do as a function:
def modify_dict_and_call( d, newfields, f):
for k,v in newfields.items():
d[k]=v
f(d)
for k in newfields:
del d[k]
e = {...} # a dictionary
modify_dict_and_call( e, {"table":"users", "timestamp":time.time()}, queue.push )
If you initially define e with only those keys that are common to each use case, you can use the mock library. mock.patch.dict allows you to temporarily add keys to a dictionary (for the duration of the with statement), although you cannot temporarily remove keys.
e = { ... }
with mock.patch.dict(e, table="users", timestamp=time.time()):
queue.push(e)
with mock.patch.dict(e, table="events"):
queue2.push(e)
mock is a third-party module for Python 2.x and prior to Python 3.4, where it was added to the standard library as unittest.mock.
I think it is might be covered with the next code:
a = {'val': 2, 'val2': -5, "name": 'Vladimir'}
b = {"asdf": 1, "b2": 2}
queue.push( dict( **a, **b) )
I'm having trouble getting my data in the form that I'd like in python.
Basically I have a program that reads in binary data and provides functions for plotting and analysis on said data.
My data has main headings and then subheadings that could be any number of varied datatypes.
I'd like to be able to access my data like for example:
>>> a = myDatafile.readit()
>>> a.elements.hydrogen.distributionfunction
(a big array)
>>> a.elements.hydrogen.mass
1
>>> a.elements.carbon.mass
12
but I don't know the names of the atoms until runtime.
I've tried using namedtuple, for example after I've read in all the atom names:
self.elements = namedtuple('elements',elementlist)
Where elementlist is a list of strings for example ('hydrogen','carbon'). But the problem is I can't nest these using for example:
for i in range(0,self.nelements):
self.elements[i] = namedtuple('details',['ux','uy','uz','mass','distributionfunction'])
and then be able to access the values through for example
self.elements.electron.distributionfunction.
Maybe I'm doing this completely wrong. I'm fairly inexperienced with python. I know this would be easy to do if I wasn't bothered about naming the variables dynamically.
I hope I've made myself clear with what I'm trying to achieve!
Without knowing your data, we can only give a generic solution.
Considering the first two lines contains the headings and Sub-Heading reading it somehow you determined the hierarchy. All you have to do is to create an hierarchical dictionary.
For example, extending your example
data.elements.hydrogen.distributionfunction
data.elements.nitrogen.xyzfunction
data.elements.nitrogen.distributionfunction
data.compound.water.distributionfunction
data.compound.hcl.xyzfunction
So we have to create a dictionary as such
{'data':{'elements':{'hydrogen':{'distributionfunction':<something>}
'nitrogen':{'xyzfunction':<something>,
'distributionfunction':<something>}
}
compound:{'water':{'distributionfunction':<something>}
'hcl':{'xyzfunction':<something>}
}
}
}
how you will populate the dictionary depends on the data which is difficult to say now.
But the keys to the dictionary you should populate from the headers, and somehow you have to map the data to the respective value in the empty slot's of the dictionary.
Once the map is populated, you can access it as
yourDict['data']['compound']['hcl']['xyzfunction']
If your element name are dynamic and obtained from the data at runtime, you can assign them to a dict and access like this
elements['hydrogen'].mass
but if you want dotted notation you can create attributes at run time e.g.
from collections import namedtuple
class Elements(object):
def add_element(self, elementname, element):
setattr(self, elementname, element)
Element = namedtuple('Element', ['ux','uy','uz','mass','distributionfunction'])
elements = Elements()
for data in [('hydrogen',1,1,1,1,1), ('helium',2,2,2,2,2), ('carbon',3,3,3,3,3)]:
elementname = data[0]
element = Element._make(data[1:])
elements.add_element(elementname, element)
print elements.hydrogen.mass
print elements.carbon.distributionfunction
Here I am assuming the data you have, but with data in any other format you can do similar tricks
Here's a method for recursively creating namedtuples from nested data.
from collections import Mapping, namedtuple
def namedtuplify(mapping, name='NT'): # thank you https://gist.github.com/hangtwenty/5960435
""" Convert mappings to namedtuples recursively. """
if isinstance(mapping, Mapping):
for key, value in list(mapping.items()):
mapping[key] = namedtuplify(value)
return namedtuple_wrapper(name, **mapping)
elif isinstance(mapping, list):
return [namedtuplify(item) for item in mapping]
return mapping
def namedtuple_wrapper(name, **kwargs):
wrap = namedtuple(name, kwargs)
return wrap(**kwargs)
stuff = {'data': {'elements': {'hydrogen': {'distributionfunction': 'foo'},
'nitrogen': {'xyzfunction': 'bar',
'distributionfunction': 'baz'}
},
'compound': {'water': {'distributionfunction': 'lorem'},
'hcl': {'xyzfunction': 'ipsum'}}}
}
example = namedtuplify(stuff)
example.data.elements.hydrogen.distributionfunction # 'foo'
I had the same issue with nested json but needed to be able to serialise the output with pickle which doesn't like you creating objects on the fly.
I've taken #bren's answer and enhanced it so that the resulting structure will be serialisable with pickle. You have to save references to each of the structures you create to globals so that pickle can keep tabs on them.
##############################################
class Json2Struct:
'''
Convert mappings to nested namedtuples
Usage:
jStruct = Json2Struct('JS').json2Struct(json)
'''
##############################################
def __init__(self, name):
self.namePrefix = name
self.nameSuffix = 0
def json2Struct(self, jsonObj): # thank you https://gist.github.com/hangtwenty/5960435
"""
Convert mappings to namedtuples recursively.
"""
if isinstance(jsonObj, Mapping):
for key, value in list(jsonObj.items()):
jsonObj[key] = self.json2Struct(value)
return self.namedtuple_wrapper(**jsonObj)
elif isinstance(jsonObj, list):
return [self.json2Struct(item) for item in jsonObj]
return jsonObj
def namedtuple_wrapper(self, **kwargs):
self.nameSuffix += 1
name = self.namePrefix + str(self.nameSuffix)
Jstruct = namedtuple(name, kwargs)
globals()[name] = Jstruct
return Jstruct(**kwargs)
The example below should work as follows and also be serialisable:
stuff = {'data': {'elements': {'hydrogen': {'distributionfunction': 'foo'},
'nitrogen': {'xyzfunction': 'bar',
'distributionfunction': 'baz'}
},
'compound': {'water': {'distributionfunction': 'lorem'},
'hcl': {'xyzfunction': 'ipsum'}}}
}
example = Json2Struct('JS').json2Struct(stuff)
example.data.elements.hydrogen.distributionfunction # 'foo'