Is there a pythonic way to process tree-structured dict keys?

Is there a pythonic way to process tree-structured dict keys? - python

I'm looking for a pythonic idiom to turn a list of keys and a value into a dict with those keys nested. For example:
dtree(["a", "b", "c"]) = 42
or
dtree("a/b/c".split(sep='/')) = 42
would return the nested dict:
{"a": {"b": {"c": 42}}}
This could be used to turn a set of values with hierarchical keys into a tree:
dtree({
"a/b/c": 10,
"a/b/d": 20,
"a/e": "foo",
"a/f": False,
"g": 30 })
would result in:
{ "a": {
"b": {
"c": 10,
"d": 20 },
"e": foo",
"f": False },
"g": 30 }
I could write some FORTRANish code to do the conversion using brute force and multiple loops and maybe collections.defaultdict, but it seems like a language with splits and joins and slices and comprehensions should have a primitive that turns a list of strings ["a","b","c"] into nested dict keys ["a"]["b"]["c"]. What is the shortest way to do this without using eval on a dict expression string?

I'm looking for a pythonic idiom to turn a list of keys and a value into a dict with those keys nested.
reduce(lambda v, k: {k: v}, reversed("a/b/c".split("/")), 42)
This could be used to turn a set of values with hierarchical keys into a tree
def hdict(keys, value, sep="/"):
return reduce(lambda v, k: {k: v}, reversed(keys.split(sep)), value)
def merge_dict(trg, src):
for k, v in src.items():
if k in trg:
merge_dict(trg[k], v)
else:
trg[k] = v
def hdict_from_dict(src):
result = {}
for sub_hdict in map(lambda kv: hdict(*kv), src.items()):
merge_dict(result, sub_hdict)
return result
data = {
"a/b/c": 10,
"a/b/d": 20,
"a/e": "foo",
"a/f": False,
"g": 30 }
print(hdict_from_dict(data))
Another overall solution using collections.defaultdict
import collections
def recursive_dict():
return collections.defaultdict(recursive_dict)
def dtree(inp):
result = recursive_dict()
for keys, value in zip(map(lambda s: s.split("/"), inp), inp.values()):
reduce(lambda d, k: d[k], keys[:-1], result)[keys[-1]] = value
return result
import json
print(json.dumps(dtree({
"a/b/c": 10,
"a/b/d": 20,
"a/e": "foo",
"a/f": False,
"g": 30 }), indent=4))

Or just for grins since reduce is the coolest thing since sliced bread, you could save one SLOC by using it twice :-)
def dmerge(x, y):
result = x.copy()
k = next(iter(y))
if k in x:
result[k] = dmerge(x[k], y[k])
else:
result.update(y)
return result
def hdict(keys, value, sep="/"):
return reduce(lambda v, k: {k: v}, reversed(keys.split(sep)), value)
def hdict_from_dict(src):
return reduce(lambda x, y: dmerge(x, y), [hdict(k, v) for k, v in src.items()])
data = {
"a/b/c": 10,
"a/b/d": 20,
"a/e": "foo",
"a/f": False,
"g": 30 }
print("flat:", data)
print("tree:", hdict_from_dict(data))

Related

From a given nested dictionary, find all the nested keys sequences

I have a nested dictionary which looks like this:
dct = {"A": {"AA": "aa", "BB": {"BBB": "bbb", "CCC": "ccc"}}}
I want to extract all the key sequences in the list format till I reach the deepest key:value pair.
The expected output is something like this:
["A->AA", "A->BB->BBB", "A->BB->CCC"]
The solution I tried is:
for k, v in dct.items():
if isinstance(v, dict):
# traverse nested dict
for x in find_keys(v):
yield "{}_{}".format(k, x)
print("{}_{}".format(k, x))
else:
yield k
print(k)
but it doesnot seem to work as expected.

I guess you are almost there (or omitted some parts by mistake):
def find_keys(dct):
for k, v in dct.items():
if isinstance(v, dict):
yield from (f"{k}->{x}" for x in find_keys(v))
else:
yield k
dct = {"A": {"AA": "aa", "BB": {"BBB": "bbb", "CCC": "ccc"}}}
print(*find_keys(dct)) # A->AA A->BB->BBB A->BB->CCC
If you want to use return instead, then:
def find_keys(dct):
result = []
for k, v in dct.items():
if isinstance(v, dict):
result += [f"{k}->{x}" for x in find_keys(v)]
else:
result.append(k)
return result

How to return a dict with lists as values but it only returns me the keys

I've got a dict1 that looks like the following:
{"A":["a","b","c"],"B":["b","d","e"],"C":["a","e"]}
My goal is to write a function that gets listA as input.
listA could be ["a","b","c","e"] for example.
I now want to return a dict2 with all elements from dict1 which have all their elements included in listA.
Output should look like this:
{'A':["a","b","c"],"C":["a","e"]}
My code looks like this:
def func(listA: list) -> dict:
return set(x for x in dict1 if all(x in listA for x in dict1[x]))
My output only returns the keys, what do I have to do?

dict1 = {"A":["a","b","c"],"B":["b","d","e"],"C":["a","e"]}
listA = ["a","b","c","e"]
dict2 = {key: l for key, l in dict1.items() if all(val in listA for val in l)}

If the original dictionary is fixed, and only listA is taken as an input to the function, then you should change the dictionary to contain sets instead of lists. Sets are faster to test membership of, so they are faster to test subsets of; this can be done using the overloaded <= operator, which makes the code cleaner, too.
dict_of_lists = {
"A": ["a", "b", "c"],
"B": ["b", "d", "e"],
"C": ["a", "e"]
}
dict_of_sets = { k: set(v) for k, v in dict_of_lists.items() }
def func(listA):
setA = set(listA)
return { k: v for k, v in dict_of_lists.items() if dict_of_sets[k] <= setA }
Example:
>>> func(['a', 'b', 'c', 'e'])
{'A': ['a', 'b', 'c'], 'C': ['a', 'e']}
Consider also whether it would make sense for your inputs and outputs to use sets instead of lists. If you don't really need to maintain the ordering or allow duplicates, then you could just use dict_of_sets in the first place, and func wouldn't need to convert the list to a set either:
dict_of_sets = {
"A": {"a", "b", "c"},
"B": {"b", "d", "e"},
"C": {"a", "e"}
}
def func(setA):
return { k: v for k, v in dict_of_sets.items() if v <= setA }

instead of
dict1[x]
use`
dict1.values[x]

Remove some field from nested dictionary?

For example I have a dict:
{
"a" : 123,
"b" : {
"a" : 24324,
"c" : 9
},
"c" : {
"a" : 123,
"b" : 64
}
}
What is the best way to remove all a fields from this dictionary? I understand, that I can do an iteration over this dict, remove a field, that iterate on the keys, which are also dicts and so on.
But maybe there is more elegant way to do it?
EDIT
Thanks for your answers! But what about a situation, when keys can be lists, like "b" : [{"a" : 1, "c" : 2}]

Well, internally you have to process the items one by one, either by manual iteration or recursion.
Here's an attempt using recursion:
def remove_keys(d, to_remove):
if not isinstance(d, dict):
return d
return {k: remove_keys(v, to_remove)
for k, v in d.items() if k not in to_remove}
You would simply pass the dictionary to process as the first argument and the name of the keys to remove (as a set, preferably) as the second argument. In your case:
remove_keys(d, {"a"})
Whether it's more elegant or not, it depends on your taste, I would say :).
If you want to actually mutate the dictionary:
def remove_keys_m(d, to_remove):
if not isinstance(d, dict):
return d
for k in to_remove:
if k in d:
del d[k]
for k in d:
d[k] = remove_keys(d[k], to_remove)
return d

def remove_entries(d, k):
if k in d:
del d[k]
for value in d.values():
if isinstance(value, dict):
remove_entries(value, k)
Here's a pretty basic recursive function for doing it
Edit:
If you also want to handle lists nested in dicts, nested in lists, etc, something like the below will owrk.
def remove_from_dict_in_list(l, k):
for i in l:
if isinstance(i, list):
remove_from_dict_in_list(i, k)
elif isinstance(i, dict):
remove_entries(i, k)
def remove_entries(d, k):
if k in d:
del d[k]
for value in d.values():
if isinstance(value, dict):
remove_entries(value, k)
elif isinstance(value, list):
remove_from_dict_in_list(value, k)

While a recursive solution is more robust, here is a simpler solution that will work for the posted example and any other dictionary with only one nested layer:
s = {
"a" : 123,
"b" : {
"a" : 24324,
"c" : 9
},
"c" : {
"a" : 123,
"b" : 64
}
}
final_data = {a:{c:d for c, d in b.items() if c != "a"} for a, b in s.items() if a != "a"}
Output:
{'c': {'b': 64}, 'b': {'c': 9}}

I think this example will answer your question.
dic = {"a": 1,
"b": {"a": 2,
"c": {"a": 3}},
"d": 4}
key_to_del = "a"
def get_key(dic):
for key, value in dic.items():
if key == key_to_del:
del dic[key]
if isinstance(value, dict):
get_key(value)
return dic
print "BEFOR", dic
print "AFTER", get_key(dic)
enter image description here

How can I construct a nested dictionary?

I have a list of strings, from which I have to construct a dict. So, for example, I have:
foo.bar:10
foo.hello.world:30
xyz.abc:40
pqr:100
This is represented as a dict:
{
"foo": {
"bar": 10,
"hello": {
"world": 30
}
},
"xyz": {
"abc": 40
},
"pqr": 100
}
This question is based on the same premise, but the answers discuss hardcoded depths such as:
mydict = ...
mydict['foo']['bar'] = 30
Since the dot seperated strings on the left may be of any depth, I can't figure out a way to build the dict. How should I parse the dot separated string and build the dict?

Building upon the solution in the links, you could
iterate over each line
for each line, extract a list of keys, and its value
recurse into a dictionary with each key using setdefault
assign the value at the bottom
lines = \
'''
foo.bar:10
foo.hello.world:30
xyz.abc:40
pqr:100
'''.splitlines()
d = {}
for l in lines:
k, v = l.split(':')
*f, l = k.split('.')
t = d
for k in f:
t = t.setdefault(k, {})
t[l] = int(v) # don't perform a conversion if your values aren't numeric
print(d)
{
"pqr": 100,
"foo": {
"bar": 10,
"hello": {
"world": 30
}
},
"xyz": {
"abc": 40
}
}
Recursive setdefault traversal learned from here.
Breaking down each step -
Split on :, extract the key-list string and the value
k, v = l.split(':')
Split the key-string on . to get a list of keys. I take the opportunity to partition the keys as well, so I have a separate reference to the last key that will be the key to v.
*f, l = k.split('.')
*f is the catch-all assignment, and f is a list of any number of values (possibly 0 values, if there's only one key in the key-string!)
For each key k in the key list f, recurse down into the "tree" using setdefault. This is similar to recursively traversing a linked list.
for k in f:
t = t.setdefault(k, {})
At the end, the last key value pair comes from l and v.
t[l] = v

What's wrong with incrementally building it?
mydict = {}
mydict["foo"] = {}
mydict["foo"]["bar"] = 30
mydict["foo"]["hello"] = {}
mydict["foo"]["hello"]["world"] = 30
mydict["foo"]["xyz"] = {}
mydict["foo"]["xyz"]["abc"] = 40
mydict["foo"]["pqr"] = 100
# ...
pprint.pprint(mydict) # {'foo': {'bar': 30, 'hello': {'world': 30}, 'pqr': 100, 'xyz': {'abc': 40}}}
Including the parsing, you could use something like this:
import pprint
inp = """foo.bar:10
foo.hello.world:30
xyz.abc:40
pqr:100
"""
mydict = {}
for line in inp.splitlines():
s, v = line.split(':')
parts = s.split(".")
d = mydict
for i in parts[:-1]:
if i not in d:
d[i] = {}
d = d[i]
d[parts[-1]] = v
pprint.pprint(mydict) # {'foo': {'bar': '10', 'hello': {'world': 30'}}, 'pqr': '100', 'xyz': {'abc': '40'}}

One key point to consider in your case is that you either want to create a dictionary in a parent's dictionarys value part or an integer
x = """
foo.bar:10
foo.hello.world:30
xyz.abc:40
pqr.a:100
"""
tree = {}
for item in x.split():
level, value = item.split(":")[0], item.split(":")[1]
t = tree
for part in item.split('.'):
keyval = part.split(":")
if len(keyval) > 1:
#integer
t = t.setdefault(keyval[0], keyval[1])
else:
t = t.setdefault(part, {})
import pprint
pprint.pprint(tree)
Result:
{'foo': {'bar': '10', 'hello': {'world': '30'}},
'pqr': {'a': '100'},
'xyz': {'abc': '40'}}

Filtering dictionaries and creating sub-dictionaries based on keys/values in Python?

Ok, I'm stuck, need some help from here on...
If I've got a main dictionary like this:
data = [ {"key1": "value1", "key2": "value2", "key1": "value3"},
{"key1": "value4", "key2": "value5", "key1": "value6"},
{"key1": "value1", "key2": "value8", "key1": "value9"} ]
Now, I need to go through that dictionary already to format some of the data, ie:
for datadict in data:
for key, value in datadict.items():
...filter the data...
Now, how would I in that same loop somehow (if possible... if not, suggest alternatives please) check for values of certain keys, and if those values match my presets then I would add that whole list to another dictionary, thus effectively creating smaller dictionaries as I go along out of this main dictionary based on certain keys and values?
So, let's say I want to create a sub-dictionary with all the lists in which key1 has value of "value1", which for the above list would give me something like this:
subdata = [ {"key1": "value1", "key2": "value2", "key1": "value3"},
{"key1": "value1", "key2": "value8", "key1": "value9"} ]

Here is a not so pretty way of doing it. The result is a generator, but if you really want a list you can surround it with a call to list(). Mostly it doesn't matter.
The predicate is a function which decides for each key/value pair if a dictionary in the list is going to cut it. The default one accepts all. If no k/v-pair in the dictionary matches it is rejected.
def filter_data(data, predicate=lambda k, v: True):
for d in data:
for k, v in d.items():
if predicate(k, v):
yield d
test_data = [{"key1":"value1", "key2":"value2"}, {"key1":"blabla"}, {"key1":"value1", "eh":"uh"}]
list(filter_data(test_data, lambda k, v: k == "key1" and v == "value1"))
# [{'key2': 'value2', 'key1': 'value1'}, {'key1': 'value1', 'eh': 'uh'}]

Net of the issues already pointed out in other comments and answers (multiple identical keys can't be in a dict, etc etc), here's how I'd do it:
def select_sublist(list_of_dicts, **kwargs):
return [d for d in list_of_dicts
if all(d.get(k)==kwargs[k] for k in kwargs)]
subdata = select_sublist(data, key1='value1')

The answer is too simple, so I guess we are missing some information. Anyway:
result = []
for datadict in data:
for key, value in datadict.items():
thefiltering()
if datadict.get('matchkey') == 'matchvalue':
result.append(datadict)
Also, you "main dictionary" is not a dictionary but a list. Just wanted to clear that up.

It's an old question, but for some reason there is no one-liner syntax answer:
{ k: v for k, v in <SOURCE_DICTIONARY>.iteritems() if <CONDITION> }
For example:
src_dict = { 1: 'a', 2: 'b', 3: 'c', 4: 'd' }
predicate = lambda k, v: k % 2 == 0
filtered_dict = { k: v for k, v in src_dict.iteritems() if predicate(k, v) }
print "Source dictionary:", src_dict
print "Filtered dictionary:", filtered_dict
Will produce the following output:
Source dictionary: {1: 'a', 2: 'b', 3: 'c', 4: 'd'}
Filtered dictionary: {2: 'b', 4: 'd'}

Inspired by the answer of Skurmedal, I split this into a recursive scheme to work with a database of nested dictionaries. In this case, a "record" is the subdictionary at the trunk. The predicate defines which records we are after -- those that match some (key,value) pair where these pairs may be deeply nested.
def filter_dict(the_dict, predicate=lambda k, v: True):
for k, v in the_dict.iteritems():
if isinstance(v, dict) and _filter_dict_sub(predicate, v):
yield k, v
def _filter_dict_sub(predicate, the_dict):
for k, v in the_dict.iteritems():
if isinstance(v, dict) and filter_dict_sub(predicate, v):
return True
if predicate(k, v):
return True
return False
Since this is a generator, you may need to wrap with dict(filter_dict(the_dict)) to obtain a filtered dictionary.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Is there a pythonic way to process tree-structured dict keys? - python

Related

From a given nested dictionary, find all the nested keys sequences

How to return a dict with lists as values but it only returns me the keys

Remove some field from nested dictionary?

How can I construct a nested dictionary?

Filtering dictionaries and creating sub-dictionaries based on keys/values in Python?

Categories

Resources