Manipulating deep values of a dictionary - python

I would like to go from
first =
{'a' :
{'b' :
{'c' : ['d', 'e'],
'f' : ['g']
}
}
}
to
z = 0
second =
{'a' :
{'b' :
{'c' :
{'d' : z,
'e' : z
},
'f' :
{'g' : z}
}
}
}
I'm looking for the pythonic way to do this.
I want to change every 3rd level elements of first (c and f) from a list to a dictionary where keys are the elements of the list (d, e for example).

>>> {k1: {k2: {k3: {k4:0 for k4 in v3} for k3, v3 in v2.items()} for k2, v2 in v1.items()} for k1, v1 in first.items()}
{'a': {'b': {'c': {'e': 0, 'd': 0}, 'f': {'g': 0}}}}
=)
(If you don't want to get shot in code review, stick to the advice by 9000 in the comments, please. I.e. get all third level dicts and manipulate those directly.)

Recursion is your friend! It allows you to generalize the operation to the last level of a dictionary, which can be arbitrarily long and even not symmetric (e.g. having a branch with 5 levels and one with 3 levels).
first = \
{'a' :
{'b' :
{'c' : ['d', 'e'],
'f' : ['g']
}
}
}
def rec_list_to_dict(inp, sub_value = 0):
if isinstance(inp, list):
return dict(zip(inp, [sub_value ]*len(inp)))
elif isinstance(inp, dict):
return {key : rec_list_to_dict(el, sub_value) for key, el in inp.items()}
else:
raise AttributeError
second = rec_list_to_dict(first, sub_value = 0)

This is a nice place to use the visitor pattern (not sure if it is considered a pattern, of course):
def visit_dict(d, new_value=None):
new_d = {}
for k, v in d.items():
if isinstance(v, dict):
new_d[k] = visit_dict(v, new_value)
elif isinstance(v, list):
new_d[k] = visit_list(v, new_value)
else:
new_d[k] = v
return new_d
def visit_list(l, new_value=None):
return {item: new_value for item in l}
first = {
'a' : {
'b' : {
'c' : ['d', 'e'],
'f' : ['g']
}
}
}
z = 0
print(visit_dict(first, z))
# {
# 'a' : {
# 'b' : {
# 'c' : {
# 'd': 0,
# 'e': 0
# }
# 'f' : {
# 'g': 0
# }
# }
# }
# }
You customize each step according to the type you visit each time. This code assumes lists have hashable values, but you can of course extend it.

Related

Get key values for certain fields in JSON response

My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']

Flatten nested dictionary to key and joined string value

I need help with a function to flatten a nested dictionary in the following format:
dict_test = {
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"entities_discovered" : {
"OTHER_ID" : [
"L800DFAG02191"
],
"CODE_ID" : [
"160472708",
"276954773"
]
},
"label_field" : [
"ELECTRONICS",
"HDMI"
],
"numeric_field" : [
491,
492
],
}
The function I was working with, flattens the dictionary to one dimension (key:value) as I want, but doesn´t join the values within the same key iteration.
def flatten(d):
agg = {}
def _flatten(d, prev_key=''):
if isinstance(d, list):
for i, item in enumerate(d):
new_k = '%s.%s' % (prev_key, i) if prev_key else i
_flatten(item, prev_key=new_k)
elif isinstance(d, dict):
for k, v in d.items():
new_k = '%s.%s' % (prev_key, k) if prev_key else k
_flatten(v, prev_key=new_k)
else:
agg[prev_key] = d
_flatten(d)
return agg
My current output is:
{
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"entities_discovered.OTHER_ID.0" : "L800DFAG02191",
"entities_discovered.CODE_ID.0" : "160472708",
"entities_discovered.CODE_ID.1" : "276954773",
"label_field.0" : "ELECTRONICS",
"label_field.1" : "HDMI",
"numeric_field.0" : 491,
"numeric_field.1" : 492
}
But actually I´m looking for something like (joining the values into the same string and separated by , or |):
{
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"OTHER_ID" : "L800DFAG02191",
"CODE_ID" : "160472708, 276954773",
"label_field" : "ELECTRONICS, HDMI",
"numeric_field" : ¨491, 492¨
}
You can use join() built-in method to join values together.
def do():
dict_test = {
"id": "5d4c2c0fd89234260ec81",
"Reference Number": "JA-L800D-191",
"entities_discovered": {
"OTHER_ID": [
"L800DFAG02191"
],
"CODE_ID": [
"160472708",
"276954773"
]
},
"label_field": [
"ELECTRONICS",
"HDMI"
],
"numeric_field": [
491,
492
],
}
new_dict = {}
for key, value in dict_test.items():
if isinstance(value, dict):
for _key, _value in value.items():
if isinstance(_value, list):
new_dict.update({_key: ', '.join([str(item) for item in _value])})
elif isinstance(value, list):
new_dict.update({key: ', '.join([str(item) for item in value])})
else:
new_dict.update({key: value})
return new_dict
if __name__ == '__main__':
print(do())
Output:
{
'id': '5d4c2c0fd89234260ec81',
'Reference Number': 'JA-L800D-191',
'OTHER_ID': 'L800DFAG02191',
'CODE_ID': '160472708, 276954773',
'label_field': 'ELECTRONICS, HDMI',
'numeric_field': '491, 492'
}
def recursive_flatten_dict(tmp, dict_test):
for i,v in dict_test.items():
if type(v) == type({}):
recursive_flatten_dict(tmp,v)
else:
tmp[i] = v
return tmp
recursive_flatten_dict({},dict_test)
Simple recursion using a generator:
def flatten(d):
for a, b in d.items():
if isinstance(b, dict):
yield from flatten(b)
else:
yield (a, b if not isinstance(b, list) else ', '.join(map(str, b)))
print(dict(flatten(dict_test)))
Output:
{
'id': '5d4c2c0fd89234260ec81',
'Reference Number': 'JA-L800D-191',
'OTHER_ID': 'L800DFAG02191',
'CODE_ID': '160472708, 276954773',
'label_field': 'ELECTRONICS, HDMI',
'numeric_field': '491, 492'
}
def flatten(dict_test):
for key in ['label_field', 'numeric_field']:
dict_test[key]= ', '.join([str(c) for c in dict_test[key]])
for c in dict_test['entities_discovered'].keys():
dict_test[c]= ', '.join(dict_test['entities_discovered'][c])
return dict_test
The above function does the job. I hope this what you are looking for?

Sort based Dictionary of Dictionary of Dictionary Values?

I have a dictionary like this
d = {
'Benefits': {
1: {
'BEN1': {
'D': [{'description': 'D1'}],
'C': [{'description': 'C1'}]
}
},
2: {
'BEN2': {
'D': [{'description': 'D2'}],
'C': [{'description': 'C2'}]
}
}
}
}
I am trying to sort dictionary based on KEY OF LAST VALUES(LIST).
FOR EXAMPLE
I am looking for get dictionary value like 'C' IN first and 'D' in second
I'm trying to get correct order. Here is code:
d1 = collections.OrderedDict(sorted(d.items()))
Unfortunately didn't get correct result
This is my expected output
{'Benefits':
{1:
{'BEN1':
{'C':[{'description': 'C1'}], 'D': [{'description': 'D1'}]
}
},
2:
{'BEN2':
{'C': [{'description': 'C2'}], 'D': [{'description': 'D2'}]
}
}
}
}
I am using python 3.5 . I am trying to get order like this
{'C':[{'description': 'C1'}], 'D': [{'description': 'D1'}]}
The following code will sort any dictionary by its key and recursively sort any dictionary value that is also a dictionary by its key and makes no assumption as to the content of the dictionary being sorted. It uses an OrderedDict but if you can be sure it will always run on Python 3.6 or greater, a simple change can be made to use a dict.
from collections import OrderedDict
d = {
'Benefits': {
1: {
'BEN1': {
'D': [{'description': 'D1'}],
'C': [{'description': 'C1'}]
}
},
2: {
'BEN2': {
'D': [{'description': 'D2'}],
'C': [{'description': 'C2'}]
}
}
}
}
def sort_dict(d):
items = [[k, v] for k, v in sorted(d.items(), key=lambda x: x[0])]
for item in items:
if isinstance(item[1], dict):
item[1] = sort_dict(item[1])
return OrderedDict(items)
#return dict(items)
print(sort_dict(d))
See demo
d1 = collections.OrderedDict(sorted(d.items()))
This is not working because it is sorting only on the Benefits item. Here you want to sort inner items, so we have to reach the inner items and sort them.
d1 = {'Benefits': {}}
for a_benefit in d['Benefits']:
d1['Benefits'][a_benefit] = {}
for a_ben in d['Benefits'][a_benefit]:
d1['Benefits'][a_benefit][a_ben] = dict(collections.OrderedDict(sorted(d['Benefits'][a_benefit][a_ben].items())))

Better way to filter the rows with python

import pprint
full_key_list = set(["F1", "F2", "F3", "F4", "F5"]) # all expected field
filt_key_list = set(["F2", "F5"]) # fields should not be included
cont_list = [] # stores all filtered documents
read_in_cont1 = { "F1" : 1, "F2" : True, "F3" : 'abc', "F4" : 130, "F5" : 'X1Z'} # document1
read_in_cont2 = { "F1" : 2, "F2" : False, "F3" : 'efg', "F4" : 100, "F5" : 'X4Z'} # document1
read_in_cont3 = { "F1" : 3, "F2" : True, "F3" : 'acd', "F4" : 400, "F5" : 'X2Z'} # document1
# assume that read_in_conts contains list of documents
read_in_conts = [read_in_cont1, read_in_cont2, read_in_cont3]
for one_item in read_in_conts: # for each document in the list
cont_dict = {}
for key, value in one_item.iteritems():
if key not in filt_key_list: # if the field should be included
cont_dict[key] = value # add this field to the temporary document
cont_list.append(cont_dict)
pprint.pprint(cont_list)
Output:
[{'F1': 1, 'F3': 'abc', 'F4': 130},
{'F1': 2, 'F3': 'efg', 'F4': 100},
{'F1': 3, 'F3': 'acd', 'F4': 400}]
Here is what I want to achieve:
Given an original raw collection of documents (i.e. read_in_conts for simulation),
I need to filter the fields so that they are not included in further process. Above
is my implementation in Python. However, I think it is too heavy and expect to see
a clean solution for this task.
Thank you
cont_list = [dict((k,v) for k,v in d.iteritems() if k not in filt_key_list)
for d in read_in_conts]
or if you want a slightly more factored version:
filter_out_keys = lambda d, x: dict((k,v) for k,v in d.iteritems() if k not in x)
cont_list = [filter_out_keys(d, filt_key_list) for d in read_in_conts]
P.S. I'd suggest making filt_key_list a set() instead - it will make in checks faster.
def filter_dict(d, keys):
return dict((key, value) for key, value in d.iteritems() if key not in filt_key_list))
cont_list = [filter_dict(d, filt_key_list) for d in read_in_conts]
You code is fine. You can make it slightly shorter:
# sets can be faster if `ignored_keys` is actually much longer
ignored_keys = set(["F2", "F5"])
# the inline version of your loop
# a dict comprehension inside a list comprehension
filtered = [{k : v for k,v in row.iteritems() if k not in ignored_keys}
for row in read_in_conts]

Python - Unflatten dict

I have this multi-dimensional dict:
a = {'a' : 'b', 'c' : {'d' : 'e'}}
And written simple function to flatten that dict:
def __flatten(self, dictionary, level = []):
tmp_dict = {}
for key, val in dictionary.items():
if type(val) == dict:
tmp_dict.update(self.__flatten(val, level + [key]))
else:
tmp_dict['.'.join(level + [key])] = val
return tmp_dict
After call this function with dict a i get in result:
{'a' : 'b', 'c.d' : 'e'}
Now, after making few instructions on this flattened dict i need to build new, multi-dimensional dict from that flattened. Example:
>> unflatten({'a' : 0, 'c.d' : 1))
{'a' : 0, 'c' : {'d' : 1}}
The only problem I have is that i do not have a function unflatten :)
Can anyone help with this? I have no idea how to do it.
EDIT:
Another example:
{'a' : 'b', 'c.d.e.f.g.h.i.j.k.l.m.n.o.p.r.s.t.u.w' : 'z'}
Should be after unflatten:
{'a': 'b', 'c': {'d': {'e': {'f': {'g': {'h': {'i': {'j': {'k': {'l': {'m': {'n': {'o': {'p': {'r': {'s': {'t': {'u': {'w': 'z'}}}}}}}}}}}}}}}}}}}
And another:
{'a' : 'b', 'c.d' : 'z', 'c.e' : 1}
To:
{'a' : 'b', 'c' : {'d' : 'z', 'e' : 1}}
This greatly increases the difficulty of the task, i know. Thats why i had problem with this and found no solution in hours..
def unflatten(dictionary):
resultDict = dict()
for key, value in dictionary.items():
parts = key.split(".")
d = resultDict
for part in parts[:-1]:
if part not in d:
d[part] = dict()
d = d[part]
d[parts[-1]] = value
return resultDict
from collections import defaultdict
def unflatten(d):
ret = defaultdict(dict)
for k,v in d.items():
k1,delim,k2 = k.partition('.')
if delim:
ret[k1].update({k2:v})
else:
ret[k1] = v
return ret
Here's one utilizing Python 3.5+ features, like typing and destructuring assignments. Try the tests out on repl.it.
from typing import Any, Dict
def unflatten(
d: Dict[str, Any],
base: Dict[str, Any] = None,
) -> Dict[str, Any]:
"""Convert any keys containing dotted paths to nested dicts
>>> unflatten({'a': 12, 'b': 13, 'c': 14}) # no expansion
{'a': 12, 'b': 13, 'c': 14}
>>> unflatten({'a.b.c': 12}) # dotted path expansion
{'a': {'b': {'c': 12}}}
>>> unflatten({'a.b.c': 12, 'a': {'b.d': 13}}) # merging
{'a': {'b': {'c': 12, 'd': 13}}}
>>> unflatten({'a.b': 12, 'a': {'b': 13}}) # insertion-order overwrites
{'a': {'b': 13}}
>>> unflatten({'a': {}}) # insertion-order overwrites
{'a': {}}
"""
if base is None:
base = {}
for key, value in d.items():
root = base
###
# If a dotted path is encountered, create nested dicts for all but
# the last level, then change root to that last level, and key to
# the final key in the path.
#
# This allows one final setitem at the bottom of the loop.
#
if '.' in key:
*parts, key = key.split('.')
for part in parts:
root.setdefault(part, {})
root = root[part]
if isinstance(value, dict):
value = unflatten(value, root.get(key, {}))
root[key] = value
return base
I wrote one years ago in Python 2 and 3 which I've adapted below. It was for making it easier to check if a given dictionary is a subset of a larger dictionary irrespective of whether provided in flattened or scaffolded form.
A bonus feature: Should there be consecutive integer indexes (as in 0, 1, 2, 3, 4 etc.), this will also convert them back into lists as well.
def unflatten_dictionary(field_dict):
field_dict = dict(field_dict)
new_field_dict = dict()
field_keys = list(field_dict)
field_keys.sort()
for each_key in field_keys:
field_value = field_dict[each_key]
processed_key = str(each_key)
current_key = None
current_subkey = None
for i in range(len(processed_key)):
if processed_key[i] == "[":
current_key = processed_key[:i]
start_subscript_index = i + 1
end_subscript_index = processed_key.index("]")
current_subkey = int(processed_key[start_subscript_index : end_subscript_index])
# reserve the remainder descendant keys to be processed later in a recursive call
if len(processed_key[end_subscript_index:]) > 1:
current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
break
# next child key is a dictionary
elif processed_key[i] == ".":
split_work = processed_key.split(".", 1)
if len(split_work) > 1:
current_key, current_subkey = split_work
else:
current_key = split_work[0]
break
if current_subkey is not None:
if current_key.isdigit():
current_key = int(current_key)
if current_key not in new_field_dict:
new_field_dict[current_key] = dict()
new_field_dict[current_key][current_subkey] = field_value
else:
new_field_dict[each_key] = field_value
# Recursively unflatten each dictionary on each depth before returning back to the caller.
all_digits = True
highest_digit = -1
for each_key, each_item in new_field_dict.items():
if isinstance(each_item, dict):
new_field_dict[each_key] = unflatten_dictionary(each_item)
# validate the keys can safely converted to a sequential list.
all_digits &= str(each_key).isdigit()
if all_digits:
next_digit = int(each_key)
if next_digit > highest_digit:
highest_digit = next_digit
# If all digits and can be sequential order, convert to list.
if all_digits and highest_digit == (len(new_field_dict) - 1):
digit_keys = list(new_field_dict)
digit_keys.sort()
new_list = []
for k in digit_keys:
i = int(k)
if len(new_list) <= i:
# Pre-populate missing list elements if the array index keys are out of order
# and the current element is ahead of the current length boundary.
while len(new_list) <= i:
new_list.append(None)
new_list[i] = new_field_dict[k]
new_field_dict = new_list
return new_field_dict
# Test
if __name__ == '__main__':
input_dict = {'a[0]': 1,
'a[1]': 10,
'a[2]': 5,
'b': 10,
'c.test.0': "hi",
'c.test.1': "bye",
"c.head.shoulders": "richard",
"c.head.knees": 'toes',
"z.trick.or[0]": "treat",
"z.trick.or[1]": "halloween",
"z.trick.and.then[0]": "he",
"z.trick.and.then[1]": "it",
"some[0].nested.field[0]": 42,
"some[0].nested.field[1]": 43,
"some[2].nested.field[0]": 44,
"mixed": {
"statement": "test",
"break[0]": True,
"break[1]": False,
}}
expected_dict = {'a': [1, 10, 5],
'b': 10,
'c': {
'test': ['hi', 'bye'],
'head': {
'shoulders': 'richard',
'knees' : 'toes'
}
},
'z': {
'trick': {
'or': ["treat", "halloween"],
'and': {
'then': ["he", "it"]
}
}
},
'some': {
0: {
'nested': {
'field': [42, 43]
}
},
2: {
'nested': {
'field': [44]
}
}
},
"mixed": {
"statement": "test",
"break": [True, False]
}}
# test
print("Input:")
print(input_dict)
print("====================================")
print("Output:")
actual_dict = unflatten_dictionary(input_dict)
print(actual_dict)
print("====================================")
print(f"Test passed? {expected_dict==actual_dict}")
As a rough-draft (could use a little improvement in variable name choice, and perhaps robustness, but it works for the example given):
def unflatten(d):
result = {}
for k,v in d.iteritems():
if '.' in k:
k1, k2 = k.split('.', 1)
v = {k2: v}
k = k1
result[k] = v
return result

Categories