How to change all the values in a nested dictionary in python? - python

I have sample data that looks like this (nested and is dynamic so it does change on the fly):
...
"counts":{
"1":{
"1":21082,
"2":14999
},
"2":{
"1":9180,
"2":10023
}
},
...
I need to recursively go through and replace all the values with -1, so the result will be something like:
...
"counts":{
"1":{
"1":-1,
"2":-1
},
"2":{
"1":-1,
"2":-1
}
},
...
I can do it if the dictionary is only one level deep like so:
result['counts'] = {x: '-1' for x in result['counts']}
How do I do it for 2 or more levels on the fly?
Also sometimes the key's can be like 1(1, 2)` or other things like so:
...
"counts":{
"(1, 2)":{
"1":21082,
"2":14999
},
"(2, 1)":{
"1":9180,
"2":10023
}
},
...
Is there any easy way to do this?

Use a recursive function that calls itself on each nested dict:
>>> def replace_value(d, new_val):
... return {
... k: replace_value(v, new_val) if isinstance(v, dict) else new_val
... for k, v in d.items()
... }
...
>>> replace_value({"foo": {"foo": "bar"}}, -1)
{'foo': {'foo': -1}}
Alternate version with the base case outside the comprehension:
>>> def replace_value(d, new_val):
... if not isinstance(d, dict):
... return new_val
... return {k: replace_value(v, new_val) for k, v in d.items()}
...
>>> replace_value({"foo": {"foo": "bar"}}, -1)
{'foo': {'foo': -1}}

My choice for similar tasks is remap from boltons
>>> def visitor(path, key, value):
... if not isinstance(value, dict):
... return key, -1
... return True
...
>>> from boltons.iterutils import remap # pip install boltons
>>> d
{'counts': {'1': {'1': 21082, '2': 14999}, '2': {'1': 9180, '2': 10023}}}
>>> remap(d, visit=visitor)
{'counts': {'1': {'1': -1, '2': -1}, '2': {'1': -1, '2': -1}}}
The visit callable accepts a path, key, and value, and should return the new key and value (or return True as a shorthand to keep old item unmodified).
I prefer this to a recursive function, because it uses a stack-based iterative approach. It is too easy to blow past the recursion limit in Python when dealing with nested data structures.

Related

Get key values for certain fields in JSON response

My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']

Get keys of all child dictionaries

I have a dictionary of this kind where the values are dictionaries as well the dictionaries can have nested dictionaries in them. like this:
data = {'key1': {
'keya':{
'keyc': None
}
'keyb': None
}
'key2': {
'keyi':None,
'keyii': None
}
}
The dictionaries can be many (we don't know how many dictionaries can be there inside the values). How can I get all keys in all values like this?
['key1', 'key2', 'keya', 'keyb', 'keyi', 'keyii']
you could get all the keys using recursion
def get_all_keys_rec(dic):
keys = [key for key in dic]
for val in dic.values():
if type(val)==dict:
inner_keys = get_all_keys_rec(val)
keys.extend(inner_keys)
return keys
print(get_all_keys_rec(data))
output:
['key1', 'key2', 'keya', 'keyb', 'keyc', 'keyi', 'keyii']
keys = []
for key, val in data.items():
keys.append(key)
if isinstance(val, dict):
item = val
while True:
for k, v in item.items():
keys.append(k)
if isinstance(v, dict):
item = v
break
else:
break
print(keys)
This outputs:
['key1', 'keya', 'keyc', 'key2', 'keyi', 'keyii']
Recursive generation, yield from should be your partner:
>>> data = {'key1': {
... 'keya': {
... 'keyc': None
... },
... 'keyb': None
...
... },
... 'key2': {
... 'keyi': None,
... 'keyii': None
... }
... }
>>> def get_all_keys(dct):
... def gen_all_keys(d):
... if isinstance(d, dict):
... yield from d
... for v in d.values():
... yield from gen_all_keys(v)
... return list(gen_all_keys(dct))
...
>>> get_all_keys(data)
['key1', 'key2', 'keya', 'keyb', 'keyc', 'keyi', 'keyii']

python itertools groupby return tuple

I need to parse the flatten structure and create nested structure using the list of keys provided. I have solved the problem but I am looking for an improvement and I would like to learn what I can change in my code. Can somebody review it and refactor using better knowledge?
src_data = [
{
"key1": "XX",
"key2": "X111",
"key3": "1aa",
"key4": 1
},
{
"key1": "YY",
"key2": "Y111",
"key3": "1bb",
"key4": 11
},
{
"key1": "ZZ",
"key2": "Z111",
"key3": "1cc",
"key4": 2.4
},
{
"key1": "AA",
"key2": "A111",
"key3": "1cc",
"key4": 33333.2122
},
{
"key1": "BB",
"key2": "B111",
"key3": "1bb",
"key4": 2
},
]
this is my code I developed so far creating the final result.
def plant_tree(ll):
master_tree = {}
for i in ll:
tree = master_tree
for n in i:
if n not in tree:
tree[n] = {}
tree = tree[n]
return master_tree
def make_nested_object(tt, var):
elo = lambda l: reduce(lambda x, y: {y: x}, l[::-1], var)
return {'n_path': tt, 'n_structure': elo(tt)}
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
def set_nested_item(dataDict, mapList, val):
"""Set item in nested dictionary"""
reduce(getitem, mapList[:-1], dataDict)[mapList[-1]] = val
return dataDict
def update_tree(data_tree):
# MAKE NESTED OBJECT
out = (make_nested_object(k, v) for k,v, in res_out.items())
for dd in out:
leaf_data = dd['n_structure']
leaf_path = dd['n_path']
data_tree = set_nested_item(data_tree, leaf_path, getFromDict(leaf_data, leaf_path))
return data_tree
this is the customed itemgeter function from this question
def customed_itemgetter(*args):
# this handles the case when one key is provided
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
define the nesting level
nesting_keys = ['key1', 'key3', 'key2']
grouper = customed_itemgetter(*nesting_keys)
ii = groupby(sorted(src_data, key=grouper), grouper)
res_out = {key: [{k:v for k,v in i.items() if k not in nesting_keys} for i in group] for key,group in ii}
#
ll = ([dd[x] for x in nesting_keys] for dd in src_data)
data_tree = plant_tree(ll)
get results
result = update_tree(data_tree)
How can I improve my code?
If the itemgetter [Python-doc] is given a single element, it returns that single element, and does not wrap it in a singleton-tuple.
We can however construct a function for that, like:
from operator import itemgetter
def itemgetter2(*args):
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
then we can thus use the new itemgetter2, like:
grouper = itemgetter2(*ll)
ii = groupby(sorted(src_data, key=grouper), grouper)
EDIT: Based on your question however, you want to perform multilevel grouping, we can make a function for that, like:
def multigroup(groups, iterable, index=0):
if len(groups) <= index:
return list(iterable)
else:
f = itemgetter(groups[index])
i1 = index + 1
return {
k: multigroup(groups, vs, index=i1)
for k, vs in groupby(sorted(iterable, key=f), f)
}
For the data_src in the question, this then generates:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'a': 1, 'b': 2, 'z': 3}]}, 2: {3: [{'a': 2, 'b': 3, 'e': 2}]}, 4: {3: [{'a': 4, 'x': 3, 'b': 3}]}}
You can post-process the values in the list(..) call however. We can for example generate dictionaries without the elements in the grouping columns:
def multigroup(groups, iterable):
group_set = set(groups)
fs = [itemgetter(group) for group in groups]
def mg(iterable, index=0):
if len(groups) <= index:
return [
{k: v for k, v in item.items() if k not in group_set}
for item in iterable
]
else:
i1 = index + 1
return {
k: mg(vs, index=i1)
for k, vs in groupby(sorted(iterable, key=fs[index]), fs[index])
}
return mg(iterable)
For the given sample input, we get:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'z': 3}]}, 2: {3: [{'e': 2}]}, 4: {3: [{'x': 3}]}}
or for the new sample data:
>>> pprint(multigroup(['key1', 'key3', 'key2'], src_data))
{'AA': {'1cc': {'A111': [{'key4': 33333.2122}]}},
'BB': {'1bb': {'B111': [{'key4': 2}]}},
'XX': {'1aa': {'X111': [{'key4': 1}]}},
'YY': {'1bb': {'Y111': [{'key4': 11}]}},
'ZZ': {'1cc': {'Z111': [{'key4': 2.4}]}}}

Lift up all occurrences of a type in a nested dictionary to a top level key

I have a need in a project to find all of a given type in a nested dictionary and move them all to a top level key in the same dictionary.
So far I have the below code, which seems to work. In the example I'm looking for all the items that are integers and moving them to a 'numbers' key.
I'd prefer it if the lift_numbers_to_top function made and returned a copy of the dictionary rather than editing it in place, but I haven't been able to work out a nice way to pass the copy and the numbers back from the recursive function to itself, if that makes sense.
a_dictionary = {
"one": 1,
"two": 2,
"text": "Hello",
"more_text": "Hi",
"internal_dictionary": {
"three": 3,
"two": 2,
"even_more_text": "Hey",
"another_internal_dictionary": {
"four": 4,
"five": 5,
"last_text": "howdy"
}
}
}
def extract_integers(dictionary, level_key=None):
numbers = {}
for key in dictionary:
if type(dictionary[key]) == int:
numbers[level_key + "__" + key if level_key else key] = dictionary[key]
return numbers
def lift_numbers_to_top(dictionary, level_key=None):
numbers = {}
if type(dictionary) == dict:
numbers = extract_integers(dictionary, level_key)
for key in numbers:
keyNumber = key.split('__')[-1]
del dictionary[keyNumber]
for key in dictionary:
numbers = {**numbers, **lift_numbers_to_top(dictionary[key], key)}
return numbers
a_dictionary['numbers'] = lift_numbers_to_top(a_dictionary)
print(a_dictionary)
Result:
{
'text': 'Hello',
'more_text': 'Hi',
'internal_dictionary': {
'even_more_text': 'Hey',
'another_internal_dictionary': {
'last_text': 'howdy'
},
},
'numbers': {
'one': 1,
'two': 2,
'internal_dictionary__two': 2,
'internal_dictionary__three': 3,
'another_internal_dictionary__four': 4,
'another_internal_dictionary__five': 5,
}
}
Use a match function to determine what to lift, and pass along the target object where you move key-value pairs to to recursive calls. If that target is missing, you know the current call is for the top-level. The match function should return the new key for the new dictionary.
To produce a new dictionary, just produce a new dictionary and put recursion results into that object.
I prefer to use #singledispatch() to handle different types when recursing:
from functools import singledispatch
#singledispatch
def lift_values(obj, match, targetname=None, **kwargs):
"""Lift key-value pairs from a nested structure to the top
For key-value pairs anywhere in the nested structure, if
match(path, value) returns a value other than `None`, the
key-value pair is moved to the top-level dictionary when targetname
is None, or to a new dictionary stored under targetname is not None,
using the return value of the match function as the key. path
is the tuple of all keys and indices leading to the value.
For example, for an input
{'foo': True, 'bar': [{'spam': False, 'ham': 42}]}
and the match function lambda p, v: p if isinstance(v, bool) else None
and targetname "flags", this function returns
{'flags': {('foo',): True, ('bar', 0, 'spam'): False}, 'bar': [{'ham': 42}]}
"""
# leaf nodes, no match testing needed, no moving of values
return obj
#lift_values.register(list)
def _handle_list(obj, match, _path=(), **kwargs):
# list values, no lifting, just passing on the recursive call
return [lift_values(v, match, _path=_path + (i,), **kwargs)
for i, v in enumerate(obj)]
#lift_values.register(dict)
def _handle_list(obj, match, targetname=None, _path=(), _target=None):
result = {}
if _target is None:
# this is the top-level object, key-value pairs are lifted to
# a new dictionary stored at this level:
if targetname is not None:
_target = result[targetname] = {}
else:
# no target name? Lift key-value pairs into the top-level
# object rather than a separate sub-object.
_target = result
for key, value in obj.items():
new_path = _path + (key,)
new_key = match(new_path, value)
if new_key is not None:
_target[new_key] = value
else:
result[key] = lift_values(
value, match, _path=new_path, _target=_target)
return result
I included a dispatch function for lists; your sample doesn't use lists, but these are common in JSON data structures so I anticipate you probably want it anyway.
The match function must accept two arguments, the path to the object this key-value pair was found in, and the value. It should return a new key to use or None if not to lift the value.
For your case, the match function would be:
def lift_integers(path, value):
if isinstance(value, int):
return '__'.join(path[-2:])
result = lift_values(a_dictionary, lift_integers, 'numbers')
Demo on your sample input dictionary:
>>> from pprint import pprint
>>> def lift_integers(path, value):
... if isinstance(value, int):
... return '__'.join(path[-2:])
...
>>> lift_values(a_dictionary, lift_integers, 'numbers')
{'numbers': {'one': 1, 'two': 2, 'internal_dictionary__three': 3, 'internal_dictionary__two': 2, 'another_internal_dictionary__four': 4, 'another_internal_dictionary__five': 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}
Personally, I'd use the full path as the key in the lifted dictionary to avoid name clashes; either by joining the full path into a new string key with some unique delimiter, or just by making the path tuple itself the new key:
>>> lift_values(a_dictionary, lambda p, v: p if isinstance(v, int) else None, 'numbers')
{'numbers': {('one',): 1, ('two',): 2, ('internal_dictionary', 'three'): 3, ('internal_dictionary', 'two'): 2, ('internal_dictionary', 'another_internal_dictionary', 'four'): 4, ('internal_dictionary', 'another_internal_dictionary', 'five'): 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {('internal_dictionary', 'another_internal_dictionary', 'five'): 5,
('internal_dictionary', 'another_internal_dictionary', 'four'): 4,
('internal_dictionary', 'three'): 3,
('internal_dictionary', 'two'): 2,
('one',): 1,
('two',): 2},
'text': 'Hello'}
You can use walk through the dict recursively and pop all elements with values as an int to create a new dict
>>> def extract(d):
... new_d = {}
... for k,v in d.items():
... if type(v) == int:
... new_d[k] = d[k]
... elif type(v) == dict:
... for k2,v2 in extract(v).items():
... new_d[k2 if '__' in k2 else k+'__'+k2] = v2
... return new_d
...
>>> a_dictionary['numbers'] = extract(a_dictionary)
>>> pprint(a_dictionary)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}

Fastest way to convert a dict's keys & values from `unicode` to `str`?

I'm receiving a dict from one "layer" of code upon which some calculations/modifications are performed before passing it onto another "layer". The original dict's keys & "string" values are unicode, but the layer they're being passed onto only accepts str.
This is going to be called often, so I'd like to know what would be the fastest way to convert something like:
{ u'spam': u'eggs', u'foo': True, u'bar': { u'baz': 97 } }
...to:
{ 'spam': 'eggs', 'foo': True, 'bar': { 'baz': 97 } }
...bearing in mind the non-"string" values need to stay as their original type.
Any thoughts?
DATA = { u'spam': u'eggs', u'foo': frozenset([u'Gah!']), u'bar': { u'baz': 97 },
u'list': [u'list', (True, u'Maybe'), set([u'and', u'a', u'set', 1])]}
def convert(data):
if isinstance(data, basestring):
return str(data)
elif isinstance(data, collections.Mapping):
return dict(map(convert, data.iteritems()))
elif isinstance(data, collections.Iterable):
return type(data)(map(convert, data))
else:
return data
print DATA
print convert(DATA)
# Prints:
# {u'list': [u'list', (True, u'Maybe'), set([u'and', u'a', u'set', 1])], u'foo': frozenset([u'Gah!']), u'bar': {u'baz': 97}, u'spam': u'eggs'}
# {'bar': {'baz': 97}, 'foo': frozenset(['Gah!']), 'list': ['list', (True, 'Maybe'), set(['and', 'a', 'set', 1])], 'spam': 'eggs'}
Assumptions:
You've imported the collections module and can make use of the abstract base classes it provides
You're happy to convert using the default encoding (use data.encode('utf-8') rather than str(data) if you need an explicit encoding).
If you need to support other container types, hopefully it's obvious how to follow the pattern and add cases for them.
I know I'm late on this one:
def convert_keys_to_string(dictionary):
"""Recursively converts dictionary keys to strings."""
if not isinstance(dictionary, dict):
return dictionary
return dict((str(k), convert_keys_to_string(v))
for k, v in dictionary.items())
If you wanted to do this inline and didn't need recursive descent, this might work:
DATA = { u'spam': u'eggs', u'foo': True, u'bar': { u'baz': 97 } }
print DATA
# "{ u'spam': u'eggs', u'foo': True, u'bar': { u'baz': 97 } }"
STRING_DATA = dict([(str(k), v) for k, v in data.items()])
print STRING_DATA
# "{ 'spam': 'eggs', 'foo': True, 'bar': { u'baz': 97 } }"
for a non-nested dict (since the title does not mention that case, it might be interesting for other people)
{str(k): str(v) for k, v in my_dict.items()}
def to_str(key, value):
if isinstance(key, unicode):
key = str(key)
if isinstance(value, unicode):
value = str(value)
return key, value
pass key and value to it, and add recursion to your code to account for inner dictionary.
To make it all inline (non-recursive):
{str(k):(str(v) if isinstance(v, unicode) else v) for k,v in my_dict.items()}
>>> d = {u"a": u"b", u"c": u"d"}
>>> d
{u'a': u'b', u'c': u'd'}
>>> import json
>>> import yaml
>>> d = {u"a": u"b", u"c": u"d"}
>>> yaml.safe_load(json.dumps(d))
{'a': 'b', 'c': 'd'}
Just use print(*(dict.keys()))
The * can be used for unpacking containers e.g. lists. For more info on * check this SO answer.

Categories