How to recursively replace character in keys of a nested dictionary? - python

I'm trying to create a generic function that replaces dots in keys of a nested dictionary. I have a non-generic function that goes 3 levels deep, but there must be a way to do this generic. Any help is appreciated! My code so far:
output = {'key1': {'key2': 'value2', 'key3': {'key4 with a .': 'value4', 'key5 with a .': 'value5'}}}
def print_dict(d):
new = {}
for key,value in d.items():
new[key.replace(".", "-")] = {}
if isinstance(value, dict):
for key2, value2 in value.items():
new[key][key2] = {}
if isinstance(value2, dict):
for key3, value3 in value2.items():
new[key][key2][key3.replace(".", "-")] = value3
else:
new[key][key2.replace(".", "-")] = value2
else:
new[key] = value
return new
print print_dict(output)
UPDATE: to answer my own question, I made a solution using json object_hooks:
import json
def remove_dots(obj):
for key in obj.keys():
new_key = key.replace(".","-")
if new_key != key:
obj[new_key] = obj[key]
del obj[key]
return obj
output = {'key1': {'key2': 'value2', 'key3': {'key4 with a .': 'value4', 'key5 with a .': 'value5'}}}
new_json = json.loads(json.dumps(output), object_hook=remove_dots)
print new_json

Yes, there exists better way:
def print_dict(d):
new = {}
for k, v in d.iteritems():
if isinstance(v, dict):
v = print_dict(v)
new[k.replace('.', '-')] = v
return new
(Edit: It's recursion, more on Wikipedia.)

Actually all of the answers contain a mistake that may lead to wrong typing in the result.
I'd take the answer of #ngenain and improve it a bit below.
My solution will take care about the types derived from dict (OrderedDict, defaultdict, etc) and also about not only list, but set and tuple types.
I also do a simple type check in the beginning of the function for the most common types to reduce the comparisons count (may give a bit of speed in the large amounts of the data).
Works for Python 3. Replace obj.items() with obj.iteritems() for Py2.
def change_keys(obj, convert):
"""
Recursively goes through the dictionary obj and replaces keys with the convert function.
"""
if isinstance(obj, (str, int, float)):
return obj
if isinstance(obj, dict):
new = obj.__class__()
for k, v in obj.items():
new[convert(k)] = change_keys(v, convert)
elif isinstance(obj, (list, set, tuple)):
new = obj.__class__(change_keys(v, convert) for v in obj)
else:
return obj
return new
If I understand the needs right, most of users want to convert the keys to use them with mongoDB that does not allow dots in key names.

I used the code by #horejsek, but I adapted it to accept nested dictionaries with lists and a function that replaces the string.
I had a similar problem to solve: I wanted to replace keys in underscore lowercase convention for camel case convention and vice versa.
def change_dict_naming_convention(d, convert_function):
"""
Convert a nested dictionary from one convention to another.
Args:
d (dict): dictionary (nested or not) to be converted.
convert_function (func): function that takes the string in one convention and returns it in the other one.
Returns:
Dictionary with the new keys.
"""
new = {}
for k, v in d.iteritems():
new_v = v
if isinstance(v, dict):
new_v = change_dict_naming_convention(v, convert_function)
elif isinstance(v, list):
new_v = list()
for x in v:
new_v.append(change_dict_naming_convention(x, convert_function))
new[convert_function(k)] = new_v
return new

Here's a simple recursive solution that deals with nested lists and dictionnaries.
def change_keys(obj, convert):
"""
Recursivly goes through the dictionnary obj and replaces keys with the convert function.
"""
if isinstance(obj, dict):
new = {}
for k, v in obj.iteritems():
new[convert(k)] = change_keys(v, convert)
elif isinstance(obj, list):
new = []
for v in obj:
new.append(change_keys(v, convert))
else:
return obj
return new

You have to remove the original key, but you can't do it in the body of the loop because it will throw RunTimeError: dictionary changed size during iteration.
To solve this, iterate through a copy of the original object, but modify the original object:
def change_keys(obj):
new_obj = obj
for k in new_obj:
if hasattr(obj[k], '__getitem__'):
change_keys(obj[k])
if '.' in k:
obj[k.replace('.', '$')] = obj[k]
del obj[k]
>>> foo = {'foo': {'bar': {'baz.121': 1}}}
>>> change_keys(foo)
>>> foo
{'foo': {'bar': {'baz$121': 1}}}

You can dump everything to a JSON
replace through the whole string and load the JSON back
def nested_replace(data, old, new):
json_string = json.dumps(data)
replaced = json_string.replace(old, new)
fixed_json = json.loads(replaced)
return fixed_json
Or use a one-liner
def short_replace(data, old, new):
return json.loads(json.dumps(data).replace(old, new))

While jllopezpino's answer works but only limited to the start with the dictionary, here is mine that works with original variable is either list or dict.
def fix_camel_cases(data):
def convert(name):
# https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
if isinstance(data, dict):
new_dict = {}
for key, value in data.items():
value = fix_camel_cases(value)
snake_key = convert(key)
new_dict[snake_key] = value
return new_dict
if isinstance(data, list):
new_list = []
for value in data:
new_list.append(fix_camel_cases(value))
return new_list
return data

Here's a 1-liner variant of #horejsek 's answer using dict comprehension for those who prefer:
def print_dict(d):
return {k.replace('.', '-'): print_dict(v) for k, v in d.items()} if isinstance(d, dict) else d
I've only tested this in Python 2.7

I am guessing you have the same issue as I have, inserting dictionaries into a MongoDB collection, encountering exceptions when trying to insert dictionaries that have keys with dots (.) in them.
This solution is essentially the same as most other answers here, but it is slightly more compact, and perhaps less readable in that it uses a single statement and calls itself recursively. For Python 3.
def replace_keys(my_dict):
return { k.replace('.', '(dot)'): replace_keys(v) if type(v) == dict else v for k, v in my_dict.items() }

Related

Flatten Python Dict for All Values Down to True/False

Let's say I have a Python dict that may contain other dicts nested to an arbitrary level. Also, some of the keys refer to boolean choices while others don't. Something like this:
{'Key1': 'none',
'Key2': {'Key2a': True, 'Key2b': False},
'Key3': {'Key3a': {'Key3a1': 'some', 'Key3a2': 'many'}, 'Key3b': True}}
What I'd like to do is transform it into this:
{'Key1_none': 1,
'Key2_Key2a': 1,
'Key2_Key2b': 0,
'Key3_Key3a_Key3a1_some': 1,
'Key3_Key3a_Key3a2_many': 1,
'Key3b': 1}
Now now only is the dict flattened, all of the keys now have boolean answers. This solution is a great start, but I'm not that familiar with Python. The solution I linked to handles most of the cases, but it doesn't drill-down to the value level in all cases. With the example above, it would leave the first part as:
{'Key1': 'none',
'Key2_Key2a': 1,
'Key2_Key2b': 0,
...}
Obviously replacing the True/False with 1/0 is trivial. My question is more about how to flatten down to the additional level when the value of the key is not True or False.
I think this recursive function should do it:
def flatten(d, prefix=''):
prefix = prefix + '_' if prefix else ''
new = {}
for k, v in d.items():
if isinstance(v, bool):
new[prefix + k] = int(v)
elif isinstance(v, dict):
new.update(flatten(v, prefix=prefix + k))
elif isinstance(v, basestring): # python3 -- str
new[prefix + k + '_' + v] = 1
else:
raise TypeError('Unknown item type.')
return new
the recursion happens if the value is a dict and the "prefix" for the keys is appended to whatever the previous level of nesting's prefix was.
Of course, you can do better by using proper ABCs in the isinstance checking... e.g.
bool -> numbers.Integral
dict -> collections.Mapping
One more solution, based on solution you're provided
import collections
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
if isinstance(v, bool):
items.append((new_key, int(v)))
else:
items.append((new_key, 1))
return dict(items

Sorting a nested OrderedDict by key, recursively

Say orig is an OrderedDict which contains normal string:string key value pairs, but sometimes the value could be another, nested OrderedDict.
I want to sort orig by key, alphabetically (ascending), and do it recursively.
Rules:
Assume key strings are unpredictable
Assume nesting can take place infinitely, e.g. level 1-50 all have both strings, OrderedDicts, etc as values.
Need an assist with the sorted algorithm:
import string
from random import choice
orig = OrderedDict((
('a', choice(string.digits)),
('b', choice(string.digits)),
('c', choice(string.digits)),
('special', OrderedDict((
('a', choice(string.digits)),
('b', choice(string.digits)),
('c', choice(string.digits)),
)))
))
sorted_copy = OrderedDict(sorted(orig.iteritems(), ...))
self.assertEqual(orig, sorted_copy)
EDIT: for python 3.6+, #pelson's answer is better
something like:
def sortOD(od):
res = OrderedDict()
for k, v in sorted(od.items()):
if isinstance(v, dict):
res[k] = sortOD(v)
else:
res[k] = v
return res
#acushner's solution can now be simplified in python3.6+ as dictionaries now preserve their insertion order.
Given we can now use the standard dictionary, the code now looks like:
def order_dict(dictionary):
result = {}
for k, v in sorted(dictionary.items()):
if isinstance(v, dict):
result[k] = order_dict(v)
else:
result[k] = v
return result
Because we can use standard dictionaries, we can also use standard dictionary comprehensions, so the code boils down to:
def order_dict(dictionary):
return {k: order_dict(v) if isinstance(v, dict) else v
for k, v in sorted(dictionary.items())}
See also https://mail.python.org/pipermail/python-dev/2016-September/146327.html for detail on python's ordered dictionary implementation. Also, the pronouncement that this will be a language feature as of python 3.7: https://mail.python.org/pipermail/python-dev/2017-December/151283.html
I faced a very similar issue with getting a stable object so I could get a stable hash, except I had objects with a mix of lists and dictionaries, so I had to sort all the dictionaries, depth first, and then sort the lists. This extends #acushner's answer:
def deep_sort(obj):
if isinstance(obj, dict):
obj = OrderedDict(sorted(obj.items()))
for k, v in obj.items():
if isinstance(v, dict) or isinstance(v, list):
obj[k] = deep_sort(v)
if isinstance(obj, list):
for i, v in enumerate(obj):
if isinstance(v, dict) or isinstance(v, list):
obj[i] = deep_sort(v)
obj = sorted(obj, key=lambda x: json.dumps(x))
return obj
As a side point, if you find yourself with classes in your objects that you need to sort, you can jsonpickle.dumps() them, then json.loads() them, then deep_sort() them. If it matters, then you can always json.dumps() and jsonpickle.loads() to get back to where you started, except sorted (well, only sorted in Python 3.6+). For cases of a stable hash, that wouldn't be necessary though.
Very similar to #acushner's solution, but class-based:
from collections import OrderedDict
class SortedDict(OrderedDict):
def __init__(self, **kwargs):
super(SortedDict, self).__init__()
for key, value in sorted(kwargs.items()):
if isinstance(value, dict):
self[key] = SortedDict(**value)
else:
self[key] = value
Usage:
sorted_dict = SortedDict(**unsorted_dict)
A combination of #pelson's answer and #cjbarth's answer, with key and reverse parameters:
def deep_sorted(obj, *, key=None, reverse=False):
if isinstance(obj, dict):
return {k: deep_sorted(v, key=key, reverse=reverse) for k, v in sorted(obj.items(), key=key, reverse=reverse)}
if isinstance(obj, list):
return [deep_sorted(v, key=key, reverse=reverse) for i, v in sorted(enumerate(obj), key=key, reverse=reverse)]
return obj
If you're still living in py2...like me....this is the 3.6+ version with lambda to pre-sort the keys.
def __sort_od(od):
res = OrderedDict()
for k, v in sorted(od.items(), key=lambda k: k[0]):
if isinstance(v, dict):
res[k] = __sort_od(v)
else:
res[k] = v
return res
sorted_OrderedDict = __sort_od(unsorted_dict)

change key to lower case for dict or OrderedDict

Following works for a dictionary, but not OrderedDict. For od it seems to form an infinite loop. Can you tell me why?
If the function input is dict it has to return dict, if input is OrderedDict it has to return od.
def key_lower(d):
"""returns d for d or od for od with keys changed to lower case
"""
for k in d.iterkeys():
v = d.pop(k)
if (type(k) == str) and (not k.islower()):
k = k.lower()
d[k] = v
return d
It forms an infinite loop because of the way ordered dictionaries add new members (to the end)
Since you are using iterkeys, it is using a generator. When you assign d[k] = v you are adding the new key/value to the end of the dictionary. Because you are using a generator, that will continue to generate keys as you continue adding them.
You could fix this in a few ways. One would be to create a new ordered dict from the previous.
def key_lower(d):
newDict = OrderedDict()
for k, v in d.iteritems():
if (isinstance(k, (str, basestring))):
k = k.lower()
newDict[k] = v
return newDict
The other way would be to not use a generator and use keys instead of iterkeys
As sberry mentioned, the infinite loop is essentially as you are modifying and reading the dict at the same time.
Probably the simplest solution is to use OrderedDict.keys() instead of OrderedDict.iterkeys():
for k in d.keys():
v = d.pop(k)
if (type(k) == str) and (not k.islower()):
k = k.lower()
d[k] = v
as the keys are captured directly at the start, they won't get updated as items are changed in the dict.

Efficient way to remove keys with empty strings from a dict

I have a dict and would like to remove all the keys for which there are empty value strings.
metadata = {u'Composite:PreviewImage': u'(Binary data 101973 bytes)',
u'EXIF:CFAPattern2': u''}
What is the best way to do this?
Python 2.X
dict((k, v) for k, v in metadata.iteritems() if v)
Python 2.7 - 3.X
{k: v for k, v in metadata.items() if v}
Note that all of your keys have values. It's just that some of those values are the empty string. There's no such thing as a key in a dict without a value; if it didn't have a value, it wouldn't be in the dict.
It can get even shorter than BrenBarn's solution (and more readable I think)
{k: v for k, v in metadata.items() if v}
Tested with Python 2.7.3.
If you really need to modify the original dictionary:
empty_keys = [k for k,v in metadata.iteritems() if not v]
for k in empty_keys:
del metadata[k]
Note that we have to make a list of the empty keys because we can't modify a dictionary while iterating through it (as you may have noticed). This is less expensive (memory-wise) than creating a brand-new dictionary, though, unless there are a lot of entries with empty values.
If you want a full-featured, yet succinct approach to handling real-world data structures which are often nested, and can even contain cycles, I recommend looking at the remap utility from the boltons utility package.
After pip install boltons or copying iterutils.py into your project, just do:
from boltons.iterutils import remap
drop_falsey = lambda path, key, value: bool(value)
clean = remap(metadata, visit=drop_falsey)
This page has many more examples, including ones working with much larger objects from Github's API.
It's pure-Python, so it works everywhere, and is fully tested in Python 2.7 and 3.3+. Best of all, I wrote it for exactly cases like this, so if you find a case it doesn't handle, you can bug me to fix it right here.
Based on Ryan's solution, if you also have lists and nested dictionaries:
For Python 2:
def remove_empty_from_dict(d):
if type(d) is dict:
return dict((k, remove_empty_from_dict(v)) for k, v in d.iteritems() if v and remove_empty_from_dict(v))
elif type(d) is list:
return [remove_empty_from_dict(v) for v in d if v and remove_empty_from_dict(v)]
else:
return d
For Python 3:
def remove_empty_from_dict(d):
if type(d) is dict:
return dict((k, remove_empty_from_dict(v)) for k, v in d.items() if v and remove_empty_from_dict(v))
elif type(d) is list:
return [remove_empty_from_dict(v) for v in d if v and remove_empty_from_dict(v)]
else:
return d
BrenBarn's solution is ideal (and pythonic, I might add). Here is another (fp) solution, however:
from operator import itemgetter
dict(filter(itemgetter(1), metadata.items()))
If you have a nested dictionary, and you want this to work even for empty sub-elements, you can use a recursive variant of BrenBarn's suggestion:
def scrub_dict(d):
if type(d) is dict:
return dict((k, scrub_dict(v)) for k, v in d.iteritems() if v and scrub_dict(v))
else:
return d
For python 3
dict((k, v) for k, v in metadata.items() if v)
Quick Answer (TL;DR)
Example01
### example01 -------------------
mydict = { "alpha":0,
"bravo":"0",
"charlie":"three",
"delta":[],
"echo":False,
"foxy":"False",
"golf":"",
"hotel":" ",
}
newdict = dict([(vkey, vdata) for vkey, vdata in mydict.iteritems() if(vdata) ])
print newdict
### result01 -------------------
result01 ='''
{'foxy': 'False', 'charlie': 'three', 'bravo': '0'}
'''
Detailed Answer
Problem
Context: Python 2.x
Scenario: Developer wishes modify a dictionary to exclude blank values
aka remove empty values from a dictionary
aka delete keys with blank values
aka filter dictionary for non-blank values over each key-value pair
Solution
example01 use python list-comprehension syntax with simple conditional to remove "empty" values
Pitfalls
example01 only operates on a copy of the original dictionary (does not modify in place)
example01 may produce unexpected results depending on what developer means by "empty"
Does developer mean to keep values that are falsy?
If the values in the dictionary are not gauranteed to be strings, developer may have unexpected data loss.
result01 shows that only three key-value pairs were preserved from the original set
Alternate example
example02 helps deal with potential pitfalls
The approach is to use a more precise definition of "empty" by changing the conditional.
Here we only want to filter out values that evaluate to blank strings.
Here we also use .strip() to filter out values that consist of only whitespace.
Example02
### example02 -------------------
mydict = { "alpha":0,
"bravo":"0",
"charlie":"three",
"delta":[],
"echo":False,
"foxy":"False",
"golf":"",
"hotel":" ",
}
newdict = dict([(vkey, vdata) for vkey, vdata in mydict.iteritems() if(str(vdata).strip()) ])
print newdict
### result02 -------------------
result02 ='''
{'alpha': 0,
'bravo': '0',
'charlie': 'three',
'delta': [],
'echo': False,
'foxy': 'False'
}
'''
See also
list-comprehension
falsy
checking for empty string
modifying original dictionary in place
dictionary comprehensions
pitfalls of checking for empty string
Building on the answers from patriciasz and nneonneo, and accounting for the possibility that you might want to delete keys that have only certain falsy things (e.g. '') but not others (e.g. 0), or perhaps you even want to include some truthy things (e.g. 'SPAM'), then you could make a highly specific hitlist:
unwanted = ['', u'', None, False, [], 'SPAM']
Unfortunately, this doesn't quite work, because for example 0 in unwanted evaluates to True. We need to discriminate between 0 and other falsy things, so we have to use is:
any([0 is i for i in unwanted])
...evaluates to False.
Now use it to del the unwanted things:
unwanted_keys = [k for k, v in metadata.items() if any([v is i for i in unwanted])]
for k in unwanted_keys: del metadata[k]
If you want a new dictionary, instead of modifying metadata in place:
newdict = {k: v for k, v in metadata.items() if not any([v is i for i in unwanted])}
I read all replies in this thread and some referred also to this thread:
Remove empty dicts in nested dictionary with recursive function
I originally used solution here and it worked great:
Attempt 1: Too Hot (not performant or future-proof):
def scrub_dict(d):
if type(d) is dict:
return dict((k, scrub_dict(v)) for k, v in d.iteritems() if v and scrub_dict(v))
else:
return d
But some performance and compatibility concerns were raised in Python 2.7 world:
use isinstance instead of type
unroll the list comp into for loop for efficiency
use python3 safe items instead of iteritems
Attempt 2: Too Cold (Lacks Memoization):
def scrub_dict(d):
new_dict = {}
for k, v in d.items():
if isinstance(v,dict):
v = scrub_dict(v)
if not v in (u'', None, {}):
new_dict[k] = v
return new_dict
DOH! This is not recursive and not at all memoizant.
Attempt 3: Just Right (so far):
def scrub_dict(d):
new_dict = {}
for k, v in d.items():
if isinstance(v,dict):
v = scrub_dict(v)
if not v in (u'', None, {}):
new_dict[k] = v
return new_dict
To preserve 0 and False values but get rid of empty values you could use:
{k: v for k, v in metadata.items() if v or v == 0 or v is False}
For a nested dict with mixed types of values you could use:
def remove_empty_from_dict(d):
if isinstance(d, dict):
return dict((k, remove_empty_from_dict(v)) for k, v in d.items() \
if v or v == 0 or v is False and remove_empty_from_dict(v) is not None)
elif isinstance(d, list):
return [remove_empty_from_dict(v) for v in d
if v or v == 0 or v is False and remove_empty_from_dict(v) is not None]
else:
if d or d == 0 or d is False:
return d
"As I also currently write a desktop application for my work with Python, I found in data-entry application when there is lots of entry and which some are not mandatory thus user can left it blank, for validation purpose, it is easy to grab all entries and then discard empty key or value of a dictionary. So my code above a show how we can easy take them out, using dictionary comprehension and keep dictionary value element which is not blank. I use Python 3.8.3
data = {'':'', '20':'', '50':'', '100':'1.1', '200':'1.2'}
dic = {key:value for key,value in data.items() if value != ''}
print(dic)
{'100': '1.1', '200': '1.2'}
Dicts mixed with Arrays
The answer at Attempt 3: Just Right (so far) from BlissRage's answer does not properly handle arrays elements. I'm including a patch in case anyone needs it. The method is handles list with the statement block of if isinstance(v, list):, which scrubs the list using the original scrub_dict(d) implementation.
#staticmethod
def scrub_dict(d):
new_dict = {}
for k, v in d.items():
if isinstance(v, dict):
v = scrub_dict(v)
if isinstance(v, list):
v = scrub_list(v)
if not v in (u'', None, {}, []):
new_dict[k] = v
return new_dict
#staticmethod
def scrub_list(d):
scrubbed_list = []
for i in d:
if isinstance(i, dict):
i = scrub_dict(i)
scrubbed_list.append(i)
return scrubbed_list
An alternative way you can do this, is using dictionary comprehension. This should be compatible with 2.7+
result = {
key: value for key, value in
{"foo": "bar", "lorem": None}.items()
if value
}
Here is an option if you are using pandas:
import pandas as pd
d = dict.fromkeys(['a', 'b', 'c', 'd'])
d['b'] = 'not null'
d['c'] = '' # empty string
print(d)
# convert `dict` to `Series` and replace any blank strings with `None`;
# use the `.dropna()` method and
# then convert back to a `dict`
d_ = pd.Series(d).replace('', None).dropna().to_dict()
print(d_)
Some of Methods mentioned above ignores if there are any integers and float with values 0 & 0.0
If someone wants to avoid the above can use below code(removes empty strings and None values from nested dictionary and nested list):
def remove_empty_from_dict(d):
if type(d) is dict:
_temp = {}
for k,v in d.items():
if v == None or v == "":
pass
elif type(v) is int or type(v) is float:
_temp[k] = remove_empty_from_dict(v)
elif (v or remove_empty_from_dict(v)):
_temp[k] = remove_empty_from_dict(v)
return _temp
elif type(d) is list:
return [remove_empty_from_dict(v) for v in d if( (str(v).strip() or str(remove_empty_from_dict(v)).strip()) and (v != None or remove_empty_from_dict(v) != None))]
else:
return d
metadata ={'src':'1921','dest':'1337','email':'','movile':''}
ot = {k: v for k, v in metadata.items() if v != ''}
print(f"Final {ot}")
You also have an option with filter method:
filtered_metadata = dict( filter(lambda val: val[1] != u'', metadata.items()) )
Some benchmarking:
1. List comprehension recreate dict
In [7]: %%timeit dic = {str(i):i for i in xrange(10)}; dic['10'] = None; dic['5'] = None
...: dic = {k: v for k, v in dic.items() if v is not None}
1000000 loops, best of 7: 375 ns per loop
2. List comprehension recreate dict using dict()
In [8]: %%timeit dic = {str(i):i for i in xrange(10)}; dic['10'] = None; dic['5'] = None
...: dic = dict((k, v) for k, v in dic.items() if v is not None)
1000000 loops, best of 7: 681 ns per loop
3. Loop and delete key if v is None
In [10]: %%timeit dic = {str(i):i for i in xrange(10)}; dic['10'] = None; dic['5'] = None
...: for k, v in dic.items():
...: if v is None:
...: del dic[k]
...:
10000000 loops, best of 7: 160 ns per loop
so loop and delete is the fastest at 160ns, list comprehension is half as slow at ~375ns and with a call to dict() is half as slow again ~680ns.
Wrapping 3 into a function brings it back down again to about 275ns. Also for me PyPy was about twice as fast as neet python.

Loop through all nested dictionary values?

for k, v in d.iteritems():
if type(v) is dict:
for t, c in v.iteritems():
print "{0} : {1}".format(t, c)
I'm trying to loop through a dictionary and print out all key value pairs where the value is not a nested dictionary. If the value is a dictionary I want to go into it and print out its key value pairs...etc. Any help?
EDIT
How about this? It still only prints one thing.
def printDict(d):
for k, v in d.iteritems():
if type(v) is dict:
printDict(v)
else:
print "{0} : {1}".format(k, v)
Full Test Case
Dictionary:
{u'xml': {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'},
u'port': u'11'}}
Result:
xml : {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'}, u'port': u'11'}
As said by Niklas, you need recursion, i.e. you want to define a function to print your dict, and if the value is a dict, you want to call your print function using this new dict.
Something like :
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print("{0} : {1}".format(k, v))
There are potential problems if you write your own recursive implementation or the iterative equivalent with stack. See this example:
dic = {}
dic["key1"] = {}
dic["key1"]["key1.1"] = "value1"
dic["key2"] = {}
dic["key2"]["key2.1"] = "value2"
dic["key2"]["key2.2"] = dic["key1"]
dic["key2"]["key2.3"] = dic
In the normal sense, nested dictionary will be a n-nary tree like data structure. But the definition doesn't exclude the possibility of a cross edge or even a back edge (thus no longer a tree). For instance, here key2.2 holds to the dictionary from key1, key2.3 points to the entire dictionary(back edge/cycle). When there is a back edge(cycle), the stack/recursion will run infinitely.
root<-------back edge
/ \ |
_key1 __key2__ |
/ / \ \ |
|->key1.1 key2.1 key2.2 key2.3
| / | |
| value1 value2 |
| |
cross edge----------|
If you print this dictionary with this implementation from Scharron
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print "{0} : {1}".format(k, v)
You would see this error:
> RuntimeError: maximum recursion depth exceeded while calling a Python object
The same goes with the implementation from senderle.
Similarly, you get an infinite loop with this implementation from Fred Foo:
def myprint(d):
stack = list(d.items())
while stack:
k, v = stack.pop()
if isinstance(v, dict):
stack.extend(v.items())
else:
print("%s: %s" % (k, v))
However, Python actually detects cycles in nested dictionary:
print dic
{'key2': {'key2.1': 'value2', 'key2.3': {...},
'key2.2': {'key1.1': 'value1'}}, 'key1': {'key1.1': 'value1'}}
"{...}" is where a cycle is detected.
As requested by Moondra this is a way to avoid cycles (DFS):
def myprint(d):
stack = list(d.items())
visited = set()
while stack:
k, v = stack.pop()
if isinstance(v, dict):
if k not in visited:
stack.extend(v.items())
else:
print("%s: %s" % (k, v))
visited.add(k)
Since a dict is iterable, you can apply the classic nested container iterable formula to this problem with only a couple of minor changes. Here's a Python 2 version (see below for 3):
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
Test:
list(nested_dict_iter({'a':{'b':{'c':1, 'd':2},
'e':{'f':3, 'g':4}},
'h':{'i':5, 'j':6}}))
# output: [('g', 4), ('f', 3), ('c', 1), ('d', 2), ('i', 5), ('j', 6)]
In Python 2, It might be possible to create a custom Mapping that qualifies as a Mapping but doesn't contain iteritems, in which case this will fail. The docs don't indicate that iteritems is required for a Mapping; on the other hand, the source gives Mapping types an iteritems method. So for custom Mappings, inherit from collections.Mapping explicitly just in case.
In Python 3, there are a number of improvements to be made. As of Python 3.3, abstract base classes live in collections.abc. They remain in collections too for backwards compatibility, but it's nicer having our abstract base classes together in one namespace. So this imports abc from collections. Python 3.3 also adds yield from, which is designed for just these sorts of situations. This is not empty syntactic sugar; it may lead to faster code and more sensible interactions with coroutines.
from collections import abc
def nested_dict_iter(nested):
for key, value in nested.items():
if isinstance(value, abc.Mapping):
yield from nested_dict_iter(value)
else:
yield key, value
Alternative iterative solution:
def myprint(d):
stack = d.items()
while stack:
k, v = stack.pop()
if isinstance(v, dict):
stack.extend(v.iteritems())
else:
print("%s: %s" % (k, v))
Slightly different version I wrote that keeps track of the keys along the way to get there
def print_dict(v, prefix=''):
if isinstance(v, dict):
for k, v2 in v.items():
p2 = "{}['{}']".format(prefix, k)
print_dict(v2, p2)
elif isinstance(v, list):
for i, v2 in enumerate(v):
p2 = "{}[{}]".format(prefix, i)
print_dict(v2, p2)
else:
print('{} = {}'.format(prefix, repr(v)))
On your data, it'll print
data['xml']['config']['portstatus']['status'] = u'good'
data['xml']['config']['target'] = u'1'
data['xml']['port'] = u'11'
It's also easy to modify it to track the prefix as a tuple of keys rather than a string if you need it that way.
Here is pythonic way to do it. This function will allow you to loop through key-value pair in all the levels. It does not save the whole thing to the memory but rather walks through the dict as you loop through it
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is dict:
yield (key, value)
yield from recursive_items(value)
else:
yield (key, value)
a = {'a': {1: {1: 2, 3: 4}, 2: {5: 6}}}
for key, value in recursive_items(a):
print(key, value)
Prints
a {1: {1: 2, 3: 4}, 2: {5: 6}}
1 {1: 2, 3: 4}
1 2
3 4
2 {5: 6}
5 6
A alternative solution to work with lists based on Scharron's solution
def myprint(d):
my_list = d.iteritems() if isinstance(d, dict) else enumerate(d)
for k, v in my_list:
if isinstance(v, dict) or isinstance(v, list):
myprint(v)
else:
print u"{0} : {1}".format(k, v)
I am using the following code to print all the values of a nested dictionary, taking into account where the value could be a list containing dictionaries. This was useful to me when parsing a JSON file into a dictionary and needing to quickly check whether any of its values are None.
d = {
"user": 10,
"time": "2017-03-15T14:02:49.301000",
"metadata": [
{"foo": "bar"},
"some_string"
]
}
def print_nested(d):
if isinstance(d, dict):
for k, v in d.items():
print_nested(v)
elif hasattr(d, '__iter__') and not isinstance(d, str):
for item in d:
print_nested(item)
elif isinstance(d, str):
print(d)
else:
print(d)
print_nested(d)
Output:
10
2017-03-15T14:02:49.301000
bar
some_string
Your question already has been answered well, but I recommend using isinstance(d, collections.Mapping) instead of isinstance(d, dict). It works for dict(), collections.OrderedDict(), and collections.UserDict().
The generally correct version is:
def myprint(d):
for k, v in d.items():
if isinstance(v, collections.Mapping):
myprint(v)
else:
print("{0} : {1}".format(k, v))
Iterative solution as an alternative:
def traverse_nested_dict(d):
iters = [d.iteritems()]
while iters:
it = iters.pop()
try:
k, v = it.next()
except StopIteration:
continue
iters.append(it)
if isinstance(v, dict):
iters.append(v.iteritems())
else:
yield k, v
d = {"a": 1, "b": 2, "c": {"d": 3, "e": {"f": 4}}}
for k, v in traverse_nested_dict(d):
print k, v
Here's a modified version of Fred Foo's answer for Python 2. In the original response, only the deepest level of nesting is output. If you output the keys as lists, you can keep the keys for all levels, although to reference them you need to reference a list of lists.
Here's the function:
def NestIter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in NestIter(value):
yield [key, inner_key], inner_value
else:
yield [key],value
To reference the keys:
for keys, vals in mynested:
print(mynested[keys[0]][keys[1][0]][keys[1][1][0]])
for a three-level dictionary.
You need to know the number of levels before to access multiple keys and the number of levels should be constant (it may be possible to add a small bit of script to check the number of nesting levels when iterating through values, but I haven't yet looked at this).
I find this approach a bit more flexible, here you just providing generator function that emits key, value pairs and can be easily extended to also iterate over lists.
def traverse(value, key=None):
if isinstance(value, dict):
for k, v in value.items():
yield from traverse(v, k)
else:
yield key, value
Then you can write your own myprint function, then would print those key value pairs.
def myprint(d):
for k, v in traverse(d):
print(f"{k} : {v}")
A test:
myprint({
'xml': {
'config': {
'portstatus': {
'status': 'good',
},
'target': '1',
},
'port': '11',
},
})
Output:
status : good
target : 1
port : 11
I tested this on Python 3.6.
Nested dictionaries looping using isinstance() and yield function.
**isinstance is afunction that returns the given input and reference is true or false as in below case dict is true so it go for iteration.
**Yield is used to return from a function without destroying the states of its local variable and when the function is called, the execution starts from the last yield statement. Any function that contains a yield keyword is termed a generator.
students= {'emp1': {'name': 'Bob', 'job': 'Mgr'},
'emp2': {'name': 'Kim', 'job': 'Dev','emp3': {'namee': 'Saam', 'j0ob': 'Deev'}},
'emp4': {'name': 'Sam', 'job': 'Dev'}}
def nested_dict_pairs_iterator(dict_obj):
for key, value in dict_obj.items():
# Check if value is of dict type
if isinstance(value, dict):
# If value is dict then iterate over all its values
for pair in nested_dict_pairs_iterator(value):
yield (key, *pair)
else:
# If value is not dict type then yield the value
yield (key, value)
for pair in nested_dict_pairs_iterator(students):
print(pair)
For a ready-made solution install ndicts
pip install ndicts
Import a NestedDict in your script
from ndicts.ndicts import NestedDict
Initialize
dictionary = {
u'xml': {
u'config': {
u'portstatus': {u'status': u'good'},
u'target': u'1'
},
u'port': u'11'
}
}
nd = NestedDict(dictionary)
Iterate
for key, value in nd.items():
print(key, value)
While the original solution from #Scharron is beautiful and simple, it cannot handle the list very well:
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print("{0} : {1}".format(k, v))
So this code can be slightly modified like this to handle list in elements:
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
elif isinstance(v, list):
for i in v:
myprint(i)
else:
print("{0} : {1}".format(k, v))
These answers work for only 2 levels of sub-dictionaries. For more try this:
nested_dict = {'dictA': {'key_1': 'value_1', 'key_1A': 'value_1A','key_1Asub1': {'Asub1': 'Asub1_val', 'sub_subA1': {'sub_subA1_key':'sub_subA1_val'}}},
'dictB': {'key_2': 'value_2'},
1: {'key_3': 'value_3', 'key_3A': 'value_3A'}}
def print_dict(dictionary):
dictionary_array = [dictionary]
for sub_dictionary in dictionary_array:
if type(sub_dictionary) is dict:
for key, value in sub_dictionary.items():
print("key=", key)
print("value", value)
if type(value) is dict:
dictionary_array.append(value)
print_dict(nested_dict)
You can print recursively with a dictionary comprehension:
def print_key_pairs(d):
{k: print_key_pairs(v) if isinstance(v, dict) else print(f'{k}: {v}') for k, v in d.items()}
For your test case this is the output:
>>> print_key_pairs({u'xml': {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'}, u'port': u'11'}})
status: good
target: 1
port: 11
Returns a tuple of each key and value and the key contains the full path
from typing import Mapping, Tuple, Iterator
def traverse_dict(nested: Mapping, parent_key="", keys_to_not_traverse_further=tuple()) -> Iterator[Tuple[str, str]]:
"""Each key is joined with it's parent using dot as a separator.
Once a `parent_key` matches `keys_to_not_traverse_further`
it will no longer find its child dicts.
"""
for key, value in nested.items():
if isinstance(value, abc.Mapping) and key not in keys_to_not_traverse_further:
yield from traverse_dict(value, f"{parent_key}.{key}", keys_to_not_traverse_further)
else:
yield f"{parent_key}.{key}", value
Let's test it
my_dict = {
"isbn": "123-456-222",
"author": {"lastname": "Doe", "firstname": "Jane"},
"editor": {"lastname": "Smith", "firstname": "Jane"},
"title": "The Ultimate Database Study Guide",
"category": ["Non-Fiction", "Technology"],
"first": {
"second": {"third": {"fourth": {"blah": "yadda"}}},
"fifth": {"sixth": "seventh"},
},
}
for k, v in traverse_dict(my_dict):
print(k, v)
Returns
.isbn 123-456-222
.author.lastname Doe
.author.firstname Jane
.editor.lastname Smith
.editor.firstname Jane
.title The Ultimate Database Study Guide
.category ['Non-Fiction', 'Technology']
.first.second.third.fourth.blah yadda
.first.fifth.sixth seventh
If you don't care about some child dicts e.g names in this case then
use the keys_to_not_traverse_further
for k, v in traverse_dict(my_dict, parent_key="", keys_to_not_traverse_further=("author","editor")):
print(k, v)
Returns
.isbn 123-456-222
.author {'lastname': 'Doe', 'firstname': 'Jane'}
.editor {'lastname': 'Smith', 'firstname': 'Jane'}
.title The Ultimate Database Study Guide
.category ['Non-Fiction', 'Technology']
.first.second.third.fourth.blah yadda
.first.fifth.sixth seventh

Categories