Merge nested dictionaries and update keys [duplicate] - python

I need to merge multiple dictionaries, here's what I have for instance:
dict1 = {1:{"a":{A}}, 2:{"b":{B}}}
dict2 = {2:{"c":{C}}, 3:{"d":{D}}}
With A B C and D being leaves of the tree, like {"info1":"value", "info2":"value2"}
There is an unknown level(depth) of dictionaries, it could be {2:{"c":{"z":{"y":{C}}}}}
In my case it represents a directory/files structure with nodes being docs and leaves being files.
I want to merge them to obtain:
dict3 = {1:{"a":{A}}, 2:{"b":{B},"c":{C}}, 3:{"d":{D}}}
I'm not sure how I could do that easily with Python.

This is actually quite tricky - particularly if you want a useful error message when things are inconsistent, while correctly accepting duplicate but consistent entries (something no other answer here does..)
Assuming you don't have huge numbers of entries, a recursive function is easiest:
from functools import reduce
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})
note that this mutates a - the contents of b are added to a (which is also returned). If you want to keep a you could call it like merge(dict(a), b).
agf pointed out (below) that you may have more than two dicts, in which case you can use:
reduce(merge, [dict1, dict2, dict3...])
where everything will be added to dict1.
Note: I edited my initial answer to mutate the first argument; that makes the "reduce" easier to explain

You could try mergedeep.
Installation
$ pip3 install mergedeep
Usage
from mergedeep import merge
a = {"keyA": 1}
b = {"keyB": {"sub1": 10}}
c = {"keyB": {"sub2": 20}}
merge(a, b, c)
print(a)
# {"keyA": 1, "keyB": {"sub1": 10, "sub2": 20}}
For a full list of options, check out the docs!

Here's an easy way to do it using generators:
def mergedicts(dict1, dict2):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):
yield (k, dict(mergedicts(dict1[k], dict2[k])))
else:
# If one of the values is not a dict, you can't continue merging it.
# Value from second dict overrides one in first and we move on.
yield (k, dict2[k])
# Alternatively, replace this with exception raiser to alert you of value conflicts
elif k in dict1:
yield (k, dict1[k])
else:
yield (k, dict2[k])
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
print dict(mergedicts(dict1,dict2))
This prints:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}

One issue with this question is that the values of the dict can be arbitrarily complex pieces of data. Based upon these and other answers I came up with this code:
class YamlReaderError(Exception):
pass
def data_merge(a, b):
"""merges b into a and return merged result
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen"""
key = None
# ## debug output
# sys.stderr.write("DEBUG: %s to %s\n" %(b,a))
try:
if a is None or isinstance(a, str) or isinstance(a, unicode) or isinstance(a, int) or isinstance(a, long) or isinstance(a, float):
# border case for first run or if a is a primitive
a = b
elif isinstance(a, list):
# lists can be only appended
if isinstance(b, list):
# merge lists
a.extend(b)
else:
# append to list
a.append(b)
elif isinstance(a, dict):
# dicts must be merged
if isinstance(b, dict):
for key in b:
if key in a:
a[key] = data_merge(a[key], b[key])
else:
a[key] = b[key]
else:
raise YamlReaderError('Cannot merge non-dict "%s" into dict "%s"' % (b, a))
else:
raise YamlReaderError('NOT IMPLEMENTED "%s" into "%s"' % (b, a))
except TypeError, e:
raise YamlReaderError('TypeError "%s" in key "%s" when merging "%s" into "%s"' % (e, key, b, a))
return a
My use case is merging YAML files where I only have to deal with a subset of possible data types. Hence I can ignore tuples and other objects. For me a sensible merge logic means
replace scalars
append lists
merge dicts by adding missing keys and updating existing keys
Everything else and the unforeseens results in an error.

Dictionaries of dictionaries merge
As this is the canonical question (in spite of certain non-generalities) I'm providing the canonical Pythonic approach to solving this issue.
Simplest Case: "leaves are nested dicts that end in empty dicts":
d1 = {'a': {1: {'foo': {}}, 2: {}}}
d2 = {'a': {1: {}, 2: {'bar': {}}}}
d3 = {'b': {3: {'baz': {}}}}
d4 = {'a': {1: {'quux': {}}}}
This is the simplest case for recursion, and I would recommend two naive approaches:
def rec_merge1(d1, d2):
'''return new merged dict of dicts'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge1(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
def rec_merge2(d1, d2):
'''update first dict with second recursively'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge2(v, d2[k])
d1.update(d2)
return d1
I believe I would prefer the second to the first, but keep in mind that the original state of the first would have to be rebuilt from its origin. Here's the usage:
>>> from functools import reduce # only required for Python 3.
>>> reduce(rec_merge1, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
>>> reduce(rec_merge2, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
Complex Case: "leaves are of any other type:"
So if they end in dicts, it's a simple case of merging the end empty dicts. If not, it's not so trivial. If strings, how do you merge them? Sets can be updated similarly, so we could give that treatment, but we lose the order in which they were merged. So does order matter?
So in lieu of more information, the simplest approach will be to give them the standard update treatment if both values are not dicts: i.e. the second dict's value will overwrite the first, even if the second dict's value is None and the first's value is a dict with a lot of info.
d1 = {'a': {1: 'foo', 2: None}}
d2 = {'a': {1: None, 2: 'bar'}}
d3 = {'b': {3: 'baz'}}
d4 = {'a': {1: 'quux'}}
from collections.abc import MutableMapping
def rec_merge(d1, d2):
'''
Update two dicts of dicts recursively,
if either mapping has leaves that are non-dicts,
the second's leaf overwrites the first's.
'''
for k, v in d1.items():
if k in d2:
# this next check is the only difference!
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = rec_merge(v, d2[k])
# we could further check types and merge as appropriate here.
d3 = d1.copy()
d3.update(d2)
return d3
And now
from functools import reduce
reduce(rec_merge, (d1, d2, d3, d4))
returns
{'a': {1: 'quux', 2: 'bar'}, 'b': {3: 'baz'}}
Application to the original question:
I've had to remove the curly braces around the letters and put them in single quotes for this to be legit Python (else they would be set literals in Python 2.7+) as well as append a missing brace:
dict1 = {1:{"a":'A'}, 2:{"b":'B'}}
dict2 = {2:{"c":'C'}, 3:{"d":'D'}}
and rec_merge(dict1, dict2) now returns:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
Which matches the desired outcome of the original question (after changing, e.g. the {A} to 'A'.)

Based on #andrew cooke. This version handles nested lists of dicts and also allows the option to update the values
def merge(a, b, path=None, update=True):
"http://stackoverflow.com/questions/7204805/python-dictionaries-of-dictionaries-merge"
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
elif isinstance(a[key], list) and isinstance(b[key], list):
for idx, val in enumerate(b[key]):
a[key][idx] = merge(a[key][idx], b[key][idx], path + [str(key), str(idx)], update=update)
elif update:
a[key] = b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a

This simple recursive procedure will merge one dictionary into another while overriding conflicting keys:
#!/usr/bin/env python2.7
def merge_dicts(dict1, dict2):
""" Recursively merges dict2 into dict1 """
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
return dict2
for k in dict2:
if k in dict1:
dict1[k] = merge_dicts(dict1[k], dict2[k])
else:
dict1[k] = dict2[k]
return dict1
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {2:{"c":"C"}, 3:{"d":"D"}}))
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {1:{"a":"A"}, 2:{"b":"C"}}))
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
{1: {'a': 'A'}, 2: {'b': 'C'}}

Based on answers from #andrew cooke.
It takes care of nested lists in a better way.
def deep_merge_lists(original, incoming):
"""
Deep merge two lists. Modifies original.
Recursively call deep merge on each correlated element of list.
If item type in both elements are
a. dict: Call deep_merge_dicts on both values.
b. list: Recursively call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
If length of incoming list is more that of original then extra values are appended.
"""
common_length = min(len(original), len(incoming))
for idx in range(common_length):
if isinstance(original[idx], dict) and isinstance(incoming[idx], dict):
deep_merge_dicts(original[idx], incoming[idx])
elif isinstance(original[idx], list) and isinstance(incoming[idx], list):
deep_merge_lists(original[idx], incoming[idx])
else:
original[idx] = incoming[idx]
for idx in range(common_length, len(incoming)):
original.append(incoming[idx])
def deep_merge_dicts(original, incoming):
"""
Deep merge two dictionaries. Modifies original.
For key conflicts if both values are:
a. dict: Recursively call deep_merge_dicts on both values.
b. list: Call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
"""
for key in incoming:
if key in original:
if isinstance(original[key], dict) and isinstance(incoming[key], dict):
deep_merge_dicts(original[key], incoming[key])
elif isinstance(original[key], list) and isinstance(incoming[key], list):
deep_merge_lists(original[key], incoming[key])
else:
original[key] = incoming[key]
else:
original[key] = incoming[key]

If you have an unknown level of dictionaries, then I would suggest a recursive function:
def combineDicts(dictionary1, dictionary2):
output = {}
for item, value in dictionary1.iteritems():
if dictionary2.has_key(item):
if isinstance(dictionary2[item], dict):
output[item] = combineDicts(value, dictionary2.pop(item))
else:
output[item] = value
for item, value in dictionary2.iteritems():
output[item] = value
return output

In case someone wants yet another approach to this problem, here's my solution.
Virtues: short, declarative, and functional in style (recursive, does no mutation).
Potential Drawback: This might not be the merge you're looking for. Consult the docstring for semantics.
def deep_merge(a, b):
"""
Merge two values, with `b` taking precedence over `a`.
Semantics:
- If either `a` or `b` is not a dictionary, `a` will be returned only if
`b` is `None`. Otherwise `b` will be returned.
- If both values are dictionaries, they are merged as follows:
* Each key that is found only in `a` or only in `b` will be included in
the output collection with its value intact.
* For any key in common between `a` and `b`, the corresponding values
will be merged with the same semantics.
"""
if not isinstance(a, dict) or not isinstance(b, dict):
return a if b is None else b
else:
# If we're here, both a and b must be dictionaries or subtypes thereof.
# Compute set of all keys in both dictionaries.
keys = set(a.keys()) | set(b.keys())
# Build output dictionary, merging recursively values with common keys,
# where `None` is used to mean the absence of a value.
return {
key: deep_merge(a.get(key), b.get(key))
for key in keys
}

Overview
The following approach subdivides the problem of a deep merge of dicts into:
A parameterized shallow merge function merge(f)(a,b) that uses a
function f to merge two dicts a and b
A recursive merger function f to be used together with merge
Implementation
A function for merging two (non nested) dicts can be written in a lot of ways. I personally like
def merge(f):
def merge(a,b):
keys = a.keys() | b.keys()
return {key:f(a.get(key), b.get(key)) for key in keys}
return merge
A nice way of defining an appropriate recursive merger function f is using multipledispatch which allows to define functions that evaluate along different paths depending on the type of their arguments.
from multipledispatch import dispatch
#for anything that is not a dict return
#dispatch(object, object)
def f(a, b):
return b if b is not None else a
#for dicts recurse
#dispatch(dict, dict)
def f(a,b):
return merge(f)(a,b)
Example
To merge two nested dicts simply use merge(f) e.g.:
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
merge(f)(dict1, dict2)
#returns {1: {'a': 'A'}, 2: {'b': 'B', 'c': 'C'}, 3: {'d': 'D'}}
Notes:
The advantages of this approach are:
The function is build from smaller functions that each do a single thing
which makes the code simpler to reason about and test
The behaviour is not hard-coded but can be changed and extended as needed which improves code reuse (see example below).
Customization
Some answers also considered dicts that contain lists e.g. of other (potentially nested) dicts. In this case one might want map over the lists and merge them based on position. This can be done by adding another definition to the merger function f:
import itertools
#dispatch(list, list)
def f(a,b):
return [merge(f)(*arg) for arg in itertools.zip_longest(a, b)]

There's a slight problem with andrew cookes answer: In some cases it modifies the second argument b when you modify the returned dict. Specifically it's because of this line:
if key in a:
...
else:
a[key] = b[key]
If b[key] is a dict, it will simply be assigned to a, meaning any subsequent modifications to that dict will affect both a and b.
a={}
b={'1':{'2':'b'}}
c={'1':{'3':'c'}}
merge(merge(a,b), c) # {'1': {'3': 'c', '2': 'b'}}
a # {'1': {'3': 'c', '2': 'b'}} (as expected)
b # {'1': {'3': 'c', '2': 'b'}} <----
c # {'1': {'3': 'c'}} (unmodified)
To fix this, the line would have to be substituted with this:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
Where clone_dict is:
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
Still. This obviously doesn't account for list, set and other stuff, but I hope it illustrates the pitfalls when trying to merge dicts.
And for completeness sake, here is my version, where you can pass it multiple dicts:
def merge_dicts(*args):
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
def merge(a, b, path=[]):
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
raise Exception('Conflict at `{path}\''.format(path='.'.join(path + [str(key)])))
else:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
return a
return reduce(merge, args, {})

You can use the merge function from the toolz package, for example:
>>> import toolz
>>> dict1 = {1: {'a': 'A'}, 2: {'b': 'B'}}
>>> dict2 = {2: {'c': 'C'}, 3: {'d': 'D'}}
>>> toolz.merge_with(toolz.merge, dict1, dict2)
{1: {'a': 'A'}, 2: {'c': 'C'}, 3: {'d': 'D'}}

This version of the function will account for N number of dictionaries, and only dictionaries -- no improper parameters can be passed, or it will raise a TypeError. The merge itself accounts for key conflicts, and instead of overwriting data from a dictionary further down the merge chain, it creates a set of values and appends to that; no data is lost.
It might not be the most effecient on the page, but it's the most thorough and you're not going to lose any information when you merge your 2 to N dicts.
def merge_dicts(*dicts):
if not reduce(lambda x, y: isinstance(y, dict) and x, dicts, True):
raise TypeError, "Object in *dicts not of type dict"
if len(dicts) < 2:
raise ValueError, "Requires 2 or more dict objects"
def merge(a, b):
for d in set(a.keys()).union(b.keys()):
if d in a and d in b:
if type(a[d]) == type(b[d]):
if not isinstance(a[d], dict):
ret = list({a[d], b[d]})
if len(ret) == 1: ret = ret[0]
yield (d, sorted(ret))
else:
yield (d, dict(merge(a[d], b[d])))
else:
raise TypeError, "Conflicting key:value type assignment"
elif d in a:
yield (d, a[d])
elif d in b:
yield (d, b[d])
else:
raise KeyError
return reduce(lambda x, y: dict(merge(x, y)), dicts[1:], dicts[0])
print merge_dicts({1:1,2:{1:2}},{1:2,2:{3:1}},{4:4})
output: {1: [1, 2], 2: {1: 2, 3: 1}, 4: 4}

Since dictviews support set operations, I was able to greatly simplify jterrace's answer.
def merge(dict1, dict2):
for k in dict1.keys() - dict2.keys():
yield (k, dict1[k])
for k in dict2.keys() - dict1.keys():
yield (k, dict2[k])
for k in dict1.keys() & dict2.keys():
yield (k, dict(merge(dict1[k], dict2[k])))
Any attempt to combine a dict with a non dict (technically, an object with a 'keys' method and an object without a 'keys' method) will raise an AttributeError. This includes both the initial call to the function and recursive calls. This is exactly what I wanted so I left it. You could easily catch an AttributeErrors thrown by the recursive call and then yield any value you please.

Short-n-sweet:
from collections.abc import MutableMapping as Map
def nested_update(d, v):
"""
Nested update of dict-like 'd' with dict-like 'v'.
"""
for key in v:
if key in d and isinstance(d[key], Map) and isinstance(v[key], Map):
nested_update(d[key], v[key])
else:
d[key] = v[key]
This works like (and is build on) Python's dict.update method. It returns None (you can always add return d if you prefer) as it updates dict d in-place. Keys in v will overwrite any existing keys in d (it does not try to interpret the dict's contents).
It will also work for other ("dict-like") mappings.

I have an iterative solution - works much much better with big dicts & a lot of them (for example jsons etc):
import collections
def merge_dict_with_subdicts(dict1: dict, dict2: dict) -> dict:
"""
similar behaviour to builtin dict.update - but knows how to handle nested dicts
"""
q = collections.deque([(dict1, dict2)])
while len(q) > 0:
d1, d2 = q.pop()
for k, v in d2.items():
if k in d1 and isinstance(d1[k], dict) and isinstance(v, dict):
q.append((d1[k], v))
else:
d1[k] = v
return dict1
note that this will use the value in d2 to override d1, in case they are not both dicts. (same as python's dict.update())
some tests:
def test_deep_update():
d = dict()
merge_dict_with_subdicts(d, {"a": 4})
assert d == {"a": 4}
new_dict = {
"b": {
"c": {
"d": 6
}
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 4,
"b": {
"c": {
"d": 6
}
}
}
new_dict = {
"a": 3,
"b": {
"f": 7
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 3,
"b": {
"c": {
"d": 6
},
"f": 7
}
}
# test a case where one of the dicts has dict as value and the other has something else
new_dict = {
'a': {
'b': 4
}
}
merge_dict_with_subdicts(d, new_dict)
assert d['a']['b'] == 4
I've tested with around ~1200 dicts - this method took 0.4 seconds, while the recursive solution took ~2.5 seconds.

As noted in many other answers, a recursive algorithm makes the most sense here. In general, when working with recursion, it is preferable to create new values rather than trying to modify any input data structure.
We need to define what happens at each merge step. If both inputs are dictionaries, this is easy: we copy across unique keys from each side, and recursively merge the values of the duplicated keys. It's the base cases that cause a problem. It will be easier to understand the logic if we pull out a separate function for that. As a placeholder, we could just wrap the two values in a tuple:
def merge_leaves(x, y):
return (x, y)
Now the core of our logic looks like:
def merge(x, y):
if not(isinstance(x, dict) and isinstance(y, dict)):
return merge_leaves(x, y)
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
Let's test it:
>>> x = {'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y = {'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
>>> merge(x, y)
{'f': (1, {'b': 'c'}), 'g': ({'h', 'i'}, 1), 'a': {'d': ('e', 'e'), 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
>>> x # The originals are unmodified.
{'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y
{'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
We can easily modify the leaf-merging rule, for example:
def merge_leaves(x, y):
try:
return x + y
except TypeError:
return Ellipsis
and observe the effects:
>>> merge(x, y)
{'f': Ellipsis, 'g': Ellipsis, 'a': {'d': 'ee', 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
We could also potentially clean this up by using a third-party library to dispatch based on the type of the inputs. For example, using multipledispatch, we could do things like:
#dispatch(dict, dict)
def merge(x, y):
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
#dispatch(str, str)
def merge(x, y):
return x + y
#dispatch(tuple, tuple)
def merge(x, y):
return x + y
#dispatch(list, list)
def merge(x, y):
return x + y
#dispatch(int, int):
def merge(x, y):
raise ValueError("integer value conflict")
#dispatch(object, object):
return (x, y)
This allows us to handle various combinations of leaf-type special cases without writing our own type checking, and also replaces the type check in the main recursive function.

The code will depend on your rules for resolving merge conflicts, of course. Here's a version which can take an arbitrary number of arguments and merges them recursively to an arbitrary depth, without using any object mutation. It uses the following rules to resolve merge conflicts:
dictionaries take precedence over non-dict values ({"foo": {...}} takes precedence over {"foo": "bar"})
later arguments take precedence over earlier arguments (if you merge {"a": 1}, {"a", 2}, and {"a": 3} in order, the result will be {"a": 3})
try:
from collections import Mapping
except ImportError:
Mapping = dict
def merge_dicts(*dicts):
"""
Return a new dictionary that is the result of merging the arguments together.
In case of conflicts, later arguments take precedence over earlier arguments.
"""
updated = {}
# grab all keys
keys = set()
for d in dicts:
keys = keys.union(set(d))
for key in keys:
values = [d[key] for d in dicts if key in d]
# which ones are mapping types? (aka dict)
maps = [value for value in values if isinstance(value, Mapping)]
if maps:
# if we have any mapping types, call recursively to merge them
updated[key] = merge_dicts(*maps)
else:
# otherwise, just grab the last value we have, since later arguments
# take precedence over earlier arguments
updated[key] = values[-1]
return updated

I had two dictionaries (a and b) which could each contain any number of nested dictionaries. I wanted to recursively merge them, with b taking precedence over a.
Considering the nested dictionaries as trees, what I wanted was:
To update a so that every path to every leaf in b would be represented in a
To overwrite subtrees of a if a leaf is found in the corresponding path in b
Maintain the invariant that all b leaf nodes remain leafs.
The existing answers were a little complicated for my taste and left some details on the shelf. I hacked together the following, which passes unit tests for my data set.
def merge_map(a, b):
if not isinstance(a, dict) or not isinstance(b, dict):
return b
for key in b.keys():
a[key] = merge_map(a[key], b[key]) if key in a else b[key]
return a
Example (formatted for clarity):
a = {
1 : {'a': 'red',
'b': {'blue': 'fish', 'yellow': 'bear' },
'c': { 'orange': 'dog'},
},
2 : {'d': 'green'},
3: 'e'
}
b = {
1 : {'b': 'white'},
2 : {'d': 'black'},
3: 'e'
}
>>> merge_map(a, b)
{1: {'a': 'red',
'b': 'white',
'c': {'orange': 'dog'},},
2: {'d': 'black'},
3: 'e'}
The paths in b that needed to be maintained were:
1 -> 'b' -> 'white'
2 -> 'd' -> 'black'
3 -> 'e'.
a had the unique and non-conflicting paths of:
1 -> 'a' -> 'red'
1 -> 'c' -> 'orange' -> 'dog'
so they are still represented in the merged map.

And just another slight variation:
Here is a pure python3 set based deep update function. It updates nested dictionaries by looping through one level at a time and calls itself to update each next level of dictionary values:
def deep_update(dict_original, dict_update):
if isinstance(dict_original, dict) and isinstance(dict_update, dict):
output=dict(dict_original)
keys_original=set(dict_original.keys())
keys_update=set(dict_update.keys())
similar_keys=keys_original.intersection(keys_update)
similar_dict={key:deep_update(dict_original[key], dict_update[key]) for key in similar_keys}
new_keys=keys_update.difference(keys_original)
new_dict={key:dict_update[key] for key in new_keys}
output.update(similar_dict)
output.update(new_dict)
return output
else:
return dict_update
A simple example:
x={'a':{'b':{'c':1, 'd':1}}}
y={'a':{'b':{'d':2, 'e':2}}, 'f':2}
print(deep_update(x, y))
>>> {'a': {'b': {'c': 1, 'd': 2, 'e': 2}}, 'f': 2}

How about another answer?!? This one also avoids mutation/side effects:
def merge(dict1, dict2):
output = {}
# adds keys from `dict1` if they do not exist in `dict2` and vice-versa
intersection = {**dict2, **dict1}
for k_intersect, v_intersect in intersection.items():
if k_intersect not in dict1:
v_dict2 = dict2[k_intersect]
output[k_intersect] = v_dict2
elif k_intersect not in dict2:
output[k_intersect] = v_intersect
elif isinstance(v_intersect, dict):
v_dict2 = dict2[k_intersect]
output[k_intersect] = merge(v_intersect, v_dict2)
else:
output[k_intersect] = v_intersect
return output
dict1 = {1:{"a":{"A"}}, 2:{"b":{"B"}}}
dict2 = {2:{"c":{"C"}}, 3:{"d":{"D"}}}
dict3 = {1:{"a":{"A"}}, 2:{"b":{"B"},"c":{"C"}}, 3:{"d":{"D"}}}
assert dict3 == merge(dict1, dict2)

This is a solution I made that recursively merges dictionaries to an infinite depth. The first dictionary passed to the function is the master dictionary - values in it will overwrite the values in the same key in the second dictionary.
def merge(dict1: dict, dict2: dict) -> dict:
merged = dict1
for key in dict2:
if type(dict2[key]) == dict:
merged[key] = merge(dict1[key] if key in dict1 else {}, dict2[key])
else:
if key not in dict1.keys():
merged[key] = dict2[key]
return merged

This should help in merging all items from dict2 into dict1:
for item in dict2:
if item in dict1:
for leaf in dict2[item]:
dict1[item][leaf] = dict2[item][leaf]
else:
dict1[item] = dict2[item]
Please test it and tell us whether this is what you wanted.
EDIT:
The above mentioned solution merges only one level, but correctly solves the example given by OP. To merge multiple levels, the recursion should be used.

I've been testing your solutions and decided to use this one in my project:
def mergedicts(dict1, dict2, conflict, no_conflict):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
yield (k, conflict(dict1[k], dict2[k]))
elif k in dict1:
yield (k, no_conflict(dict1[k]))
else:
yield (k, no_conflict(dict2[k]))
dict1 = {1:{"a":"A"}, 2:{"b":"B"}}
dict2 = {2:{"c":"C"}, 3:{"d":"D"}}
#this helper function allows for recursion and the use of reduce
def f2(x, y):
return dict(mergedicts(x, y, f2, lambda x: x))
print dict(mergedicts(dict1, dict2, f2, lambda x: x))
print dict(reduce(f2, [dict1, dict2]))
Passing functions as parameteres is key to extend jterrace solution to behave as all the other recursive solutions.

Easiest way i can think of is :
#!/usr/bin/python
from copy import deepcopy
def dict_merge(a, b):
if not isinstance(b, dict):
return b
result = deepcopy(a)
for k, v in b.iteritems():
if k in result and isinstance(result[k], dict):
result[k] = dict_merge(result[k], v)
else:
result[k] = deepcopy(v)
return result
a = {1:{"a":'A'}, 2:{"b":'B'}}
b = {2:{"c":'C'}, 3:{"d":'D'}}
print dict_merge(a,b)
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}

I have another slightly different solution here:
def deepMerge(d1, d2, inconflict = lambda v1,v2 : v2) :
''' merge d2 into d1. using inconflict function to resolve the leaf conflicts '''
for k in d2:
if k in d1 :
if isinstance(d1[k], dict) and isinstance(d2[k], dict) :
deepMerge(d1[k], d2[k], inconflict)
elif d1[k] != d2[k] :
d1[k] = inconflict(d1[k], d2[k])
else :
d1[k] = d2[k]
return d1
By default it resolves conflicts in favor of values from the second dict, but you can easily override this, with some witchery you may be able to even throw exceptions out of it. :).

class Utils(object):
"""
>>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
>>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
>>> Utils.merge_dict(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
True
>>> main = {'a': {'b': {'test': 'bug'}, 'c': 'C'}}
>>> suply = {'a': {'b': 2, 'd': 'D', 'c': {'test': 'bug2'}}}
>>> Utils.merge_dict(main, suply) == {'a': {'b': {'test': 'bug'}, 'c': 'C', 'd': 'D'}}
True
"""
#staticmethod
def merge_dict(main, suply):
"""
获取融合的字典,以main为主,suply补充,冲突时以main为准
:return:
"""
for key, value in suply.items():
if key in main:
if isinstance(main[key], dict):
if isinstance(value, dict):
Utils.merge_dict(main[key], value)
else:
pass
else:
pass
else:
main[key] = value
return main
if __name__ == '__main__':
import doctest
doctest.testmod()

hey there I also had the same problem but I though of a solution and I will post it here, in case it is also useful for others, basically merging nested dictionaries and also adding the values, for me I needed to calculate some probabilities so this one worked great:
#used to copy a nested dict to a nested dict
def deepupdate(target, src):
for k, v in src.items():
if k in target:
for k2, v2 in src[k].items():
if k2 in target[k]:
target[k][k2]+=v2
else:
target[k][k2] = v2
else:
target[k] = copy.deepcopy(v)
by using the above method we can merge:
target = {'6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1}, '6,63': {'63,4': 1}}
src = {'5,4': {'4,4': 1}, '5,5': {'5,4': 1}, '4,4': {'4,3': 1}}
and this will become:
{'5,5': {'5,4': 1}, '5,4': {'4,4': 1}, '6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 2}, '6,63': {'63,4': 1}}
also notice the changes here:
target = {'6,6': {'6,63': 1}, '6,63': {'63,4': 1}, '4,4': {'4,3': 1}, '63,4': {'4,4': 1}}
src = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '4,4': {'4,9': 1}, '3,4': {'4,4': 1}, '5,5': {'5,4': 1}}
merge = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '6,63': {'63,4': 1}, '5,5': {'5,4': 1}, '6,6': {'6,63': 1}, '3,4': {'4,4': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1, '4,9': 1}}
dont forget to also add the import for copy:
import copy

from collections import defaultdict
from itertools import chain
class DictHelper:
#staticmethod
def merge_dictionaries(*dictionaries, override=True):
merged_dict = defaultdict(set)
all_unique_keys = set(chain(*[list(dictionary.keys()) for dictionary in dictionaries])) # Build a set using all dict keys
for key in all_unique_keys:
keys_value_type = list(set(filter(lambda obj_type: obj_type != type(None), [type(dictionary.get(key, None)) for dictionary in dictionaries])))
# Establish the object type for each key, return None if key is not present in dict and remove None from final result
if len(keys_value_type) != 1:
raise Exception("Different objects type for same key: {keys_value_type}".format(keys_value_type=keys_value_type))
if keys_value_type[0] == list:
values = list(chain(*[dictionary.get(key, []) for dictionary in dictionaries])) # Extract the value for each key
merged_dict[key].update(values)
elif keys_value_type[0] == dict:
# Extract all dictionaries by key and enter in recursion
dicts_to_merge = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = DictHelper.merge_dictionaries(*dicts_to_merge)
else:
# if override => get value from last dictionary else make a list of all values
values = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = values[-1] if override else values
return dict(merged_dict)
if __name__ == '__main__':
d1 = {'aaaaaaaaa': ['to short', 'to long'], 'bbbbb': ['to short', 'to long'], "cccccc": ["the is a test"]}
d2 = {'aaaaaaaaa': ['field is not a bool'], 'bbbbb': ['field is not a bool']}
d3 = {'aaaaaaaaa': ['filed is not a string', "to short"], 'bbbbb': ['field is not an integer']}
print(DictHelper.merge_dictionaries(d1, d2, d3))
d4 = {"a": {"x": 1, "y": 2, "z": 3, "d": {"x1": 10}}}
d5 = {"a": {"x": 10, "y": 20, "d": {"x2": 20}}}
print(DictHelper.merge_dictionaries(d4, d5))
Output:
{'bbbbb': {'to long', 'field is not an integer', 'to short', 'field is not a bool'},
'aaaaaaaaa': {'to long', 'to short', 'filed is not a string', 'field is not a bool'},
'cccccc': {'the is a test'}}
{'a': {'y': 20, 'd': {'x1': 10, 'x2': 20}, 'z': 3, 'x': 10}}

Related

Bug in recursive dict-merging function

I want to merge inner dictionaries under a specific key (let's say 'x') into the outer dictionary. Lists, dicts and tuples should be traversed recursively.
For example:
>>> key_upmerge({'x': {1: 2, 3: 4}}, 'x')
{1: 2, 3: 4}
>>> key_upmerge({'x': {1: 2, 'x': {3: 4}}}, 'x')
{1: 2, 3: 4}
>>> key_upmerge({'x': {1: 2}, 2: 3}, 'x')
{1: 2, 2: 3}
>>> key_upmerge({'x': {1: 2, 'x': {1: 3}}}, 'x')
[...]
ValueError (clashing keys)
>>> key_upmerge([{'x': {1: 2}}, {'x': {3: 4}}], 'x')
[{1: 2}, {3: 4}]
My function already works for all the test cases above.
def key_upmerge(d, key):
if isinstance(d, (list, tuple)):
result = type(d)(key_upmerge(x, key) for x in d)
elif isinstance(d, dict):
result = {}
for k, v in d.items():
# NOTE: only need to check type of v. k cannot contain dicts
if isinstance(v, (dict, list, tuple)):
v = key_upmerge(v, key)
if k == key and isinstance(v, dict):
if both := (result.keys() & v.keys()):
msg = f'Cannot merge dict into upper dict: clashing keys {both}'
raise ValueError(msg)
result.update(v)
else:
result[k] = v
else:
raise TypeError('expected dict, list or tuple')
return result
The problem is that I also expected a ValueError for the following test cases, but the function returns a result.
>>> key_upmerge({'x': {1: 2}, 1: 3}, 'x')
{1: 3} # expected ValueError due to clashing keys
>>> key_upmerge({'x': {1: 2}, 1: {'x': {3: 4}}}, 'x')
{1: {3: 4}} # expected ValueError due to clashing keys
You build the result dict in order. You only check for key clashes when merging in a dict, not when assigning individual values to a key. Since your test cases always have the merged dict appear first ('x': {1, 2} precedes 1: 3), the clashing key doesn't exist in the result yet, and no clash is detected. Your test works only when the clashing individual key precedes the dict to be merged (key_upmerge({1: 3, 'x': {1: 2}}, 'x') dies as you expect).
To catch cases where a clashing individual key-value pair is inserted after a merge, replace:
else:
result[k] = v
with:
elif k in result:
raise ValueError(f'clashing key {k!r}')
else:
result[k] = v
verifying that k is not in result when you insert individual key-value pairs, not just when you perform bulk merges.

Enrich Python dictionary with data from second dictionary [duplicate]

I need to merge multiple dictionaries, here's what I have for instance:
dict1 = {1:{"a":{A}}, 2:{"b":{B}}}
dict2 = {2:{"c":{C}}, 3:{"d":{D}}}
With A B C and D being leaves of the tree, like {"info1":"value", "info2":"value2"}
There is an unknown level(depth) of dictionaries, it could be {2:{"c":{"z":{"y":{C}}}}}
In my case it represents a directory/files structure with nodes being docs and leaves being files.
I want to merge them to obtain:
dict3 = {1:{"a":{A}}, 2:{"b":{B},"c":{C}}, 3:{"d":{D}}}
I'm not sure how I could do that easily with Python.
This is actually quite tricky - particularly if you want a useful error message when things are inconsistent, while correctly accepting duplicate but consistent entries (something no other answer here does..)
Assuming you don't have huge numbers of entries, a recursive function is easiest:
from functools import reduce
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})
note that this mutates a - the contents of b are added to a (which is also returned). If you want to keep a you could call it like merge(dict(a), b).
agf pointed out (below) that you may have more than two dicts, in which case you can use:
reduce(merge, [dict1, dict2, dict3...])
where everything will be added to dict1.
Note: I edited my initial answer to mutate the first argument; that makes the "reduce" easier to explain
You could try mergedeep.
Installation
$ pip3 install mergedeep
Usage
from mergedeep import merge
a = {"keyA": 1}
b = {"keyB": {"sub1": 10}}
c = {"keyB": {"sub2": 20}}
merge(a, b, c)
print(a)
# {"keyA": 1, "keyB": {"sub1": 10, "sub2": 20}}
For a full list of options, check out the docs!
Here's an easy way to do it using generators:
def mergedicts(dict1, dict2):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):
yield (k, dict(mergedicts(dict1[k], dict2[k])))
else:
# If one of the values is not a dict, you can't continue merging it.
# Value from second dict overrides one in first and we move on.
yield (k, dict2[k])
# Alternatively, replace this with exception raiser to alert you of value conflicts
elif k in dict1:
yield (k, dict1[k])
else:
yield (k, dict2[k])
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
print dict(mergedicts(dict1,dict2))
This prints:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
One issue with this question is that the values of the dict can be arbitrarily complex pieces of data. Based upon these and other answers I came up with this code:
class YamlReaderError(Exception):
pass
def data_merge(a, b):
"""merges b into a and return merged result
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen"""
key = None
# ## debug output
# sys.stderr.write("DEBUG: %s to %s\n" %(b,a))
try:
if a is None or isinstance(a, str) or isinstance(a, unicode) or isinstance(a, int) or isinstance(a, long) or isinstance(a, float):
# border case for first run or if a is a primitive
a = b
elif isinstance(a, list):
# lists can be only appended
if isinstance(b, list):
# merge lists
a.extend(b)
else:
# append to list
a.append(b)
elif isinstance(a, dict):
# dicts must be merged
if isinstance(b, dict):
for key in b:
if key in a:
a[key] = data_merge(a[key], b[key])
else:
a[key] = b[key]
else:
raise YamlReaderError('Cannot merge non-dict "%s" into dict "%s"' % (b, a))
else:
raise YamlReaderError('NOT IMPLEMENTED "%s" into "%s"' % (b, a))
except TypeError, e:
raise YamlReaderError('TypeError "%s" in key "%s" when merging "%s" into "%s"' % (e, key, b, a))
return a
My use case is merging YAML files where I only have to deal with a subset of possible data types. Hence I can ignore tuples and other objects. For me a sensible merge logic means
replace scalars
append lists
merge dicts by adding missing keys and updating existing keys
Everything else and the unforeseens results in an error.
Dictionaries of dictionaries merge
As this is the canonical question (in spite of certain non-generalities) I'm providing the canonical Pythonic approach to solving this issue.
Simplest Case: "leaves are nested dicts that end in empty dicts":
d1 = {'a': {1: {'foo': {}}, 2: {}}}
d2 = {'a': {1: {}, 2: {'bar': {}}}}
d3 = {'b': {3: {'baz': {}}}}
d4 = {'a': {1: {'quux': {}}}}
This is the simplest case for recursion, and I would recommend two naive approaches:
def rec_merge1(d1, d2):
'''return new merged dict of dicts'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge1(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
def rec_merge2(d1, d2):
'''update first dict with second recursively'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge2(v, d2[k])
d1.update(d2)
return d1
I believe I would prefer the second to the first, but keep in mind that the original state of the first would have to be rebuilt from its origin. Here's the usage:
>>> from functools import reduce # only required for Python 3.
>>> reduce(rec_merge1, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
>>> reduce(rec_merge2, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
Complex Case: "leaves are of any other type:"
So if they end in dicts, it's a simple case of merging the end empty dicts. If not, it's not so trivial. If strings, how do you merge them? Sets can be updated similarly, so we could give that treatment, but we lose the order in which they were merged. So does order matter?
So in lieu of more information, the simplest approach will be to give them the standard update treatment if both values are not dicts: i.e. the second dict's value will overwrite the first, even if the second dict's value is None and the first's value is a dict with a lot of info.
d1 = {'a': {1: 'foo', 2: None}}
d2 = {'a': {1: None, 2: 'bar'}}
d3 = {'b': {3: 'baz'}}
d4 = {'a': {1: 'quux'}}
from collections.abc import MutableMapping
def rec_merge(d1, d2):
'''
Update two dicts of dicts recursively,
if either mapping has leaves that are non-dicts,
the second's leaf overwrites the first's.
'''
for k, v in d1.items():
if k in d2:
# this next check is the only difference!
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = rec_merge(v, d2[k])
# we could further check types and merge as appropriate here.
d3 = d1.copy()
d3.update(d2)
return d3
And now
from functools import reduce
reduce(rec_merge, (d1, d2, d3, d4))
returns
{'a': {1: 'quux', 2: 'bar'}, 'b': {3: 'baz'}}
Application to the original question:
I've had to remove the curly braces around the letters and put them in single quotes for this to be legit Python (else they would be set literals in Python 2.7+) as well as append a missing brace:
dict1 = {1:{"a":'A'}, 2:{"b":'B'}}
dict2 = {2:{"c":'C'}, 3:{"d":'D'}}
and rec_merge(dict1, dict2) now returns:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
Which matches the desired outcome of the original question (after changing, e.g. the {A} to 'A'.)
Based on #andrew cooke. This version handles nested lists of dicts and also allows the option to update the values
def merge(a, b, path=None, update=True):
"http://stackoverflow.com/questions/7204805/python-dictionaries-of-dictionaries-merge"
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
elif isinstance(a[key], list) and isinstance(b[key], list):
for idx, val in enumerate(b[key]):
a[key][idx] = merge(a[key][idx], b[key][idx], path + [str(key), str(idx)], update=update)
elif update:
a[key] = b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
This simple recursive procedure will merge one dictionary into another while overriding conflicting keys:
#!/usr/bin/env python2.7
def merge_dicts(dict1, dict2):
""" Recursively merges dict2 into dict1 """
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
return dict2
for k in dict2:
if k in dict1:
dict1[k] = merge_dicts(dict1[k], dict2[k])
else:
dict1[k] = dict2[k]
return dict1
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {2:{"c":"C"}, 3:{"d":"D"}}))
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {1:{"a":"A"}, 2:{"b":"C"}}))
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
{1: {'a': 'A'}, 2: {'b': 'C'}}
Based on answers from #andrew cooke.
It takes care of nested lists in a better way.
def deep_merge_lists(original, incoming):
"""
Deep merge two lists. Modifies original.
Recursively call deep merge on each correlated element of list.
If item type in both elements are
a. dict: Call deep_merge_dicts on both values.
b. list: Recursively call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
If length of incoming list is more that of original then extra values are appended.
"""
common_length = min(len(original), len(incoming))
for idx in range(common_length):
if isinstance(original[idx], dict) and isinstance(incoming[idx], dict):
deep_merge_dicts(original[idx], incoming[idx])
elif isinstance(original[idx], list) and isinstance(incoming[idx], list):
deep_merge_lists(original[idx], incoming[idx])
else:
original[idx] = incoming[idx]
for idx in range(common_length, len(incoming)):
original.append(incoming[idx])
def deep_merge_dicts(original, incoming):
"""
Deep merge two dictionaries. Modifies original.
For key conflicts if both values are:
a. dict: Recursively call deep_merge_dicts on both values.
b. list: Call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
"""
for key in incoming:
if key in original:
if isinstance(original[key], dict) and isinstance(incoming[key], dict):
deep_merge_dicts(original[key], incoming[key])
elif isinstance(original[key], list) and isinstance(incoming[key], list):
deep_merge_lists(original[key], incoming[key])
else:
original[key] = incoming[key]
else:
original[key] = incoming[key]
If you have an unknown level of dictionaries, then I would suggest a recursive function:
def combineDicts(dictionary1, dictionary2):
output = {}
for item, value in dictionary1.iteritems():
if dictionary2.has_key(item):
if isinstance(dictionary2[item], dict):
output[item] = combineDicts(value, dictionary2.pop(item))
else:
output[item] = value
for item, value in dictionary2.iteritems():
output[item] = value
return output
In case someone wants yet another approach to this problem, here's my solution.
Virtues: short, declarative, and functional in style (recursive, does no mutation).
Potential Drawback: This might not be the merge you're looking for. Consult the docstring for semantics.
def deep_merge(a, b):
"""
Merge two values, with `b` taking precedence over `a`.
Semantics:
- If either `a` or `b` is not a dictionary, `a` will be returned only if
`b` is `None`. Otherwise `b` will be returned.
- If both values are dictionaries, they are merged as follows:
* Each key that is found only in `a` or only in `b` will be included in
the output collection with its value intact.
* For any key in common between `a` and `b`, the corresponding values
will be merged with the same semantics.
"""
if not isinstance(a, dict) or not isinstance(b, dict):
return a if b is None else b
else:
# If we're here, both a and b must be dictionaries or subtypes thereof.
# Compute set of all keys in both dictionaries.
keys = set(a.keys()) | set(b.keys())
# Build output dictionary, merging recursively values with common keys,
# where `None` is used to mean the absence of a value.
return {
key: deep_merge(a.get(key), b.get(key))
for key in keys
}
Overview
The following approach subdivides the problem of a deep merge of dicts into:
A parameterized shallow merge function merge(f)(a,b) that uses a
function f to merge two dicts a and b
A recursive merger function f to be used together with merge
Implementation
A function for merging two (non nested) dicts can be written in a lot of ways. I personally like
def merge(f):
def merge(a,b):
keys = a.keys() | b.keys()
return {key:f(a.get(key), b.get(key)) for key in keys}
return merge
A nice way of defining an appropriate recursive merger function f is using multipledispatch which allows to define functions that evaluate along different paths depending on the type of their arguments.
from multipledispatch import dispatch
#for anything that is not a dict return
#dispatch(object, object)
def f(a, b):
return b if b is not None else a
#for dicts recurse
#dispatch(dict, dict)
def f(a,b):
return merge(f)(a,b)
Example
To merge two nested dicts simply use merge(f) e.g.:
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
merge(f)(dict1, dict2)
#returns {1: {'a': 'A'}, 2: {'b': 'B', 'c': 'C'}, 3: {'d': 'D'}}
Notes:
The advantages of this approach are:
The function is build from smaller functions that each do a single thing
which makes the code simpler to reason about and test
The behaviour is not hard-coded but can be changed and extended as needed which improves code reuse (see example below).
Customization
Some answers also considered dicts that contain lists e.g. of other (potentially nested) dicts. In this case one might want map over the lists and merge them based on position. This can be done by adding another definition to the merger function f:
import itertools
#dispatch(list, list)
def f(a,b):
return [merge(f)(*arg) for arg in itertools.zip_longest(a, b)]
There's a slight problem with andrew cookes answer: In some cases it modifies the second argument b when you modify the returned dict. Specifically it's because of this line:
if key in a:
...
else:
a[key] = b[key]
If b[key] is a dict, it will simply be assigned to a, meaning any subsequent modifications to that dict will affect both a and b.
a={}
b={'1':{'2':'b'}}
c={'1':{'3':'c'}}
merge(merge(a,b), c) # {'1': {'3': 'c', '2': 'b'}}
a # {'1': {'3': 'c', '2': 'b'}} (as expected)
b # {'1': {'3': 'c', '2': 'b'}} <----
c # {'1': {'3': 'c'}} (unmodified)
To fix this, the line would have to be substituted with this:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
Where clone_dict is:
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
Still. This obviously doesn't account for list, set and other stuff, but I hope it illustrates the pitfalls when trying to merge dicts.
And for completeness sake, here is my version, where you can pass it multiple dicts:
def merge_dicts(*args):
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
def merge(a, b, path=[]):
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
raise Exception('Conflict at `{path}\''.format(path='.'.join(path + [str(key)])))
else:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
return a
return reduce(merge, args, {})
You can use the merge function from the toolz package, for example:
>>> import toolz
>>> dict1 = {1: {'a': 'A'}, 2: {'b': 'B'}}
>>> dict2 = {2: {'c': 'C'}, 3: {'d': 'D'}}
>>> toolz.merge_with(toolz.merge, dict1, dict2)
{1: {'a': 'A'}, 2: {'c': 'C'}, 3: {'d': 'D'}}
This version of the function will account for N number of dictionaries, and only dictionaries -- no improper parameters can be passed, or it will raise a TypeError. The merge itself accounts for key conflicts, and instead of overwriting data from a dictionary further down the merge chain, it creates a set of values and appends to that; no data is lost.
It might not be the most effecient on the page, but it's the most thorough and you're not going to lose any information when you merge your 2 to N dicts.
def merge_dicts(*dicts):
if not reduce(lambda x, y: isinstance(y, dict) and x, dicts, True):
raise TypeError, "Object in *dicts not of type dict"
if len(dicts) < 2:
raise ValueError, "Requires 2 or more dict objects"
def merge(a, b):
for d in set(a.keys()).union(b.keys()):
if d in a and d in b:
if type(a[d]) == type(b[d]):
if not isinstance(a[d], dict):
ret = list({a[d], b[d]})
if len(ret) == 1: ret = ret[0]
yield (d, sorted(ret))
else:
yield (d, dict(merge(a[d], b[d])))
else:
raise TypeError, "Conflicting key:value type assignment"
elif d in a:
yield (d, a[d])
elif d in b:
yield (d, b[d])
else:
raise KeyError
return reduce(lambda x, y: dict(merge(x, y)), dicts[1:], dicts[0])
print merge_dicts({1:1,2:{1:2}},{1:2,2:{3:1}},{4:4})
output: {1: [1, 2], 2: {1: 2, 3: 1}, 4: 4}
Since dictviews support set operations, I was able to greatly simplify jterrace's answer.
def merge(dict1, dict2):
for k in dict1.keys() - dict2.keys():
yield (k, dict1[k])
for k in dict2.keys() - dict1.keys():
yield (k, dict2[k])
for k in dict1.keys() & dict2.keys():
yield (k, dict(merge(dict1[k], dict2[k])))
Any attempt to combine a dict with a non dict (technically, an object with a 'keys' method and an object without a 'keys' method) will raise an AttributeError. This includes both the initial call to the function and recursive calls. This is exactly what I wanted so I left it. You could easily catch an AttributeErrors thrown by the recursive call and then yield any value you please.
Short-n-sweet:
from collections.abc import MutableMapping as Map
def nested_update(d, v):
"""
Nested update of dict-like 'd' with dict-like 'v'.
"""
for key in v:
if key in d and isinstance(d[key], Map) and isinstance(v[key], Map):
nested_update(d[key], v[key])
else:
d[key] = v[key]
This works like (and is build on) Python's dict.update method. It returns None (you can always add return d if you prefer) as it updates dict d in-place. Keys in v will overwrite any existing keys in d (it does not try to interpret the dict's contents).
It will also work for other ("dict-like") mappings.
I have an iterative solution - works much much better with big dicts & a lot of them (for example jsons etc):
import collections
def merge_dict_with_subdicts(dict1: dict, dict2: dict) -> dict:
"""
similar behaviour to builtin dict.update - but knows how to handle nested dicts
"""
q = collections.deque([(dict1, dict2)])
while len(q) > 0:
d1, d2 = q.pop()
for k, v in d2.items():
if k in d1 and isinstance(d1[k], dict) and isinstance(v, dict):
q.append((d1[k], v))
else:
d1[k] = v
return dict1
note that this will use the value in d2 to override d1, in case they are not both dicts. (same as python's dict.update())
some tests:
def test_deep_update():
d = dict()
merge_dict_with_subdicts(d, {"a": 4})
assert d == {"a": 4}
new_dict = {
"b": {
"c": {
"d": 6
}
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 4,
"b": {
"c": {
"d": 6
}
}
}
new_dict = {
"a": 3,
"b": {
"f": 7
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 3,
"b": {
"c": {
"d": 6
},
"f": 7
}
}
# test a case where one of the dicts has dict as value and the other has something else
new_dict = {
'a': {
'b': 4
}
}
merge_dict_with_subdicts(d, new_dict)
assert d['a']['b'] == 4
I've tested with around ~1200 dicts - this method took 0.4 seconds, while the recursive solution took ~2.5 seconds.
As noted in many other answers, a recursive algorithm makes the most sense here. In general, when working with recursion, it is preferable to create new values rather than trying to modify any input data structure.
We need to define what happens at each merge step. If both inputs are dictionaries, this is easy: we copy across unique keys from each side, and recursively merge the values of the duplicated keys. It's the base cases that cause a problem. It will be easier to understand the logic if we pull out a separate function for that. As a placeholder, we could just wrap the two values in a tuple:
def merge_leaves(x, y):
return (x, y)
Now the core of our logic looks like:
def merge(x, y):
if not(isinstance(x, dict) and isinstance(y, dict)):
return merge_leaves(x, y)
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
Let's test it:
>>> x = {'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y = {'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
>>> merge(x, y)
{'f': (1, {'b': 'c'}), 'g': ({'h', 'i'}, 1), 'a': {'d': ('e', 'e'), 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
>>> x # The originals are unmodified.
{'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y
{'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
We can easily modify the leaf-merging rule, for example:
def merge_leaves(x, y):
try:
return x + y
except TypeError:
return Ellipsis
and observe the effects:
>>> merge(x, y)
{'f': Ellipsis, 'g': Ellipsis, 'a': {'d': 'ee', 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
We could also potentially clean this up by using a third-party library to dispatch based on the type of the inputs. For example, using multipledispatch, we could do things like:
#dispatch(dict, dict)
def merge(x, y):
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
#dispatch(str, str)
def merge(x, y):
return x + y
#dispatch(tuple, tuple)
def merge(x, y):
return x + y
#dispatch(list, list)
def merge(x, y):
return x + y
#dispatch(int, int):
def merge(x, y):
raise ValueError("integer value conflict")
#dispatch(object, object):
return (x, y)
This allows us to handle various combinations of leaf-type special cases without writing our own type checking, and also replaces the type check in the main recursive function.
The code will depend on your rules for resolving merge conflicts, of course. Here's a version which can take an arbitrary number of arguments and merges them recursively to an arbitrary depth, without using any object mutation. It uses the following rules to resolve merge conflicts:
dictionaries take precedence over non-dict values ({"foo": {...}} takes precedence over {"foo": "bar"})
later arguments take precedence over earlier arguments (if you merge {"a": 1}, {"a", 2}, and {"a": 3} in order, the result will be {"a": 3})
try:
from collections import Mapping
except ImportError:
Mapping = dict
def merge_dicts(*dicts):
"""
Return a new dictionary that is the result of merging the arguments together.
In case of conflicts, later arguments take precedence over earlier arguments.
"""
updated = {}
# grab all keys
keys = set()
for d in dicts:
keys = keys.union(set(d))
for key in keys:
values = [d[key] for d in dicts if key in d]
# which ones are mapping types? (aka dict)
maps = [value for value in values if isinstance(value, Mapping)]
if maps:
# if we have any mapping types, call recursively to merge them
updated[key] = merge_dicts(*maps)
else:
# otherwise, just grab the last value we have, since later arguments
# take precedence over earlier arguments
updated[key] = values[-1]
return updated
I had two dictionaries (a and b) which could each contain any number of nested dictionaries. I wanted to recursively merge them, with b taking precedence over a.
Considering the nested dictionaries as trees, what I wanted was:
To update a so that every path to every leaf in b would be represented in a
To overwrite subtrees of a if a leaf is found in the corresponding path in b
Maintain the invariant that all b leaf nodes remain leafs.
The existing answers were a little complicated for my taste and left some details on the shelf. I hacked together the following, which passes unit tests for my data set.
def merge_map(a, b):
if not isinstance(a, dict) or not isinstance(b, dict):
return b
for key in b.keys():
a[key] = merge_map(a[key], b[key]) if key in a else b[key]
return a
Example (formatted for clarity):
a = {
1 : {'a': 'red',
'b': {'blue': 'fish', 'yellow': 'bear' },
'c': { 'orange': 'dog'},
},
2 : {'d': 'green'},
3: 'e'
}
b = {
1 : {'b': 'white'},
2 : {'d': 'black'},
3: 'e'
}
>>> merge_map(a, b)
{1: {'a': 'red',
'b': 'white',
'c': {'orange': 'dog'},},
2: {'d': 'black'},
3: 'e'}
The paths in b that needed to be maintained were:
1 -> 'b' -> 'white'
2 -> 'd' -> 'black'
3 -> 'e'.
a had the unique and non-conflicting paths of:
1 -> 'a' -> 'red'
1 -> 'c' -> 'orange' -> 'dog'
so they are still represented in the merged map.
And just another slight variation:
Here is a pure python3 set based deep update function. It updates nested dictionaries by looping through one level at a time and calls itself to update each next level of dictionary values:
def deep_update(dict_original, dict_update):
if isinstance(dict_original, dict) and isinstance(dict_update, dict):
output=dict(dict_original)
keys_original=set(dict_original.keys())
keys_update=set(dict_update.keys())
similar_keys=keys_original.intersection(keys_update)
similar_dict={key:deep_update(dict_original[key], dict_update[key]) for key in similar_keys}
new_keys=keys_update.difference(keys_original)
new_dict={key:dict_update[key] for key in new_keys}
output.update(similar_dict)
output.update(new_dict)
return output
else:
return dict_update
A simple example:
x={'a':{'b':{'c':1, 'd':1}}}
y={'a':{'b':{'d':2, 'e':2}}, 'f':2}
print(deep_update(x, y))
>>> {'a': {'b': {'c': 1, 'd': 2, 'e': 2}}, 'f': 2}
How about another answer?!? This one also avoids mutation/side effects:
def merge(dict1, dict2):
output = {}
# adds keys from `dict1` if they do not exist in `dict2` and vice-versa
intersection = {**dict2, **dict1}
for k_intersect, v_intersect in intersection.items():
if k_intersect not in dict1:
v_dict2 = dict2[k_intersect]
output[k_intersect] = v_dict2
elif k_intersect not in dict2:
output[k_intersect] = v_intersect
elif isinstance(v_intersect, dict):
v_dict2 = dict2[k_intersect]
output[k_intersect] = merge(v_intersect, v_dict2)
else:
output[k_intersect] = v_intersect
return output
dict1 = {1:{"a":{"A"}}, 2:{"b":{"B"}}}
dict2 = {2:{"c":{"C"}}, 3:{"d":{"D"}}}
dict3 = {1:{"a":{"A"}}, 2:{"b":{"B"},"c":{"C"}}, 3:{"d":{"D"}}}
assert dict3 == merge(dict1, dict2)
This is a solution I made that recursively merges dictionaries to an infinite depth. The first dictionary passed to the function is the master dictionary - values in it will overwrite the values in the same key in the second dictionary.
def merge(dict1: dict, dict2: dict) -> dict:
merged = dict1
for key in dict2:
if type(dict2[key]) == dict:
merged[key] = merge(dict1[key] if key in dict1 else {}, dict2[key])
else:
if key not in dict1.keys():
merged[key] = dict2[key]
return merged
This should help in merging all items from dict2 into dict1:
for item in dict2:
if item in dict1:
for leaf in dict2[item]:
dict1[item][leaf] = dict2[item][leaf]
else:
dict1[item] = dict2[item]
Please test it and tell us whether this is what you wanted.
EDIT:
The above mentioned solution merges only one level, but correctly solves the example given by OP. To merge multiple levels, the recursion should be used.
I've been testing your solutions and decided to use this one in my project:
def mergedicts(dict1, dict2, conflict, no_conflict):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
yield (k, conflict(dict1[k], dict2[k]))
elif k in dict1:
yield (k, no_conflict(dict1[k]))
else:
yield (k, no_conflict(dict2[k]))
dict1 = {1:{"a":"A"}, 2:{"b":"B"}}
dict2 = {2:{"c":"C"}, 3:{"d":"D"}}
#this helper function allows for recursion and the use of reduce
def f2(x, y):
return dict(mergedicts(x, y, f2, lambda x: x))
print dict(mergedicts(dict1, dict2, f2, lambda x: x))
print dict(reduce(f2, [dict1, dict2]))
Passing functions as parameteres is key to extend jterrace solution to behave as all the other recursive solutions.
Easiest way i can think of is :
#!/usr/bin/python
from copy import deepcopy
def dict_merge(a, b):
if not isinstance(b, dict):
return b
result = deepcopy(a)
for k, v in b.iteritems():
if k in result and isinstance(result[k], dict):
result[k] = dict_merge(result[k], v)
else:
result[k] = deepcopy(v)
return result
a = {1:{"a":'A'}, 2:{"b":'B'}}
b = {2:{"c":'C'}, 3:{"d":'D'}}
print dict_merge(a,b)
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
I have another slightly different solution here:
def deepMerge(d1, d2, inconflict = lambda v1,v2 : v2) :
''' merge d2 into d1. using inconflict function to resolve the leaf conflicts '''
for k in d2:
if k in d1 :
if isinstance(d1[k], dict) and isinstance(d2[k], dict) :
deepMerge(d1[k], d2[k], inconflict)
elif d1[k] != d2[k] :
d1[k] = inconflict(d1[k], d2[k])
else :
d1[k] = d2[k]
return d1
By default it resolves conflicts in favor of values from the second dict, but you can easily override this, with some witchery you may be able to even throw exceptions out of it. :).
class Utils(object):
"""
>>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
>>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
>>> Utils.merge_dict(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
True
>>> main = {'a': {'b': {'test': 'bug'}, 'c': 'C'}}
>>> suply = {'a': {'b': 2, 'd': 'D', 'c': {'test': 'bug2'}}}
>>> Utils.merge_dict(main, suply) == {'a': {'b': {'test': 'bug'}, 'c': 'C', 'd': 'D'}}
True
"""
#staticmethod
def merge_dict(main, suply):
"""
获取融合的字典,以main为主,suply补充,冲突时以main为准
:return:
"""
for key, value in suply.items():
if key in main:
if isinstance(main[key], dict):
if isinstance(value, dict):
Utils.merge_dict(main[key], value)
else:
pass
else:
pass
else:
main[key] = value
return main
if __name__ == '__main__':
import doctest
doctest.testmod()
hey there I also had the same problem but I though of a solution and I will post it here, in case it is also useful for others, basically merging nested dictionaries and also adding the values, for me I needed to calculate some probabilities so this one worked great:
#used to copy a nested dict to a nested dict
def deepupdate(target, src):
for k, v in src.items():
if k in target:
for k2, v2 in src[k].items():
if k2 in target[k]:
target[k][k2]+=v2
else:
target[k][k2] = v2
else:
target[k] = copy.deepcopy(v)
by using the above method we can merge:
target = {'6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1}, '6,63': {'63,4': 1}}
src = {'5,4': {'4,4': 1}, '5,5': {'5,4': 1}, '4,4': {'4,3': 1}}
and this will become:
{'5,5': {'5,4': 1}, '5,4': {'4,4': 1}, '6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 2}, '6,63': {'63,4': 1}}
also notice the changes here:
target = {'6,6': {'6,63': 1}, '6,63': {'63,4': 1}, '4,4': {'4,3': 1}, '63,4': {'4,4': 1}}
src = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '4,4': {'4,9': 1}, '3,4': {'4,4': 1}, '5,5': {'5,4': 1}}
merge = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '6,63': {'63,4': 1}, '5,5': {'5,4': 1}, '6,6': {'6,63': 1}, '3,4': {'4,4': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1, '4,9': 1}}
dont forget to also add the import for copy:
import copy
from collections import defaultdict
from itertools import chain
class DictHelper:
#staticmethod
def merge_dictionaries(*dictionaries, override=True):
merged_dict = defaultdict(set)
all_unique_keys = set(chain(*[list(dictionary.keys()) for dictionary in dictionaries])) # Build a set using all dict keys
for key in all_unique_keys:
keys_value_type = list(set(filter(lambda obj_type: obj_type != type(None), [type(dictionary.get(key, None)) for dictionary in dictionaries])))
# Establish the object type for each key, return None if key is not present in dict and remove None from final result
if len(keys_value_type) != 1:
raise Exception("Different objects type for same key: {keys_value_type}".format(keys_value_type=keys_value_type))
if keys_value_type[0] == list:
values = list(chain(*[dictionary.get(key, []) for dictionary in dictionaries])) # Extract the value for each key
merged_dict[key].update(values)
elif keys_value_type[0] == dict:
# Extract all dictionaries by key and enter in recursion
dicts_to_merge = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = DictHelper.merge_dictionaries(*dicts_to_merge)
else:
# if override => get value from last dictionary else make a list of all values
values = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = values[-1] if override else values
return dict(merged_dict)
if __name__ == '__main__':
d1 = {'aaaaaaaaa': ['to short', 'to long'], 'bbbbb': ['to short', 'to long'], "cccccc": ["the is a test"]}
d2 = {'aaaaaaaaa': ['field is not a bool'], 'bbbbb': ['field is not a bool']}
d3 = {'aaaaaaaaa': ['filed is not a string', "to short"], 'bbbbb': ['field is not an integer']}
print(DictHelper.merge_dictionaries(d1, d2, d3))
d4 = {"a": {"x": 1, "y": 2, "z": 3, "d": {"x1": 10}}}
d5 = {"a": {"x": 10, "y": 20, "d": {"x2": 20}}}
print(DictHelper.merge_dictionaries(d4, d5))
Output:
{'bbbbb': {'to long', 'field is not an integer', 'to short', 'field is not a bool'},
'aaaaaaaaa': {'to long', 'to short', 'filed is not a string', 'field is not a bool'},
'cccccc': {'the is a test'}}
{'a': {'y': 20, 'd': {'x1': 10, 'x2': 20}, 'z': 3, 'x': 10}}

How do I merge two multi-dictionary dictionaries? [duplicate]

I need to merge multiple dictionaries, here's what I have for instance:
dict1 = {1:{"a":{A}}, 2:{"b":{B}}}
dict2 = {2:{"c":{C}}, 3:{"d":{D}}}
With A B C and D being leaves of the tree, like {"info1":"value", "info2":"value2"}
There is an unknown level(depth) of dictionaries, it could be {2:{"c":{"z":{"y":{C}}}}}
In my case it represents a directory/files structure with nodes being docs and leaves being files.
I want to merge them to obtain:
dict3 = {1:{"a":{A}}, 2:{"b":{B},"c":{C}}, 3:{"d":{D}}}
I'm not sure how I could do that easily with Python.
This is actually quite tricky - particularly if you want a useful error message when things are inconsistent, while correctly accepting duplicate but consistent entries (something no other answer here does..)
Assuming you don't have huge numbers of entries, a recursive function is easiest:
from functools import reduce
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})
note that this mutates a - the contents of b are added to a (which is also returned). If you want to keep a you could call it like merge(dict(a), b).
agf pointed out (below) that you may have more than two dicts, in which case you can use:
reduce(merge, [dict1, dict2, dict3...])
where everything will be added to dict1.
Note: I edited my initial answer to mutate the first argument; that makes the "reduce" easier to explain
You could try mergedeep.
Installation
$ pip3 install mergedeep
Usage
from mergedeep import merge
a = {"keyA": 1}
b = {"keyB": {"sub1": 10}}
c = {"keyB": {"sub2": 20}}
merge(a, b, c)
print(a)
# {"keyA": 1, "keyB": {"sub1": 10, "sub2": 20}}
For a full list of options, check out the docs!
Here's an easy way to do it using generators:
def mergedicts(dict1, dict2):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):
yield (k, dict(mergedicts(dict1[k], dict2[k])))
else:
# If one of the values is not a dict, you can't continue merging it.
# Value from second dict overrides one in first and we move on.
yield (k, dict2[k])
# Alternatively, replace this with exception raiser to alert you of value conflicts
elif k in dict1:
yield (k, dict1[k])
else:
yield (k, dict2[k])
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
print dict(mergedicts(dict1,dict2))
This prints:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
One issue with this question is that the values of the dict can be arbitrarily complex pieces of data. Based upon these and other answers I came up with this code:
class YamlReaderError(Exception):
pass
def data_merge(a, b):
"""merges b into a and return merged result
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen"""
key = None
# ## debug output
# sys.stderr.write("DEBUG: %s to %s\n" %(b,a))
try:
if a is None or isinstance(a, str) or isinstance(a, unicode) or isinstance(a, int) or isinstance(a, long) or isinstance(a, float):
# border case for first run or if a is a primitive
a = b
elif isinstance(a, list):
# lists can be only appended
if isinstance(b, list):
# merge lists
a.extend(b)
else:
# append to list
a.append(b)
elif isinstance(a, dict):
# dicts must be merged
if isinstance(b, dict):
for key in b:
if key in a:
a[key] = data_merge(a[key], b[key])
else:
a[key] = b[key]
else:
raise YamlReaderError('Cannot merge non-dict "%s" into dict "%s"' % (b, a))
else:
raise YamlReaderError('NOT IMPLEMENTED "%s" into "%s"' % (b, a))
except TypeError, e:
raise YamlReaderError('TypeError "%s" in key "%s" when merging "%s" into "%s"' % (e, key, b, a))
return a
My use case is merging YAML files where I only have to deal with a subset of possible data types. Hence I can ignore tuples and other objects. For me a sensible merge logic means
replace scalars
append lists
merge dicts by adding missing keys and updating existing keys
Everything else and the unforeseens results in an error.
Dictionaries of dictionaries merge
As this is the canonical question (in spite of certain non-generalities) I'm providing the canonical Pythonic approach to solving this issue.
Simplest Case: "leaves are nested dicts that end in empty dicts":
d1 = {'a': {1: {'foo': {}}, 2: {}}}
d2 = {'a': {1: {}, 2: {'bar': {}}}}
d3 = {'b': {3: {'baz': {}}}}
d4 = {'a': {1: {'quux': {}}}}
This is the simplest case for recursion, and I would recommend two naive approaches:
def rec_merge1(d1, d2):
'''return new merged dict of dicts'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge1(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
def rec_merge2(d1, d2):
'''update first dict with second recursively'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge2(v, d2[k])
d1.update(d2)
return d1
I believe I would prefer the second to the first, but keep in mind that the original state of the first would have to be rebuilt from its origin. Here's the usage:
>>> from functools import reduce # only required for Python 3.
>>> reduce(rec_merge1, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
>>> reduce(rec_merge2, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
Complex Case: "leaves are of any other type:"
So if they end in dicts, it's a simple case of merging the end empty dicts. If not, it's not so trivial. If strings, how do you merge them? Sets can be updated similarly, so we could give that treatment, but we lose the order in which they were merged. So does order matter?
So in lieu of more information, the simplest approach will be to give them the standard update treatment if both values are not dicts: i.e. the second dict's value will overwrite the first, even if the second dict's value is None and the first's value is a dict with a lot of info.
d1 = {'a': {1: 'foo', 2: None}}
d2 = {'a': {1: None, 2: 'bar'}}
d3 = {'b': {3: 'baz'}}
d4 = {'a': {1: 'quux'}}
from collections.abc import MutableMapping
def rec_merge(d1, d2):
'''
Update two dicts of dicts recursively,
if either mapping has leaves that are non-dicts,
the second's leaf overwrites the first's.
'''
for k, v in d1.items():
if k in d2:
# this next check is the only difference!
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = rec_merge(v, d2[k])
# we could further check types and merge as appropriate here.
d3 = d1.copy()
d3.update(d2)
return d3
And now
from functools import reduce
reduce(rec_merge, (d1, d2, d3, d4))
returns
{'a': {1: 'quux', 2: 'bar'}, 'b': {3: 'baz'}}
Application to the original question:
I've had to remove the curly braces around the letters and put them in single quotes for this to be legit Python (else they would be set literals in Python 2.7+) as well as append a missing brace:
dict1 = {1:{"a":'A'}, 2:{"b":'B'}}
dict2 = {2:{"c":'C'}, 3:{"d":'D'}}
and rec_merge(dict1, dict2) now returns:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
Which matches the desired outcome of the original question (after changing, e.g. the {A} to 'A'.)
Based on #andrew cooke. This version handles nested lists of dicts and also allows the option to update the values
def merge(a, b, path=None, update=True):
"http://stackoverflow.com/questions/7204805/python-dictionaries-of-dictionaries-merge"
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
elif isinstance(a[key], list) and isinstance(b[key], list):
for idx, val in enumerate(b[key]):
a[key][idx] = merge(a[key][idx], b[key][idx], path + [str(key), str(idx)], update=update)
elif update:
a[key] = b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
This simple recursive procedure will merge one dictionary into another while overriding conflicting keys:
#!/usr/bin/env python2.7
def merge_dicts(dict1, dict2):
""" Recursively merges dict2 into dict1 """
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
return dict2
for k in dict2:
if k in dict1:
dict1[k] = merge_dicts(dict1[k], dict2[k])
else:
dict1[k] = dict2[k]
return dict1
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {2:{"c":"C"}, 3:{"d":"D"}}))
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {1:{"a":"A"}, 2:{"b":"C"}}))
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
{1: {'a': 'A'}, 2: {'b': 'C'}}
Based on answers from #andrew cooke.
It takes care of nested lists in a better way.
def deep_merge_lists(original, incoming):
"""
Deep merge two lists. Modifies original.
Recursively call deep merge on each correlated element of list.
If item type in both elements are
a. dict: Call deep_merge_dicts on both values.
b. list: Recursively call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
If length of incoming list is more that of original then extra values are appended.
"""
common_length = min(len(original), len(incoming))
for idx in range(common_length):
if isinstance(original[idx], dict) and isinstance(incoming[idx], dict):
deep_merge_dicts(original[idx], incoming[idx])
elif isinstance(original[idx], list) and isinstance(incoming[idx], list):
deep_merge_lists(original[idx], incoming[idx])
else:
original[idx] = incoming[idx]
for idx in range(common_length, len(incoming)):
original.append(incoming[idx])
def deep_merge_dicts(original, incoming):
"""
Deep merge two dictionaries. Modifies original.
For key conflicts if both values are:
a. dict: Recursively call deep_merge_dicts on both values.
b. list: Call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
"""
for key in incoming:
if key in original:
if isinstance(original[key], dict) and isinstance(incoming[key], dict):
deep_merge_dicts(original[key], incoming[key])
elif isinstance(original[key], list) and isinstance(incoming[key], list):
deep_merge_lists(original[key], incoming[key])
else:
original[key] = incoming[key]
else:
original[key] = incoming[key]
If you have an unknown level of dictionaries, then I would suggest a recursive function:
def combineDicts(dictionary1, dictionary2):
output = {}
for item, value in dictionary1.iteritems():
if dictionary2.has_key(item):
if isinstance(dictionary2[item], dict):
output[item] = combineDicts(value, dictionary2.pop(item))
else:
output[item] = value
for item, value in dictionary2.iteritems():
output[item] = value
return output
In case someone wants yet another approach to this problem, here's my solution.
Virtues: short, declarative, and functional in style (recursive, does no mutation).
Potential Drawback: This might not be the merge you're looking for. Consult the docstring for semantics.
def deep_merge(a, b):
"""
Merge two values, with `b` taking precedence over `a`.
Semantics:
- If either `a` or `b` is not a dictionary, `a` will be returned only if
`b` is `None`. Otherwise `b` will be returned.
- If both values are dictionaries, they are merged as follows:
* Each key that is found only in `a` or only in `b` will be included in
the output collection with its value intact.
* For any key in common between `a` and `b`, the corresponding values
will be merged with the same semantics.
"""
if not isinstance(a, dict) or not isinstance(b, dict):
return a if b is None else b
else:
# If we're here, both a and b must be dictionaries or subtypes thereof.
# Compute set of all keys in both dictionaries.
keys = set(a.keys()) | set(b.keys())
# Build output dictionary, merging recursively values with common keys,
# where `None` is used to mean the absence of a value.
return {
key: deep_merge(a.get(key), b.get(key))
for key in keys
}
Overview
The following approach subdivides the problem of a deep merge of dicts into:
A parameterized shallow merge function merge(f)(a,b) that uses a
function f to merge two dicts a and b
A recursive merger function f to be used together with merge
Implementation
A function for merging two (non nested) dicts can be written in a lot of ways. I personally like
def merge(f):
def merge(a,b):
keys = a.keys() | b.keys()
return {key:f(a.get(key), b.get(key)) for key in keys}
return merge
A nice way of defining an appropriate recursive merger function f is using multipledispatch which allows to define functions that evaluate along different paths depending on the type of their arguments.
from multipledispatch import dispatch
#for anything that is not a dict return
#dispatch(object, object)
def f(a, b):
return b if b is not None else a
#for dicts recurse
#dispatch(dict, dict)
def f(a,b):
return merge(f)(a,b)
Example
To merge two nested dicts simply use merge(f) e.g.:
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
merge(f)(dict1, dict2)
#returns {1: {'a': 'A'}, 2: {'b': 'B', 'c': 'C'}, 3: {'d': 'D'}}
Notes:
The advantages of this approach are:
The function is build from smaller functions that each do a single thing
which makes the code simpler to reason about and test
The behaviour is not hard-coded but can be changed and extended as needed which improves code reuse (see example below).
Customization
Some answers also considered dicts that contain lists e.g. of other (potentially nested) dicts. In this case one might want map over the lists and merge them based on position. This can be done by adding another definition to the merger function f:
import itertools
#dispatch(list, list)
def f(a,b):
return [merge(f)(*arg) for arg in itertools.zip_longest(a, b)]
There's a slight problem with andrew cookes answer: In some cases it modifies the second argument b when you modify the returned dict. Specifically it's because of this line:
if key in a:
...
else:
a[key] = b[key]
If b[key] is a dict, it will simply be assigned to a, meaning any subsequent modifications to that dict will affect both a and b.
a={}
b={'1':{'2':'b'}}
c={'1':{'3':'c'}}
merge(merge(a,b), c) # {'1': {'3': 'c', '2': 'b'}}
a # {'1': {'3': 'c', '2': 'b'}} (as expected)
b # {'1': {'3': 'c', '2': 'b'}} <----
c # {'1': {'3': 'c'}} (unmodified)
To fix this, the line would have to be substituted with this:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
Where clone_dict is:
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
Still. This obviously doesn't account for list, set and other stuff, but I hope it illustrates the pitfalls when trying to merge dicts.
And for completeness sake, here is my version, where you can pass it multiple dicts:
def merge_dicts(*args):
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
def merge(a, b, path=[]):
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
raise Exception('Conflict at `{path}\''.format(path='.'.join(path + [str(key)])))
else:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
return a
return reduce(merge, args, {})
You can use the merge function from the toolz package, for example:
>>> import toolz
>>> dict1 = {1: {'a': 'A'}, 2: {'b': 'B'}}
>>> dict2 = {2: {'c': 'C'}, 3: {'d': 'D'}}
>>> toolz.merge_with(toolz.merge, dict1, dict2)
{1: {'a': 'A'}, 2: {'c': 'C'}, 3: {'d': 'D'}}
This version of the function will account for N number of dictionaries, and only dictionaries -- no improper parameters can be passed, or it will raise a TypeError. The merge itself accounts for key conflicts, and instead of overwriting data from a dictionary further down the merge chain, it creates a set of values and appends to that; no data is lost.
It might not be the most effecient on the page, but it's the most thorough and you're not going to lose any information when you merge your 2 to N dicts.
def merge_dicts(*dicts):
if not reduce(lambda x, y: isinstance(y, dict) and x, dicts, True):
raise TypeError, "Object in *dicts not of type dict"
if len(dicts) < 2:
raise ValueError, "Requires 2 or more dict objects"
def merge(a, b):
for d in set(a.keys()).union(b.keys()):
if d in a and d in b:
if type(a[d]) == type(b[d]):
if not isinstance(a[d], dict):
ret = list({a[d], b[d]})
if len(ret) == 1: ret = ret[0]
yield (d, sorted(ret))
else:
yield (d, dict(merge(a[d], b[d])))
else:
raise TypeError, "Conflicting key:value type assignment"
elif d in a:
yield (d, a[d])
elif d in b:
yield (d, b[d])
else:
raise KeyError
return reduce(lambda x, y: dict(merge(x, y)), dicts[1:], dicts[0])
print merge_dicts({1:1,2:{1:2}},{1:2,2:{3:1}},{4:4})
output: {1: [1, 2], 2: {1: 2, 3: 1}, 4: 4}
Since dictviews support set operations, I was able to greatly simplify jterrace's answer.
def merge(dict1, dict2):
for k in dict1.keys() - dict2.keys():
yield (k, dict1[k])
for k in dict2.keys() - dict1.keys():
yield (k, dict2[k])
for k in dict1.keys() & dict2.keys():
yield (k, dict(merge(dict1[k], dict2[k])))
Any attempt to combine a dict with a non dict (technically, an object with a 'keys' method and an object without a 'keys' method) will raise an AttributeError. This includes both the initial call to the function and recursive calls. This is exactly what I wanted so I left it. You could easily catch an AttributeErrors thrown by the recursive call and then yield any value you please.
Short-n-sweet:
from collections.abc import MutableMapping as Map
def nested_update(d, v):
"""
Nested update of dict-like 'd' with dict-like 'v'.
"""
for key in v:
if key in d and isinstance(d[key], Map) and isinstance(v[key], Map):
nested_update(d[key], v[key])
else:
d[key] = v[key]
This works like (and is build on) Python's dict.update method. It returns None (you can always add return d if you prefer) as it updates dict d in-place. Keys in v will overwrite any existing keys in d (it does not try to interpret the dict's contents).
It will also work for other ("dict-like") mappings.
I have an iterative solution - works much much better with big dicts & a lot of them (for example jsons etc):
import collections
def merge_dict_with_subdicts(dict1: dict, dict2: dict) -> dict:
"""
similar behaviour to builtin dict.update - but knows how to handle nested dicts
"""
q = collections.deque([(dict1, dict2)])
while len(q) > 0:
d1, d2 = q.pop()
for k, v in d2.items():
if k in d1 and isinstance(d1[k], dict) and isinstance(v, dict):
q.append((d1[k], v))
else:
d1[k] = v
return dict1
note that this will use the value in d2 to override d1, in case they are not both dicts. (same as python's dict.update())
some tests:
def test_deep_update():
d = dict()
merge_dict_with_subdicts(d, {"a": 4})
assert d == {"a": 4}
new_dict = {
"b": {
"c": {
"d": 6
}
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 4,
"b": {
"c": {
"d": 6
}
}
}
new_dict = {
"a": 3,
"b": {
"f": 7
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 3,
"b": {
"c": {
"d": 6
},
"f": 7
}
}
# test a case where one of the dicts has dict as value and the other has something else
new_dict = {
'a': {
'b': 4
}
}
merge_dict_with_subdicts(d, new_dict)
assert d['a']['b'] == 4
I've tested with around ~1200 dicts - this method took 0.4 seconds, while the recursive solution took ~2.5 seconds.
As noted in many other answers, a recursive algorithm makes the most sense here. In general, when working with recursion, it is preferable to create new values rather than trying to modify any input data structure.
We need to define what happens at each merge step. If both inputs are dictionaries, this is easy: we copy across unique keys from each side, and recursively merge the values of the duplicated keys. It's the base cases that cause a problem. It will be easier to understand the logic if we pull out a separate function for that. As a placeholder, we could just wrap the two values in a tuple:
def merge_leaves(x, y):
return (x, y)
Now the core of our logic looks like:
def merge(x, y):
if not(isinstance(x, dict) and isinstance(y, dict)):
return merge_leaves(x, y)
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
Let's test it:
>>> x = {'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y = {'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
>>> merge(x, y)
{'f': (1, {'b': 'c'}), 'g': ({'h', 'i'}, 1), 'a': {'d': ('e', 'e'), 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
>>> x # The originals are unmodified.
{'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y
{'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
We can easily modify the leaf-merging rule, for example:
def merge_leaves(x, y):
try:
return x + y
except TypeError:
return Ellipsis
and observe the effects:
>>> merge(x, y)
{'f': Ellipsis, 'g': Ellipsis, 'a': {'d': 'ee', 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
We could also potentially clean this up by using a third-party library to dispatch based on the type of the inputs. For example, using multipledispatch, we could do things like:
#dispatch(dict, dict)
def merge(x, y):
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
#dispatch(str, str)
def merge(x, y):
return x + y
#dispatch(tuple, tuple)
def merge(x, y):
return x + y
#dispatch(list, list)
def merge(x, y):
return x + y
#dispatch(int, int):
def merge(x, y):
raise ValueError("integer value conflict")
#dispatch(object, object):
return (x, y)
This allows us to handle various combinations of leaf-type special cases without writing our own type checking, and also replaces the type check in the main recursive function.
The code will depend on your rules for resolving merge conflicts, of course. Here's a version which can take an arbitrary number of arguments and merges them recursively to an arbitrary depth, without using any object mutation. It uses the following rules to resolve merge conflicts:
dictionaries take precedence over non-dict values ({"foo": {...}} takes precedence over {"foo": "bar"})
later arguments take precedence over earlier arguments (if you merge {"a": 1}, {"a", 2}, and {"a": 3} in order, the result will be {"a": 3})
try:
from collections import Mapping
except ImportError:
Mapping = dict
def merge_dicts(*dicts):
"""
Return a new dictionary that is the result of merging the arguments together.
In case of conflicts, later arguments take precedence over earlier arguments.
"""
updated = {}
# grab all keys
keys = set()
for d in dicts:
keys = keys.union(set(d))
for key in keys:
values = [d[key] for d in dicts if key in d]
# which ones are mapping types? (aka dict)
maps = [value for value in values if isinstance(value, Mapping)]
if maps:
# if we have any mapping types, call recursively to merge them
updated[key] = merge_dicts(*maps)
else:
# otherwise, just grab the last value we have, since later arguments
# take precedence over earlier arguments
updated[key] = values[-1]
return updated
I had two dictionaries (a and b) which could each contain any number of nested dictionaries. I wanted to recursively merge them, with b taking precedence over a.
Considering the nested dictionaries as trees, what I wanted was:
To update a so that every path to every leaf in b would be represented in a
To overwrite subtrees of a if a leaf is found in the corresponding path in b
Maintain the invariant that all b leaf nodes remain leafs.
The existing answers were a little complicated for my taste and left some details on the shelf. I hacked together the following, which passes unit tests for my data set.
def merge_map(a, b):
if not isinstance(a, dict) or not isinstance(b, dict):
return b
for key in b.keys():
a[key] = merge_map(a[key], b[key]) if key in a else b[key]
return a
Example (formatted for clarity):
a = {
1 : {'a': 'red',
'b': {'blue': 'fish', 'yellow': 'bear' },
'c': { 'orange': 'dog'},
},
2 : {'d': 'green'},
3: 'e'
}
b = {
1 : {'b': 'white'},
2 : {'d': 'black'},
3: 'e'
}
>>> merge_map(a, b)
{1: {'a': 'red',
'b': 'white',
'c': {'orange': 'dog'},},
2: {'d': 'black'},
3: 'e'}
The paths in b that needed to be maintained were:
1 -> 'b' -> 'white'
2 -> 'd' -> 'black'
3 -> 'e'.
a had the unique and non-conflicting paths of:
1 -> 'a' -> 'red'
1 -> 'c' -> 'orange' -> 'dog'
so they are still represented in the merged map.
And just another slight variation:
Here is a pure python3 set based deep update function. It updates nested dictionaries by looping through one level at a time and calls itself to update each next level of dictionary values:
def deep_update(dict_original, dict_update):
if isinstance(dict_original, dict) and isinstance(dict_update, dict):
output=dict(dict_original)
keys_original=set(dict_original.keys())
keys_update=set(dict_update.keys())
similar_keys=keys_original.intersection(keys_update)
similar_dict={key:deep_update(dict_original[key], dict_update[key]) for key in similar_keys}
new_keys=keys_update.difference(keys_original)
new_dict={key:dict_update[key] for key in new_keys}
output.update(similar_dict)
output.update(new_dict)
return output
else:
return dict_update
A simple example:
x={'a':{'b':{'c':1, 'd':1}}}
y={'a':{'b':{'d':2, 'e':2}}, 'f':2}
print(deep_update(x, y))
>>> {'a': {'b': {'c': 1, 'd': 2, 'e': 2}}, 'f': 2}
How about another answer?!? This one also avoids mutation/side effects:
def merge(dict1, dict2):
output = {}
# adds keys from `dict1` if they do not exist in `dict2` and vice-versa
intersection = {**dict2, **dict1}
for k_intersect, v_intersect in intersection.items():
if k_intersect not in dict1:
v_dict2 = dict2[k_intersect]
output[k_intersect] = v_dict2
elif k_intersect not in dict2:
output[k_intersect] = v_intersect
elif isinstance(v_intersect, dict):
v_dict2 = dict2[k_intersect]
output[k_intersect] = merge(v_intersect, v_dict2)
else:
output[k_intersect] = v_intersect
return output
dict1 = {1:{"a":{"A"}}, 2:{"b":{"B"}}}
dict2 = {2:{"c":{"C"}}, 3:{"d":{"D"}}}
dict3 = {1:{"a":{"A"}}, 2:{"b":{"B"},"c":{"C"}}, 3:{"d":{"D"}}}
assert dict3 == merge(dict1, dict2)
This is a solution I made that recursively merges dictionaries to an infinite depth. The first dictionary passed to the function is the master dictionary - values in it will overwrite the values in the same key in the second dictionary.
def merge(dict1: dict, dict2: dict) -> dict:
merged = dict1
for key in dict2:
if type(dict2[key]) == dict:
merged[key] = merge(dict1[key] if key in dict1 else {}, dict2[key])
else:
if key not in dict1.keys():
merged[key] = dict2[key]
return merged
This should help in merging all items from dict2 into dict1:
for item in dict2:
if item in dict1:
for leaf in dict2[item]:
dict1[item][leaf] = dict2[item][leaf]
else:
dict1[item] = dict2[item]
Please test it and tell us whether this is what you wanted.
EDIT:
The above mentioned solution merges only one level, but correctly solves the example given by OP. To merge multiple levels, the recursion should be used.
I've been testing your solutions and decided to use this one in my project:
def mergedicts(dict1, dict2, conflict, no_conflict):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
yield (k, conflict(dict1[k], dict2[k]))
elif k in dict1:
yield (k, no_conflict(dict1[k]))
else:
yield (k, no_conflict(dict2[k]))
dict1 = {1:{"a":"A"}, 2:{"b":"B"}}
dict2 = {2:{"c":"C"}, 3:{"d":"D"}}
#this helper function allows for recursion and the use of reduce
def f2(x, y):
return dict(mergedicts(x, y, f2, lambda x: x))
print dict(mergedicts(dict1, dict2, f2, lambda x: x))
print dict(reduce(f2, [dict1, dict2]))
Passing functions as parameteres is key to extend jterrace solution to behave as all the other recursive solutions.
Easiest way i can think of is :
#!/usr/bin/python
from copy import deepcopy
def dict_merge(a, b):
if not isinstance(b, dict):
return b
result = deepcopy(a)
for k, v in b.iteritems():
if k in result and isinstance(result[k], dict):
result[k] = dict_merge(result[k], v)
else:
result[k] = deepcopy(v)
return result
a = {1:{"a":'A'}, 2:{"b":'B'}}
b = {2:{"c":'C'}, 3:{"d":'D'}}
print dict_merge(a,b)
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
I have another slightly different solution here:
def deepMerge(d1, d2, inconflict = lambda v1,v2 : v2) :
''' merge d2 into d1. using inconflict function to resolve the leaf conflicts '''
for k in d2:
if k in d1 :
if isinstance(d1[k], dict) and isinstance(d2[k], dict) :
deepMerge(d1[k], d2[k], inconflict)
elif d1[k] != d2[k] :
d1[k] = inconflict(d1[k], d2[k])
else :
d1[k] = d2[k]
return d1
By default it resolves conflicts in favor of values from the second dict, but you can easily override this, with some witchery you may be able to even throw exceptions out of it. :).
class Utils(object):
"""
>>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
>>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
>>> Utils.merge_dict(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
True
>>> main = {'a': {'b': {'test': 'bug'}, 'c': 'C'}}
>>> suply = {'a': {'b': 2, 'd': 'D', 'c': {'test': 'bug2'}}}
>>> Utils.merge_dict(main, suply) == {'a': {'b': {'test': 'bug'}, 'c': 'C', 'd': 'D'}}
True
"""
#staticmethod
def merge_dict(main, suply):
"""
获取融合的字典,以main为主,suply补充,冲突时以main为准
:return:
"""
for key, value in suply.items():
if key in main:
if isinstance(main[key], dict):
if isinstance(value, dict):
Utils.merge_dict(main[key], value)
else:
pass
else:
pass
else:
main[key] = value
return main
if __name__ == '__main__':
import doctest
doctest.testmod()
hey there I also had the same problem but I though of a solution and I will post it here, in case it is also useful for others, basically merging nested dictionaries and also adding the values, for me I needed to calculate some probabilities so this one worked great:
#used to copy a nested dict to a nested dict
def deepupdate(target, src):
for k, v in src.items():
if k in target:
for k2, v2 in src[k].items():
if k2 in target[k]:
target[k][k2]+=v2
else:
target[k][k2] = v2
else:
target[k] = copy.deepcopy(v)
by using the above method we can merge:
target = {'6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1}, '6,63': {'63,4': 1}}
src = {'5,4': {'4,4': 1}, '5,5': {'5,4': 1}, '4,4': {'4,3': 1}}
and this will become:
{'5,5': {'5,4': 1}, '5,4': {'4,4': 1}, '6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 2}, '6,63': {'63,4': 1}}
also notice the changes here:
target = {'6,6': {'6,63': 1}, '6,63': {'63,4': 1}, '4,4': {'4,3': 1}, '63,4': {'4,4': 1}}
src = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '4,4': {'4,9': 1}, '3,4': {'4,4': 1}, '5,5': {'5,4': 1}}
merge = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '6,63': {'63,4': 1}, '5,5': {'5,4': 1}, '6,6': {'6,63': 1}, '3,4': {'4,4': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1, '4,9': 1}}
dont forget to also add the import for copy:
import copy
from collections import defaultdict
from itertools import chain
class DictHelper:
#staticmethod
def merge_dictionaries(*dictionaries, override=True):
merged_dict = defaultdict(set)
all_unique_keys = set(chain(*[list(dictionary.keys()) for dictionary in dictionaries])) # Build a set using all dict keys
for key in all_unique_keys:
keys_value_type = list(set(filter(lambda obj_type: obj_type != type(None), [type(dictionary.get(key, None)) for dictionary in dictionaries])))
# Establish the object type for each key, return None if key is not present in dict and remove None from final result
if len(keys_value_type) != 1:
raise Exception("Different objects type for same key: {keys_value_type}".format(keys_value_type=keys_value_type))
if keys_value_type[0] == list:
values = list(chain(*[dictionary.get(key, []) for dictionary in dictionaries])) # Extract the value for each key
merged_dict[key].update(values)
elif keys_value_type[0] == dict:
# Extract all dictionaries by key and enter in recursion
dicts_to_merge = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = DictHelper.merge_dictionaries(*dicts_to_merge)
else:
# if override => get value from last dictionary else make a list of all values
values = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = values[-1] if override else values
return dict(merged_dict)
if __name__ == '__main__':
d1 = {'aaaaaaaaa': ['to short', 'to long'], 'bbbbb': ['to short', 'to long'], "cccccc": ["the is a test"]}
d2 = {'aaaaaaaaa': ['field is not a bool'], 'bbbbb': ['field is not a bool']}
d3 = {'aaaaaaaaa': ['filed is not a string', "to short"], 'bbbbb': ['field is not an integer']}
print(DictHelper.merge_dictionaries(d1, d2, d3))
d4 = {"a": {"x": 1, "y": 2, "z": 3, "d": {"x1": 10}}}
d5 = {"a": {"x": 10, "y": 20, "d": {"x2": 20}}}
print(DictHelper.merge_dictionaries(d4, d5))
Output:
{'bbbbb': {'to long', 'field is not an integer', 'to short', 'field is not a bool'},
'aaaaaaaaa': {'to long', 'to short', 'filed is not a string', 'field is not a bool'},
'cccccc': {'the is a test'}}
{'a': {'y': 20, 'd': {'x1': 10, 'x2': 20}, 'z': 3, 'x': 10}}

How to merge dictionaries of dictionaries?

I need to merge multiple dictionaries, here's what I have for instance:
dict1 = {1:{"a":{A}}, 2:{"b":{B}}}
dict2 = {2:{"c":{C}}, 3:{"d":{D}}}
With A B C and D being leaves of the tree, like {"info1":"value", "info2":"value2"}
There is an unknown level(depth) of dictionaries, it could be {2:{"c":{"z":{"y":{C}}}}}
In my case it represents a directory/files structure with nodes being docs and leaves being files.
I want to merge them to obtain:
dict3 = {1:{"a":{A}}, 2:{"b":{B},"c":{C}}, 3:{"d":{D}}}
I'm not sure how I could do that easily with Python.
This is actually quite tricky - particularly if you want a useful error message when things are inconsistent, while correctly accepting duplicate but consistent entries (something no other answer here does..)
Assuming you don't have huge numbers of entries, a recursive function is easiest:
from functools import reduce
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})
note that this mutates a - the contents of b are added to a (which is also returned). If you want to keep a you could call it like merge(dict(a), b).
agf pointed out (below) that you may have more than two dicts, in which case you can use:
reduce(merge, [dict1, dict2, dict3...])
where everything will be added to dict1.
Note: I edited my initial answer to mutate the first argument; that makes the "reduce" easier to explain
You could try mergedeep.
Installation
$ pip3 install mergedeep
Usage
from mergedeep import merge
a = {"keyA": 1}
b = {"keyB": {"sub1": 10}}
c = {"keyB": {"sub2": 20}}
merge(a, b, c)
print(a)
# {"keyA": 1, "keyB": {"sub1": 10, "sub2": 20}}
For a full list of options, check out the docs!
Here's an easy way to do it using generators:
def mergedicts(dict1, dict2):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):
yield (k, dict(mergedicts(dict1[k], dict2[k])))
else:
# If one of the values is not a dict, you can't continue merging it.
# Value from second dict overrides one in first and we move on.
yield (k, dict2[k])
# Alternatively, replace this with exception raiser to alert you of value conflicts
elif k in dict1:
yield (k, dict1[k])
else:
yield (k, dict2[k])
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
print dict(mergedicts(dict1,dict2))
This prints:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
One issue with this question is that the values of the dict can be arbitrarily complex pieces of data. Based upon these and other answers I came up with this code:
class YamlReaderError(Exception):
pass
def data_merge(a, b):
"""merges b into a and return merged result
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen"""
key = None
# ## debug output
# sys.stderr.write("DEBUG: %s to %s\n" %(b,a))
try:
if a is None or isinstance(a, str) or isinstance(a, unicode) or isinstance(a, int) or isinstance(a, long) or isinstance(a, float):
# border case for first run or if a is a primitive
a = b
elif isinstance(a, list):
# lists can be only appended
if isinstance(b, list):
# merge lists
a.extend(b)
else:
# append to list
a.append(b)
elif isinstance(a, dict):
# dicts must be merged
if isinstance(b, dict):
for key in b:
if key in a:
a[key] = data_merge(a[key], b[key])
else:
a[key] = b[key]
else:
raise YamlReaderError('Cannot merge non-dict "%s" into dict "%s"' % (b, a))
else:
raise YamlReaderError('NOT IMPLEMENTED "%s" into "%s"' % (b, a))
except TypeError, e:
raise YamlReaderError('TypeError "%s" in key "%s" when merging "%s" into "%s"' % (e, key, b, a))
return a
My use case is merging YAML files where I only have to deal with a subset of possible data types. Hence I can ignore tuples and other objects. For me a sensible merge logic means
replace scalars
append lists
merge dicts by adding missing keys and updating existing keys
Everything else and the unforeseens results in an error.
Dictionaries of dictionaries merge
As this is the canonical question (in spite of certain non-generalities) I'm providing the canonical Pythonic approach to solving this issue.
Simplest Case: "leaves are nested dicts that end in empty dicts":
d1 = {'a': {1: {'foo': {}}, 2: {}}}
d2 = {'a': {1: {}, 2: {'bar': {}}}}
d3 = {'b': {3: {'baz': {}}}}
d4 = {'a': {1: {'quux': {}}}}
This is the simplest case for recursion, and I would recommend two naive approaches:
def rec_merge1(d1, d2):
'''return new merged dict of dicts'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge1(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
def rec_merge2(d1, d2):
'''update first dict with second recursively'''
for k, v in d1.items(): # in Python 2, use .iteritems()!
if k in d2:
d2[k] = rec_merge2(v, d2[k])
d1.update(d2)
return d1
I believe I would prefer the second to the first, but keep in mind that the original state of the first would have to be rebuilt from its origin. Here's the usage:
>>> from functools import reduce # only required for Python 3.
>>> reduce(rec_merge1, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
>>> reduce(rec_merge2, (d1, d2, d3, d4))
{'a': {1: {'quux': {}, 'foo': {}}, 2: {'bar': {}}}, 'b': {3: {'baz': {}}}}
Complex Case: "leaves are of any other type:"
So if they end in dicts, it's a simple case of merging the end empty dicts. If not, it's not so trivial. If strings, how do you merge them? Sets can be updated similarly, so we could give that treatment, but we lose the order in which they were merged. So does order matter?
So in lieu of more information, the simplest approach will be to give them the standard update treatment if both values are not dicts: i.e. the second dict's value will overwrite the first, even if the second dict's value is None and the first's value is a dict with a lot of info.
d1 = {'a': {1: 'foo', 2: None}}
d2 = {'a': {1: None, 2: 'bar'}}
d3 = {'b': {3: 'baz'}}
d4 = {'a': {1: 'quux'}}
from collections.abc import MutableMapping
def rec_merge(d1, d2):
'''
Update two dicts of dicts recursively,
if either mapping has leaves that are non-dicts,
the second's leaf overwrites the first's.
'''
for k, v in d1.items():
if k in d2:
# this next check is the only difference!
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = rec_merge(v, d2[k])
# we could further check types and merge as appropriate here.
d3 = d1.copy()
d3.update(d2)
return d3
And now
from functools import reduce
reduce(rec_merge, (d1, d2, d3, d4))
returns
{'a': {1: 'quux', 2: 'bar'}, 'b': {3: 'baz'}}
Application to the original question:
I've had to remove the curly braces around the letters and put them in single quotes for this to be legit Python (else they would be set literals in Python 2.7+) as well as append a missing brace:
dict1 = {1:{"a":'A'}, 2:{"b":'B'}}
dict2 = {2:{"c":'C'}, 3:{"d":'D'}}
and rec_merge(dict1, dict2) now returns:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
Which matches the desired outcome of the original question (after changing, e.g. the {A} to 'A'.)
Based on #andrew cooke. This version handles nested lists of dicts and also allows the option to update the values
def merge(a, b, path=None, update=True):
"http://stackoverflow.com/questions/7204805/python-dictionaries-of-dictionaries-merge"
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
elif isinstance(a[key], list) and isinstance(b[key], list):
for idx, val in enumerate(b[key]):
a[key][idx] = merge(a[key][idx], b[key][idx], path + [str(key), str(idx)], update=update)
elif update:
a[key] = b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
This simple recursive procedure will merge one dictionary into another while overriding conflicting keys:
#!/usr/bin/env python2.7
def merge_dicts(dict1, dict2):
""" Recursively merges dict2 into dict1 """
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
return dict2
for k in dict2:
if k in dict1:
dict1[k] = merge_dicts(dict1[k], dict2[k])
else:
dict1[k] = dict2[k]
return dict1
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {2:{"c":"C"}, 3:{"d":"D"}}))
print (merge_dicts({1:{"a":"A"}, 2:{"b":"B"}}, {1:{"a":"A"}, 2:{"b":"C"}}))
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
{1: {'a': 'A'}, 2: {'b': 'C'}}
Based on answers from #andrew cooke.
It takes care of nested lists in a better way.
def deep_merge_lists(original, incoming):
"""
Deep merge two lists. Modifies original.
Recursively call deep merge on each correlated element of list.
If item type in both elements are
a. dict: Call deep_merge_dicts on both values.
b. list: Recursively call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
If length of incoming list is more that of original then extra values are appended.
"""
common_length = min(len(original), len(incoming))
for idx in range(common_length):
if isinstance(original[idx], dict) and isinstance(incoming[idx], dict):
deep_merge_dicts(original[idx], incoming[idx])
elif isinstance(original[idx], list) and isinstance(incoming[idx], list):
deep_merge_lists(original[idx], incoming[idx])
else:
original[idx] = incoming[idx]
for idx in range(common_length, len(incoming)):
original.append(incoming[idx])
def deep_merge_dicts(original, incoming):
"""
Deep merge two dictionaries. Modifies original.
For key conflicts if both values are:
a. dict: Recursively call deep_merge_dicts on both values.
b. list: Call deep_merge_lists on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
"""
for key in incoming:
if key in original:
if isinstance(original[key], dict) and isinstance(incoming[key], dict):
deep_merge_dicts(original[key], incoming[key])
elif isinstance(original[key], list) and isinstance(incoming[key], list):
deep_merge_lists(original[key], incoming[key])
else:
original[key] = incoming[key]
else:
original[key] = incoming[key]
If you have an unknown level of dictionaries, then I would suggest a recursive function:
def combineDicts(dictionary1, dictionary2):
output = {}
for item, value in dictionary1.iteritems():
if dictionary2.has_key(item):
if isinstance(dictionary2[item], dict):
output[item] = combineDicts(value, dictionary2.pop(item))
else:
output[item] = value
for item, value in dictionary2.iteritems():
output[item] = value
return output
In case someone wants yet another approach to this problem, here's my solution.
Virtues: short, declarative, and functional in style (recursive, does no mutation).
Potential Drawback: This might not be the merge you're looking for. Consult the docstring for semantics.
def deep_merge(a, b):
"""
Merge two values, with `b` taking precedence over `a`.
Semantics:
- If either `a` or `b` is not a dictionary, `a` will be returned only if
`b` is `None`. Otherwise `b` will be returned.
- If both values are dictionaries, they are merged as follows:
* Each key that is found only in `a` or only in `b` will be included in
the output collection with its value intact.
* For any key in common between `a` and `b`, the corresponding values
will be merged with the same semantics.
"""
if not isinstance(a, dict) or not isinstance(b, dict):
return a if b is None else b
else:
# If we're here, both a and b must be dictionaries or subtypes thereof.
# Compute set of all keys in both dictionaries.
keys = set(a.keys()) | set(b.keys())
# Build output dictionary, merging recursively values with common keys,
# where `None` is used to mean the absence of a value.
return {
key: deep_merge(a.get(key), b.get(key))
for key in keys
}
Overview
The following approach subdivides the problem of a deep merge of dicts into:
A parameterized shallow merge function merge(f)(a,b) that uses a
function f to merge two dicts a and b
A recursive merger function f to be used together with merge
Implementation
A function for merging two (non nested) dicts can be written in a lot of ways. I personally like
def merge(f):
def merge(a,b):
keys = a.keys() | b.keys()
return {key:f(a.get(key), b.get(key)) for key in keys}
return merge
A nice way of defining an appropriate recursive merger function f is using multipledispatch which allows to define functions that evaluate along different paths depending on the type of their arguments.
from multipledispatch import dispatch
#for anything that is not a dict return
#dispatch(object, object)
def f(a, b):
return b if b is not None else a
#for dicts recurse
#dispatch(dict, dict)
def f(a,b):
return merge(f)(a,b)
Example
To merge two nested dicts simply use merge(f) e.g.:
dict1 = {1:{"a":"A"},2:{"b":"B"}}
dict2 = {2:{"c":"C"},3:{"d":"D"}}
merge(f)(dict1, dict2)
#returns {1: {'a': 'A'}, 2: {'b': 'B', 'c': 'C'}, 3: {'d': 'D'}}
Notes:
The advantages of this approach are:
The function is build from smaller functions that each do a single thing
which makes the code simpler to reason about and test
The behaviour is not hard-coded but can be changed and extended as needed which improves code reuse (see example below).
Customization
Some answers also considered dicts that contain lists e.g. of other (potentially nested) dicts. In this case one might want map over the lists and merge them based on position. This can be done by adding another definition to the merger function f:
import itertools
#dispatch(list, list)
def f(a,b):
return [merge(f)(*arg) for arg in itertools.zip_longest(a, b)]
There's a slight problem with andrew cookes answer: In some cases it modifies the second argument b when you modify the returned dict. Specifically it's because of this line:
if key in a:
...
else:
a[key] = b[key]
If b[key] is a dict, it will simply be assigned to a, meaning any subsequent modifications to that dict will affect both a and b.
a={}
b={'1':{'2':'b'}}
c={'1':{'3':'c'}}
merge(merge(a,b), c) # {'1': {'3': 'c', '2': 'b'}}
a # {'1': {'3': 'c', '2': 'b'}} (as expected)
b # {'1': {'3': 'c', '2': 'b'}} <----
c # {'1': {'3': 'c'}} (unmodified)
To fix this, the line would have to be substituted with this:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
Where clone_dict is:
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
Still. This obviously doesn't account for list, set and other stuff, but I hope it illustrates the pitfalls when trying to merge dicts.
And for completeness sake, here is my version, where you can pass it multiple dicts:
def merge_dicts(*args):
def clone_dict(obj):
clone = {}
for key, value in obj.iteritems():
if isinstance(value, dict):
clone[key] = clone_dict(value)
else:
clone[key] = value
return
def merge(a, b, path=[]):
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
raise Exception('Conflict at `{path}\''.format(path='.'.join(path + [str(key)])))
else:
if isinstance(b[key], dict):
a[key] = clone_dict(b[key])
else:
a[key] = b[key]
return a
return reduce(merge, args, {})
You can use the merge function from the toolz package, for example:
>>> import toolz
>>> dict1 = {1: {'a': 'A'}, 2: {'b': 'B'}}
>>> dict2 = {2: {'c': 'C'}, 3: {'d': 'D'}}
>>> toolz.merge_with(toolz.merge, dict1, dict2)
{1: {'a': 'A'}, 2: {'c': 'C'}, 3: {'d': 'D'}}
This version of the function will account for N number of dictionaries, and only dictionaries -- no improper parameters can be passed, or it will raise a TypeError. The merge itself accounts for key conflicts, and instead of overwriting data from a dictionary further down the merge chain, it creates a set of values and appends to that; no data is lost.
It might not be the most effecient on the page, but it's the most thorough and you're not going to lose any information when you merge your 2 to N dicts.
def merge_dicts(*dicts):
if not reduce(lambda x, y: isinstance(y, dict) and x, dicts, True):
raise TypeError, "Object in *dicts not of type dict"
if len(dicts) < 2:
raise ValueError, "Requires 2 or more dict objects"
def merge(a, b):
for d in set(a.keys()).union(b.keys()):
if d in a and d in b:
if type(a[d]) == type(b[d]):
if not isinstance(a[d], dict):
ret = list({a[d], b[d]})
if len(ret) == 1: ret = ret[0]
yield (d, sorted(ret))
else:
yield (d, dict(merge(a[d], b[d])))
else:
raise TypeError, "Conflicting key:value type assignment"
elif d in a:
yield (d, a[d])
elif d in b:
yield (d, b[d])
else:
raise KeyError
return reduce(lambda x, y: dict(merge(x, y)), dicts[1:], dicts[0])
print merge_dicts({1:1,2:{1:2}},{1:2,2:{3:1}},{4:4})
output: {1: [1, 2], 2: {1: 2, 3: 1}, 4: 4}
Since dictviews support set operations, I was able to greatly simplify jterrace's answer.
def merge(dict1, dict2):
for k in dict1.keys() - dict2.keys():
yield (k, dict1[k])
for k in dict2.keys() - dict1.keys():
yield (k, dict2[k])
for k in dict1.keys() & dict2.keys():
yield (k, dict(merge(dict1[k], dict2[k])))
Any attempt to combine a dict with a non dict (technically, an object with a 'keys' method and an object without a 'keys' method) will raise an AttributeError. This includes both the initial call to the function and recursive calls. This is exactly what I wanted so I left it. You could easily catch an AttributeErrors thrown by the recursive call and then yield any value you please.
Short-n-sweet:
from collections.abc import MutableMapping as Map
def nested_update(d, v):
"""
Nested update of dict-like 'd' with dict-like 'v'.
"""
for key in v:
if key in d and isinstance(d[key], Map) and isinstance(v[key], Map):
nested_update(d[key], v[key])
else:
d[key] = v[key]
This works like (and is build on) Python's dict.update method. It returns None (you can always add return d if you prefer) as it updates dict d in-place. Keys in v will overwrite any existing keys in d (it does not try to interpret the dict's contents).
It will also work for other ("dict-like") mappings.
I have an iterative solution - works much much better with big dicts & a lot of them (for example jsons etc):
import collections
def merge_dict_with_subdicts(dict1: dict, dict2: dict) -> dict:
"""
similar behaviour to builtin dict.update - but knows how to handle nested dicts
"""
q = collections.deque([(dict1, dict2)])
while len(q) > 0:
d1, d2 = q.pop()
for k, v in d2.items():
if k in d1 and isinstance(d1[k], dict) and isinstance(v, dict):
q.append((d1[k], v))
else:
d1[k] = v
return dict1
note that this will use the value in d2 to override d1, in case they are not both dicts. (same as python's dict.update())
some tests:
def test_deep_update():
d = dict()
merge_dict_with_subdicts(d, {"a": 4})
assert d == {"a": 4}
new_dict = {
"b": {
"c": {
"d": 6
}
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 4,
"b": {
"c": {
"d": 6
}
}
}
new_dict = {
"a": 3,
"b": {
"f": 7
}
}
merge_dict_with_subdicts(d, new_dict)
assert d == {
"a": 3,
"b": {
"c": {
"d": 6
},
"f": 7
}
}
# test a case where one of the dicts has dict as value and the other has something else
new_dict = {
'a': {
'b': 4
}
}
merge_dict_with_subdicts(d, new_dict)
assert d['a']['b'] == 4
I've tested with around ~1200 dicts - this method took 0.4 seconds, while the recursive solution took ~2.5 seconds.
As noted in many other answers, a recursive algorithm makes the most sense here. In general, when working with recursion, it is preferable to create new values rather than trying to modify any input data structure.
We need to define what happens at each merge step. If both inputs are dictionaries, this is easy: we copy across unique keys from each side, and recursively merge the values of the duplicated keys. It's the base cases that cause a problem. It will be easier to understand the logic if we pull out a separate function for that. As a placeholder, we could just wrap the two values in a tuple:
def merge_leaves(x, y):
return (x, y)
Now the core of our logic looks like:
def merge(x, y):
if not(isinstance(x, dict) and isinstance(y, dict)):
return merge_leaves(x, y)
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
Let's test it:
>>> x = {'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y = {'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
>>> merge(x, y)
{'f': (1, {'b': 'c'}), 'g': ({'h', 'i'}, 1), 'a': {'d': ('e', 'e'), 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
>>> x # The originals are unmodified.
{'a': {'b': 'c', 'd': 'e'}, 'f': 1, 'g': {'h', 'i'}, 'j': None}
>>> y
{'a': {'d': 'e', 'h': 'i'}, 'f': {'b': 'c'}, 'g': 1, 'k': None}
We can easily modify the leaf-merging rule, for example:
def merge_leaves(x, y):
try:
return x + y
except TypeError:
return Ellipsis
and observe the effects:
>>> merge(x, y)
{'f': Ellipsis, 'g': Ellipsis, 'a': {'d': 'ee', 'b': 'c', 'h': 'i'}, 'j': None, 'k': None}
We could also potentially clean this up by using a third-party library to dispatch based on the type of the inputs. For example, using multipledispatch, we could do things like:
#dispatch(dict, dict)
def merge(x, y):
x_keys, y_keys = x.keys(), y.keys()
result = { k: merge(x[k], y[k]) for k in x_keys & y_keys }
result.update({k: x[k] for k in x_keys - y_keys})
result.update({k: y[k] for k in y_keys - x_keys})
return result
#dispatch(str, str)
def merge(x, y):
return x + y
#dispatch(tuple, tuple)
def merge(x, y):
return x + y
#dispatch(list, list)
def merge(x, y):
return x + y
#dispatch(int, int):
def merge(x, y):
raise ValueError("integer value conflict")
#dispatch(object, object):
return (x, y)
This allows us to handle various combinations of leaf-type special cases without writing our own type checking, and also replaces the type check in the main recursive function.
The code will depend on your rules for resolving merge conflicts, of course. Here's a version which can take an arbitrary number of arguments and merges them recursively to an arbitrary depth, without using any object mutation. It uses the following rules to resolve merge conflicts:
dictionaries take precedence over non-dict values ({"foo": {...}} takes precedence over {"foo": "bar"})
later arguments take precedence over earlier arguments (if you merge {"a": 1}, {"a", 2}, and {"a": 3} in order, the result will be {"a": 3})
try:
from collections import Mapping
except ImportError:
Mapping = dict
def merge_dicts(*dicts):
"""
Return a new dictionary that is the result of merging the arguments together.
In case of conflicts, later arguments take precedence over earlier arguments.
"""
updated = {}
# grab all keys
keys = set()
for d in dicts:
keys = keys.union(set(d))
for key in keys:
values = [d[key] for d in dicts if key in d]
# which ones are mapping types? (aka dict)
maps = [value for value in values if isinstance(value, Mapping)]
if maps:
# if we have any mapping types, call recursively to merge them
updated[key] = merge_dicts(*maps)
else:
# otherwise, just grab the last value we have, since later arguments
# take precedence over earlier arguments
updated[key] = values[-1]
return updated
I had two dictionaries (a and b) which could each contain any number of nested dictionaries. I wanted to recursively merge them, with b taking precedence over a.
Considering the nested dictionaries as trees, what I wanted was:
To update a so that every path to every leaf in b would be represented in a
To overwrite subtrees of a if a leaf is found in the corresponding path in b
Maintain the invariant that all b leaf nodes remain leafs.
The existing answers were a little complicated for my taste and left some details on the shelf. I hacked together the following, which passes unit tests for my data set.
def merge_map(a, b):
if not isinstance(a, dict) or not isinstance(b, dict):
return b
for key in b.keys():
a[key] = merge_map(a[key], b[key]) if key in a else b[key]
return a
Example (formatted for clarity):
a = {
1 : {'a': 'red',
'b': {'blue': 'fish', 'yellow': 'bear' },
'c': { 'orange': 'dog'},
},
2 : {'d': 'green'},
3: 'e'
}
b = {
1 : {'b': 'white'},
2 : {'d': 'black'},
3: 'e'
}
>>> merge_map(a, b)
{1: {'a': 'red',
'b': 'white',
'c': {'orange': 'dog'},},
2: {'d': 'black'},
3: 'e'}
The paths in b that needed to be maintained were:
1 -> 'b' -> 'white'
2 -> 'd' -> 'black'
3 -> 'e'.
a had the unique and non-conflicting paths of:
1 -> 'a' -> 'red'
1 -> 'c' -> 'orange' -> 'dog'
so they are still represented in the merged map.
And just another slight variation:
Here is a pure python3 set based deep update function. It updates nested dictionaries by looping through one level at a time and calls itself to update each next level of dictionary values:
def deep_update(dict_original, dict_update):
if isinstance(dict_original, dict) and isinstance(dict_update, dict):
output=dict(dict_original)
keys_original=set(dict_original.keys())
keys_update=set(dict_update.keys())
similar_keys=keys_original.intersection(keys_update)
similar_dict={key:deep_update(dict_original[key], dict_update[key]) for key in similar_keys}
new_keys=keys_update.difference(keys_original)
new_dict={key:dict_update[key] for key in new_keys}
output.update(similar_dict)
output.update(new_dict)
return output
else:
return dict_update
A simple example:
x={'a':{'b':{'c':1, 'd':1}}}
y={'a':{'b':{'d':2, 'e':2}}, 'f':2}
print(deep_update(x, y))
>>> {'a': {'b': {'c': 1, 'd': 2, 'e': 2}}, 'f': 2}
How about another answer?!? This one also avoids mutation/side effects:
def merge(dict1, dict2):
output = {}
# adds keys from `dict1` if they do not exist in `dict2` and vice-versa
intersection = {**dict2, **dict1}
for k_intersect, v_intersect in intersection.items():
if k_intersect not in dict1:
v_dict2 = dict2[k_intersect]
output[k_intersect] = v_dict2
elif k_intersect not in dict2:
output[k_intersect] = v_intersect
elif isinstance(v_intersect, dict):
v_dict2 = dict2[k_intersect]
output[k_intersect] = merge(v_intersect, v_dict2)
else:
output[k_intersect] = v_intersect
return output
dict1 = {1:{"a":{"A"}}, 2:{"b":{"B"}}}
dict2 = {2:{"c":{"C"}}, 3:{"d":{"D"}}}
dict3 = {1:{"a":{"A"}}, 2:{"b":{"B"},"c":{"C"}}, 3:{"d":{"D"}}}
assert dict3 == merge(dict1, dict2)
This is a solution I made that recursively merges dictionaries to an infinite depth. The first dictionary passed to the function is the master dictionary - values in it will overwrite the values in the same key in the second dictionary.
def merge(dict1: dict, dict2: dict) -> dict:
merged = dict1
for key in dict2:
if type(dict2[key]) == dict:
merged[key] = merge(dict1[key] if key in dict1 else {}, dict2[key])
else:
if key not in dict1.keys():
merged[key] = dict2[key]
return merged
This should help in merging all items from dict2 into dict1:
for item in dict2:
if item in dict1:
for leaf in dict2[item]:
dict1[item][leaf] = dict2[item][leaf]
else:
dict1[item] = dict2[item]
Please test it and tell us whether this is what you wanted.
EDIT:
The above mentioned solution merges only one level, but correctly solves the example given by OP. To merge multiple levels, the recursion should be used.
I've been testing your solutions and decided to use this one in my project:
def mergedicts(dict1, dict2, conflict, no_conflict):
for k in set(dict1.keys()).union(dict2.keys()):
if k in dict1 and k in dict2:
yield (k, conflict(dict1[k], dict2[k]))
elif k in dict1:
yield (k, no_conflict(dict1[k]))
else:
yield (k, no_conflict(dict2[k]))
dict1 = {1:{"a":"A"}, 2:{"b":"B"}}
dict2 = {2:{"c":"C"}, 3:{"d":"D"}}
#this helper function allows for recursion and the use of reduce
def f2(x, y):
return dict(mergedicts(x, y, f2, lambda x: x))
print dict(mergedicts(dict1, dict2, f2, lambda x: x))
print dict(reduce(f2, [dict1, dict2]))
Passing functions as parameteres is key to extend jterrace solution to behave as all the other recursive solutions.
Easiest way i can think of is :
#!/usr/bin/python
from copy import deepcopy
def dict_merge(a, b):
if not isinstance(b, dict):
return b
result = deepcopy(a)
for k, v in b.iteritems():
if k in result and isinstance(result[k], dict):
result[k] = dict_merge(result[k], v)
else:
result[k] = deepcopy(v)
return result
a = {1:{"a":'A'}, 2:{"b":'B'}}
b = {2:{"c":'C'}, 3:{"d":'D'}}
print dict_merge(a,b)
Output:
{1: {'a': 'A'}, 2: {'c': 'C', 'b': 'B'}, 3: {'d': 'D'}}
I have another slightly different solution here:
def deepMerge(d1, d2, inconflict = lambda v1,v2 : v2) :
''' merge d2 into d1. using inconflict function to resolve the leaf conflicts '''
for k in d2:
if k in d1 :
if isinstance(d1[k], dict) and isinstance(d2[k], dict) :
deepMerge(d1[k], d2[k], inconflict)
elif d1[k] != d2[k] :
d1[k] = inconflict(d1[k], d2[k])
else :
d1[k] = d2[k]
return d1
By default it resolves conflicts in favor of values from the second dict, but you can easily override this, with some witchery you may be able to even throw exceptions out of it. :).
class Utils(object):
"""
>>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
>>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
>>> Utils.merge_dict(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
True
>>> main = {'a': {'b': {'test': 'bug'}, 'c': 'C'}}
>>> suply = {'a': {'b': 2, 'd': 'D', 'c': {'test': 'bug2'}}}
>>> Utils.merge_dict(main, suply) == {'a': {'b': {'test': 'bug'}, 'c': 'C', 'd': 'D'}}
True
"""
#staticmethod
def merge_dict(main, suply):
"""
获取融合的字典,以main为主,suply补充,冲突时以main为准
:return:
"""
for key, value in suply.items():
if key in main:
if isinstance(main[key], dict):
if isinstance(value, dict):
Utils.merge_dict(main[key], value)
else:
pass
else:
pass
else:
main[key] = value
return main
if __name__ == '__main__':
import doctest
doctest.testmod()
hey there I also had the same problem but I though of a solution and I will post it here, in case it is also useful for others, basically merging nested dictionaries and also adding the values, for me I needed to calculate some probabilities so this one worked great:
#used to copy a nested dict to a nested dict
def deepupdate(target, src):
for k, v in src.items():
if k in target:
for k2, v2 in src[k].items():
if k2 in target[k]:
target[k][k2]+=v2
else:
target[k][k2] = v2
else:
target[k] = copy.deepcopy(v)
by using the above method we can merge:
target = {'6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1}, '6,63': {'63,4': 1}}
src = {'5,4': {'4,4': 1}, '5,5': {'5,4': 1}, '4,4': {'4,3': 1}}
and this will become:
{'5,5': {'5,4': 1}, '5,4': {'4,4': 1}, '6,6': {'6,63': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 2}, '6,63': {'63,4': 1}}
also notice the changes here:
target = {'6,6': {'6,63': 1}, '6,63': {'63,4': 1}, '4,4': {'4,3': 1}, '63,4': {'4,4': 1}}
src = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '4,4': {'4,9': 1}, '3,4': {'4,4': 1}, '5,5': {'5,4': 1}}
merge = {'5,4': {'4,4': 1}, '4,3': {'3,4': 1}, '6,63': {'63,4': 1}, '5,5': {'5,4': 1}, '6,6': {'6,63': 1}, '3,4': {'4,4': 1}, '63,4': {'4,4': 1}, '4,4': {'4,3': 1, '4,9': 1}}
dont forget to also add the import for copy:
import copy
from collections import defaultdict
from itertools import chain
class DictHelper:
#staticmethod
def merge_dictionaries(*dictionaries, override=True):
merged_dict = defaultdict(set)
all_unique_keys = set(chain(*[list(dictionary.keys()) for dictionary in dictionaries])) # Build a set using all dict keys
for key in all_unique_keys:
keys_value_type = list(set(filter(lambda obj_type: obj_type != type(None), [type(dictionary.get(key, None)) for dictionary in dictionaries])))
# Establish the object type for each key, return None if key is not present in dict and remove None from final result
if len(keys_value_type) != 1:
raise Exception("Different objects type for same key: {keys_value_type}".format(keys_value_type=keys_value_type))
if keys_value_type[0] == list:
values = list(chain(*[dictionary.get(key, []) for dictionary in dictionaries])) # Extract the value for each key
merged_dict[key].update(values)
elif keys_value_type[0] == dict:
# Extract all dictionaries by key and enter in recursion
dicts_to_merge = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = DictHelper.merge_dictionaries(*dicts_to_merge)
else:
# if override => get value from last dictionary else make a list of all values
values = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
merged_dict[key] = values[-1] if override else values
return dict(merged_dict)
if __name__ == '__main__':
d1 = {'aaaaaaaaa': ['to short', 'to long'], 'bbbbb': ['to short', 'to long'], "cccccc": ["the is a test"]}
d2 = {'aaaaaaaaa': ['field is not a bool'], 'bbbbb': ['field is not a bool']}
d3 = {'aaaaaaaaa': ['filed is not a string', "to short"], 'bbbbb': ['field is not an integer']}
print(DictHelper.merge_dictionaries(d1, d2, d3))
d4 = {"a": {"x": 1, "y": 2, "z": 3, "d": {"x1": 10}}}
d5 = {"a": {"x": 10, "y": 20, "d": {"x2": 20}}}
print(DictHelper.merge_dictionaries(d4, d5))
Output:
{'bbbbb': {'to long', 'field is not an integer', 'to short', 'field is not a bool'},
'aaaaaaaaa': {'to long', 'to short', 'filed is not a string', 'field is not a bool'},
'cccccc': {'the is a test'}}
{'a': {'y': 20, 'd': {'x1': 10, 'x2': 20}, 'z': 3, 'x': 10}}

How I can get rid of None values in dictionary?

Something like:
for (a,b) in kwargs.iteritems():
if not b : del kwargs[a]
This code raise exception because changing of dictionary when iterating.
I discover only non pretty solution with another dictionary:
res ={}
res.update((a,b) for a,b in kwargs.iteritems() if b is not None)
Thanks
Another way to write it is
res = dict((k,v) for k,v in kwargs.iteritems() if v is not None)
In Python3, this becomes
res = {k:v for k,v in kwargs.items() if v is not None}
You can also use filter:
d = dict(a = 1, b = None, c = 3)
filtered = dict(filter(lambda item: item[1] is not None, d.items()))
print(filtered)
{'a': 1, 'c': 3}
d = {'a': None, 'b': 'myname', 'c': 122}
print dict(filter(lambda x:x[1], d.items()))
{'b': 'myname', 'c': 122}
I like the variation of your second method:
res = dict((a, b) for (a, b) in kwargs.iteritems() if b is not None)
it's Pythonic and I don't think that ugly. A variation of your first is:
for (a, b) in list(kwargs.iteritems()):
if b is None:
del kwargs[a]
If you need to handle nested dicts, then you can leverage a simple recursive approach:
# Python 2
from collections import Mapping
def filter_none(d):
if isinstance(d, Mapping):
return dict((k, filter_none(v)) for k, v, in d.iteritems() if v is not None)
else:
return d
# Python 3
from collections.abc import Mapping
def filter_none(d):
if isinstance(d, Mapping):
return {k: filter_none(v) for k, v in d.items() if v is not None}
else:
return d
To anybody who may interests, here's another way to get rid of None value. Instead of deleting the key, I change the value of None with a placeholder for the same key.
One use case is applying with Spark RDD.map onto null valued JSON.
def filter_null(data, placeholder="[spark]nonexists"):
# Replace all `None` in the dict to the value of `placeholder`
return dict((k, filter_null(v, placeholder) if isinstance(v, dict) else v if v
is not None else placeholder) for k, v in data.iteritems())
Sample output:
>>> filter_null({'a':None,'b':"nul", "c": {'a':None,'b':"nul"}})
{'a': '[spark]nonexists', 'c': {'a': '[spark]nonexists', 'b': 'nul'}, 'b': 'nul'}
For python3, change the iteritems() to items().
The recursive approach to also filter nested lists of dicts in the dictionary:
def filter_none(d):
if isinstance(d, dict):
return {k: filter_none(v) for k, v in d.items() if v is not None}
elif isinstance(d, list):
return [filter_none(v) for v in d]
else:
return d
Sample output:
data = {'a': 'b', 'c': None, 'd':{'e': 'f', 'h': None, 'i':[{'j': 'k', 'l': None}]}}
print(filter_none(data))
>>> {'a': 'b', 'd': {'e': 'f', 'i': [{'j': 'k'}]}}

Categories