I have a nested dictionary {1: {2: {3: None}}} and a dictionary that maps keys of the nested dictionary to a set of values such as {1: x, 2: y, 3: z}. I want to transform the nested dictionary to this form {x: {y: {z: None}}}. I have tried a couple of recursive functions but I keep going in circles and confusing myself. What is the best way to achieve this?
The level of nesting is arbitrary. The above is a simple example.
You need to recurse through the dictionary while building a new one with new keys. Note that if you have a list or tuple in there somewhere that has other dictionaries in it, they won't be processed - you'd have to add some code to do that. You can actually do this without building a new dictionary, but I think this way is simpler.
od = { 1: { 2: { 3: None }}}
kd = { 1: 'x', 2: 'y', 3: 'z' }
def replace_keys(old_dict, key_dict):
new_dict = { }
for key in old_dict.keys():
new_key = key_dict.get(key, key)
if isinstance(old_dict[key], dict):
new_dict[new_key] = replace_keys(old_dict[key], key_dict)
else:
new_dict[new_key] = old_dict[key]
return new_dict
nd = replace_keys(od, kd)
print nd
outputs:
{'x': {'y': {'z': None}}}
The accepted answer will not support dict of list, adding the full feature
#bilentor,
od = {'name': 'John', '1': [{'name': 'innername'}]}
kd = { 'name': 'cname', '1': '2', 3: 'z' }
def replace_keys(data_dict, key_dict):
new_dict = { }
if isinstance(data_dict, list):
dict_value_list = list()
for inner_dict in data_dict:
dict_value_list.append(replace_keys(inner_dict, key_dict))
return dict_value_list
else:
for key in data_dict.keys():
value = data_dict[key]
new_key = key_dict.get(key, key)
if isinstance(value, dict) or isinstance(value, list):
new_dict[new_key] = replace_keys(value, key_dict)
else:
new_dict[new_key] = value
return new_dict
nd = replace_keys(od, kd)
print(nd)
You can use a NestedDict
from ndicts import NestedDict
d = {1: {2: {3: None}}}
replace = {1: 'x', 2: 'y', 3: 'z'}
def ndict_replace(ndict: dict, map: dict):
nd = NestedDict(nd)
new_nd = NestedDict()
for key, value in nd.items():
new_key = tuple(replace.get(k, k) for k in key)
new_nd[new_key] = value
return new_nd.to_dict()
>>> ndict_replace(d, replace)
{'x': {'y': {'z': None}}}
The solution is robust and works with any nested dictionary
>>> d = {
1: {2: {3: None}},
3: {4: None},
5: None
}
>>> ndict_replace(d, replace)
{'x': {'y': {'z': None}}, 'z': {4: None}, 4: None}}
To install ndicts pip install ndicts
Related
I have a list:
List_ = ["Peter", "Peter", "Susan"]
I want to make a dictonary like this:
Dict_ = {"Name": "Peter", "Count": 2, "Name": "Susan", "Count": 1}
Dict_ = {}
Dict_new = {}
for text in List_:
if text not in Dict_:
Dict_[text] = 1
else:
Dict_[text] += 1
for key, values in Dict_.items():
Dict_new["Name"] = key
Dict_new["Count"] = values
print(Dict_new)
It is printing only last ones:
{"Name": "Susan", "Count": 1}
Here is the implementation that you can use according to what you would like :
from collections import Counter
# Your data
my_list = ["Peter", "Peter", "Susan"]
# Count the occurrences
counted = Counter(my_list)
# Your format
counted_list = []
for key, value in counted.items():
counted_list.append({"Name": key, "Count": value})
print(counted_list)
And output will be :
[{'Name': 'Peter', 'Count': 2}, {'Name': 'Susan', 'Count': 1}]
As noted in comments, a dictionary can only have each key once.
You may want a list of dictionaries, built with help from collections.Counter and a list comprehension.
>>> from collections import Counter
>>> List_ = ["Peter", "Peter", "Susan"]
>>> [{'name': k, 'count': v} for k, v in Counter(List_).items()]
[{'name': 'Peter', 'count': 2}, {'name': 'Susan', 'count': 1}]
In addition to using collections.Counter you could use a defaultdict.
>>> from collections import defaultdict
>>> d = defaultdict(int)
>>> for n in List_:
... d[n] += 1
...
>>> d
defaultdict(<class 'int'>, {'Peter': 2, 'Susan': 1})
>>> [{'name': k, 'count': v} for k, v in d.items()]
[{'name': 'Peter', 'count': 2}, {'name': 'Susan', 'count': 1}]
You can use the following code to achieve what you are trying to do.
List_ = ["Peter", "Peter", "Susan"]
dict_ = {}
for name in List_:
if name in dict_:
dict_[name] += 1
else:
dict_[name] = 1
print(dict_)
Generates the following output where key is the name and value is the count.
{'Peter': 2, 'Susan': 1}
Suppose you have the following dictionaries
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
The intended result is as follows:
{'y': 'only unique', 'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex'}
You want to merge them in a particular order as they appear. If the key is repeated, one wants to concatenate the results. The use case is in 10K submissions. Often you find this
Column X Column X+1
(123)
when the number is (123)
I tried this and it works
# helper functions
def string_comprehension(array_like,separator,separate_chars=False):
if not isinstance(array_like,list):
array_like = to_list(array_like)
if separate_chars:
result = ''.join(separator.join(map(str, x)) for x in array_like)
else:
result = separator.join(str(x) for x in array_like)
return result
def to_list(var):
a = []
if islist(var): return var
else: return [var]
def merge_concatenate_dicts(dict_list, separator = ''):
def update_common_dict(local_common_keys,global_common_dict,index_list):
for key in list(local_common_keys):
if key in global_common_dict.keys():
current_indices = to_list(global_common_dict.get(key))
new_indices = sorted(list(set.union(set(current_indices),
set(index_list))))
common_keys_dict.update( { key: new_indices} )
else:
common_keys_dict.update( { key: index_list} )
return global_common_dict
common_keys_dict = {}
dict_combination = it.combinations(dict_list,2)
disjoint_keys = []
for d_combine in dict_combination:
index_list = sorted([dict_list.index(d_combine[0]),dict_list.index(d_combine[1])])
keys_dict1 = list(d_combine[0].keys())
keys_dict2 = list(d_combine[1].keys())
common_keys = set.intersection( set(keys_dict1),
set(keys_dict2))
if common_keys_dict:
all_common_keys = list(set.union(
common_keys,
set(list(common_keys_dict.keys())
)))
else: all_common_keys = list(common_keys)
all_keys = list(reduce(set.union,
map(set,[keys_dict1, keys_dict2] )))
disjoint_local = [k for k in all_keys if k not in all_common_keys]
disjoint_universe = list(set.union(set(disjoint_local), set(disjoint_keys)))
disjoint_keys = [k for k in disjoint_universe if k not in all_common_keys]
common_keys_dict = update_common_dict(local_common_keys = common_keys,
global_common_dict = common_keys_dict, index_list = index_list )
merged_dicts = {}
for k_disjoint in disjoint_keys:
for d in dict_list:
if k_disjoint in d:
merged_dicts.update({k_disjoint : d.get(k_disjoint)})
break
for k_common in common_keys_dict.keys():
dict_merge_list = [d.get(k_common) for idx,d in enumerate(dict_list) if idx in common_keys_dict.get(k_common)]
merged_val = string_comprehension(array_like=dict_merge_list, separator= separator)
merged_dicts.update( {k_common: merged_val} )
return merged_dicts
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
result = merge_concatenate_dicts([dict1,dict2,dict3,dict4,dict5])
print(result)
Yields
{'y': 'only unique', 'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex'}
You can use collections.defaultdict here.
from collections import defaultdict
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
def merge(*dicts):
out=defaultdict(str)
for d in dicts:
for k,v in d.items():
out[k]+=v
return out #The return type is defaultdict if you want dict then use `return dict(out)`
merge(dict1,dict2,dict3,dict4,dict5)
# defaultdict(<class 'str'>, {'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex', 'y': 'only unique'})
#When `return dict(out)`
# {'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex', 'y': 'only unique'}
I want to replace the values (formated as strings) with the same values as integers, whenever the key is 'current_values'.
d = {'id': '10', 'datastreams': [{'current_value': '5'}, {'current_value': '4'}]}
Desired Output:
d = {'id': '10', 'datastreams': [{'current_value': 5}, {'current_value': 4}]}
The following piece of code replaces (substrings of) values in a dictionary. It works for nested json structures and copes with json, list and string types. You can easily add other types if needed.
def dict_replace_value(d: dict, old: str, new: str) -> dict:
x = {}
for k, v in d.items():
if isinstance(v, dict):
v = dict_replace_value(v, old, new)
elif isinstance(v, list):
v = list_replace_value(v, old, new)
elif isinstance(v, str):
v = v.replace(old, new)
x[k] = v
return x
def list_replace_value(l: list, old: str, new: str) -> list:
x = []
for e in l:
if isinstance(e, list):
e = list_replace_value(e, old, new)
elif isinstance(e, dict):
e = dict_replace_value(e, old, new)
elif isinstance(e, str):
e = e.replace(old, new)
x.append(e)
return x
# See input and output below
output = dict_replace_value(input, 'string', 'something')
Input:
input = {
'key1': 'a string',
'key2': 'another string',
'key3': [
'a string',
'another string',
[1, 2, 3],
{
'key1': 'a string',
'key2': 'another string'
}
],
'key4': {
'key1': 'a string',
'key2': 'another string',
'key3': [
'a string',
'another string',
500,
1000
]
},
'key5': {
'key1': [
{
'key1': 'a string'
}
]
}
}
Output:
print(output)
{
"key1":"a something",
"key2":"another something",
"key3":[
"a something",
"another something",
[
1,
2,
3
],
{
"key1":"a something",
"key2":"another something"
}
],
"key4":{
"key1":"a something",
"key2":"another something",
"key3":[
"a something",
"another something",
500,
1000
]
},
"key5":{
"key1":[
{
"key1":"a something"
}
]
}
}
d = {'id': '10', 'datastreams': [{'current_value': '5'}, {'current_value': '4'}]}
for elem in d['datastreams']: # for each elem in the list datastreams
for k,v in elem.items(): # for key,val in the elem of the list
if 'current_value' in k: # if current_value is in the key
elem[k] = int(v) # Cast it to int
print(d)
OUTPUT:
{'id': '10', 'datastreams': [{'current_value': 5}, {'current_value': 4}]}
A general approach (assuming you don't know in advance which key of the dict is pointing to a list) would be to iterate over the dict and check the type of its values and then iterate again into each value if needed.
In your case, your dictionary may contain a list of dictionaries as values, so it is enough to check if a value is of type list, if so, iterate over the list and change the dicts you need.
It can be done recursively with a function like the following:
def f(d):
for k,v in d.items():
if k == 'current_value':
d[k] = int(v)
elif type(v) is list:
for item in v:
if type(item) is dict:
f(item)
>>> d = {'id': '10', 'datastreams': [{'current_value': '5'}, {'current_value': '4'}]}
>>> f(d)
>>> d
{'id': '10', 'datastreams': [{'current_value': 5}, {'current_value': 4}]}
Can be done with list comprehension:
d['datastreams'] = [{'current_value': int(ds['current_value'])} if ('current_value' in ds) else ds for ds in d['datastreams']]
You can use ast.literal_eval to evaluate the underlying value for items with current_value key in the d['datastreams'] list. Then check whether the type is an int using isinstance for such values. Finally, type cast such values to int.
import ast
d = {'id': '10', 'datastreams': [{'current_value': '5'}, {'current_value': '4'}]}
for i in d['datastreams']:
for k,v in i.items():
if 'current_value' in k and isinstance(ast.literal_eval(v),int):
i[k] = int(v)
#Output:
print(d)
{'id': '10', 'datastreams': [{'current_value': 5}, {'current_value': 4}]}
You could use this method
which would loop through checks for current_value in list and change it to integer by passing the value through int() function:
for value in d.values():
for element in value:
if 'current_value' in element:
element['current_value'] = int(element['current_value'])
Taking alec_djinn's solution little farther to handle also nested dicts:
def f(d):
for k,v in d.items():
if k == 'current_value':
d[k] = int(v)
elif type(v) is list:
for item in v:
if type(item) is dict:
f(item)
if type(v) is dict:
f(v)
I happen to have a complex dictionary (having lists, dicts within lists etc). The values for some of the keys are set as None
Is there a way I can replace this None with some default value of my own irrespective of the complex structure of the dictionary?
You can do it using object_pairs_hook from json module:
def dict_clean(items):
result = {}
for key, value in items:
if value is None:
value = 'default'
result[key] = value
return result
dict_str = json.dumps(my_dict)
my_dict = json.loads(dict_str, object_pairs_hook=dict_clean)
# replace_none_with_empty_str_in_dict.py
raw = {'place': 'coffee shop', 'time': 'noon', 'day': None}
def replace_none_with_empty_str(some_dict):
return { k: ('' if v is None else v) for k, v in some_dict.items() }
print(replace_none_with_empty_str(raw))
Here's a recursive solution that also replaces Nones inside lists.
First we define a simple class, Null, to act as the replacement for None.
class Null(object):
def __repr__(self):
return 'Null'
NULL = Null()
def replace_none(data):
for k, v in data.items() if isinstance(data, dict) else enumerate(data):
if v is None:
data[k] = NULL
elif isinstance(v, (dict, list)):
replace_none(v)
# Test
data = {
1: 'one',
2: ['two', 2, None],
3: None,
4: {4: None, 44: 'four'},
5: {
5: [55, 56, None],
6: {66: None, 67: None},
8: [88, {9:'nine', 99:None}, 100]
}
}
print(data)
replace_none(data)
print(data)
output
{1: 'one', 2: ['two', 2, None], 3: None, 4: {44: 'four', 4: None}, 5: {8: [88, {9: 'nine', 99: None}, 100], 5: [55, 56, None], 6: {66: None, 67: None}}}
{1: 'one', 2: ['two', 2, Null], 3: Null, 4: {44: 'four', 4: Null}, 5: {8: [88, {9: 'nine', 99: Null}, 100], 5: [55, 56, Null], 6: {66: Null, 67: Null}}}
for k, v in my_dict.items():
if v is None:
my_dict[k] = "my default value"
Recursive solution from Lutz:
def replace(any_dict):
for k, v in any_dict.items():
if v is None:
any_dict[k] = "my default value"
elif type(v) == type(any_dict):
replace(v)
replace(my_dict)
You could do it with recursive function that iterates over all dicts and lists:
def convert(obj):
if type(obj) == list:
for x in obj:
convert(x)
elif type(obj) == dict:
for k, v in obj.iteritems():
if v is None:
obj[k] = 'DEFAULT'
else:
convert(v)
data = {1: 'foo', 2: None, 3: [{1: 'foo', 2: None}]}
convert(data)
print data # -> {1: 'foo', 2: 'DEFAULT', 3: [{1: 'foo', 2: 'DEFAULT'}]}
exhaustive:
- all keys in the dictionary, even if the keys are in a nested dictionary that is a value to a previous-level dictionary key.
sorted:
- this is to ensure the keys are always returned in the same order
The nesting is arbitrarily deep. A non-recursive algorithm is preferred.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
Note: dictionary values can include lists (which can have dictionaries as elements), e.g.
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
def _auxallkeys(aset, adict):
aset.update(adict)
for d in adict.itervalues():
if isinstance(d, dict):
_auxallkeys(aset, d)
def allkeys(adict):
aset = set()
_auxallkeys(aset, adict)
return sorted(aset)
is the obvious (recursive) solution. To eliminate recursion:
def allkeys(adict):
aset = set()
pending = [adict]
while pending:
d = pending.pop()
aset.update(d)
for dd in d.itervalues():
if isinstance(dd, dict):
pending.append(dd)
return sorted(aset)
since the order of processing of the various nested dicts does not matter for this purpose.
Edit: the OP comments whining that it doesn't work if a dict is not nested, but rather in a list (and I replied that it could also be in a tuple, an object with attributes per-instance or per-class [maybe a base class thereof], a shelf, and many other ways to hide dicts around the house;-). If the OP will deign to define precisely what he means by "nested" (obviously not the same meaning as ordinary mortals apply to the word in question), it will probably be easier to help him. Meanwhile, here's a version that covers lists (and tuples, but not generators, instances of many itertools classes, shelves, etc, etc);
def allkeys(adict):
aset = set()
pending = [adict]
pendlis = []
def do_seq(seq):
for dd in seq:
if isinstance(dd, dict):
pending.append(dd)
elif isinstance(dd, (list, tuple)):
pendlis.append(dd)
while pending or pendlis:
while pending:
d = pending.pop()
aset.update(d)
do_seq(d.itervalues())
while pendlis:
l = pendlis.pop()
do_seq(l)
return sorted(aset)
A non-recursive method isn't obvious to me right now. The following works on your original example. Edit: It will now handle dicts within a list within a dict, at least the one within the tricky example cited in the comment to Alex Martelli's answer.
#!/usr/bin/env python
import types
def get_key_list(the_dict, key_list):
for k, v in (the_dict.iteritems()):
key_list.append(k)
if type(v) is types.DictType:
get_key_list(v, key_list)
if type(v) is types.ListType:
for lv in v:
if type(lv) is types.DictType:
get_key_list(lv, key_list)
return
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
key_list = []
get_key_list(level1, key_list)
key_list.sort()
print key_list
key_list = []
get_key_list(tricky, key_list)
key_list.sort()
print key_list
Output:
['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z']
['category', 'content', 'content']
Here's a non-recursive solution which processes generators as well as lists, tuples and dicts and adds all successive keys if a key appears more than once:
def get_iterator(i):
if hasattr(i, 'next'):
# already an iterator - use it as-is!
return i
elif hasattr(i, '__iter__') and not isinstance(i, basestring):
# an iterable type that isn't a string
return iter(i)
else:
# Can't iterate most other types!
return None
def get_dict_keys(D):
LRtn = []
L = [(D, get_iterator(D))]
while 1:
if not L: break
cur, _iter = L[-1]
if _iter:
# Get the next item
try:
i = _iter.next()
except StopIteration:
del L[-1]
continue
if isinstance(cur, dict):
# Process a dict and all subitems
LRtn.append(i)
_iter = get_iterator(cur[i])
if _iter: L.append((cur[i], _iter))
else:
# Process generators, lists, tuples and all subitems
_iter = get_iterator(i)
if _iter: L.append((i, _iter))
# Sort and return
LRtn.sort()
return LRtn
D = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd', 'e': 134, 'f': [{'blah': 553}]} },
'level2_2' : { 'z': 'zzzzzzz' },
'blah2': iter([{'blah3': None}]),
}
print get_dict_keys(D)
EDIT: Increased the speed a bit and made the code shorter.
I also prefer a recursive approach...
#!/usr/bin/env python
def extract_all_keys(structure):
try:
list_of_keys = structure.keys()
for value in structure.values():
add_all_keys_in_value_to_list(value, list_of_keys)
except AttributeError:
list_of_keys = []
return list_of_keys.sort()
def add_all_keys_in_value_to_list(value, list_of_keys):
if isinstance(value, dict):
list_of_keys += extract_all_keys(value)
elif isinstance(value, (list, tuple)):
for element in value:
list_of_keys += extract_all_keys(element)
import unittest
class TestKeys(unittest.TestCase):
def given_a_structure_of(self, structure):
self.structure = structure
def when_keys_are_extracted(self):
self.list_of_keys = extract_all_keys(self.structure)
def testEmptyDict(self):
self.given_a_structure_of({})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
def testOneElement(self):
self.given_a_structure_of({'a': 'aaaa'})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a'])
def testTwoElementsSorted(self):
self.given_a_structure_of({
'z': 'zzzz',
'a': 'aaaa',
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'z'])
def testNestedElements(self):
self.given_a_structure_of({
'a': {'aaaa': 'A',},
'b': {'bbbb': 'B',},
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'aaaa', 'b', 'bbbb'])
def testDoublyNestedElements(self):
self.given_a_structure_of({
'level2': {'aaaa': 'A',
'level3': {'bbbb': 'B'}
}
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['aaaa', 'bbbb', 'level2', 'level3'])
def testNestedExampleOnStackOverflow(self):
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
self.given_a_structure_of(level1)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z'])
def testListExampleOnStackOverflow(self):
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
self.given_a_structure_of(tricky)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testTuplesTreatedLikeLists(self):
tricky_tuple = {'category': ({'content': 'aaaaa'}, {'content': 'bbbbbb'})}
self.given_a_structure_of(tricky_tuple)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testCanHandleString(self):
self.given_a_structure_of('keys')
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
if __name__ == '__main__':
unittest.main()
I think it is better to use a recursive method.
My code is in the following.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
all_keys=[] # a global list to store all the keys in level1
def depth ( dict ):
for k in dict:
if type(dict[k]) == type(dict): #judge the type of elements in dictionary
depth(dict[k]) # recursive
else:
all_keys.append(k)
depth(level1)
print all_keys