I want to flatten a list of dict but having issues,
let's say i have a list of dict as,
d = [{'val': 454,'c': {'name': 'ss'}, 'r': {'name1': 'ff'}},{'val': 'ss', 'c': {'name': 'ww'}, 'r': {'name1': 'ff'}}, {'val': 22,'c': {'name': 'dd'}, 'r': {'name1': 'aa'}}]
And the output I'm trying to get is,
d = [{'val': 454,'name': 'ss', 'name1': 'ff'},{'val': 'ss','name': 'ww', 'name1': 'ff'},{'val': 22, 'name': 'dd', 'name1': 'aa'}]
For which I'm using the following function,
def flatten(structure, key="", flattened=None):
if flattened is None:
flattened = {}
if type(structure) not in(dict, list):
flattened[key] = structure
elif isinstance(structure, list):
for i, item in enumerate(structure):
flatten(item, "%d" % i, flattened)
else:
for new_key, value in structure.items():
flatten(value, new_key, flattened)
return flattened
Now, the issue I have is, it's only generating the first element in the dict
You are probably initializing something in the wrong place. Take a look at the code below:
d = [{'val': 454, 'c': {'name': 'ss'}, 'r': {'name1': 'ff'}}, {'val': 55, 'c': {'name': 'ww'}, 'r': {'name1': 'ff'}}, {'val': 22, 'c': {'name': 'dd'}, 'r': {'name1': 'aa'}}]
# ^ typo here
def flatten(my_dict):
res = []
for sub in my_dict:
print(sub)
dict_ = {}
for k, v in sub.items():
if isinstance(v, dict):
for k_new, v_new in v.items():
dict_[k_new] = v_new
else:
dict_[k] = v
res.append(dict_)
return res
result = flatten(d)
print(result) # [{'name': 'ss', 'name1': 'ff', 'val': 454}, {'name': 'ww', 'name1': 'ff', 'val': 55}, {'name': 'dd', 'name1': 'aa', 'val': 22}]
You should initialize flattened to the same type as structure if it's None, and pass None when recursing at the list case:
def flatten_2(structure, key="", flattened=None):
if flattened is None:
flattened = {} if isinstance(structure, dict) else []
if type(structure) not in(dict, list):
flattened[key] = structure
elif isinstance(structure, list):
for i, item in enumerate(structure):
flattened.append(flatten(item, "%d" % i))
else:
for new_key, value in structure.items():
flatten(value, new_key, flattened)
return flattened
In [13]: flatten_2(d)
Out[13]:
[{'name': 'ss', 'name1': 'ff', 'val': 454},
{'name': 'ww', 'name1': 'ff', 'val': 'ss'},
{'name': 'dd', 'name1': 'aa', 'val': 22}]
This of course only works for a limited type of data.
Related
I have a dictionary with missing values (the key is there, but the associated value is empty). For example I want the dictionary below:
dct = {'ID': '', 'gender': 'male', 'age': '20', 'weight': '', 'height': '5.7'}
to be changed to this form:
dct = {'ID': {'link': '','value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '','value': ''}, 'height': '5.7'}
I want the ID and Weight key should be replaced with nested dictionary if its empty.
How can I write that in the most time-efficient way?
I have tried solutions from below links but didnt work,
def update(orignal, addition):
for k, v in addition.items():
if k not in orignal:
orignal[k] = v
else:
if isinstance(v, dict):
update(orignal[k], v)
elif isinstance(v, list):
for i in range(len(v)):
update(orignal[k][i], v[i])
else:
if not orignal[k]:
orignal[k] = v
Error: TypeError: 'str' object does not support item assignment
Fill missing keys by comparing example json in python
Adding missing keys in dictionary in Python
It seems similar with this issue https://stackoverflow.com/a/3233356/6396981
import collections.abc
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}) or {}, v)
else:
d[k] = v
return d
For example in your case:
>>> dict1 = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7'}
>>> dict2 = {'ID': {'link':'','value':''}, 'weight': {'link':'','value':''}}
>>>
>>> update(dict1, dict2)
{'ID': {'link': '', 'value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '', 'value': ''}, 'height': '5.7'}
>>>
You can iterate through the list and see if the value is an empty string('') if it is, replace it with the default value. Here's a small snippet which does it -
dct = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7'}
def update(d, default):
for k, v in d.items():
if v == '':
d[k] = default.copy()
update(dct, {'link':'','value':''})
print(dct)
Output :
{'ID': {'link': '', 'value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '', 'value': ''}, 'height': '5.7'}
Note that the dict is passed by reference to the function, so any updates made there will be reflected in the original dictionary as well as seen in the above example.
If your dict is nested and you want the replacement to be done for nested items as well then you can use this function -
def nested_update(d, default):
for k, v in d.items():
if v == '':
d[k] = default.copy()
if isinstance(v, list):
for item in v:
nested_update(item, default)
if isinstance(v, dict):
nested_update(v, default)
here's a small example with list of dictionaries and nested dictionary -
dct = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7', "list_data":[{'empty': ''}, {'non-empty': 'value'}], "nested_dict": {"key1": "val1", "missing_nested": ""}}
nested_update(dct, {'key1': 'val1-added', 'key2': 'val2-added'})
print(dct)
Output :
{'ID': {'key1': 'val1-added', 'key2': 'val2-added'}, 'gender': 'male', 'age': '20', 'weight': {'key1': 'val1-added', 'key2': 'val2-added'}, 'height': '5.7', 'list_data': [{'empty': {'key1': 'val1-added', 'key2': 'val2-added'}}, {'non-empty': 'value'}], 'nested_dict': {'key1': 'val1', 'missing_nested': {'key1': 'val1-added', 'key2': 'val2-added'}}}
For "this default dictionary to only specified keys like ID and Weight and not for other keys", you can update the condition of when we replace the value -
def nested_update(d, default):
for k, v in d.items():
if k in ('ID', 'weight') and v == '':
d[k] = default.copy()
if isinstance(v, list):
for item in v:
nested_update(item, default)
if isinstance(v, dict):
nested_update(v, default)
I have a dictionary with some values that are type list, i need to convert each list in another dictionary and insert this new dictionary at the place of the list.
Basically, I have this dictionary
Dic = {
'name': 'P1',
'srcintf': 'IntA',
'dstintf': 'IntB',
'srcaddr': 'IP1',
'dstaddr': ['IP2', 'IP3', 'IP4'],
'service': ['P_9100', 'SNMP'],
'schedule' : 'always',
}
I need to reemplace the values that are lists
Expected output:
Dic = {
'name': 'P1',
'srcintf': 'IntA',
'dstintf': 'IntB',
'srcaddr': 'IP1',
'dstaddr': [
{'name': 'IP2'},
{'name': 'IP3'},
{'name': 'IP4'}
],
'service': [
{'name': 'P_9100'},
{'name': 'SNMP'}
],
'schedule' : 'always',
}
So far I have come up with this code:
for k,v in Dic.items():
if not isinstance(v, list):
NewDic = [k,v]
print(NewDic)
else:
values = v
keys = ["name"]*len(values)
for item in range(len(values)):
key = keys[item]
value = values[item]
SmallDic = {key : value}
liste.append(SmallDic)
NewDic = [k,liste]
which print this
['name', 'P1']
['srcintf', 'IntA']
['dstintf', 'IntB']
['srcaddr', 'IP1']
['schedule', 'always']
['schedule', 'always']
I think is a problem with the loop for, but so far I haven't been able to figure it out.
You need to re-create the dictionary. With some modifications to your existing code so that it generates a new dictionary & fixing the else clause:
NewDic = {}
for k, v in Dic.items():
if not isinstance(v, list):
NewDic[k] = v
else:
NewDic[k] = [
{"name": e} for e in v # loop through the list values & generate a dict for each
]
print(NewDic)
Result:
{'name': 'P1', 'srcintf': 'IntA', 'dstintf': 'IntB', 'srcaddr': 'IP1', 'dstaddr': [{'name': 'IP2'}, {'name': 'IP3'}, {'name': 'IP4'}], 'service': [{'name': 'P_9100'}, {'name': 'SNMP'}], 'schedule': 'always'}
Suppose you have the following dictionaries
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
The intended result is as follows:
{'y': 'only unique', 'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex'}
You want to merge them in a particular order as they appear. If the key is repeated, one wants to concatenate the results. The use case is in 10K submissions. Often you find this
Column X Column X+1
(123)
when the number is (123)
I tried this and it works
# helper functions
def string_comprehension(array_like,separator,separate_chars=False):
if not isinstance(array_like,list):
array_like = to_list(array_like)
if separate_chars:
result = ''.join(separator.join(map(str, x)) for x in array_like)
else:
result = separator.join(str(x) for x in array_like)
return result
def to_list(var):
a = []
if islist(var): return var
else: return [var]
def merge_concatenate_dicts(dict_list, separator = ''):
def update_common_dict(local_common_keys,global_common_dict,index_list):
for key in list(local_common_keys):
if key in global_common_dict.keys():
current_indices = to_list(global_common_dict.get(key))
new_indices = sorted(list(set.union(set(current_indices),
set(index_list))))
common_keys_dict.update( { key: new_indices} )
else:
common_keys_dict.update( { key: index_list} )
return global_common_dict
common_keys_dict = {}
dict_combination = it.combinations(dict_list,2)
disjoint_keys = []
for d_combine in dict_combination:
index_list = sorted([dict_list.index(d_combine[0]),dict_list.index(d_combine[1])])
keys_dict1 = list(d_combine[0].keys())
keys_dict2 = list(d_combine[1].keys())
common_keys = set.intersection( set(keys_dict1),
set(keys_dict2))
if common_keys_dict:
all_common_keys = list(set.union(
common_keys,
set(list(common_keys_dict.keys())
)))
else: all_common_keys = list(common_keys)
all_keys = list(reduce(set.union,
map(set,[keys_dict1, keys_dict2] )))
disjoint_local = [k for k in all_keys if k not in all_common_keys]
disjoint_universe = list(set.union(set(disjoint_local), set(disjoint_keys)))
disjoint_keys = [k for k in disjoint_universe if k not in all_common_keys]
common_keys_dict = update_common_dict(local_common_keys = common_keys,
global_common_dict = common_keys_dict, index_list = index_list )
merged_dicts = {}
for k_disjoint in disjoint_keys:
for d in dict_list:
if k_disjoint in d:
merged_dicts.update({k_disjoint : d.get(k_disjoint)})
break
for k_common in common_keys_dict.keys():
dict_merge_list = [d.get(k_common) for idx,d in enumerate(dict_list) if idx in common_keys_dict.get(k_common)]
merged_val = string_comprehension(array_like=dict_merge_list, separator= separator)
merged_dicts.update( {k_common: merged_val} )
return merged_dicts
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
result = merge_concatenate_dicts([dict1,dict2,dict3,dict4,dict5])
print(result)
Yields
{'y': 'only unique', 'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex'}
You can use collections.defaultdict here.
from collections import defaultdict
dict1 = {'a': 'a'}
dict2 = {'a':'b', 'c':'d' , 'f':'w', 'r':' not unique' }
dict3 = {'a': 'c', 'c':'e'}
dict4 = {'a': 'd', 'c':'e', 'f':'r'}
dict5 = {'r': 'x', 'y':'only unique', 'f':'r'}
def merge(*dicts):
out=defaultdict(str)
for d in dicts:
for k,v in d.items():
out[k]+=v
return out #The return type is defaultdict if you want dict then use `return dict(out)`
merge(dict1,dict2,dict3,dict4,dict5)
# defaultdict(<class 'str'>, {'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex', 'y': 'only unique'})
#When `return dict(out)`
# {'a': 'abcd', 'c': 'dee', 'f': 'wrr', 'r': ' not uniquex', 'y': 'only unique'}
I attempted to flatten a disorganized dictionary (that in turn was taken from a json file) to ease extracting info. Below is an example of how the dictionary is structured and my attempt at flattening it:
data = {'horse':{'speed':{"walk": 40, "run":50}}, 'dog':{'run':30}, 'human':{'gait':{'normal':{'run': 25, 'walk': 30}}}}
flat_dict = []
for items in list(data.items()):
flat_list = []
flat_list.append(items[0])
try:
for item in list(items[1].items())[0]:
if type(item) is not dict:
flat_list.append(item)
else:
flat_list.append(list(item.keys())[0])
flat_list.append(list(item.values())[0])
except:
flat_list.append(items[0])
flat_dict.append(flat_list)
print(flat_dict)
However the above code does not flatten the entire dictionary and some information is lost, here's the output of the above code:
[['horse', 'speed', 'walk', 40], ['dog', 'run', 30], ['human', 'gait', 'normal', {'run': 25, 'walk': 30}]]
What I wanted was:
[['horse', 'speed', 'walk', 40, 'run', 50], ['dog', 'run', 30], ['human', 'gait', 'normal', 'run', 25, 'walk', 30]]
What do I do?
you can use a recursive approach with a list comprehension:
def gen(d):
if isinstance(d, dict):
for k, v in d.items():
yield k
yield from gen(v)
else:
yield d
[[k, *gen(v)] for k, v in data.items()]
output:
[['horse', 'speed', 'walk', 40, 'run', 50],
['dog', 'run', 30],
['human', 'gait', 'normal', 'run', 25, 'walk', 30]]
As you don't know the structure inside the dict you cannot use simple loops to handle each case, you need to use recursion, I'd suggest an utility method to flatten whatever structure recursivly, then make use it to make arrays of [key, flatten(values)]
def flatten(values) -> list:
if isinstance(values, list):
return [v for value in values for v in flatten(value)]
if isinstance(values, dict):
return [*values.keys(), *flatten(list(values.values()))]
return [values]
def flatten_dict(values: dict) -> list:
return [[key, *flatten(value)] for key, value in values.items()]
if __name__ == '__main__':
# ['foo']
print(flatten('foo'))
# ['foo', 'bar', 'uio', 1, 2, 3, 'k1', 'k2', 'v1', 'kk1', '9', 5, 9, 8, 7]
print(flatten(['foo', ['bar', 'uio', [1, 2, 3]], {'k1': 'v1', 'k2': {'kk1': ['9', 5, 9, 8, 7, ]}}]))
data = {'horse': {'speed': {"walk": 40, "run": 50}}, 'dog': {'run': 30},
'human': {'gait': {'normal': {'run': 25, 'walk': 30}}}}
# [['horse', 'speed', 'walk', 'run', 40, 50], ['dog', 'run', 30], ['human', 'gait', 'normal', 'run', 'walk', 25, 30]]
print(flatten_dict(data))
Answered as asked:
data = {
'horse': {
'speed': {
"walk": 40, "run": 50}},
'dog': {
'run': 30},
'human': {
'gait': {
'normal': {
'run': 25, 'walk': 30}}}}
def my_flatten(ddict, mylist):
for k, v in ddict.items():
if isinstance(v, dict):
mylist.append(k)
my_flatten(v, mylist)
else:
mylist.extend([k, v])
return mylist
flist = [my_flatten(v, [k]) for k, v in data.items()]
print(flist)
exhaustive:
- all keys in the dictionary, even if the keys are in a nested dictionary that is a value to a previous-level dictionary key.
sorted:
- this is to ensure the keys are always returned in the same order
The nesting is arbitrarily deep. A non-recursive algorithm is preferred.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
Note: dictionary values can include lists (which can have dictionaries as elements), e.g.
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
def _auxallkeys(aset, adict):
aset.update(adict)
for d in adict.itervalues():
if isinstance(d, dict):
_auxallkeys(aset, d)
def allkeys(adict):
aset = set()
_auxallkeys(aset, adict)
return sorted(aset)
is the obvious (recursive) solution. To eliminate recursion:
def allkeys(adict):
aset = set()
pending = [adict]
while pending:
d = pending.pop()
aset.update(d)
for dd in d.itervalues():
if isinstance(dd, dict):
pending.append(dd)
return sorted(aset)
since the order of processing of the various nested dicts does not matter for this purpose.
Edit: the OP comments whining that it doesn't work if a dict is not nested, but rather in a list (and I replied that it could also be in a tuple, an object with attributes per-instance or per-class [maybe a base class thereof], a shelf, and many other ways to hide dicts around the house;-). If the OP will deign to define precisely what he means by "nested" (obviously not the same meaning as ordinary mortals apply to the word in question), it will probably be easier to help him. Meanwhile, here's a version that covers lists (and tuples, but not generators, instances of many itertools classes, shelves, etc, etc);
def allkeys(adict):
aset = set()
pending = [adict]
pendlis = []
def do_seq(seq):
for dd in seq:
if isinstance(dd, dict):
pending.append(dd)
elif isinstance(dd, (list, tuple)):
pendlis.append(dd)
while pending or pendlis:
while pending:
d = pending.pop()
aset.update(d)
do_seq(d.itervalues())
while pendlis:
l = pendlis.pop()
do_seq(l)
return sorted(aset)
A non-recursive method isn't obvious to me right now. The following works on your original example. Edit: It will now handle dicts within a list within a dict, at least the one within the tricky example cited in the comment to Alex Martelli's answer.
#!/usr/bin/env python
import types
def get_key_list(the_dict, key_list):
for k, v in (the_dict.iteritems()):
key_list.append(k)
if type(v) is types.DictType:
get_key_list(v, key_list)
if type(v) is types.ListType:
for lv in v:
if type(lv) is types.DictType:
get_key_list(lv, key_list)
return
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
key_list = []
get_key_list(level1, key_list)
key_list.sort()
print key_list
key_list = []
get_key_list(tricky, key_list)
key_list.sort()
print key_list
Output:
['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z']
['category', 'content', 'content']
Here's a non-recursive solution which processes generators as well as lists, tuples and dicts and adds all successive keys if a key appears more than once:
def get_iterator(i):
if hasattr(i, 'next'):
# already an iterator - use it as-is!
return i
elif hasattr(i, '__iter__') and not isinstance(i, basestring):
# an iterable type that isn't a string
return iter(i)
else:
# Can't iterate most other types!
return None
def get_dict_keys(D):
LRtn = []
L = [(D, get_iterator(D))]
while 1:
if not L: break
cur, _iter = L[-1]
if _iter:
# Get the next item
try:
i = _iter.next()
except StopIteration:
del L[-1]
continue
if isinstance(cur, dict):
# Process a dict and all subitems
LRtn.append(i)
_iter = get_iterator(cur[i])
if _iter: L.append((cur[i], _iter))
else:
# Process generators, lists, tuples and all subitems
_iter = get_iterator(i)
if _iter: L.append((i, _iter))
# Sort and return
LRtn.sort()
return LRtn
D = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd', 'e': 134, 'f': [{'blah': 553}]} },
'level2_2' : { 'z': 'zzzzzzz' },
'blah2': iter([{'blah3': None}]),
}
print get_dict_keys(D)
EDIT: Increased the speed a bit and made the code shorter.
I also prefer a recursive approach...
#!/usr/bin/env python
def extract_all_keys(structure):
try:
list_of_keys = structure.keys()
for value in structure.values():
add_all_keys_in_value_to_list(value, list_of_keys)
except AttributeError:
list_of_keys = []
return list_of_keys.sort()
def add_all_keys_in_value_to_list(value, list_of_keys):
if isinstance(value, dict):
list_of_keys += extract_all_keys(value)
elif isinstance(value, (list, tuple)):
for element in value:
list_of_keys += extract_all_keys(element)
import unittest
class TestKeys(unittest.TestCase):
def given_a_structure_of(self, structure):
self.structure = structure
def when_keys_are_extracted(self):
self.list_of_keys = extract_all_keys(self.structure)
def testEmptyDict(self):
self.given_a_structure_of({})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
def testOneElement(self):
self.given_a_structure_of({'a': 'aaaa'})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a'])
def testTwoElementsSorted(self):
self.given_a_structure_of({
'z': 'zzzz',
'a': 'aaaa',
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'z'])
def testNestedElements(self):
self.given_a_structure_of({
'a': {'aaaa': 'A',},
'b': {'bbbb': 'B',},
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'aaaa', 'b', 'bbbb'])
def testDoublyNestedElements(self):
self.given_a_structure_of({
'level2': {'aaaa': 'A',
'level3': {'bbbb': 'B'}
}
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['aaaa', 'bbbb', 'level2', 'level3'])
def testNestedExampleOnStackOverflow(self):
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
self.given_a_structure_of(level1)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z'])
def testListExampleOnStackOverflow(self):
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
self.given_a_structure_of(tricky)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testTuplesTreatedLikeLists(self):
tricky_tuple = {'category': ({'content': 'aaaaa'}, {'content': 'bbbbbb'})}
self.given_a_structure_of(tricky_tuple)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testCanHandleString(self):
self.given_a_structure_of('keys')
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
if __name__ == '__main__':
unittest.main()
I think it is better to use a recursive method.
My code is in the following.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
all_keys=[] # a global list to store all the keys in level1
def depth ( dict ):
for k in dict:
if type(dict[k]) == type(dict): #judge the type of elements in dictionary
depth(dict[k]) # recursive
else:
all_keys.append(k)
depth(level1)
print all_keys