I have this multi-dimensional dict:
a = {'a' : 'b', 'c' : {'d' : 'e'}}
And written simple function to flatten that dict:
def __flatten(self, dictionary, level = []):
tmp_dict = {}
for key, val in dictionary.items():
if type(val) == dict:
tmp_dict.update(self.__flatten(val, level + [key]))
else:
tmp_dict['.'.join(level + [key])] = val
return tmp_dict
After call this function with dict a i get in result:
{'a' : 'b', 'c.d' : 'e'}
Now, after making few instructions on this flattened dict i need to build new, multi-dimensional dict from that flattened. Example:
>> unflatten({'a' : 0, 'c.d' : 1))
{'a' : 0, 'c' : {'d' : 1}}
The only problem I have is that i do not have a function unflatten :)
Can anyone help with this? I have no idea how to do it.
EDIT:
Another example:
{'a' : 'b', 'c.d.e.f.g.h.i.j.k.l.m.n.o.p.r.s.t.u.w' : 'z'}
Should be after unflatten:
{'a': 'b', 'c': {'d': {'e': {'f': {'g': {'h': {'i': {'j': {'k': {'l': {'m': {'n': {'o': {'p': {'r': {'s': {'t': {'u': {'w': 'z'}}}}}}}}}}}}}}}}}}}
And another:
{'a' : 'b', 'c.d' : 'z', 'c.e' : 1}
To:
{'a' : 'b', 'c' : {'d' : 'z', 'e' : 1}}
This greatly increases the difficulty of the task, i know. Thats why i had problem with this and found no solution in hours..
def unflatten(dictionary):
resultDict = dict()
for key, value in dictionary.items():
parts = key.split(".")
d = resultDict
for part in parts[:-1]:
if part not in d:
d[part] = dict()
d = d[part]
d[parts[-1]] = value
return resultDict
from collections import defaultdict
def unflatten(d):
ret = defaultdict(dict)
for k,v in d.items():
k1,delim,k2 = k.partition('.')
if delim:
ret[k1].update({k2:v})
else:
ret[k1] = v
return ret
Here's one utilizing Python 3.5+ features, like typing and destructuring assignments. Try the tests out on repl.it.
from typing import Any, Dict
def unflatten(
d: Dict[str, Any],
base: Dict[str, Any] = None,
) -> Dict[str, Any]:
"""Convert any keys containing dotted paths to nested dicts
>>> unflatten({'a': 12, 'b': 13, 'c': 14}) # no expansion
{'a': 12, 'b': 13, 'c': 14}
>>> unflatten({'a.b.c': 12}) # dotted path expansion
{'a': {'b': {'c': 12}}}
>>> unflatten({'a.b.c': 12, 'a': {'b.d': 13}}) # merging
{'a': {'b': {'c': 12, 'd': 13}}}
>>> unflatten({'a.b': 12, 'a': {'b': 13}}) # insertion-order overwrites
{'a': {'b': 13}}
>>> unflatten({'a': {}}) # insertion-order overwrites
{'a': {}}
"""
if base is None:
base = {}
for key, value in d.items():
root = base
###
# If a dotted path is encountered, create nested dicts for all but
# the last level, then change root to that last level, and key to
# the final key in the path.
#
# This allows one final setitem at the bottom of the loop.
#
if '.' in key:
*parts, key = key.split('.')
for part in parts:
root.setdefault(part, {})
root = root[part]
if isinstance(value, dict):
value = unflatten(value, root.get(key, {}))
root[key] = value
return base
I wrote one years ago in Python 2 and 3 which I've adapted below. It was for making it easier to check if a given dictionary is a subset of a larger dictionary irrespective of whether provided in flattened or scaffolded form.
A bonus feature: Should there be consecutive integer indexes (as in 0, 1, 2, 3, 4 etc.), this will also convert them back into lists as well.
def unflatten_dictionary(field_dict):
field_dict = dict(field_dict)
new_field_dict = dict()
field_keys = list(field_dict)
field_keys.sort()
for each_key in field_keys:
field_value = field_dict[each_key]
processed_key = str(each_key)
current_key = None
current_subkey = None
for i in range(len(processed_key)):
if processed_key[i] == "[":
current_key = processed_key[:i]
start_subscript_index = i + 1
end_subscript_index = processed_key.index("]")
current_subkey = int(processed_key[start_subscript_index : end_subscript_index])
# reserve the remainder descendant keys to be processed later in a recursive call
if len(processed_key[end_subscript_index:]) > 1:
current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
break
# next child key is a dictionary
elif processed_key[i] == ".":
split_work = processed_key.split(".", 1)
if len(split_work) > 1:
current_key, current_subkey = split_work
else:
current_key = split_work[0]
break
if current_subkey is not None:
if current_key.isdigit():
current_key = int(current_key)
if current_key not in new_field_dict:
new_field_dict[current_key] = dict()
new_field_dict[current_key][current_subkey] = field_value
else:
new_field_dict[each_key] = field_value
# Recursively unflatten each dictionary on each depth before returning back to the caller.
all_digits = True
highest_digit = -1
for each_key, each_item in new_field_dict.items():
if isinstance(each_item, dict):
new_field_dict[each_key] = unflatten_dictionary(each_item)
# validate the keys can safely converted to a sequential list.
all_digits &= str(each_key).isdigit()
if all_digits:
next_digit = int(each_key)
if next_digit > highest_digit:
highest_digit = next_digit
# If all digits and can be sequential order, convert to list.
if all_digits and highest_digit == (len(new_field_dict) - 1):
digit_keys = list(new_field_dict)
digit_keys.sort()
new_list = []
for k in digit_keys:
i = int(k)
if len(new_list) <= i:
# Pre-populate missing list elements if the array index keys are out of order
# and the current element is ahead of the current length boundary.
while len(new_list) <= i:
new_list.append(None)
new_list[i] = new_field_dict[k]
new_field_dict = new_list
return new_field_dict
# Test
if __name__ == '__main__':
input_dict = {'a[0]': 1,
'a[1]': 10,
'a[2]': 5,
'b': 10,
'c.test.0': "hi",
'c.test.1': "bye",
"c.head.shoulders": "richard",
"c.head.knees": 'toes',
"z.trick.or[0]": "treat",
"z.trick.or[1]": "halloween",
"z.trick.and.then[0]": "he",
"z.trick.and.then[1]": "it",
"some[0].nested.field[0]": 42,
"some[0].nested.field[1]": 43,
"some[2].nested.field[0]": 44,
"mixed": {
"statement": "test",
"break[0]": True,
"break[1]": False,
}}
expected_dict = {'a': [1, 10, 5],
'b': 10,
'c': {
'test': ['hi', 'bye'],
'head': {
'shoulders': 'richard',
'knees' : 'toes'
}
},
'z': {
'trick': {
'or': ["treat", "halloween"],
'and': {
'then': ["he", "it"]
}
}
},
'some': {
0: {
'nested': {
'field': [42, 43]
}
},
2: {
'nested': {
'field': [44]
}
}
},
"mixed": {
"statement": "test",
"break": [True, False]
}}
# test
print("Input:")
print(input_dict)
print("====================================")
print("Output:")
actual_dict = unflatten_dictionary(input_dict)
print(actual_dict)
print("====================================")
print(f"Test passed? {expected_dict==actual_dict}")
As a rough-draft (could use a little improvement in variable name choice, and perhaps robustness, but it works for the example given):
def unflatten(d):
result = {}
for k,v in d.iteritems():
if '.' in k:
k1, k2 = k.split('.', 1)
v = {k2: v}
k = k1
result[k] = v
return result
Related
My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']
I need to parse the flatten structure and create nested structure using the list of keys provided. I have solved the problem but I am looking for an improvement and I would like to learn what I can change in my code. Can somebody review it and refactor using better knowledge?
src_data = [
{
"key1": "XX",
"key2": "X111",
"key3": "1aa",
"key4": 1
},
{
"key1": "YY",
"key2": "Y111",
"key3": "1bb",
"key4": 11
},
{
"key1": "ZZ",
"key2": "Z111",
"key3": "1cc",
"key4": 2.4
},
{
"key1": "AA",
"key2": "A111",
"key3": "1cc",
"key4": 33333.2122
},
{
"key1": "BB",
"key2": "B111",
"key3": "1bb",
"key4": 2
},
]
this is my code I developed so far creating the final result.
def plant_tree(ll):
master_tree = {}
for i in ll:
tree = master_tree
for n in i:
if n not in tree:
tree[n] = {}
tree = tree[n]
return master_tree
def make_nested_object(tt, var):
elo = lambda l: reduce(lambda x, y: {y: x}, l[::-1], var)
return {'n_path': tt, 'n_structure': elo(tt)}
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
def set_nested_item(dataDict, mapList, val):
"""Set item in nested dictionary"""
reduce(getitem, mapList[:-1], dataDict)[mapList[-1]] = val
return dataDict
def update_tree(data_tree):
# MAKE NESTED OBJECT
out = (make_nested_object(k, v) for k,v, in res_out.items())
for dd in out:
leaf_data = dd['n_structure']
leaf_path = dd['n_path']
data_tree = set_nested_item(data_tree, leaf_path, getFromDict(leaf_data, leaf_path))
return data_tree
this is the customed itemgeter function from this question
def customed_itemgetter(*args):
# this handles the case when one key is provided
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
define the nesting level
nesting_keys = ['key1', 'key3', 'key2']
grouper = customed_itemgetter(*nesting_keys)
ii = groupby(sorted(src_data, key=grouper), grouper)
res_out = {key: [{k:v for k,v in i.items() if k not in nesting_keys} for i in group] for key,group in ii}
#
ll = ([dd[x] for x in nesting_keys] for dd in src_data)
data_tree = plant_tree(ll)
get results
result = update_tree(data_tree)
How can I improve my code?
If the itemgetter [Python-doc] is given a single element, it returns that single element, and does not wrap it in a singleton-tuple.
We can however construct a function for that, like:
from operator import itemgetter
def itemgetter2(*args):
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
then we can thus use the new itemgetter2, like:
grouper = itemgetter2(*ll)
ii = groupby(sorted(src_data, key=grouper), grouper)
EDIT: Based on your question however, you want to perform multilevel grouping, we can make a function for that, like:
def multigroup(groups, iterable, index=0):
if len(groups) <= index:
return list(iterable)
else:
f = itemgetter(groups[index])
i1 = index + 1
return {
k: multigroup(groups, vs, index=i1)
for k, vs in groupby(sorted(iterable, key=f), f)
}
For the data_src in the question, this then generates:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'a': 1, 'b': 2, 'z': 3}]}, 2: {3: [{'a': 2, 'b': 3, 'e': 2}]}, 4: {3: [{'a': 4, 'x': 3, 'b': 3}]}}
You can post-process the values in the list(..) call however. We can for example generate dictionaries without the elements in the grouping columns:
def multigroup(groups, iterable):
group_set = set(groups)
fs = [itemgetter(group) for group in groups]
def mg(iterable, index=0):
if len(groups) <= index:
return [
{k: v for k, v in item.items() if k not in group_set}
for item in iterable
]
else:
i1 = index + 1
return {
k: mg(vs, index=i1)
for k, vs in groupby(sorted(iterable, key=fs[index]), fs[index])
}
return mg(iterable)
For the given sample input, we get:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'z': 3}]}, 2: {3: [{'e': 2}]}, 4: {3: [{'x': 3}]}}
or for the new sample data:
>>> pprint(multigroup(['key1', 'key3', 'key2'], src_data))
{'AA': {'1cc': {'A111': [{'key4': 33333.2122}]}},
'BB': {'1bb': {'B111': [{'key4': 2}]}},
'XX': {'1aa': {'X111': [{'key4': 1}]}},
'YY': {'1bb': {'Y111': [{'key4': 11}]}},
'ZZ': {'1cc': {'Z111': [{'key4': 2.4}]}}}
I have a need in a project to find all of a given type in a nested dictionary and move them all to a top level key in the same dictionary.
So far I have the below code, which seems to work. In the example I'm looking for all the items that are integers and moving them to a 'numbers' key.
I'd prefer it if the lift_numbers_to_top function made and returned a copy of the dictionary rather than editing it in place, but I haven't been able to work out a nice way to pass the copy and the numbers back from the recursive function to itself, if that makes sense.
a_dictionary = {
"one": 1,
"two": 2,
"text": "Hello",
"more_text": "Hi",
"internal_dictionary": {
"three": 3,
"two": 2,
"even_more_text": "Hey",
"another_internal_dictionary": {
"four": 4,
"five": 5,
"last_text": "howdy"
}
}
}
def extract_integers(dictionary, level_key=None):
numbers = {}
for key in dictionary:
if type(dictionary[key]) == int:
numbers[level_key + "__" + key if level_key else key] = dictionary[key]
return numbers
def lift_numbers_to_top(dictionary, level_key=None):
numbers = {}
if type(dictionary) == dict:
numbers = extract_integers(dictionary, level_key)
for key in numbers:
keyNumber = key.split('__')[-1]
del dictionary[keyNumber]
for key in dictionary:
numbers = {**numbers, **lift_numbers_to_top(dictionary[key], key)}
return numbers
a_dictionary['numbers'] = lift_numbers_to_top(a_dictionary)
print(a_dictionary)
Result:
{
'text': 'Hello',
'more_text': 'Hi',
'internal_dictionary': {
'even_more_text': 'Hey',
'another_internal_dictionary': {
'last_text': 'howdy'
},
},
'numbers': {
'one': 1,
'two': 2,
'internal_dictionary__two': 2,
'internal_dictionary__three': 3,
'another_internal_dictionary__four': 4,
'another_internal_dictionary__five': 5,
}
}
Use a match function to determine what to lift, and pass along the target object where you move key-value pairs to to recursive calls. If that target is missing, you know the current call is for the top-level. The match function should return the new key for the new dictionary.
To produce a new dictionary, just produce a new dictionary and put recursion results into that object.
I prefer to use #singledispatch() to handle different types when recursing:
from functools import singledispatch
#singledispatch
def lift_values(obj, match, targetname=None, **kwargs):
"""Lift key-value pairs from a nested structure to the top
For key-value pairs anywhere in the nested structure, if
match(path, value) returns a value other than `None`, the
key-value pair is moved to the top-level dictionary when targetname
is None, or to a new dictionary stored under targetname is not None,
using the return value of the match function as the key. path
is the tuple of all keys and indices leading to the value.
For example, for an input
{'foo': True, 'bar': [{'spam': False, 'ham': 42}]}
and the match function lambda p, v: p if isinstance(v, bool) else None
and targetname "flags", this function returns
{'flags': {('foo',): True, ('bar', 0, 'spam'): False}, 'bar': [{'ham': 42}]}
"""
# leaf nodes, no match testing needed, no moving of values
return obj
#lift_values.register(list)
def _handle_list(obj, match, _path=(), **kwargs):
# list values, no lifting, just passing on the recursive call
return [lift_values(v, match, _path=_path + (i,), **kwargs)
for i, v in enumerate(obj)]
#lift_values.register(dict)
def _handle_list(obj, match, targetname=None, _path=(), _target=None):
result = {}
if _target is None:
# this is the top-level object, key-value pairs are lifted to
# a new dictionary stored at this level:
if targetname is not None:
_target = result[targetname] = {}
else:
# no target name? Lift key-value pairs into the top-level
# object rather than a separate sub-object.
_target = result
for key, value in obj.items():
new_path = _path + (key,)
new_key = match(new_path, value)
if new_key is not None:
_target[new_key] = value
else:
result[key] = lift_values(
value, match, _path=new_path, _target=_target)
return result
I included a dispatch function for lists; your sample doesn't use lists, but these are common in JSON data structures so I anticipate you probably want it anyway.
The match function must accept two arguments, the path to the object this key-value pair was found in, and the value. It should return a new key to use or None if not to lift the value.
For your case, the match function would be:
def lift_integers(path, value):
if isinstance(value, int):
return '__'.join(path[-2:])
result = lift_values(a_dictionary, lift_integers, 'numbers')
Demo on your sample input dictionary:
>>> from pprint import pprint
>>> def lift_integers(path, value):
... if isinstance(value, int):
... return '__'.join(path[-2:])
...
>>> lift_values(a_dictionary, lift_integers, 'numbers')
{'numbers': {'one': 1, 'two': 2, 'internal_dictionary__three': 3, 'internal_dictionary__two': 2, 'another_internal_dictionary__four': 4, 'another_internal_dictionary__five': 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}
Personally, I'd use the full path as the key in the lifted dictionary to avoid name clashes; either by joining the full path into a new string key with some unique delimiter, or just by making the path tuple itself the new key:
>>> lift_values(a_dictionary, lambda p, v: p if isinstance(v, int) else None, 'numbers')
{'numbers': {('one',): 1, ('two',): 2, ('internal_dictionary', 'three'): 3, ('internal_dictionary', 'two'): 2, ('internal_dictionary', 'another_internal_dictionary', 'four'): 4, ('internal_dictionary', 'another_internal_dictionary', 'five'): 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {('internal_dictionary', 'another_internal_dictionary', 'five'): 5,
('internal_dictionary', 'another_internal_dictionary', 'four'): 4,
('internal_dictionary', 'three'): 3,
('internal_dictionary', 'two'): 2,
('one',): 1,
('two',): 2},
'text': 'Hello'}
You can use walk through the dict recursively and pop all elements with values as an int to create a new dict
>>> def extract(d):
... new_d = {}
... for k,v in d.items():
... if type(v) == int:
... new_d[k] = d[k]
... elif type(v) == dict:
... for k2,v2 in extract(v).items():
... new_d[k2 if '__' in k2 else k+'__'+k2] = v2
... return new_d
...
>>> a_dictionary['numbers'] = extract(a_dictionary)
>>> pprint(a_dictionary)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}
I would like to go from
first =
{'a' :
{'b' :
{'c' : ['d', 'e'],
'f' : ['g']
}
}
}
to
z = 0
second =
{'a' :
{'b' :
{'c' :
{'d' : z,
'e' : z
},
'f' :
{'g' : z}
}
}
}
I'm looking for the pythonic way to do this.
I want to change every 3rd level elements of first (c and f) from a list to a dictionary where keys are the elements of the list (d, e for example).
>>> {k1: {k2: {k3: {k4:0 for k4 in v3} for k3, v3 in v2.items()} for k2, v2 in v1.items()} for k1, v1 in first.items()}
{'a': {'b': {'c': {'e': 0, 'd': 0}, 'f': {'g': 0}}}}
=)
(If you don't want to get shot in code review, stick to the advice by 9000 in the comments, please. I.e. get all third level dicts and manipulate those directly.)
Recursion is your friend! It allows you to generalize the operation to the last level of a dictionary, which can be arbitrarily long and even not symmetric (e.g. having a branch with 5 levels and one with 3 levels).
first = \
{'a' :
{'b' :
{'c' : ['d', 'e'],
'f' : ['g']
}
}
}
def rec_list_to_dict(inp, sub_value = 0):
if isinstance(inp, list):
return dict(zip(inp, [sub_value ]*len(inp)))
elif isinstance(inp, dict):
return {key : rec_list_to_dict(el, sub_value) for key, el in inp.items()}
else:
raise AttributeError
second = rec_list_to_dict(first, sub_value = 0)
This is a nice place to use the visitor pattern (not sure if it is considered a pattern, of course):
def visit_dict(d, new_value=None):
new_d = {}
for k, v in d.items():
if isinstance(v, dict):
new_d[k] = visit_dict(v, new_value)
elif isinstance(v, list):
new_d[k] = visit_list(v, new_value)
else:
new_d[k] = v
return new_d
def visit_list(l, new_value=None):
return {item: new_value for item in l}
first = {
'a' : {
'b' : {
'c' : ['d', 'e'],
'f' : ['g']
}
}
}
z = 0
print(visit_dict(first, z))
# {
# 'a' : {
# 'b' : {
# 'c' : {
# 'd': 0,
# 'e': 0
# }
# 'f' : {
# 'g': 0
# }
# }
# }
# }
You customize each step according to the type you visit each time. This code assumes lists have hashable values, but you can of course extend it.
import pprint
full_key_list = set(["F1", "F2", "F3", "F4", "F5"]) # all expected field
filt_key_list = set(["F2", "F5"]) # fields should not be included
cont_list = [] # stores all filtered documents
read_in_cont1 = { "F1" : 1, "F2" : True, "F3" : 'abc', "F4" : 130, "F5" : 'X1Z'} # document1
read_in_cont2 = { "F1" : 2, "F2" : False, "F3" : 'efg', "F4" : 100, "F5" : 'X4Z'} # document1
read_in_cont3 = { "F1" : 3, "F2" : True, "F3" : 'acd', "F4" : 400, "F5" : 'X2Z'} # document1
# assume that read_in_conts contains list of documents
read_in_conts = [read_in_cont1, read_in_cont2, read_in_cont3]
for one_item in read_in_conts: # for each document in the list
cont_dict = {}
for key, value in one_item.iteritems():
if key not in filt_key_list: # if the field should be included
cont_dict[key] = value # add this field to the temporary document
cont_list.append(cont_dict)
pprint.pprint(cont_list)
Output:
[{'F1': 1, 'F3': 'abc', 'F4': 130},
{'F1': 2, 'F3': 'efg', 'F4': 100},
{'F1': 3, 'F3': 'acd', 'F4': 400}]
Here is what I want to achieve:
Given an original raw collection of documents (i.e. read_in_conts for simulation),
I need to filter the fields so that they are not included in further process. Above
is my implementation in Python. However, I think it is too heavy and expect to see
a clean solution for this task.
Thank you
cont_list = [dict((k,v) for k,v in d.iteritems() if k not in filt_key_list)
for d in read_in_conts]
or if you want a slightly more factored version:
filter_out_keys = lambda d, x: dict((k,v) for k,v in d.iteritems() if k not in x)
cont_list = [filter_out_keys(d, filt_key_list) for d in read_in_conts]
P.S. I'd suggest making filt_key_list a set() instead - it will make in checks faster.
def filter_dict(d, keys):
return dict((key, value) for key, value in d.iteritems() if key not in filt_key_list))
cont_list = [filter_dict(d, filt_key_list) for d in read_in_conts]
You code is fine. You can make it slightly shorter:
# sets can be faster if `ignored_keys` is actually much longer
ignored_keys = set(["F2", "F5"])
# the inline version of your loop
# a dict comprehension inside a list comprehension
filtered = [{k : v for k,v in row.iteritems() if k not in ignored_keys}
for row in read_in_conts]