Flattening a dictionary of dictionaries that contain lists - python

I have a dictionary of dictionaries that looks like this:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
I need to flatten this dict into a dataframe.I tried to use json_normalize() to flatten the dictionary and the output I got looked like this:
My desired output is something like the one given below.
This can be done in R by using as.data.frame(unlist(data)) but I want to do the same flattening in Python. I am a novice in python so I dont have much idea about doing this.

I have made an attempt to normalize your json object by writing a recursive function as follows:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
sequence = ""
subDicts = []
def findAllSubDicts(data):
global subDicts
global sequence
for key, value in data.items():
sequence += key
#print(sequence)
if isinstance(value, str):
subDicts.append([sequence,value])
sequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
elif isinstance(value, dict):
tempSequence = sequence[:sequence.rfind(".")+1]
sequence += "."
#print(sequence)
findAllSubDicts(value)
sequence = tempSequence
elif isinstance(value, list) and isinstance(value[0], dict):
sequence += "."
tempSequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
findAllSubDicts(value[0])
sequence = tempSequence
elif isinstance(value, list) and len(value)==1:
tempSequence = sequence[:sequence.rfind(".")+1]
subDicts.append([sequence,value[0]])
sequence = tempSequence
return subDicts
outDict = findAllSubDicts(data)
for i in outDict:
print(i[0].ljust(40," "), end=" ")
print(i[1])
Printing the results will give you:
data input
test.and.range.month.start Jan
test.and.range.month.end July
test.and.Student.Name ABC
test.and.Student.Class 10
Notify me if you need any clarification or any modification in my code.

Related

Parsing through nested JSON keys

I have a JSON file that looks like this:
data = {
"x": {
"y": {
"key": {
},
"w": {
}
}}}
And have converted it into a dict in python to them parse through it to look for keys, using the following code:
entry = input("Search JSON for the following: ") //search for "key"
if entry in data:
print(entry)
else:
print("Not found.")
However, even when I input "key" as entry, it still returns "Not found." Do I need to control the depth of data, what if I don't know the location of "key" but still want to search for it.
Your method is not working because key is not a key in data. data has one key: x. So you need to look at the dictionary and see if the key is in it. If not, you can pass the next level dictionaries back to the function recursively. This will find the first matching key:
data = {
"x": {
"y": {
"key": "some value",
"w": {}
}}}
key = "key"
def findValue(key, d):
if key in d:
return d[key]
for v in d.values():
if isinstance(v, dict):
found = findValue(key, v)
if found is not None:
return found
findValue(key, data)
# 'some value'
It will return None if your key is not found
Here's an approach which allows you to collect all the values from a nested dict, if the keys are repeated at different levels of nesting. It's very similar to the above answer, just wrapped in a function with a nonlocal list to hold the results:
def foo(mydict, mykey):
result = []
num_recursive_calls = 0
def explore(mydict, mykey):
#nonlocal result #allow successive recursive calls to write to list
#actually this is unnecessary in this case! Here
#is where we would need it, for a call counter:
nonlocal num_recursive_calls
num_recursive_calls += 1
for key in mydict.keys(): #get all keys from that level of nesting
if mykey == key:
print(f"Found {key}")
result.append({key:mydict[key]})
elif isinstance(mydict.get(key), dict):
print(f"Found nested dict under {key}, exploring")
explore(mydict[key], mykey)
explore(mydict, mykey)
print(f"explore called {num_recursive_calls} times") #see above
return result
For example, with
data = {'x': {'y': {'key': {}, 'w': {}}}, 'key': 'duplicate'}
This will return:
[{'key': {}}, {'key': 'duplicate'}]

How to replace nested python dictionary value from a key as a string format with separated by dots?

a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
#...
#str_key_n = 'user.level2.level3...leveln=XXX'
Let's consider this 'str_key' string, goes with infinite number of dots/levels.
Expected Output:
a = {
'user': {
'username': 'john', # username, should be replace
'name': {
'first': 'Micheal',
'last': 'henry' # lastname, should be replace
},
'email': 'micheal#domain.com',
...
... # Infinite level of another nested dict
}
}
I'm expecting the answers for applying 'n' Level of nested key string, rather than simply replacing by a['user']['username'] = 'John' statically. Answers must be work for any number of 'dotted' string values.
Thanks in advance!
There are three steps:
Separate the key-value pair string into a fully-qualified key and
value.
Split the key into path components.
Traverse the dictionary to find the relevant value to update.
Here's an example of what the code might look like:
# Split by the delimiter, making sure to split once only
# to prevent splitting when the delimiter appears in the value
key, value = str_key_n.split("=", 1)
# Break the dot-joined key into parts that form a path
key_parts = key.split(".")
# The last part is required to update the dictionary
last_part = key_parts.pop()
# Traverse the dictionary using the parts
current = a
while key_parts:
current = current[key_parts.pop(0)]
# Update the value
current[last_part] = value
I'd go with a recursive function to accomplish this, assuming your key value strings are all valid:
def assign_value(sample_dict, str_keys, value):
access_key = str_keys[0]
if len(str_keys) == 1:
sample_dict[access_key] = value
else:
sample_dict[access_key] = assign_value(sample_dict[access_key], str_keys[1:], value)
return sample_dict
The idea is to traverse your dict until you hit the lowest key and then we assign our new value to that last key;
if __name__ == "__main__":
sample_dict = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com'
}
}
str_key_1 = 'user.username=john'
str_keys_1, value_1 = str_key_1.split('=')
sample_dict = assign_value(sample_dict, str_keys_1.split('.'), value_1)
print("result: {} ".format(sample_dict))
str_key_2 = 'user.name.last=henry'
str_keys_2, value_2 = str_key_2.split('=')
sample_dict = assign_value(sample_dict, str_keys_2.split('.'), value_2)
print("result: {}".format(sample_dict))
To use the assign_value you would need to split your original key to the keys and value as seen above;
If you're okay with using exec() and modify your str_key(s), you could do something like:
def get_keys_value(string):
keys, value = string.split("=")
return keys, value
def get_exec_string(dict_name, keys):
exec_string = dict_name
for key in keys.split("."):
exec_string = exec_string + "[" + key + "]"
exec_string = exec_string + "=" + "value"
return exec_string
str_key_1 = "'user'.'username'=john"
str_key_2 = "'user'.'name'.'last'=henry"
str_key_list = [str_key_1, str_key_2]
for str_key in str_key_list:
keys, value = get_keys_value(str_key) # split into key-string and value
exec_string = get_exec_string("a", keys) # extract keys from key-string
exec(exec_string)
print(a)
# prints {'user': {'email': 'micheal#domain.com', 'name': {'last': 'henry', 'first': 'Micheal'}, 'username': 'john'}}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
def MutateDict(key):
strkey, strval = key.split('=')[0], key.split('=')[1]
strkeys = strkey.split('.')
print("strkeys = " ,strkeys)
target = a
k = ""
for k in strkeys:
print(target.keys())
if k in target.keys():
prevTarget = target
target = target[k]
else:
print ("Invalid key specified")
return
prevTarget[k] = strval
MutateDict(str_key_1)
print(a)
MutateDict(str_key_2)
print(a)

python json.dumps and json.loads before DynamoDB insertion

Is there any way to populate an empty string at any position (without knowing the json's structure) in a json received from a certain endpoint before inserting it into DynamoDB? As you all know it has issues with floats that you must transform them into Decimals, but can't seem to figure out an easy way to populate the empty string such as "full_name": "" with a value like "N/A".
I'm looking for something like json.loads(json.dumps(data), parse_float=Decimal), as for the parse_float thing but for empty strings. Something clean and easy to use. I've seen you can use a custom cls class for that but I don't quite get it how to do it properly especially without knowing the structure of the json which might vary.
JSON example:
{
"campaign_id": "9c1c6cd7-fd4d-480b-8c80-07091cdd4103",
"creation_date": 1530804132,
"objects": [
{
"id": 12345,
"full_name": ""
},
...
],
...
}
You can do this by defining an _object_hook_ to pass to json.loads.
From the docs:
object_hook is an optional function that will be called with the result of any object literal decoded (a dict). The return value of object_hook will be used instead of the dict.
Given this dict:
>>> pprint(d)
{'campaign_id': '9c1c6cd7-fd4d-480b-8c80-07091cdd4103',
'creation_date': 1530804132,
'float': 1.2345,
'objects': [{'full_name': '', 'id': 12345}],
'strs': ['', 'abc', {'a': ''}],
'top_str': ''}
This pair of functions will recurse over the result of json.loads and change instance of the empty string to 'N/A'.
def transform_dict(mapping=None):
if mapping is None:
mapping = {}
for k, v in mapping.items():
if v == '':
mapping[k] = 'N/A'
elif isinstance(v, dict):
mapping[k] = transform_dict(v)
elif isinstance(v, list):
mapping[k] = transform_list(v)
else:
# Make it obvious that we aren't changing other values
pass
return mapping
def transform_list(lst):
for i, x in enumerate(lst):
if x == '':
lst[i] = 'N/A'
elif isinstance(x, dict):
lst[i] = transform_dict(x)
elif isinstance(x, list):
lst[i] = transform_list(x)
else:
# Make it obvious that we aren't changing other values
pass
return lst
>>> res = json.loads(json.dumps(d), parse_float=decimal.Decimal, )
>>> pprint(res)
{'campaign_id': '9c1c6cd7-fd4d-480b-8c80-07091cdd4103',
'creation_date': 1530804132,
'float': Decimal('1.2345'),
'objects': [{'full_name': 'N/A', 'id': 12345}],
'strs': ['N/A', 'abc', {'a': 'N/A'}],
'top_str': 'N/A'}
Note that this approach depends on the input json being a json object ({...}).

How to mask a Python 3 nested dictionary to return a new dictionary with only certain items?

I am consuming several endpoints of an API that is very verbose in the data it returns. I would like to provide a subset of this data to another piece of code elsewhere.
Suppose I am given several dictionaries like this (which I plan to loop through and filter):
asset = {
'id': 1,
'name': 'MY-PC',
'owner': 'me',
'location': 'New York City',
'model': {
'id': 1,
'name': 'Surface',
'manufacturer': {
'id': 1,
'name': 'Microsoft'
}
}
}
I want to create a function that will take that dictionary in, along with a "mask" which will be used to create a new dictionary of only the allowed items. This might be an example mask (though, I can work with whatever format makes the resulting code the most concise):
mask = {
'id': True,
'name': True,
'model': {
'id': True,
'name': True,
'manufacturer': {
'name': True
}
}
}
The function should then return this:
mask = {
'id': 1,
'name': 'MY-PC',
'model': {
'id': 1,
'name': 'Surface',
'manufacturer': {
'name': 'Microsoft'
}
}
}
Is there something already built into Python 3 that would help aid in this? It looks like if I have to do this manually, it's going to get quite ugly quickly. I found itertools.compress, but that seems like it's for lists and won't handle the complexity of dictionaries.
You can recursively build a new dict from the mask by selecting only values corresponding in the main dict:
def prune_dict(dct, mask):
result = {}
for k, v in mask.items():
if isinstance(v, dict):
value = prune_dict(dct[k], v)
if value: # check that dict is non-empty
result[k] = value
elif v:
result[k] = dct[k]
return result
print(prune_dict(asset, mask))
{'id': 1,
'model': {'id': 1, 'manufacturer': {'name': 'Microsoft'}, 'name': 'Surface'},
'name': 'MY-PC'}
This would be a good chance to use recursion, here is some sample code I haven't tested:
def copy(asset, result, mask):
for key_name, value in mask.items():
if value == True:
result[key_name] = asset[key_name]
else:
result[key_name] = x = {}
copy(asset[key_name], x, value)
y = {}
copy(asset, y, mask)
This would probably be a recursive function. Also, for the mask, I recommend this format: mask = ["id", "name", "model.id", "model.name", "model.manufacturer.name"]
Then, you'd first keep only entries that are named in the mask:
def filterstage1(dictionary, mask):
result = {}
for key in dictionary:
if isinstance(dictionary[key], dict):
newmask = [maskname[mask.find(".") + 1:] for maskname in mask if maskname.startswith(key + ".")]
result[k] = filterstage1(dictionary[key], newmask)
elif key in mask:
result[key] = dictionary[key]
return result
Then, depending on whether or not you want to remove sub-dictionaries that were not in the mask and had no subelements, you can include the second stage:
def filterstage2(dictionary, mask):
result = {}
for key in dictionary:
if not (isinstance(dictionary[key], dict) and dictionary[key] == {} and key not in mask):
result[key] = dictionary[key]
Final code: filterstage2(filterstage1(dictionary, mask), mask). You can combine the two stages together if you wish.

How merge with appending two nested dictionaries in python?

For example I have two dicts:
schema = {
'type': 'object',
'properties': {
'reseller_name': {
'type': 'string',
},
'timestamp': {
'type': 'integer',
},
},
'required': ['reseller_name', 'timestamp'],
}
and
schema_add = {
'properties': {
'user_login': {
'type': 'string',
},
},
'required': ['user_login'],
}
How I can get next merged with appending result dict:
schema_result = {
'type': 'object',
'properties': {
'reseller_name': {
'type': 'string',
},
'timestamp': {
'type': 'integer',
},
'user_login': {
'type': 'string',
},
},
'required': ['reseller_name', 'timestamp', 'user_login'],
}
Rules:
Same path is properties and required for scheme and scheme_add in example.
If both dict have dicts with same path, they merged with same rules.
If both dict have lists with same path, then add first list with second.
If both dict have simple values (or dict and non dict or list and non list) with same path, then first value overriding with second.
If only one dict have key with some path, than setting this key and value.
Not sure where the problem likes, but the way you're writing it down is almost like a computer program, and the example is like a test case. Why don't you start from this?
def add_dict(d1, d2):
newdict = {}
for (key, value) in d1.iteritems():
if key in d2: ...
#apply rules, add to newdict, use
else:
#simply add
for (key, value) in d2.iteritems():
if not key in d1:
# simply add
return newdict
This can probably be written more tightly, but might be easier like that to edit.
Edit.. after writing the last comment, couldn't help but write a nicer implementation
def merge_values(a,b):
if a==None or b==None:
return a or b
# now handle cases where both have values
if type(a)==dict:
return add_dict(a, b)
if type(a)==list:
...
def add_dict(d1,d2):
return dict(
[
(key,
merge_values(
d1.get(key,None),
d2.get(key,None)))
for key
in set(d1.keys()).union(d2.keys())
])
My own solution with #Nicolas78 help:
def merge(obj_1, obj_2):
if type(obj_1) == dict and type(obj_2) == dict:
result = {}
for key, value in obj_1.iteritems():
if key not in obj_2:
result[key] = value
else:
result[key] = merge(value, obj_2[key])
for key, value in obj_2.iteritems():
if key not in obj_1:
result[key] = value
return result
if type(obj_1) == list and type(obj_2) == list:
return obj_1 + obj_2
return obj_2
I am adding simple solution of this problem. Assuming that sample data will not change.
def merge_nested_dicts(schema,schema_add):
new_schema = schema
for k in schema:
if k in schema_add.keys():
if isinstance(schema_add[k],dict):
new_schema[k].update(schema_add[k])
if isinstance(schema_add[k],list):
new_schema[k] = new_schema[k]+schema_add[k]
return new_schema
Try this if you know the keys exactly.
schema['properties'].update(schema_add['properties'])
schema['result'].append(schema_add['result'])
result is merged in schema.
If you do not know the keys exactly then one loop is required to find inner list and dictionaries.
for value in schema:
if value is dict:
if schema_add.has_key(value) and schema_add[value] is dict:
schema[value].update(schema_add[value])
elif value is list:
if schema_add.has_key(value) and schema_add[value] is list:
schema[value].append(schema_add[value])
result can be merged into different dict as well.

Categories