in the following example I am trying to replace value of one key by the value of another key; but I tried multiple ways and it doesn't seem to work.
following is my code
d = {
"name" : "ABC",
"type" : "Service",
"clusterRef" : {
"clusterName" : "ABCSTUFF"
},
"serviceState" : "STARTED",
"healthChecks" : [ {
"name" : "STORAGE",
"summary" : "GOOD"
}, {
"name" : "CPU UTILIZATION",
"summary" : "GOOD"
} ],
"maintenanceMode" : "false"
}
########################
## Get Key Value
def get_key_values(d, key):
for k, v in d.items():
if k == "name":
k = (key + "." + v)
else:
k = (key + "." + k)
if isinstance(v, dict):
get_key_values(v, k)
elif isinstance(v, list):
for i in v:
get_key_values(i, k)
else:
print ("{0} : {1}".format(k, v))
get_key_values(d, "TTS")
the result come up like following
TTS.serviceState : STARTED
TTS.type : Service
TTS.ABC : ABC
TTS.clusterRef.clusterName : ABCSTUFF
TTS.healthChecks.summary : GOOD <<< remove this line and replace "Good" with the value for "TTS.healthChecks.STORAGE"
TTS.healthChecks.STORAGE : STORAGE
TTS.healthChecks.summary : GOOD <<< remove this line and replace "Good" with the value for "TTS.healthChecks.CPU UTILIZATION"
TTS.healthChecks.CPU UTILIZATION : CPU UTILIZATION
TTS.maintenanceMode : false
but I want the result to be following
TTS.serviceState : STARTED
TTS.type : Service
TTS.ABC : ABC
TTS.clusterRef.clusterName : ABCSTUFF
TTS.healthChecks.STORAGE : GOOD <<<
TTS.healthChecks.CPU UTILIZATION : GOOD <<<
TTS.maintenanceMode : false
Any help is much appreciated
Here's a non-generic solution which works for your question:
d = {
"name": "ABC",
"type": "Service",
"clusterRef": {
"clusterName": "ABCSTUFF"
},
"serviceState": "STARTED",
"healthChecks": [{
"name": "STORAGE",
"summary": "GOOD"
}, {
"name": "CPU UTILIZATION",
"summary": "GOOD"
}],
"maintenanceMode": "false"
}
########################
# Get Key Value
def get_key_values(d, key):
for k, v in d.items():
if k == "name":
k = (key + "." + v)
else:
k = (key + "." + k)
if isinstance(v, dict):
get_key_values(v, k)
elif isinstance(v, list):
for i in v:
tok1 = k + "." + i.get("name")
tok2 = i.get("summary")
print("{0} : {1}".format(tok1, tok2))
else:
print("{0} : {1}".format(k, v))
get_key_values(d, "TTS")
Related
I have a Python script, which uses a function from a previous Stack Overflow solution.
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, parent_key=False, separator='.'):
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, mm):
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
items.append((new_key, value))
return dict(items)
d = {
"_id" : 1,
"labelId" : [
6422
],
"levels" : [
{
"active" : "true",
"level" : 3,
"actions" : [
{
"isActive" : "true"
}]
}]
}
x = flatten(d)
x = json_normalize(x)
print(x)
Current Output:
_id labelId.0 levels.0.active levels.0.level levels.0.actions.0.isActive
0 1 6422 true 3 true
The issue I am having is the numeric keys which gets included in the column name. Is there a way I can amend my code in order to achieve my desired output?
Desired Output:
_id labelId levels.active levels.level levels.actions.isActive
0 1 6422 true 3 true
First of all using parent_key as bool then assigning it other type value is not the best practice. It works but can become messy. I modified a code a bit, adding separate argument to track parent_key status as bool, and p_key which carry the string you wanted. Here is snippet
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, p_key=None, parent_key=False, separator='.'):
items = []
for key, value in dictionary.items():
if parent_key:
new_key = f"{str(p_key)}{separator}{key}"
else:
new_key = p_key if p_key else key
if isinstance(value, mm):
items.extend(flatten(
dictionary=value,
p_key=new_key,
parent_key=True,
separator=separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten(
dictionary={str(k): v},
p_key=new_key,
parent_key=False,
separator=separator).items())
else:
items.append((new_key, value))
return dict(items)
d = {
"_id" : 1,
"labelId" : [
6422
],
"levels" : [
{
"active" : "true",
"level" : 3,
"actions" : [
{
"isActive" : "true"
}]
}]
}
x = flatten(d)
x = json_normalize(x)
print(x)
I have tried to use the online Jsonify It tool which can create nested JSON data from my data but I can't seem to get that to work. I have also tried to use the Python code from other posts on but they do not seem to work either. If you know an easier method than using Python, that would be good.
Here is my .CSV data:
ID,Name,Date,Subject,Start,Finish
0,Ladybridge High School,01/11/2019,Maths,05:28,0
0,Ladybridge High School,02/11/2019,Maths,05:30,06:45
0,Ladybridge High School,01/11/2019,Economics,11:58,12:40
0,Ladybridge High School,02/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,01/11/2019,Maths,05:28,06:45
1,Loreto Sixth Form,02/11/2019,Maths,05:30,06:45
1,Loreto Sixth Form,01/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,02/11/2019,Economics,11:58,12:40
This is the nested JSON structure I would like:
{
"Timetable" : [ {
"Date" : {
"01-11-2019" : {
"Maths" : {
"Start" : "05:28",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
},
"02-11-2019" : {
"Maths" : {
"Start" : "05:30",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
}
},
"Name" : "Ladybridge High School"
}, {
"Date" : {
"01-11-2019" : {
"Maths" : {
"Start" : "05:28",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
},
"02-11-2019" : {
"Maths" : {
"Start" : "05:30",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
}
},
"Name" : "Loreto Sixth From"
} ]
}
Something like this?
[EDIT]
I refactored it to handle arbitrary top-level keys for each entry in the timetable. I also made it first create a dict and then convert the dict to a list so that it can run in O(N) time, in case the input is very large.
import csv
timetable = {}
with open('data.csv') as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
for row in csv_data:
if not timetable.get(row["ID"]):
timetable[row["ID"]] = {"ID": row["ID"], "Date": {}}
for k in row.keys():
# Date has to be handled as a special case
if k == "Date":
timetable[row["ID"]]["Date"][row["Date"]] = {}
timetable[row["ID"]]["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
# Ignore these keys because they are only for 'Date'
elif k == "Start" or k == "Finish" or k == "Subject":
continue
# Use everything else
else:
timetable[row["ID"]][k] = row[k]
timetable = {"Timetable": [v for k, v in timetable.items()]}
An improvement to the above answer to nest the ID before the name and date:
import csv
timetable = {"Timetable": []}
print(timetable)
with open("C:/Users/kspv914/Downloads/data.csv") as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
name_array = []
for name in [row["Name"] for row in csv_data]:
name_array.append(name)
name_set = set(name_array)
for name in name_set:
timetable["Timetable"].append({"Name": name, "Date": {}})
for row in csv_data:
for entry in timetable["Timetable"]:
if entry["Name"] == row["Name"]:
entry["Date"][row["Date"]] = {}
entry["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
print(timetable)
I need help with a function to flatten a nested dictionary in the following format:
dict_test = {
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"entities_discovered" : {
"OTHER_ID" : [
"L800DFAG02191"
],
"CODE_ID" : [
"160472708",
"276954773"
]
},
"label_field" : [
"ELECTRONICS",
"HDMI"
],
"numeric_field" : [
491,
492
],
}
The function I was working with, flattens the dictionary to one dimension (key:value) as I want, but doesn´t join the values within the same key iteration.
def flatten(d):
agg = {}
def _flatten(d, prev_key=''):
if isinstance(d, list):
for i, item in enumerate(d):
new_k = '%s.%s' % (prev_key, i) if prev_key else i
_flatten(item, prev_key=new_k)
elif isinstance(d, dict):
for k, v in d.items():
new_k = '%s.%s' % (prev_key, k) if prev_key else k
_flatten(v, prev_key=new_k)
else:
agg[prev_key] = d
_flatten(d)
return agg
My current output is:
{
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"entities_discovered.OTHER_ID.0" : "L800DFAG02191",
"entities_discovered.CODE_ID.0" : "160472708",
"entities_discovered.CODE_ID.1" : "276954773",
"label_field.0" : "ELECTRONICS",
"label_field.1" : "HDMI",
"numeric_field.0" : 491,
"numeric_field.1" : 492
}
But actually I´m looking for something like (joining the values into the same string and separated by , or |):
{
"id" : "5d4c2c0fd89234260ec81",
"Reference Number" : "JA-L800D-191",
"OTHER_ID" : "L800DFAG02191",
"CODE_ID" : "160472708, 276954773",
"label_field" : "ELECTRONICS, HDMI",
"numeric_field" : ¨491, 492¨
}
You can use join() built-in method to join values together.
def do():
dict_test = {
"id": "5d4c2c0fd89234260ec81",
"Reference Number": "JA-L800D-191",
"entities_discovered": {
"OTHER_ID": [
"L800DFAG02191"
],
"CODE_ID": [
"160472708",
"276954773"
]
},
"label_field": [
"ELECTRONICS",
"HDMI"
],
"numeric_field": [
491,
492
],
}
new_dict = {}
for key, value in dict_test.items():
if isinstance(value, dict):
for _key, _value in value.items():
if isinstance(_value, list):
new_dict.update({_key: ', '.join([str(item) for item in _value])})
elif isinstance(value, list):
new_dict.update({key: ', '.join([str(item) for item in value])})
else:
new_dict.update({key: value})
return new_dict
if __name__ == '__main__':
print(do())
Output:
{
'id': '5d4c2c0fd89234260ec81',
'Reference Number': 'JA-L800D-191',
'OTHER_ID': 'L800DFAG02191',
'CODE_ID': '160472708, 276954773',
'label_field': 'ELECTRONICS, HDMI',
'numeric_field': '491, 492'
}
def recursive_flatten_dict(tmp, dict_test):
for i,v in dict_test.items():
if type(v) == type({}):
recursive_flatten_dict(tmp,v)
else:
tmp[i] = v
return tmp
recursive_flatten_dict({},dict_test)
Simple recursion using a generator:
def flatten(d):
for a, b in d.items():
if isinstance(b, dict):
yield from flatten(b)
else:
yield (a, b if not isinstance(b, list) else ', '.join(map(str, b)))
print(dict(flatten(dict_test)))
Output:
{
'id': '5d4c2c0fd89234260ec81',
'Reference Number': 'JA-L800D-191',
'OTHER_ID': 'L800DFAG02191',
'CODE_ID': '160472708, 276954773',
'label_field': 'ELECTRONICS, HDMI',
'numeric_field': '491, 492'
}
def flatten(dict_test):
for key in ['label_field', 'numeric_field']:
dict_test[key]= ', '.join([str(c) for c in dict_test[key]])
for c in dict_test['entities_discovered'].keys():
dict_test[c]= ', '.join(dict_test['entities_discovered'][c])
return dict_test
The above function does the job. I hope this what you are looking for?
My function below is calculating the depth of a nested dict.
#!/usr/bin/env python3
def get_dict_depth(d, depth=0):
if not isinstance(d, dict) or not d:
return depth
return max(get_dict_depth(v, depth+1) if k != 'id' else depth for k, v in d.items())
foobar = {
"key1" : "val1",
"key2" : {
"id" : "val2"
},
"new_d" : {
"key" : "val",
"key2" : {
"id" : "blabla",
"key" : {
"id" : "blabla",
}
},
}
}
depth = get_dict_depth(foobar)
print("Depth %d" % depth)
I would like to modify it to not include keys that have the value id in the depth calculation. The program works if I use a ternary in the generator expression:
return max(get_dict_depth(v, depth+1) if k != 'id' else depth for k, v in d.items())
But I can't seem to make it work by filtering:
return max(get_dict_depth(v, depth+1) for k, v in d.items() if k != 'id')
Why isn't my filter working and how do I make it work?
Move the check to your get_dict_depth function:
def get_dict_depth(d, depth=0):
if not isinstance(d, dict) or not d or 'id' in d:
return depth
return max(get_dict_depth(v, depth+1) for v in d.values())
This gives me the result:
Depth 2
I am looking to write a recursive function:
arguments: d, dictionary
result: list of dictionaries
def expand_dictionary(d):
return []
The function recursively goes through a dictionary and flattens nested objects using an _, in addition it expands out nested lists into the array, and includes the parent label.
Think of creating a relational model from a document.
Here is an example input and output:
original_object = {
"id" : 1,
"name" : {
"first" : "Alice",
"last" : "Sample"
},
"cities" : [
{
"id" : 55,
"name" : "New York"
},
{
"id" : 60,
"name" : "Chicago"
}
],
"teachers" : [
{
"id" : 2
"name" : "Bob",
"classes" : [
{
"id" : 13,
"name" : "math"
},
{
"id" : 16,
"name" : "spanish"
}
]
}
]
}
expected_output = [
{
"id" : 1,
"name_first" : "Alice",
"name_last" : "Sample"
},
{
"_parent_object" : "cities",
"id" : 55,
"name" : "New York"
},
{
"_parent_object" : "cities",
"id" : 60,
"name" : "Chicago"
},
{
"parent_object" :"teachers",
"id" : 2,
"name" : "Bob"
},
{
"parent_object" :"teachers_classes",
"id" : 13,
"name" : "math"
},
{
"parent_object" :"teachers_classes",
"id" : 16,
"name" : "spanish"
}
]
the code currently being used for flattening is:
def flatten_dictionary(d):
def expand(key, value):
if isinstance(value, dict):
return [ (key + '_' + k, v) for k, v in flatten_dictionary(value).items() ]
else:
#If value is null or empty array don't include it
if value is None or value == [] or value == '':
return []
return [ (key, value) ]
items = [ item for k, v in d.items() for item in expand(k, v) ]
return dict(items)
That will do
def expand_dictionary(d,name=None,l=None):
obj = {}
if l == None:
l = [obj]
else:
l.append(obj)
prefix = (name+'_'if name else '')
if prefix: obj['_parent_object'] = name
for i, v in d.iteritems():
if isinstance(v, list):
map(lambda x:expand_dictionary(x,prefix+i,l),v)
elif isinstance(v, dict):
obj.update(flatten_dictionary({i: v}))
else:
obj[i] = v
return l
After working through it a bit here is what I have come up with. Probably can be significantly optimized. Based on #paulo-scardine's comment I added the parent primary key to keep the relational model. Would love to hear optimization thoughts.
def expand_dictionary(original_object, object_name, objects=None):
if objects is None:
objects = []
def flatten_dictionary(dictionary):
def expand(key, value):
if isinstance(value, dict):
return [ (key + '_' + k, v) for k, v in flatten_dictionary(value).items() ]
else:
#If value is null or empty array don't include it
if value is None or value == [] or value == '':
return []
return [ (key, value) ]
items = [ item for k, v in dictionary.items() for item in expand(k, v) ]
return dict(items)
original_object_root = flatten_dictionary(original_object).copy()
original_object_root['_meta_object_name'] = object_name
for key,value in original_object_root.copy().items():
if isinstance(value, dict):
flatten_dictionary(value, objects)
if isinstance(value, list):
original_object_root.pop(key)
for nested_object in value:
nested_object['_meta_parent_foreign_key'] = original_object_root['id']
nested_object['_meta_object_name'] = object_name + "_" + key
expand_dictionary(nested_object, object_name + "_" + key, objects)
objects.append(original_object_root)
return objects