I have a nested dict in Python containing YAML structures like
- id: left_time
type: u2
doc: Time left
and I want to obtain pairs like {id: doc}. For this example I want it to be: {"left_time": "Time left"}. The problem is I need to walk through them recursively.
My attempt is
def get_dict_recursively(search_dict, field):
fields_found = []
name = ""
for key, value in search_dict.items():
if key == "id":
name = value
if key == field:
fields_found.append({name: value})
elif isinstance(value, dict):
results = get_dict_recursively(value, field)
for result in results:
fields_found.append({name: result})
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_dict_recursively(item, field)
for another_result in more_results:
fields_found.append({name: another_result})
return fields_found
calling it like
get_dict_recursively(dict, "doc")
where
dict = {
meta:
id: foo
title: Foo
types:
data:
seq:
- id: left_time
type: u2
doc: Time left
gps:
seq:
- id: gps_st
type: b2
- id: sats
type: b6
doc: Number of satellites
}
There's a mistake, but I can't find it out.
Let's first state your example data as a dict:
data = {
"meta": {
"id": "foo",
"title": "Foo"
},
"types": {
"data": {
"seq": [
{
"id": "left_time",
"type": "u2",
"doc": "Time left"
}
]
},
"gps": {
"seq": [
{
"id": "gps_st",
"type": "b2"
},
{
"id": "sats",
"type": "b6",
"doc": "Number of satellites"
}
]
}
}
}
Next, we can simplify your recursive function to look like this:
def extract_docs(data):
result = []
if isinstance(data, list):
for d in data:
result += extract_docs(d)
elif isinstance(data, dict):
if "id" in data and "doc" in data:
result.append((data["id"], data["doc"]))
else:
for d in data.values():
result += extract_docs(d)
return result
With this you get
>>> dict(extract_docs(data))
{'sats': 'Number of satellites', 'left_time': 'Time left'}
Related
I have an object on dynamo db saved in the following way:
{
"name": "nameA",
...,
"properties": {
"prop1": "a",
...
}
}
If I pass the following object:
{
"name": "nameUpdate"
}
I would like to eventually get the following result:
{
"name": "nameUpdate",
...,
"properties": {
"prop1": "a",
...
}
}
The problem is that I get the object without the nested field:
{
"name": "nameUpdate",
...,
"properties": {}
}
MY APPROACH
To perform the update operation I am proceeding as follows:
def build_ddb_update_expression(data):
prefix = '#pf'
vals = {}
exp = 'SET '
attr_names = {}
for key, value in data.items():
vals[f':{key}'] = value
attr_names[f'#pf_{key}'] = key
exp += f'{prefix}_{key} = :{key}, '
exp = exp.rstrip(", ")
return vals, exp, attr_names
...
vals, exp, attr_names = build_ddb_update_expression(
json.loads(json.dumps(object_to_update), parse_float=decimal.Decimal))
response = table.update_item(
Key={'object_id': object_id},
ConditionExpression='attribute_exists(object_id)',
UpdateExpression=exp,
ExpressionAttributeValues=vals,
ExpressionAttributeNames=attr_names,
ReturnValues="ALL_NEW"
)
Has this ever happened to anyone?
Thanks in advance
I want to update Dict dictionary's value by inp dictionary's values using recursion or loop.
also the format should not change mean use recursion or loop on same format
please suggest a solution that is applicable to all level nesting not for this particular case
dict={
"name": "john",
"quality":
{
"type1":"honest",
"type2":"clever"
},
"marks":
[
{
"english":34
},
{
"math":90
}
]
}
inp = {
"name" : "jack",
"type1" : "dumb",
"type2" : "liar",
"english" : 28,
"math" : 89
}
Another solution, changing the dict in-place:
dct = {
"name": "john",
"quality": {"type1": "honest", "type2": "clever"},
"marks": [{"english": 34}, {"math": 90}],
}
inp = {
"name": "jack",
"type1": "dumb",
"type2": "liar",
"english": 28,
"math": 89,
}
def change(d, inp):
if isinstance(d, list):
for i in d:
change(i, inp)
elif isinstance(d, dict):
for k, v in d.items():
if not isinstance(v, (list, dict)):
d[k] = inp.get(k, v)
else:
change(v, inp)
change(dct, inp)
print(dct)
Prints:
{
"name": "jack",
"quality": {"type1": "dumb", "type2": "liar"},
"marks": [{"english": 28}, {"math": 89}],
}
First, make sure you change the name of the first Dictionary, say to myDict, since dict is reserved in Python as a Class Type.
The below function will do what you are looking for, in a recursive manner.
def recursive_swipe(input_var, updates):
if isinstance(input_var, list):
output_var = []
for entry in input_var:
output_var.append(recursive_swipe(entry, updates))
elif isinstance(input_var, dict):
output_var = {}
for label in input_var:
if isinstance(input_var[label], list) or isinstance(input_var[label], dict):
output_var[label] = recursive_swipe(input_var[label], updates)
else:
if label in updates:
output_var[label] = updates[label]
else:
output_var = input_var
return output_var
myDict = recursive_swipe(myDict, inp)
You may look for more optimal solutions if there are some limits to the formatting of the two dictionaries that were not stated in your question.
I am trying to convert a nested JSON into a CSV file with three columns: the level 0 key, the branch, and the lowest level leaf.
For example, in the JSON below:
{
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
I want to create an output like this (didn't include entire tree example just to get point across):
level 0
branch
leaf
protein
protein.meat
chicken
protein
protein.meat
beef
I tried using json normalize but the actual file will not have paths that I can use to identify the nested fields as each dictionary is unique.
This returns the level 0 field but I need to have these as rows, not columns. Any help would be very much appreciated.
I created a function that pcan unnest the json based on key values like this:
import json
with open('path/to/json') as m:
my_json = json.load(m)
def unnest_json(data):
for key, value in data.items():
print(str(key)+'.'+str(value))
if isinstance(value, dict):
unnest_json(value)
elif isinstance(value, list):
for val in value:
if isinstance(val, str):
pass
elif isinstance(val, list):
pass
else:
unnest_json(val)
unnest_json(my_json)
Probably not the cleanest approach but I think you can use some sort of recursive function (traverse in below code) to convert the dictionary into a list of column values and then convert them to pandas DataFrame.
data = {
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
def traverse(col_values, dictionary, rows):
for key in dictionary:
new_col_values = list(col_values)
if dictionary[key]:
new_col_values[1] += '.' + key
traverse(new_col_values, dictionary[key], rows)
else:
new_col_values[2] = key
rows.append(new_col_values)
rows = []
for key in data:
traverse([key, str(key), None], data[key], rows)
import pandas as pd
df = pd.DataFrame(rows, columns=["level 0", "branch", "leaf"])
print(df)
Is there a way to filter a nested dict in Python, so I can see only the keys I'd specified ?
Example:
x = {
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
"dt_last_up": "10/04/2014 16:30:44",
},
"url_detalhes": "/DetalhesDocsPro.aspx",
"url_app": "/docspro",
}
y = filter(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
Then var y would be something like:
{
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
},
"url_app": "/docspro",
}
I've tried to do something using defaultdict, but had no success with lists at any level of recursion. Also I found dificulty while working with different data structures.
Here's a modified version of 2rs2ts's answer that returns a new object rather than modifying the old one (and handles filtering on non-leaf nodes):
import copy
def fltr(node, vals):
if isinstance(node, dict):
retVal = {}
for key in node:
if key in vals:
retVal[key] = copy.deepcopy(node[key])
elif isinstance(node[key], list) or isinstance(node[key], dict):
child = fltr(node[key], vals)
if child:
retVal[key] = child
if retVal:
return retVal
else:
return None
elif isinstance(node, list):
retVal = []
for entry in node:
child = fltr(entry, vals)
if child:
retVal.append(child)
if retVal:
return retVal
else:
return None
With this, you will call
y = fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
and get
{
"field": [
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ch_origem_sistema_chave"
},
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ax_andamento_data"
}
],
"url_app": "/docspro",
"_metadata": {
"dt_reg": "22/01/2014 16:17:16"
}
}
Note that this will return None if everything is filtered. For example,
fltr(x, [])
will always return None, no matter what is in x.
Here's a solution which walks the structure in a depth-first manner to find the "leaf" nodes which you are checking to see if they're in your list of elements to preserve. When it finds such an element, it removes it from the dictionary with del. (So this is done in-place.)
def fltr(d, vals):
if isinstance(d, dict):
vals_to_del = []
for k in d:
if k in vals:
continue
if not isinstance(d[k], list) and not isinstance(d[k], dict):
if k not in vals:
vals_to_del.append(k)
else:
fltr(d[k], vals)
for k in vals_to_del:
del d[k]
elif isinstance(d, list):
for i in d:
fltr(i, vals)
Note that I didn't define a function called filter, because it's a built-in one and you don't want to shadow it.
>>> fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
>>> x
{'field': [{'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ch_origem_sistema_chave'}, {'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ax_andamento_data'}], 'url_app': '/docspro', '_metadata': {'dt_reg': '22/01/2014 16:17:16'}}
I'm trying to get the value of each json elements. I am expecting the type to be an array or list but instead, I get type unicode.
Here's my sample json file:
{
"accounts": [
{
"account": {
"basicDetails": {
"accountId": {
"acctName": "Test A",
"acctNumber": "Test B"
},
"accountBranchId": {
"branchName": "Test C",
"brancNumber": "Test D"
},
"cusName": "Test E"
},
"otherDetails": {
"dateCreated": "1999-10-01",
"dateClosed": "2000-10-01"
}
}
}
],
"userExtension": {
"testId": null,
"version": null
},
"status": {
"overallStatus": "S",
"messages": null
},
"_links": null
}
Here is the code I am currently trying
def extract_key(self,obj):
def extract(obj):
if type(obj)== type(OrderedDict()) or isinstance(obj, list):
for k, v in obj.items():
if type(v) == type(OrderedDict()) or type(v)==type(list):
extract(v)
elif type(v) != type(OrderedDict()) or type(v)!=type(list):
print(type(k))
print(k)
results = extract(obj)
return results
def print_keys(self):
with open("C:\\Account.json", "r+") as jsonFile:
data = json.load(jsonFile, object_pairs_hook=OrderedDict)
names = self.extract_key(data)
return names
I'm expecting to get the elements after "accounts": [ but it wont go thru because it treats "accounts" as a unicode instead of a list or array.
You've asked this
if type(obj)== type(OrderedDict()) or isinstance(obj, list):
and entered into the "accounts", that's ok
Then you've just need:
for k, v in obj.items():
if obj.items():
results.append(v)
Results got loaded with:
[OrderedDict([('account', OrderedDict([('basicDetails', OrderedDict([('accountId', OrderedDict([('acctName', 'Test A'), ('acctNumber', 'Test B')])), ('accountBranchId', OrderedDict([('branchName', 'Test C'), ('brancNumber', 'Test D')])), ('cusName', 'Test E')])), ('otherDetails', OrderedDict([('dateCreated', '1999-10-01'), ('dateClosed', '2000-10-01')]))]))])], OrderedDict([('testId', None), ('version', None)]), OrderedDict([('overallStatus', 'S'), ('messages', None)]), None]
Of course results must be declared before that, like:
results = []