Converting JSON to nested dictionary(python) - python

I have a json object like this:
{
parser_config:
[{"element_name": "jobs","type": "class", "attributes": ["title is-5", "company", "location"]},
{"element_name": "page-details","type":"class","attributes":["subtitle is-3", "title is-1"]}]
}
I want to create a nested dictionary out of the attributes like this:
{"jobs": {"class": "title is-5", "class_2": "location", "class_3": "company"},
"page-details": {"class": "subtitle is-3", "class_2": "title is-1"}}
I have created a function like this:
def AttributeConstructor(json_data: dict, attribute_type: str) -> dict:
specific_attributes = []
config_data = json_data['parser_config']
for i in range(len(config_data)):
for target in config_data[i]['attributes']:
target_type = config_data[i]['type']
if target_type == attribute_type:
element_name = config_data[i]['element_name']
specific_attributes.append(
{element_name: {target_type: target}})
return specific_attributes
I am not sure how to get the desired output of keeping 1 element name for all {target_type: target}. I am setting attribute_type to "class".
The goal is to convert the JSON into a nested dictionary using a function, I cannot seem to get my function to output everything so I am left with:
{"jobs": {"class": "company"},
"page-details": {"class": "title is-1"}}
instead of the whole nested dictionary. I also would like this to work on much larger JSON objects.

You only add singelton dictionaries {element_name: {target_type: target}} to the list.
You could try
def attribute_constructor(json_data, attribute_type):
return [
{rec["element_name"]: {
rec["type"] if i == 1 else f'{rec["type"]}_{i}': attr
for i, attr in enumerate(rec["attributes"], start=1)}}
for rec in json_data['parser_config'] if rec["type"] == attribute_type
]
or more closer to your attempt
def attribute_constructor(json_data, attribute_type):
specific_attributes = []
for rec in json_data['parser_config']:
target_type = rec["type"]
if target_type != attribute_type:
continue
specific_attributes.append(
{rec["element_name"]: {
target_type if i == 1 else f'{target_type}_{i}': attr
for i, attr in enumerate(rec["attributes"], start=1)}}
)
return specific_attributes
instead.
Result for
json_data = {
"parser_config": [
{"element_name": "jobs", "type": "class", "attributes": ["title is-5", "company", "location"]},
{"element_name": "page-details", "type": "class","attributes": ["subtitle is-3", "title is-1"]}
]
}
print(attribute_constructor(json_data, "class"))
is
[{'jobs': {'class': 'title is-5', 'class_2': 'company', 'class_3': 'location'}},
{'page-details': {'class': 'subtitle is-3', 'class_2': 'title is-1'}}]

Related

Why is my for loop filling in values into the next json object if it has an empty array?

I'm working with a large number of large json files. I've written the below (extremely un elegant code) in order to generate two dictionaries with which I can create a dataframe to work with. However, in instances where the JSON has values with empty arrays, my code is propagating the last 'valid' values into the subsequent objects with empty arrays. I've tried replacing empty arrays with blanks but that doesn't seem to work either. (I know my code is very bad - still learning so please keep that in mind)
dicts = []
fid = []
x=0
while x < 1:
for i in files:
n=[]
k = []
t = []
op = open(i)
data = op.read()
js = json.loads(data)
items = js['metadata']['items']
#items = json.dumps(items).replace('[]','""')
#items = json.loads(items)
fileid = js['id']
fid.append(fileid)
##Everything after this point is what's throwing me off##
for a in items:
b = json.loads(json.dumps(a, sort_keys =True))
key = b['name']
k.append(key)
val = b['values']
values = []
for c in val:
j=json.dumps(c['value'])
if isinstance(c, list) == False:
continue
values.append(j)
j = ';'.join(values) #<-- For objects with more than one value
t.append(j)
output_dict = dict(zip([key], [j]))
n.append(output_dict)
dicts.append(n)
x = x+1
Here is an example section of the json where I'm observing this behavior:
x = {"metadata": {
"items": [
{
"values": [
{ "attribute1": "attribute", #<-- NOT IMPORTANT
"value": "VALUE 1" #<----VALUE I'M AFTER
},
{"attribute2": "attribute",#<-- NOT IMPORTANT
"value2": "VALUE 2"#<----VALUE I'M AFTER
}
],
"name": "NAME 1" #<--NAME I'M AFTER
},
{
"values": [
{
"value": []#<-- EMPTY ARRAY
}
],
"name": "NAME 2"}
]
}
}
In the above snippet, my ideal output is a list of dictionary pairings that looks like:
[{"NAME 1": "VALUE 1; VALUE 2", "NAME 2": " "...}]
But what I'm getting is:
[{"NAME 1": "VALUE 1; VALUE 2"}, {"NAME 2": "VALUE 1; VALUE 2"}...}]
I've tried deconstructing my work, and can't figure out why. I've re-indented and done a walk through a couple times and I don't understand why it would behave like this. What about the way my loop is constructed is causing this?

Pydantic create model for list with nested dictionary

I have a body looks like this:
{
"data": [
{
"my_api": {
"label": "First name",
"value": "Micheal"
}
},
{
"my_api": {
"label": "Last name",
"value": [
"Jackson"
]
}
},
{
"my_api": {
"label": "Favourite colour",
"value": "I don't have any"
}
}
]
}
This is my model.py so far:
class DictParameter(BaseModel): # pylint: disable=R0903
"""
`my_api` children
"""
label: Optional[str]
value: Optional[str]
class DataParameter(BaseModel): # pylint: disable=R0903
"""
`data` children
"""
my_api: Optional[dict] # NOTE: Future readers, this incorrect reference is part of the OP's Q
class InputParameter(BaseModel): # pylint: disable=R0903
"""
Takes predefined params
"""
data: Optional[List[DataParameter]]
In main.py:
from model import InputParameter
#router.post("/v2/workflow", status_code=200)
def get_parameter(user_input: InputParameter):
"""
Version 2 : No decoding & retrieve workflow params
"""
data = user_input.data
print(data)
Output:
[DataParameter(my_api={'label': 'First name', 'value': 'Micheal'}), DataParameter(my_api={'label': 'Last name', 'value': ['Jackson']}), DataParameter(my_api={'label': 'Favourite colour', 'value': "I don't have any"})]
I want to access the value inside my_api key. But I keep getting type error. I'm not sure how to access List of dictionary with nested child. Plus, the value of value can be str or array. It is dynamic.
Is there any other way of doing this?
Plus, the value of value can be str or array. It is dynamic.
What you currently have will cast single element lists to strs, which is probably what you want. If you want lists to stay as lists, use:
from Typing import Union
class DictParameter(BaseModel):
Value: Union[str, list[str]]
Unless you have the good luck to be running python 3.10, on which case str | list[str] is equivalent.
However, you do not actually use this model! You have my_api: Optional[dict] not my_api: Optional[DictParameter], so your current output is a plain old dict, and you need to do data[0].my_api["value"]. Currently this returns a str or a list, which is probably the problem. I suspect, though, that you meant to use the pydantic schema.
Note that data is a list: if you want all the values you need to iterate, something like
apis = [x.my_api for x in data]
Assuming you fix the issue in DictParameter (as pointed out by other answer by #2e0byo):
class DictParameter(BaseModel):
label: Optional[str]
value: Optional[Union[str, List[str]]]
And you fix the issue in DataParameter:
class DataParameter(BaseModel):
# my_api: Optional[dict] <-- prev value
my_api: Optional[DictParameter]
You can access the values in your object the following way:
def get_value_from_data_param(param_obj: InputParameter, key: str):
"""
Returns a value from an InputParameter object,
or returns `None` if not found
"""
# Iterate over objects
for item in param_obj.data:
# Skip if no value
if not item.my_api:
continue
# This assumes there are no duplicate labels
# if there are, perhaps make a list and append values
if item.my_api.label == label:
return item.my_api.value
# If nothing is found, return None (or some `default`)
return None
Now let's test it:
input_data = {
"data": [
{"my_api": {"label": "First name", "value": "Micheal"}},
{"my_api": {"label": "Last name", "value": ["Jordan"]}},
{"my_api": {"label": "Favourite colour", "value": "I don't have any"}}
]
}
# Create an object
input_param_obj = InputParameter.parse_obj(input_data)
# Let's see if we can get values:
f_name = get_value_from_data_param(input_param_obj, "First name")
assert f_name == 'Michael'
l_name = get_value_from_data_param(input_param_obj, "Last name")
assert l_name == ['Jordan']
nums = get_value_from_data_param(input_param_obj, "Numbers")
assert nums == ["1", "2", "3"]
erroneous = get_value_from_data_param(input_param_obj, "KEY DOES NOT EXIST")
assert erroneous == None

Python parse JSON file

{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information",
"Microsoft.AspNetCore": "Warning",
"System.Net.Http.HttpClient.Default.ClientHandler": "Warning",
"System.Net.Http.HttpClient.Default.LogicalHandler": "Warning"
}
},
"AllowedHosts": "*",
"AutomaticTransferOptions": {
"DateOffsetForDirectoriesInDays": -1,
"DateOffsetForPortfoliosInDays": -3,
"Clause": {
"Item1": "1"
}
},
"Authentication": {
"ApiKeys": [
{
"Key": "AB8E5976-2A7C-4EEE-92C1-7B0B4DC840F6",
"OwnerName": "Cron job",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "StressTestManager"
}
]
},
{
"Key": "B11D4F27-483A-4234-8EC7-CA121712D5BE",
"OwnerName": "Test admin",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "StressTestAdmin"
},
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "TestManager"
}
]
},
{
"Key": "EBF98F2E-555E-4E66-9D77-5667E0AA1B54",
"OwnerName": "Test manager",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "TestManager"
}
]
}
],
"LDAP": {
"Domain": "domain.local",
"MachineAccountName": "Soft13",
"MachineAccountPassword": "vixuUEY7884*",
"EnableLdapClaimResolution": true
}
},
"Authorization": {
"Permissions": {
"Roles": [
{
"Role": "TestAdmin",
"Permissions": [
"transfers.create",
"bindings.create"
]
},
{
"Role": "TestManager",
"Permissions": [
"transfers.create"
]
}
]
}
}
}
I have JSON above and need to parse it with output like this
Logging__LogLevel__Default
Authentication__ApiKeys__0__Claims__0__Type
Everything is ok, but I always get some strings with this output
Authentication__ApiKeys__0__Key
Authentication__ApiKeys__0__OwnerName
Authentication__ApiKeys__0__Claims__0__Type
Authentication__ApiKeys__0__Claims__0__Value
Authentication__ApiKeys__0__Claims__0
Authentication__ApiKeys__2
Authorization__Permissions__Roles__0__Role
Authorization__Permissions__Roles__0__Permissions__1
Authorization__Permissions__Roles__1__Role
Authorization__Permissions__Roles__1__Permissions__0
Authorization__Permissions__Roles__1
Why does my code adds not full strings like
Authentication__ApiKeys__0__Claims__0
Authentication__ApiKeys__2
Authorization__Permissions__Roles__1
And why it doesn't print every value from
Authorization__Permissions__Roles__0__Permissions__*
and from
Authorization__Permissions__Roles__1__Permissions__*
I have this code in python3:
def checkdepth(sub_key, variable):
delmt = '__'
for item in sub_key:
try:
if isinstance(sub_key[item], dict):
sub_variable = variable + delmt + item
checkdepth(sub_key[item], sub_variable)
except TypeError:
continue
if isinstance(sub_key[item], list):
sub_variable = variable + delmt + item
for it in sub_key[item]:
sub_variable = variable + delmt + item + delmt + str(sub_key[item].index(it))
checkdepth(it, sub_variable)
print(sub_variable)
if isinstance(sub_key[item], int) or isinstance(sub_key[item], str):
sub_variable = variable + delmt + item
print (sub_variable)
for key in data:
if type(data[key]) is str:
print(key + '=' +str(data[key]))
else:
variable = key
checkdepth(data[key], variable)
I know that the problem in block where I process list data type, but I don't know where is the problem exactly
Use a recursive generator:
import json
with open('input.json') as f:
data = json.load(f)
def strkeys(data):
if isinstance(data,dict):
for k,v in data.items():
for item in strkeys(v):
yield f'{k}__{item}' if item else k
elif isinstance(data,list):
for i,v in enumerate(data):
for item in strkeys(v):
yield f'{i}__{item}' if item else str(i)
else:
yield None # termination condition, not a list or dict
for s in strkeys(data):
print(s)
Output:
Logging__LogLevel__Default
Logging__LogLevel__Microsoft
Logging__LogLevel__Microsoft.Hosting.Lifetime
Logging__LogLevel__Microsoft.AspNetCore
Logging__LogLevel__System.Net.Http.HttpClient.Default.ClientHandler
Logging__LogLevel__System.Net.Http.HttpClient.Default.LogicalHandler
AllowedHosts
AutomaticTransferOptions__DateOffsetForDirectoriesInDays
AutomaticTransferOptions__DateOffsetForPortfoliosInDays
AutomaticTransferOptions__Clause__Item1
Authentication__ApiKeys__0__Key
Authentication__ApiKeys__0__OwnerName
Authentication__ApiKeys__0__Claims__0__Type
Authentication__ApiKeys__0__Claims__0__Value
Authentication__ApiKeys__1__Key
Authentication__ApiKeys__1__OwnerName
Authentication__ApiKeys__1__Claims__0__Type
Authentication__ApiKeys__1__Claims__0__Value
Authentication__ApiKeys__1__Claims__1__Type
Authentication__ApiKeys__1__Claims__1__Value
Authentication__ApiKeys__2__Key
Authentication__ApiKeys__2__OwnerName
Authentication__ApiKeys__2__Claims__0__Type
Authentication__ApiKeys__2__Claims__0__Value
Authentication__LDAP__Domain
Authentication__LDAP__MachineAccountName
Authentication__LDAP__MachineAccountPassword
Authentication__LDAP__EnableLdapClaimResolution
Authorization__Permissions__Roles__0__Role
Authorization__Permissions__Roles__0__Permissions__0
Authorization__Permissions__Roles__0__Permissions__1
Authorization__Permissions__Roles__1__Role
Authorization__Permissions__Roles__1__Permissions__0
Using json_flatten this can be converted to pandas, but it's not clear if that's what you want. Also, when you do convert it can use df.iloc[0] to see why each column is being provided (ie you see the value for that key).
Note: you need to pass a list so I just wrapped your json above in [].
# https://github.com/amirziai/flatten
dic = your json from above
dic =[dic] # put it in a list
dic_flattened = (flatten(d, '__') for d in dic) # add your delimiter
df = pd.DataFrame(dic_flattened)
df.iloc[0]
Logging__LogLevel__Default Information
Logging__LogLevel__Microsoft Warning
Logging__LogLevel__Microsoft.Hosting.Lifetime Information
Logging__LogLevel__Microsoft.AspNetCore Warning
Logging__LogLevel__System.Net.Http.HttpClient.Default.ClientHandler Warning
Logging__LogLevel__System.Net.Http.HttpClient.Default.LogicalHandler Warning
AllowedHosts *
AutomaticTransferOptions__DateOffsetForDirectoriesInDays -1
AutomaticTransferOptions__DateOffsetForPortfoliosInDays -3
AutomaticTransferOptions__Clause__Item1 1
Authentication__ApiKeys__0__Key AB8E5976-2A7C-4EEE-92C1-7B0B4DC840F6
Authentication__ApiKeys__0__OwnerName Cron job
Authentication__ApiKeys__0__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__0__Claims__0__Value StressTestManager
Authentication__ApiKeys__1__Key B11D4F27-483A-4234-8EC7-CA121712D5BE
Authentication__ApiKeys__1__OwnerName Test admin
Authentication__ApiKeys__1__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__1__Claims__0__Value StressTestAdmin
Authentication__ApiKeys__1__Claims__1__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__1__Claims__1__Value TestManager
Authentication__ApiKeys__2__Key EBF98F2E-555E-4E66-9D77-5667E0AA1B54
Authentication__ApiKeys__2__OwnerName Test manager
Authentication__ApiKeys__2__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__2__Claims__0__Value TestManager
Authentication__LDAP__Domain domain.local
Authentication__LDAP__MachineAccountName Soft13
Authentication__LDAP__MachineAccountPassword vixuUEY7884*
Authentication__LDAP__EnableLdapClaimResolution true
Authorization__Permissions__Roles__0__Role TestAdmin
Authorization__Permissions__Roles__0__Permissions__0 transfers.create
Authorization__Permissions__Roles__0__Permissions__1 bindings.create
Authorization__Permissions__Roles__1__Role TestManager
Authorization__Permissions__Roles__1__Permissions__0 transfers.create
Ok, I looked at your code and it's hard to follow. You're variable and function names are not easy to understand their purpose. Which is fine cause everyone has to learn best practice and all the little tips and tricks in python. So hopefully I can help you out.
You have a recursive-ish function. Which is definingly the best way to handle a situation like this. However your code is part recursive and part not. If you go recursive to solve a problem you have to go 100% recursive.
Also the only time you should print in a recursive function is for debugging. Recursive functions should have an object that is passed down the function and gets appended to or altered and then passed back once it gets to the end of the recursion.
When you get a problem like this, think about which data you actually need or care about. In this problem we don't care about the values that are stored in the object, we just care about the keys. So we should write code that doesn't even bother looking at the value of something except to determine its type.
Here is some code I wrote up that should work for what you're wanting to do. But take note that because I did purely a recursive function my code base is small. Also my function uses a list that is passed around and added to and then at the end I return it so that we can use it for whatever we need. If you have questions just comment on this question and I'll answer the best I can.
def convert_to_delimited_keys(obj, parent_key='', delimiter='__', keys_list=None):
if keys_list is None: keys_list = []
if isinstance(obj, dict):
for k in obj:
convert_to_delimited_keys(obj[k], delimiter.join((parent_key, str(k))), delimiter, keys_list)
elif isinstance(obj, list):
for i, _ in enumerate(obj):
convert_to_delimited_keys(obj[i], delimiter.join((parent_key, str(i))), delimiter, keys_list)
else:
# Append to list, but remove the leading delimiter due to string.join
keys_list.append(parent_key[len(delimiter):])
return keys_list
for item in convert_to_delimited_keys(data):
print(item)

Python Dict Key Error. How do I loop through nested dict and check for a key

I have the following nested dict below which I'm trying to loop through. Well, not necessarily loop through but I just want to check that the label has a value "EDD". If it does then I want to trigger some other action.
My problem is that I keep getting an error for labels key error.
Please how do I do this.
message.data = {
"messages": [{
"to": "wa-id",
"from": "another-wa-id",
"type": "text",
"_vnd": {
"v1": {
"direction": "outbound",
"in_reply_to": "an-earlier-inbound-external-id",
"author": {
"name": "the name of the author",
"type": "SYSTEM | OPERATOR",
},
"labels": [{
"uuid": "the-uuid",
"value": "EDD"
}]
}
}
}, ]
}
My code looks like so:
whatsapp_contact_id = message.data
print(whatsapp_contact_id.keys())
list_data = whatsapp_contact_id["messages"]
print(list_data)
for dictionary_data in list_data:
print(dictionary_data)
dictionary_keys = dictionary_data.items()
print(dictionary_keys)
"""
EDD_label = dictionary_data["labels"]
"""
EDD_label = dictionary_data.get('labels', 'could not find')
print("The label is below")
print(EDD_label)
Assuming the structure stays constant, what you want is:
whatsapp_contact_id = message.data
list_data = whatsapp_contact_id.get("messages")
for dictionary_data in list_data:
dictionary_data_2 = dictionary_data.get("_vnd").get("v1")
labels_data = dictionary_data_2.get("labels")
print(labels_data)
for EDD in labels_data:
EDD_string = EDD.get("value", "EDD label not present")
print(EDD_string)
Also, you appear to have triple-pasted your code.
Edited to include final code from OP
Thanks everyone especially #tennoshi.
This works:
whatsapp_contact_id = message.data
list_data = whatsapp_contact_id.get("messages")
for dictionary_data in list_data:
dictionary_data_2 = dictionary_data.get("_vnd").get("v1")
labels_data = dictionary_data_2.get("labels")
print(labels_data)
for EDD in labels_data:
EDD_string = EDD.get("value", "EDD label not present")
print(EDD_string)```

How do I extract a list item from nested json in Python?

I have a json object and I'm trying to extract a couple of values from a nested list. Then print them in markup. I'm getting and error - AttributeError: 'list' object has no attribute 'get'
I understand that it's a list and I can't preform a get. I've been searching for the proper method for a few hours now and I'm running out of steam. I'm able to get the Event, but not Value1 and Value2.
This is the json object
{
"resource": {
"data": {
"event": "qwertyuiop",
"eventVersion": "1.05",
"parameters": {
"name": "sometext",
"othername": [
""
],
"thing": {
"something": {
"blah": "whatever"
},
"abc": "123",
"def": {
"xzy": "value"
}
},
"something": [
"else"
]
},
"whatineed": [{
"value1": "text.i.need",
"value2": "text.i.need.also"
}]
}
}
}
And this is my function
def parse_json(json_data: dict) -> Info:
some_data = json_data.get('resource', {})
specific_data = some_data.get('data', {})
whatineed_data = specific_data.get('whatineed', {})
formatted_json = json.dumps(json_data, indent=2)
description = f'''
h3. Details
*Event:* {some_data.get('event')}
*Value1:* {whatineed_data('value1')}
*Value2:* {whatineed_data('value2')}
'''
From the data structure, whatineed is a list with a single item, which in turn is a dictionary. So, one way to access it would be:
whatineed_list = specific_data.get('whatineed', [])
whatineed_dict = whatineed_list[0]
At this point you can do:
value1 = whatineed_dict.get('value1')
value2 = whatineed_dict.get('value2')
You can change your function to the following:
def parse_json(json_data: dict) -> Info:
some_data = json_data.get('resource')
specific_data = some_data.get('data', {})
whatineed_data = specific_data.get('whatineed', {})
formatted_json = json.dumps(json_data, indent=2)
description = '''
h3. Details
*Event:* {}
*Value1:* {}
*Value2:* {}
'''.format(some_data.get('data').get('event'),whatineed_data[0]['value1'], whatineed_data[0]['value2'])
Since whatineed_data is a list, you need to index the element first
Python handles json as strings unless they are coming directly from a file. This could be the source for some of your problems. Also this article might help.
Assuming that "whatineed" attribute is really a list, and it's elements are dicts, you can't call whatineed.get asking for Value1 or Value2 as if they are attributes, because it is a list and it don't have attributes.
So, you have two options:
If whatineed list has a single element ever, you can access this element directly and than access the element attributes:
element = whatineed[0]
v1 = element.get('value1', {})
v2 = element.get('value2', {})
Or, if whatineed list can have more items, so, you will need to iterate over this list and access those elements:
for element in whatineed:
v1 = element.get('value1', {})
v2 = element.get('value2', {})
## Do something with values

Categories