I have a json file, which when loaded in python using json.loads() becomes a dictionary. The json data is a nested dictionary which can contain a 'groups' key inside another 'groups' key. The values inside the 'groups' key are a 'name' key and a 'properties' key.
Each 'properties' key has a unique 'name' and a 'value' key.
My objective is to search for a 'groups' key having its 'name' key value as "SportCar", which has a properties key having a name key value as "BMW", and only when these conditions are satisfied, update the 'data' key from 'data':value1 to 'data':value2.
An example of the json is as follows
{
"groups": [
{
"name": "SportCar",
"properties": [
{
"name": "BMW",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
},
{
"name": "Audi",
"value": {
"type": "Boolean",
"data": true
}
}
],
"groups": [
{
"name": "Trucks",
"properties": [
{
"name": "Volvo",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
}
]
}
]
},
{
"name": "MotorCycle",
"properties": [
{
"name": "Yamaha",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
}
],
"groups": [
{
"name": "Speeders",
"properties": [
{
"name": "prop2",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
}
]
}
]
}
]
}
The above json is contained in myjson22.json. Here is what I have tried so far:
import json
from pprint import pprint
json_data=open('myjson22.json', 'r')
data = json.load(json_data)
#print(data)
def get_recursively(search_dict, field):
"""
To read the json data as type dict and search all 'groups' keys for the 'name' key value value provided.
"""
fields_found = []
for key, value in search_dict.items():
if key == field:
fields_found.append(value)
elif isinstance(value, dict):
results = get_recursively(value, field)
for result in results:
fields_found.append(result)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_recursively(item, field)
for another_result in more_results:
fields_found.append(another_result)
return fields_found
get_recursively(data, ["properties"][0])
and the output was:
[[{'name': 'BMW',
'value': {'data': 'value1', 'encoding': 'utf-8', 'type': 'String'}},
{'name': 'Audi', 'value': {'data': True, 'type': 'Boolean'}}],
[{'name': 'Volvo',
'value': {'data': 'value1', 'encoding': 'utf-8', 'type': 'String'}}],
[{'name': 'Yamaha',
'value': {'data': 'value1', 'encoding': 'utf-8', 'type': 'String'}}],
[{'name': 'prop2',
'value': {'data': 'value1', 'encoding': 'utf-8', 'type': 'String'}}]]
A way to implement this recursive solution is with backtracking. When no more 'groups' keys are are found nested inside the root key, the 'name' key value is matched with groups_name parameter, which is 'SportCar' in our case. If this condition is satisfied check for the values inside same 'groups' key's (i.e. 'SportCar' key's) 'properties' key and match its 'name' key value with the properties_name parameter (which is 'BMW' in our case). If this second condition is also true, then update the 'data' key value inside the same 'properties' key, as per requirements, or else return (for backtracking).
import json
json_data = open('myjson22.json', 'r')
data = json.load(json_data)
def get_recursively( myJson, groups_name, properties_name, value2):
if 'groups' in myJson.keys():
# As there are multiple values inside 'groups' key
for jsonInsideGroupsKey in myJson['groups']:
get_recursively( jsonInsideGroupsKey, groups_name, properties_name, value2)
if 'name' in myJson.keys():
# check for groups name
if myJson['name'] == groups_name:
# check for properties name
if myJson['properties'][0]['name'] == properties_name:
# Update value. The changes will persist as we backtrack because
# we are making the update at the original memory location
# and not on a copy. For more info see deep and shallow copy.
myJson['properties'][0]['value']['data'] = value2
return
get_recursively(data,'SportCar','BMW','changedValue1')
get_recursively(data,'Speeders','prop2','changedValue2')
print data
my output:
{u'groups': [{u'name': u'SportCar', u'groups': [{u'name': u'Trucks', u'properties': [{u'name': u'Volvo', u'value': {u'data': u'value1', u'type': u'String', u'encoding': u'utf-8'}}]}], u'properties': [{u'name': u'BMW', u'value': {u'data': 'changedValue1', u'type': u'String', u'encoding': u'utf-8'}}, {u'name': u'Audi', u'value': {u'data': True, u'type': u'Boolean'}}]}, {u'name': u'MotorCycle', u'groups': [{u'name': u'Speeders', u'properties': [{u'name': u'prop2', u'value': {u'data': 'changedValue2', u'type': u'String', u'encoding': u'utf-8'}}]}], u'properties': [{u'name': u'Yamaha', u'value': {u'data': u'value1', u'type': u'String', u'encoding': u'utf-8'}}]}]}
prettified it will look as:
{
"groups": [
{
"name": "SportCar",
"properties": [
{
"name": "BMW",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "ChangedValue1"
}
},
{
"name": "Audi",
"value": {
"type": "Boolean",
"data": true
}
}
],
"groups": [
{
"name": "Trucks",
"properties": [
{
"name": "Volvo",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
}
]
}
]
},
{
"name": "MotorCycle",
"properties": [
{
"name": "Yamaha",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "value1"
}
}
],
"groups": [
{
"name": "Speeders",
"properties": [
{
"name": "prop2",
"value": {
"type": "String",
"encoding": "utf-8",
"data": "ChangedValue2"
}
}
]
}
]
}
]
}
Related
I've been successful in reading in and parsing a json string using python including normalising and exploding arrays. However I have a second json format which I'm struggling with and making little head way.
I need to pull out the 'Entities' and 'Transitions' within the 'data' object.
My json structure is:
{
"model": {
"id": "639b2970ac4d16767484b2bd",
"name": "TestImport",
"description": "",
"type": "LineageModel",
"referenceModelType": null,
"owner": {
"id": "639b2904ac4d167674849e1e",
"name": "xyz",
"firstName": null,
"lastName": null
},
"members": {
"totalUsers": 1,
"totalGroups": 0,
"users": {
"Owner": [
{
"joinTime": "2022-12-15T14:04:32.076Z",
"modificationTime": "2022-12-15T14:04:32.076Z",
"email": "sxyz",
"username": null,
"hasPendingSiteInvite": false,
"isDisabled": false,
"id": "639b2904ac4d167674849e1e",
"name": "xyz",
"firstName": null,
"lastName": null
}
]
},
"groups": {}
},
"$type": "ModelInformation"
},
"data": {
"version": "simple",
"roots": [
"8a44e4d6-4062-4e1c-a46c-c7787cab4405",
"d1494635-9005-4337-8eab-227265b29332"
],
"entities": {
"29380f60-620e-4314-9969-4ad6fe5bbea6": {
"name": "Element",
"children": [],
"id": "29380f60-620e-4314-9969-4ad6fe5bbea6",
"properties": {}
},
"86361ab4-6002-4f3b-b6ca-7e35acd69f9b": {
"name": "Application",
"children": [
"29380f60-620e-4314-9969-4ad6fe5bbea6"
],
"id": "86361ab4-6002-4f3b-b6ca-7e35acd69f9b",
"properties": {}
},
"223d9749-feb2-425d-b512-17b5322cda96": {
"name": "S_Group",
"children": [
"86361ab4-6002-4f3b-b6ca-7e35acd69f9b"
],
"id": "223d9749-feb2-425d-b512-17b5322cda96",
"properties": {}
}
},
"transitions": {
"c4e2026a-9c57-4bb0-b2e2-f7068d9c6fe5": {
"source": "29380f60-620e-4314-9969-4ad6fe5bbea6",
"target": "040677a5-820f-4d17-ae50-1296c0e36273",
"id": "c4e2026a-9c57-4bb0-b2e2-f7068d9c6fe5",
"properties": {}
}
},
"metadata": {
"queries": "{\"version\":\"3\"}",
"views": "{\"views\":[{\"name\":\"MainView\",\"description\":\"\",\"applyOnLoad\":true,\"view\":{\"version\":20,\"zoom\":{\"scale\":1,\"layerWidth\":250,\"layerSpacing\":40},\"collapsed\":{\"expanded\":[\"961847ad-1693-47b5-aa29-88ee07700b5e\",\"0da88727-e272-44a4-adeb-5a0465970490\",\"223d9749-feb2-425d-b512-17b5322cda96\",\"86361ab4-6002-4f3b-b6ca-7e35acd69f9b\"],\"collapsed\":[]},\"trace\":{\"enabled\":true,\"lock\":null,\"highlightedTraceDepth\":1,\"isHighlightedTraceDepthAll\":true,\"isTraversable\":true},\"selection\":[],\"queries\":{\"styled\":[],\"filtered\":[],\"filterType\":{\"6eecd266-2c2e-46d1-a00a-ceec18a87cdb\":\"show\"},\"expandedModules\":[]},\"settings\":{\"hideEmptyContainers\":false,\"hideFilteredLayers\":false,\"expandFilteredEntities\":false,\"portHintsEnabled\":true,\"autoBundleTransitions\":1,\"autoStyleTransitions\":1,\"autoHideTransitions\":1,\"maxSpanningTransitionDepth\":10,\"rootEntityType\":\"Layer\"}},\"options\":{\"zoom\":true,\"collapsed\":true,\"trace\":true,\"selection\":true,\"queries\":true,\"settings\":true},\"id\":\"VIEW-tcPIi3jP\"}]}"
},
"queries": [
{
"id": "9eb5c7fe-8d4c-4d94-b1a7-fee08bb2f663",
"name": "TestID",
"description": "",
"displayRules": "[{\"id\":\"e119af2d-5061-43f3-901a-4c70f402bb5d\",\"type\":\"PROPERTY\",\"staticColour\":\"#E47833\",\"dynamicColouring\":false,\"cls\":\"dr-19\",\"property\":\"TestID\",\"prefix\":\"\",\"suffix\":\"\",\"center\":false,\"left\":false}]",
"querySource": "not isEmpty(TestID)",
"modulePath": "Uncategorised",
"importedFromModel": null
}
],
"propertyDefinitions": {
"propertyDefinitionsId": "2ab6a0c3-8d24-4235-9783-e241437bf860",
"modelId": "639b2970ac4d16767484b2bd",
"properties": {
"TestID": {
"type": "Number",
"defaultValue": null,
"options": [],
"optionInfos": {}
}
}
}
},
"importedModels": {},
"importedModelsForQueries": {},
"propertyDefinitionsForImports": {},
"templateCollections": {}
}
I have been using the following to convert a json to data frame:
fInput = 'filepath to json file'
with open(fInput, encoding='utf-16') as inputfile:
df = pd.read_json(inputfile)
fOutput = 'output file path'
df.to_csv(fOutput, encoding='utf-16, index = false)
I then normalise columns using which for other json formats works
pd.json_normalize(df['column'])
and I explode arrays in the columns using:
df2 = pd.DataFrame([d, tup.id) for df.intertuples() for d in tup.columnName])
What I can't work out is how to pull into a data frame the 'entities' object from the 'data' object. Once I can get to that then I should be able to parse the content.
I had got to:
df = df["data"]
df = df["entities"]
When I print that it looks promising, but if I try to output to csv it fails with "'dict' object has no attributes" so I'm going wrong somewhere. The traceback for the error is:
AttributeError
Input [48], in <cell line: 14>()
12 df = df["entities"]
13 print(df)
14 df.to_csv(fOutput, encoding='utf-16', index=false)
AttributeError: 'dict' object has no attribute 'to_csv'
Any pointers appreciated.
Couldn't you just do this?
I saved the json data you gave in a file named data.txt
import json
with open('data.txt','r') as file:
data = json.load(file)
entities = data['data']['entities']
transitions = data['data']['transitions']
print(f'Entities: {entities}\nTransitions: {transitions}')
Output
Entities: {'29380f60-620e-4314-9969-4ad6fe5bbea6': {'name': 'Element', 'children': [], 'id': '29380f60-620e-4314-9969-4ad6fe5bbea6', 'properties': {}}, '86361ab4-6002-4f3b-b6ca-7e35acd69f9b': {'name': 'Application', 'children': ['29380f60-620e-4314-9969-4ad6fe5bbea6'], 'id': '86361ab4-6002-4f3b-b6ca-7e35acd69f9b', 'properties': {}}, '223d9749-feb2-425d-b512-17b5322cda96': {'name': 'S_Group', 'children': ['86361ab4-6002-4f3b-b6ca-7e35acd69f9b'], 'id': '223d9749-feb2-425d-b512-17b5322cda96', 'properties': {}}}
Transitions: {'c4e2026a-9c57-4bb0-b2e2-f7068d9c6fe5': {'source': '29380f60-620e-4314-9969-4ad6fe5bbea6', 'target': '040677a5-820f-4d17-ae50-1296c0e36273', 'id': 'c4e2026a-9c57-4bb0-b2e2-f7068d9c6fe5', 'properties': {}}}
I want to extract code from JSON format.
import json
json_data = '''
{
"Body": {
"stkCallback": {
"MerchantRequestID": "22531-976234-1",
"CheckoutRequestID": "ws_CO_DMZ_250600506_23022019144745852",
"ResultCode": 0,
"ResultDesc": "The service request is processed successfully.",
"CallbackMetadata": {
"Item": [
{
"Name": "Amount",
"Value": 1.0
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
}
}
}
}
'''
json_da = data['Body']
list_data = data['Body']['MpesaReceiptNumber']
print (json_da)
print (list_data)
I want to print this: NBN52K8A1J
The problem is that you have a list of dicts you need to search first:
>>> for obj in data['Body']['stkCallback']['CallbackMetadata']['Item']:
... print(obj)
...
{'Name': 'Amount', 'Value': 1.0}
{'Name': 'MpesaReceiptNumber', 'Value': 'NBN52K8A1J'}
{'Name': 'Balance'}
{'Name': 'TransactionDate', 'Value': 20190223144807}
{'Name': 'PhoneNumber', 'Value': 254725696042}
One possibility is
>>> [x['Value'] for x in data['Body']['stkCallback']['CallbackMetadata']['Item'] if x['Name'] == 'MpesaReceiptNumber'][0]
'NBN52K8A1J'
Just use the library json. Then you can print its inner elements
import json
json_data = '{"Body":{"stkCallback":{"MerchantRequestID":"22531-976234-1","CheckoutRequestID":"ws_CO_DMZ_250600506_23022019144745852","ResultCode":0,"ResultDesc":"The service request is processed successfully.","CallbackMetadata":{"Item":[{"Name":"Amount","Value":1.00},{"Name":"MpesaReceiptNumber","Value":"NBN52K8A1J"},{"Name":"Balance"},{"Name":"TransactionDate","Value":20190223144807},{"Name":"PhoneNumber","Value":254725696042}]}}}}'
a = json.loads(json_data)
print(a["Body"]["stkCallback"]["CallbackMetadata"]["Item"][1]["Value"])
You were almost there just have to get the the key value pair itself from the dicts and check if it is the name you wanted:
data = json.loads(json_data)
list_data = data['Body']["stkCallback"]['CallbackMetadata']['Item']
var: str
for x in list_data:
if x['Name'] == 'MpesaReceiptNumber':
var = x['Value']
break
print(var)
You can use this in the future easily by replacing the if check with the name of something else so you can grab the value depending on a variable.
I find using pprint to get the shape of the data structure is helpful when you're learning how to navigate it all out.
import json
import pprint
json_data = '{"Body":{"stkCallback":{"MerchantRequestID":"22531-976234-1","CheckoutRequestID":"ws_CO_DMZ_250600506_23022019144745852","ResultCode":0,"ResultDesc":"The service request is processed successfully.","CallbackMetadata":{"Item":[{"Name":"Amount","Value":1.00},{"Name":"MpesaReceiptNumber","Value":"NBN52K8A1J"},{"Name":"Balance"},{"Name":"TransactionDate","Value":20190223144807},{"Name":"PhoneNumber","Value":254725696042}]}}}}'
data = json.loads(json_data)
pprint.pprint(data)
Results in:
{'Body': {'stkCallback': {'CallbackMetadata': {'Item': [{'Name': 'Amount', 'Value': 1.0},
{'Name': 'MpesaReceiptNumber', 'Value': 'NBN52K8A1J'},
{'Name': 'Balance'},
{'Name': 'TransactionDate', 'Value': 20190223144807},
{'Name': 'PhoneNumber', 'Value': 254725696042}]},
'CheckoutRequestID': 'ws_CO_DMZ_250600506_23022019144745852',
'MerchantRequestID': '22531-976234-1',
'ResultCode': 0,
'ResultDesc': 'The service request is processed successfully.'}}}
So you should be able to see that data["Body"]["stkCallback"]["CallbackMetadata"]["Item"] gets to to the depth you need for your data.
>>> pprint.pprint(data["Body"]["stkCallback"]["CallbackMetadata"]["Item"])
[{'Name': 'Amount', 'Value': 1.0},
{'Name': 'MpesaReceiptNumber', 'Value': 'NBN52K8A1J'},
{'Name': 'Balance'},
{'Name': 'TransactionDate', 'Value': 20190223144807},
{'Name': 'PhoneNumber', 'Value': 254725696042}]
So next you need to iterate through that list and find a match (if one exists) for the MpesaReceiptNumber key.
receipt_no = None
for item in data["Body"]["stkCallback"]["CallbackMetadata"]["Item"]:
if item.get('Name') == 'MpesaReceiptNumber':
receipt_no = item.get('Value')
print(f"The receipt # is: {receipt_no}")
If you parse the json you will notice that the path to the data is not simply ['Body']['MpesaReceiptNumber']. In fact you have a list of dicts inside ['Item'] that needs to be searched.
Parsed data tree
One suggestion is to run the following code to find the data you are looking for:
import json
json_data = '{"Body":{"stkCallback":{"MerchantRequestID":"22531-976234-1","CheckoutRequestID":"ws_CO_DMZ_250600506_23022019144745852","ResultCode":0,"ResultDesc":"The service request is processed successfully.","CallbackMetadata":{"Item":[{"Name":"Amount","Value":1.00},{"Name":"MpesaReceiptNumber","Value":"NBN52K8A1J"},{"Name":"Balance"},{"Name":"TransactionDate","Value":20190223144807},{"Name":"PhoneNumber","Value":254725696042}]}}}}'
data = (json.loads(json_data))
list_data = data['Body']['stkCallback']['CallbackMetadata']['Item']
# Returns:
# [{'Name': 'Amount', 'Value': 1.0}, {'Name': 'MpesaReceiptNumber', 'Value':'NBN52K8A1J'}, {'Name': 'Balance'}, {'Name': 'TransactionDate', 'Value': 20190223144807}, {'Name': 'PhoneNumber', 'Value': 254725696042}]
# Now find Name: 'MpesaReceiptNumber' inside the dict list
find_it = next(item for item in list_data if item["Name"] == "MpesaReceiptNumber")
find_it = find_it['Value']
print (find_it)
Result
NBN52K8A1J
Use jq.
First off, it can "pretty print" any JSON data.
Put the value of json_data into a file test.json, and then show the formatted output of the JSON data with:
$ jq <test.json
{
"Body": {
"stkCallback": {
"MerchantRequestID": "22531-976234-1",
"CheckoutRequestID": "ws_CO_DMZ_250600506_23022019144745852",
"ResultCode": 0,
"ResultDesc": "The service request is processed successfully.",
"CallbackMetadata": {
"Item": [
{
"Name": "Amount",
"Value": 1
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
}
}
}
}
Next, to extract values, a selector path needs to be given on the jq command line:
jq '.Body.stkCallback.CallbackMetadata.Item|.[]|select(.Name == "MpesaReceiptNumber")|.Value' test.json
"NBN52K8A1J"
Now to make this sequence easier to understand, let's break it down component by component.
To extract and return only the .Body:
$ jq '.Body' <test.json
{
"stkCallback": {
"MerchantRequestID": "22531-976234-1",
"CheckoutRequestID": "ws_CO_DMZ_250600506_23022019144745852",
"ResultCode": 0,
"ResultDesc": "The service request is processed successfully.",
"CallbackMetadata": {
"Item": [
{
"Name": "Amount",
"Value": 1
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
}
}
}
Now let's fetch the stkCallback component:
$ jq '.Body.stkCallback' <test.json
{
"MerchantRequestID": "22531-976234-1",
"CheckoutRequestID": "ws_CO_DMZ_250600506_23022019144745852",
"ResultCode": 0,
"ResultDesc": "The service request is processed successfully.",
"CallbackMetadata": {
"Item": [
{
"Name": "Amount",
"Value": 1
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
}
}
Ok, now the callbackMetadata:
$ jq '.Body.stkCallback.CallbackMetadata' <test.json
{
"Item": [
{
"Name": "Amount",
"Value": 1
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
}
Next, the Item part:
$ jq '.Body.stkCallback.CallbackMetadata.Item' <test.json
[
{
"Name": "Amount",
"Value": 1
},
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
},
{
"Name": "Balance"
},
{
"Name": "TransactionDate",
"Value": 20190223144807
},
{
"Name": "PhoneNumber",
"Value": 254725696042
}
]
Notice that the result is an array of list items? Let's filter the data out of the array:
$ jq '.Body.stkCallback.CallbackMetadata.Item|.[]' <test.json
{
"Name": "Amount",
"Value": 1
}
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
}
{
"Name": "Balance"
}
{
"Name": "TransactionDate",
"Value": 20190223144807
}
{
"Name": "PhoneNumber",
"Value": 254725696042
}
Now the result is just the list of tuples, each with a "Name" and "Value". So, let's select just the one we (you) wanted:
$ jq '.Body.stkCallback.CallbackMetadata.Item|.[]|select(.Name == "MpesaReceiptNumber")' <test.json
{
"Name": "MpesaReceiptNumber",
"Value": "NBN52K8A1J"
}
Cool. We've got the tuple we wanted. Let's extract just the value now:
$ jq '.Body.stkCallback.CallbackMetadata.Item|.[]|select(.Name == "MpesaReceiptNumber")|.Value' <test.json
"NBN52K8A1J"
And, there you go.
Simplest way to get the value associated with a specific CallbackMetadata item name:
json_string = '''
{
"Body": {
"stkCallback": {
...
}
'''
json_data = json.loads(json_string)
for item in json_data["Body"]["stkCallback"]["CallbackMetadata"]["Item"]:
if item["Name"] == "MpesaReceiptNumber":
print(item["Value"]) # -> NBN52K8A1J
Origin data as below show, every item has a type mark, such as interests, family, behaviors, etc and I want to group by this type field.
return_data = [
{
"id": "112",
"name": "name_112",
"type": "interests",
},
{
"id": "113",
"name": "name_113",
"type": "interests",
},
{
"id": "114",
"name": "name_114",
"type": "interests",
},
{
"id": "115",
"name": "name_115",
"type": "behaviors",
},
{
"id": "116",
"name": "name_116",
"type": "family",
},
{
"id": "117",
"name": "name_117",
"type": "interests",
},
...
]
And expected ouput data format like:
output_data = [
{"interests":[
{
"id": "112",
"name": "name_112"
},
{
"id": "113",
"name": "name_113"
},
...
]
},
{
"behaviors": [
{
"id": "115",
"name": "name_115"
},
...
]
},
{
"family": [
{
"id": "116",
"name": "name_116"
},
...
]
},
...
]
And here is my trial:
type_list = []
for item in return_data:
if item['type'] not in type_list:
type_list.append(item['type'])
interests_list = []
for type in type_list:
temp_list = []
for item in return_data:
if item['type'] == type:
temp_list.append({"id": item['id'], "name": item['name']})
interests_list.append({type: temp_list})
Obviously my trial is low efficient as it is O(n*m), but I cannot find the more effective way to solve the problem.
Is there more effective way to get the result? any commentary is great welcome, thanks.
Use a defaultdict to store a list of items for each type:
from collections import defaultdict
# group by type
temp_dict = defaultdict(list)
for item in return_data:
temp_dict[item["type"]].append({"id": item["id"], "name": item["name"]})
# convert back into a list with the desired format
output_data = [{k: v} for k, v in temp_dict.items()]
Output:
[
{
'behaviors': [
{'name': 'name_115', 'id': '115'}
]
},
{
'family': [
{'name': 'name_116', 'id': '116'}
]
},
{
'interests': [
{'name': 'name_112', 'id': '112'},
{'name': 'name_113', 'id': '113'},
{'name': 'name_114', 'id': '114'},
{'name': 'name_117', 'id': '117'}
]
},
...
]
If you don't want to import defaultdict, you could use a vanilla dictionary with setdefault:
# temp_dict = {}
temp_dict.setdefault(item["type"], []).append(...)
Behaves in exactly the same way, if a little less efficient.
please see Python dictionary for map.
for item in return_data:
typeMap[item['type']] = typeMap[item['type']] + delimiter + item['name']
I have the following json document:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data": {
"list": {
"name": "Sorji for QA",
"id": "5b0a2543b89acdbdb85f7b42"
},
"board": {
"shortLink": "iyCzZ5jx",
"name": "FlicksIO",
"id": "5b0a251f68a9e74b8ec3b3ac"
},
"card": {
"shortLink": "vOt2vO7v",
"idShort": 92,
"name": "New column in main for Storefront provider correlation.",
"id": "5b9c0023533f7c26424ea4ed",
"closed": true
},
"old": {
"closed": false
}
},
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator": {
"id": "5b203bc7e47d817a8138bc37",
"avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"fullName": "Marie Bond",
"idMemberReferrer": null,
"initials": "MB",
"username": "mb"
}
}
I would like to expand this out to be a single level with dot notation. That is, it should look like:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42"
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac"
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true
"data.old.closed": false
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
Would it be possible to do this with a generator object? I've been working a lot on recursion today, and have been trying to move from while loops to using generator objects and yields, etc.
You can keep a parameter in the signature of the recursive function to store the paths:
data = {'id': '5c26321bd8f4113d43b91141', 'idMemberCreator': '5b203bc7e47d817a8138bc37', 'data': {'list': {'name': 'Sorji for QA', 'id': '5b0a2543b89acdbdb85f7b42'}, 'board': {'shortLink': 'iyCzZ5jx', 'name': 'FlicksIO', 'id': '5b0a251f68a9e74b8ec3b3ac'}, 'card': {'shortLink': 'vOt2vO7v', 'idShort': 92, 'name': 'New column in main for Storefront provider correlation.', 'id': '5b9c0023533f7c26424ea4ed', 'closed': True}, 'old': {'closed': False}}, 'type': 'updateCard', 'date': '2018-12-28T14:24:27.455Z', 'limits': {}, 'memberCreator': {'id': '5b203bc7e47d817a8138bc37', 'avatarHash': '73bfa48c76c3c92615fe89ff79a6c5ae', 'avatarUrl': 'https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae', 'fullName': 'Marie Bond', 'idMemberReferrer': None, 'initials': 'MB', 'username': 'mb'}}
def dot_paths(d, _paths = []):
for a, b in d.items():
if not b or not isinstance(b, dict):
yield ['.'.join(_paths+[a]), b]
else:
yield from dot_paths(b, _paths+[a])
import json
print(json.dumps(dict(dot_paths(data)), indent=4))
Output:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42",
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac",
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true,
"data.old.closed": false,
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
Python newbie:
Default.json
{
"name": {
"provide": ""
},
"test": {
"Fail": {
"centers": None,
"Nearest": 0
},
"far": "",
"Meta": null,
"Only": false,
"Tags": null
},
"Session": "",
"conf": {
"check": "",
"Reg": ""
},
"Token": ""
}
Remote.json
[ {
'name': {
'provide': ''
},
'Name': 'abc',
'test': {
'Service': 'redis',
'Tags': [
'stage'
],
'Fail': {
'centers': None,
'Nearest': 3
},
'Only': false,
'far': '',
'Meta': null
},
'Token': '',
'Session': '',
'conf': {
'Reg': '',
'check': 'name_prefix_match'
},
} ]
I have a default.json and remote .json .Task I am trying to achieve is remove all the json elements from the remote.json for whom the values of remote.json matches with the default.json.For example since the key,value of name:{provider=""} from default.json matches the name:{provider=""} from remote.json.It should get removed from remote.json
with open(remote.json) as f:
with open(default.json) as m:
file=json.load(f)
default=json.load(m)
for i in xrange(len(file)):
for key,value in default.items():
#default[key]=value
#a=filter(lambda x: x[""],file.keys())
1.I am not getting the idea here how to get key, value from default and compare it with file?Any help would be appreciated.
The reason I need to remove element from remote.json is because I need to compare the resultant json with other json file "local.json".If i dont' remove the key ,values with value "" or null or None then the comparison between remote.json and local.json would never be equal.
2.Is there any better way of going for this problem?
local.json
{
"Name": "",
"conf": {
"check": "name_prefix_match",
},
"test": {
"Service": "redis",
"Fail": {
"Near": 3
},
"Tags": ""
}
}
There are some problems with JSON examples because of None & False are not valid JSON objects (and so are single-quoted string literals), so let's pretend that we've already parsed files and got something like
default_json = {
"name": {
"provide": ""
},
"test": {
"Fail": {
"centers": None,
"Nearest": 0
},
"far": "",
"Meta": None,
"Only": False,
"Tags": None
},
"Session": "",
"conf": {
"check": "",
"Reg": ""
},
"Token": ""
}
remote_json = [{
"name": {
"provide": ""
},
"Name": "abc",
"test": {
"Service": "redis",
"Tags": [
"stage"
],
"Fail": {
"centers": None,
"Nearest": 3
},
"Only": False,
"far": "",
"Meta": None
},
"Token": "",
"Session": "",
"conf": {
"Reg": "",
"check": "name_prefix_match"
},
}]
Assuming that remote.json is the list of dictionaries & each one of them should be filtered out using default.json:
filtered_remote_json = [dict(item
for item in dictionary.items()
if item not in default_json.items())
for dictionary in remote_json]
will give us
filtered_remote_json == [{"Name": "abc",
"test": {"Service": "redis", "Tags": ["stage"],
"Fail": {"centers": None,
"Nearest": 3}, "Only": False,
"far": "", "Meta": None},
"conf": {"Reg": "",
"check": "name_prefix_match"}}]
EDIT
if we need to filter sub-dictionaries as well, then next a bit nasty utility function should help
def filter_defaults(json_object, default_json_object):
result = {}
for key, value in json_object.items():
try:
default_value = default_json_object[key]
except KeyError:
# key not in defaults, adding to result
result[key] = value
continue
# we need to process sub-dictionaries as well
if isinstance(value, dict):
value = filter_defaults(value, default_value)
# we are not interested in empty filtered sub-dictionaries
if not value:
continue
# value should differ from default
elif value == default_value:
continue
result[key] = value
return result
then just write
filtered_remote_json = [filter_defaults(dictionary, default_json)
for dictionary in remote_json]
this will give us
filtered_remote_json == [{"Name": "abc",
"test": {"Service": "redis", "Tags": ["stage"],
"Fail": {"Nearest": 3}},
"conf": {"check": "name_prefix_match"}}]