How to get values of keys for changing Json - python

I am using python2.7
I have a json i pull that is always changing when i request it.
I need to pull out Animal_Target_DisplayName under Term7 Under Relation6 in my dict.
The problem is sometimes the object Relation6 is in another part of the Json, it could be leveled deeper or in another order.
I am trying to create code that can just export the values of the key Animal_Target_DisplayName but nothing is working. It wont even loop down the nested dict.
Now this can work if i just pull it out using something like ['view']['Term0'][0]['Relation6'] but remember the JSON is never returned in the same structure.
Code i am using to get the values of the key Animal_Target_DisplayName but it doesnt seem to loop through my dict and find all the values with that key.
array = []
for d in dict.values():
row = d['Animal_Target_DisplayName']
array.append(row)
JSON Below:
dict = {
"view":{
"Term0":[
{
"Id":"b0987b91-af12-4fe3-a56f-152ac7a4d84d",
"DisplayName":"Dog",
"FullName":"Dog",
"AssetType1":[
{
"AssetType_Id":"00000000-0000-0000-0000-000000031131",
}
]
},
{
"Id":"ee74a59d-fb74-4052-97ba-9752154f015d",
"DisplayName":"Dog2",
"FullName":"Dog",
"AssetType1":[
{
"AssetType_Id":"00000000-0000-0000-0000-000000031131",
}
]
},
{
"Id":"eb548eae-da6f-41e8-80ea-7e9984f56af6",
"DisplayName":"Dog3",
"FullName":"Dog3",
"AssetType1":[
{
"AssetType_Id":"00000000-0000-0000-0000-000000031131",
}
]
},
{
"Id":"cfac6dd4-0efa-4417-a2bf-0333204f8a42",
"DisplayName":"Animal Set",
"FullName":"Animal Set",
"AssetType1":[
{
"AssetType_Id":"00000000-0000-0000-0001-000400000001",
}
],
"StringAttribute2":[
{
"StringAttribute_00000000-0000-0000-0000-000000003114_Id":"00a701a8-be4c-4b76-a6e5-3b0a4085bcc8",
"StringAttribute_00000000-0000-0000-0000-000000003114_Value":"Desc"
}
],
"StringAttribute3":[
{
"StringAttribute_00000000-0000-0000-0000-000000000262_Id":"a81adfb4-7528-4673-8c95-953888f3b43a",
"StringAttribute_00000000-0000-0000-0000-000000000262_Value":"meow"
}
],
"BooleanAttribute4":[
{
"BooleanAttribute_00000000-0000-0000-0001-000500000001_Id":"932c5f97-c03f-4a1a-a0c5-a518f5edef5e",
"BooleanAttribute_00000000-0000-0000-0001-000500000001_Value":"true"
}
],
"SingleValueListAttribute5":[
{
"SingleValueListAttribute_00000000-0000-0000-0001-000500000031_Id":"ef51dedd-6f25-4408-99a6-5a6cfa13e198",
"SingleValueListAttribute_00000000-0000-0000-0001-000500000031_Value":"Blah"
}
],
"Relation6":[
{
"Animal_Id":"2715ca09-3ced-4b74-a418-cef4a95dddf1",
"Term7":[
{
"Animal_Target_Id":"88fd0090-4ea8-4ae6-b7f0-1b13e5cf3d74",
"Animal_Target_DisplayName":"Animaltheater",
"Animal_Target_FullName":"Animaltheater"
}
]
},
{
"Animal_Id":"6068fe78-fc8e-4542-9aee-7b4b68760dcd",
"Term7":[
{
"Animal_Target_Id":"4e87a614-2a8b-46c0-90f3-8a0cf9bda66c",
"Animal_Target_DisplayName":"Animaltitle",
"Animal_Target_FullName":"Animaltitle"
}
]
},
{
"Animal_Id":"754ec0e6-19b6-4b6b-8ba1-573393268257",
"Term7":[
{
"Animal_Target_Id":"a8986ed5-3ec8-44f3-954c-71cacb280ace",
"Animal_Target_DisplayName":"Animalcustomer",
"Animal_Target_FullName":"Animalcustomer"
}
]
},
{
"Animal_Id":"86b3ffd1-4d54-4a98-b25b-369060651bd6",
"Term7":[
{
"Animal_Target_Id":"89d02067-ebe8-4b87-9a1f-a6a0bdd40ec4",
"Animal_Target_DisplayName":"Animalfact_transaction",
"Animal_Target_FullName":"Animalfact_transaction"
}
]
},
{
"Animal_Id":"ea2e1b76-f8bc-46d9-8ebc-44ffdd60f213",
"Term7":[
{
"Animal_Target_Id":"e398cd32-1e73-46bd-8b8f-d039986d6de0",
"Animal_Target_DisplayName":"Animalfact_transaction",
"Animal_Target_FullName":"Animalfact_transaction"
}
]
}
],
"Relation10":[
{
"TargetRelation_b8b178ff-e957-47db-a4e7-6e5b789d6f03_Id":"aff80bd0-a282-4cf5-bdcc-2bad35ddec1d",
"Term11":[
{
"AnimalId":"3ac22167-eb91-469a-9d94-315aa301f55a",
"AnimalDisplayName":"Animal",
"AnimalFullName":"Animal"
}
]
}
],
"Tag12":[
{
"Tag_Id":"75968ea6-4c9f-43c9-80f7-dfc41b24ec8f",
"Tag_Name":"AnimalAnimaltitle"
},
{
"Tag_Id":"b1adbc00-aeef-415b-82b6-a3159145c60d",
"Tag_Name":"Animal2"
},
{
"Tag_Id":"5f78e4dc-2b37-41e0-a0d3-cec773af2397",
"Tag_Name":"AnimalDisplayName"
}
]
}
]
}
}
The output i am trying to get is a list of all the values from key Animal_Target_DisplayName like this ['Animaltheater','Animaltitle', 'Animalcustomer', 'Animalfact_transaction', 'Animalfact_transaction'] but we need to remember the nested structure of this json always changes but the keys for it are always the same.

I guess your only option is running through the entire dict and get the values of Animal_Target_DisplayName key, I propose the following recursive solution:
def run_json(dict_):
animal_target_sons = []
if type(dict_) is list:
for element in dict_:
animal_target_sons.append(run_json(element))
elif type(dict_) is dict:
for key in dict_:
if key=="Animal_Target_DisplayName":
animal_target_sons.append([dict_[key]])
else:
animal_target_sons.append(run_json(dict_[key]))
return [x for sublist in animal_target_sons for x in sublist]
run_json(dict_)
Then calling run_json returns a list with what you want. By the way, I recommend you to rename your json from dict to, for example dict_, since dict is a reserved word of Python for the dictionary type.

Since you're getting JSON, why not make use of the json module? That will do the parsing for you and allow you to use dictionary functions+features to get the information you need.
#!/usr/bin/python2.7
from __future__ import print_function
import json
# _somehow_ get your JSON in as a string. I'm calling it "jstr" for this
# example.
# Use the module to parse it
jdict = json.loads(jstr)
# our dict has keys...
# view -> Term0 -> keys-we're-interested-in
templist = jdict["view"]["Term0"]
results = {}
for _el in range(len(templist)):
if templist[_el]["FullName"] == "Animal Set":
# this is the one we're interested in - and it's another list
moretemp = templist[_el]["Relation6"]
for _k in range(len(moretemp)):
term7 = moretemp[_k]["Term7"][0]
displayName = term7["Animal_Target_DisplayName"]
fullName = term7["Animal_Target_FullName"]
results[fullName] = displayName
print("{0}".format(results))
Then you can dump the results dict plain, or with pretty-printing:
>>> print(json.dumps(results, indent=4))
{
"Animaltitle2": "Animaltitle2",
"Animalcustomer3": "Animalcustomer3",
"Animalfact_transaction4": "Animalfact_transaction4",
"Animaltheater1": "Animaltheater1"
}

Related

Convert nested dictionary inside dictionary into relational and add missing keys using Python

I am trying to convert below json records into relational but I am not getting the expected output,
Filename.json:-
{
"SampleRecord":{
"SampleRules":[
{
"Scaler_id":"1",
"family_min_samples_percentage":5,
"original_number_of_clusters":4,
"Results":[
{
"eps_value":0.1,
"min_samples":5,
"number_of_clusters":9,
"number_of_noise_samples":72,
"scores":{
"adjusted_rand_index":0.001,
"adjusted_mutual_info_score":0.009
}
}
],
"isnegative":"False",
"comment":[
"#Comment"
],
"enable":"enabled",
"additional_value":{
"type":[
{
"value":"AAA"
}
],
"uid":[
{
"value":"BBB"
}
],
"options":[
{
"value":"CCC"
},
{
"value":"DDD"
}
],
"scope":[
{
"value":"EEE"
}
]
}
},
{
"Scaler_id":"2",
"family_min_samples_percentage":5,
"original_number_of_clusters":4,
"Results":[
{
"eps_value":0.1,
"min_samples":5,
"number_of_clusters":9,
"number_of_noise_samples":72,
"scores":{
"adjusted_rand_index":0.001,
"adjusted_mutual_info_score":0.009
}
}
],
"isnegative":"False",
"comment":[
"#Comment"
],
"enable":"enabled",
"additional_value":{
"type":[
{
"value":"AAA"
}
],
"uid":[
{
"value":"BBB"
}
],
"options":[
{
"value":"CCC"
}
]
}
}
]
}
}
Expected output:
Scaler_id~original_number_of_clusters~Results_eps_value~Results_Scores_adjusted_rand_index~Results_Scores_avies_bouldin_score~isnegative~comment~additional_value_type~additional_value_uid~additional_value_options
1~4~0.1~0.001~1.70~False~#comment~AAA~BBB~CCC~EEE
1~4~0.1~0.001~1.70~False~#comment~AAA~BBB~DDD~EEE
2~4~0.1~0.001~1.70~False~#comment~AAA~BBB~CCC~Null
with open(Filename.json) as inputfile:
content = inputfile.read()
data=json.loads(json.dumps(content,ensure_ascii=False))
df1=pd.json_normalize(data['SampleRecord'],'SampleRules',sep='_')
df23.to_csv('Sample1.txt',encoding='utf-8',index=False,sep'~',na_rep='')
output(Sample1.txt):
Scaler_id~original_number_of_clusters~Results~isnegative~comment~additional_value_type~additional_value_uid~additional_value_options
1~4~[{0.1},{0.001},{1.70}]~False~#comment~[{AAA}]~[{BBB}]~[{CCC},{DDD}]~[{EEE}]
2~4~[{0.1},{0.001},{1.70}]~False~#comment~[{AAA}]~[{BBB}]~[{CCC}]~
df2=pd.json_normalize(data['SampleRecord'],['SampleRules','Results'],[['SampleRules','Scaler_id'],['SampleRules','original_number_of_clusters'],['SampleRules','isnegative'],['SampleRules','comment']],record_prefix='Results',sep='_',max_level=None,errors='ignore')
df3=pd.json_normalize(data['SampleRecord'],['SampleRules','additional_value','type'],['SampleRules','Scaler_id'],record_prefix='additional_value',sep='_',max_level=None,errors='ignore')
df23=pd.merge(df2,df3,how='inner',left_on=('SampleRules_Scaler_id'),right_on=('SampleRules_Scaler_id'))
df23.to_csv('Sample2.txt',encoding='utf-8',index=False,sep'~',na_rep='')
Current output(Sample2.txt):
1~4~0.1~0.001~1.70~False~#comment~AAA~BBB~CCC
1~4~0.1~0.001~1.70~False~#comment~AAA~BBB~DDD
2~4~0.1~0.001~1.70~False~#comment~AAA~BBB~CCC
df4=pd.json_normalize(data['SampleRecord'],['SampleRules','additional_value','scope'],['SampleRules','Scaler_id'],record_prefix='additional_value',sep='_',max_level=None,errors='ignore') #This throws KeyError='Scope' (since this key is missing in few records)
I tried to use get() since it gives default value None but it didnt work,
df4=pd.json_normalize(data['SampleRecord']['SampleRules'],['additional_value'],['scope'].get('value'),['SampleRules','Scaler_id'],record_prefix='additional_value',sep='_',max_level=None,errors='ignore')
#TypeError : list indices must be integers or slices,not str
Problems:
1)How to get nested dictionary (additional_value) values in single normalize python code like without explicitly defining df2,df3,df4 for each sub dictionaries?
2)How to get missing key as Null if key itself missing in Json record and avoid keyError
I have already referred the below,but no luck
How to fill missing json keys with key and null value?
If key not in JSON then set value to null and insert into dataframe
Python JSON TypeError list indices must be integers or slices, not str
python dictionary keyError
I am a beginner to Python. Any suggestions would be of great help.
Thanks in advance!

How to update/change both keys and values separately (not dedicated key-value pair) in a deeply nested JSON in python 3.x

I have a JSON file where I need to replace the UUID and update it with another one. I'm having trouble replacing the deeply nested keys and values.
Below is my JSON file that I need to read in python, replace the keys and values and update the file.
JSON file - myfile.json
{
"name": "Shipping box"
"company":"Detla shipping"
"description":"---"
"details" : {
"boxes":[
{
"box_name":"alpha",
"id":"a3954710-5075-4f52-8eb4-1137be51bf14"
},
{
"box_name":"beta",
"id":"31be3763-3d63-4e70-a9b6-d197b5cb6929"
} ​
​ ]
​}
"container": [
"a3954710-5075-4f52-8eb4-1137be51bf14":[],
"31be3763-3d63-4e70-a9b6-d197b5cb6929":[] ​
​]
​"data":[
{
"data_series":[],
"other":50
},
{
"data_series":[],
"other":40
},
{
"data_series":
{
"a3954710-5075-4f52-8eb4-1137be51bf14":
{
{
"dimentions":[2,10,12]
}
},
"31be3763-3d63-4e70-a9b6-d197b5cb6929":
{
{
"dimentions":[3,9,12]
}
}
},
"other":50
}
]
}
I want achieve something like the following-
"details" : {
"boxes":[
{
"box_name":"alpha"
"id":"replace_uuid"
},
}
.
.
.
​ "data":[ {
"data_series":
{
"replace_uuid":
{
{
"dimentions":[2,10,12]
}
}
]
In such a type of deeply nested dictionary, how can we replace all the occurrence of keys and values with another string, here replace_uuid?
I tried with pop() and dotty_dict but I wasn't able to replace the nested list.
I was able to achieve it in the following way-
def uuid_change(): #generate a random uuid
new_uuid = uuid.uuid4()
return str(new_uuid)
dict = json.load(f)
for uid in dict[details][boxes]:
old_id = uid['id']
replace_id = uuid_change()
uid['id'] = replace_id
for i in range(n):
for uid1 in dict['container'][i].keys()
if uid1 == old_id:
dict['container'][i][replace_id]
= dict['container'][i].pop(uid1) #replace the key
for uid2 in dict['data'][2]['data_series'].keys()
if uid2 == old_id:
dict['data'][2]['data_series'][replace_id]
= dict['data'][2]['data_series'].pop(uid2) #replace the key

Update json data with context in Python using jsonpath-ng

following Update json nodes in Python using jsonpath, would like to know how one might update the JSON data given a certain context.
So, say we pick the exact same JSON example:
{
"SchemeId": 10,
"nominations": [
{
"nominationId": 1
}
]
}
But this time, would like to double the value of the original value, hence some lambda function is needed which takes into account the current node value.
No need for lambdas; for example, to double SchemeId, something like this should work:
data = json.loads("""the json string above""")
jsonpath_expr = parse('$.SchemeId')
jsonpath_expr.find(data)
val = jsonpath_expr.find(data)[0].value
jsonpath_expr.update(data, val*2)
print(json.dumps(data, indent=2))
Output:
{
"SchemeId": 20,
"nominations": [
{
"nominationId": 1
}
]
}
Here is example with lambda expression:
import json
from jsonpath_ng import parse
settings = '''{
"choices": {
"atm": {
"cs": "Strom",
"en": "Tree"
},
"bar": {
"cs": "Dům",
"en": "House"
},
"sea": {
"cs": "Moře",
"en": "Sea"
}
}
}'''
json_data = json.loads(settings)
pattern = parse('$.choices.*')
def magic(f: dict, to_lang='cs'):
return f[to_lang]
pattern.update(json_data,
lambda data_field, data, field: data.update({field: magic(data[field])}))
json_data
returns
{
'choices': {
'atm': 'Strom',
'bar': 'Dům',
'sea': 'Moře'
}
}

Elegant way of iterating list of dict python

I have a list of dictionary as below. I need to iterate the list of dictionary and remove the content of the parameters and set as an empty dictionary in sections dictionary.
input = [
{
"category":"Configuration",
"sections":[
{
"section_name":"Global",
"parameters":{
"name":"first",
"age":"second"
}
},
{
"section_name":"Operator",
"parameters":{
"adrress":"first",
"city":"first"
}
}
]
},
{
"category":"Module",
"sections":[
{
"section_name":"Global",
"parameters":{
"name":"first",
"age":"second"
}
}
]
}
]
Expected Output:
[
{
"category":"Configuration",
"sections":[
{
"section_name":"Global",
"parameters":{}
},
{
"section_name":"Operator",
"parameters":{}
}
]
},
{
"category":"Module",
"sections":[
{
"section_name":"Global",
"parameters":{}
}
]
}
]
My current code looks like below:
category_list = []
for categories in input:
sections_list = []
category_name_dict = {"category": categories["category"]}
for sections_dict in categories["sections"]:
section = {}
section["section_name"] = sections_dict['section_name']
section["parameters"] = {}
sections_list.append(section)
category_name_dict["sections"] = sections_list
category_list.append(category_name_dict)
Is there any elegant and more performant way to do compute this logic. Keys such as category, sections, section_name, and parameters are constants.
The easier way is not to rebuild the dictionary without the parameters, just clear it in every section:
for value in values:
for section in value['sections']:
section['parameters'] = {}
Code demo
Elegance is in the eye of the beholder, but rather than creating empty lists and dictionaries then filling them why not do it in one go with a list comprehension:
category_list = [
{
**category,
"sections": [
{
**section,
"parameters": {},
}
for section in category["sections"]
],
}
for category in input
]
This is more efficient and (in my opinion) makes it clearer that the intention is to change a single key.

nested json to csv using pandas normalize

With given script I am able to get output as I showed in a screenshot,
but there is a column named as cve.description.description_data which is again in json format. I want to extract that data as well.
import json
import pandas as pd
from pandas.io.json import json_normalize
#load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
#tells us parent node is 'programs'
nycphil = json_normalize(d['CVE_Items'])
nycphil.head(3)
works_data = json_normalize(data=d['CVE_Items'], record_path='cve')
works_data.head(3)
nycphil.to_csv("test4.csv")
If I change works_data = json_normalize(data=d['CVE_Items'], record_path='cve.descr') it gives this error:
"result = result[spec] KeyError: 'cve.description'"
JSON format as follows:
{
"CVE_data_type":"CVE",
"CVE_data_format":"MITRE",
"CVE_data_version":"4.0",
"CVE_data_numberOfCVEs":"1000",
"CVE_data_timestamp":"2018-04-04T00:00Z",
"CVE_Items":[
{
"cve":{
"data_type":"CVE",
"data_format":"MITRE",
"data_version":"4.0",
"CVE_data_meta":{
"ID":"CVE-2001-1594",
"ASSIGNER":"cve#mitre.org"
},
"affects":{
"vendor":{
"vendor_data":[
{
"vendor_name":"gehealthcare",
"product":{
"product_data":[
{
"product_name":"entegra_p&r",
"version":{
"version_data":[
{
"version_value":"*"
}
]
}
}
]
}
}
]
}
},
"problemtype":{
"problemtype_data":[
{
"description":[
{
"lang":"en",
"value":"CWE-255"
}
]
}
]
},
"references":{
"reference_data":[
{
"url":"http://apps.gehealthcare.com/servlet/ClientServlet/2263784.pdf?DOCCLASS=A&REQ=RAC&DIRECTION=2263784-100&FILENAME=2263784.pdf&FILEREV=5&DOCREV_ORG=5&SUBMIT=+ ACCEPT+"
},
{
"url":"http://www.forbes.com/sites/thomasbrewster/2015/07/10/vulnerable- "
},
{
"url":"https://ics-cert.us-cert.gov/advisories/ICSMA-18-037-02"
},
{
"url":"https://twitter.com/digitalbond/status/619250429751222277"
}
]
},
"description":{
"description_data":[
{
"lang":"en",
"value":"GE Healthcare eNTEGRA P&R has a password of (1) value."
}
]
}
},
"configurations":{
"CVE_data_version":"4.0",
"nodes":[
{
"operator":"OR",
"cpe":[
{
"vulnerable":true,
"cpe22Uri":"cpe:/a:gehealthcare:entegra_p%26r",
"cpe23Uri":"cpe:2.3:a:gehealthcare:entegra_p\\&r:*:*:*:*:*:*:*:*"
}
]
}
]
},
"impact":{
"baseMetricV2":{
"cvssV2":{
"version":"2.0",
"vectorString":"(AV:N/AC:L/Au:N/C:C/I:C/A:C)",
"accessVector":"NETWORK",
"accessComplexity":"LOW",
"authentication":"NONE",
"confidentialityImpact":"COMPLETE",
"integrityImpact":"COMPLETE",
"availabilityImpact":"COMPLETE",
"baseScore":10.0
},
"severity":"HIGH",
"exploitabilityScore":10.0,
"impactScore":10.0,
"obtainAllPrivilege":false,
"obtainUserPrivilege":false,
"obtainOtherPrivilege":false,
"userInteractionRequired":false
}
},
"publishedDate":"2015-08-04T14:59Z",
"lastModifiedDate":"2018-03-28T01:29Z"
}
]
}
I want to flatten all data.
Assuming the multiple URLs delineate between rows and all else meta data repeats, consider a recursive function call to extract every key-value pair in nested json object, d.
The recursive function will call global to update the needed global objects to be binded into a list of dictionaries for pd.DataFrame() call. Last loop at end updates the recursive function's dictionary, inner, to integrate the different urls (stored in multi)
import json
import pandas as pd
# load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
multi = []; inner = {}
def recursive_extract(i):
global multi, inner
if type(i) is list:
if len(i) == 1:
for k,v in i[0].items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
else:
multi = i
if type(i) is dict:
for k,v in i.items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
recursive_extract(d['CVE_Items'])
data_dict = []
for i in multi:
tmp = inner.copy()
tmp.update(i)
data_dict.append(tmp)
df = pd.DataFrame(data_dict)
df.to_csv('Output.csv')
Output (all columns the same except for URL, widened for emphasis)

Categories