I am working on a script to generate some test data based on a json spec. The intention of this script is to construct a json object/python dict record
To simplify things, I am using a list items here that represents my source items, which also represent the path where the value should be inserted.
Here's my intended output -
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "Test Data"
},
"item1": 1,
"item2": 0
}
I am able to build the nested objects but they are all getting inserted at first level of the dictionary instead.
How can I use dest_path to store the result in the intended location?
Source:
import json
import random
def get_nested_obj(items: list):
"""
Construct a nested json object
"""
res = 'Test Data'
for item in items[::-1]:
res = {item: res}
return res
def get_dest_path(source_fields):
"""
Construct dest path where result from `get_nested_obj` should go
"""
dest_path = ''
for x in source_fields:
dest_path += f'[\'{x}\']'
return 'record'+dest_path
record = {}
items = ['access.device.java.version', 'access.device.python.version', 'access.type', 'item1', 'item2']
for item in items:
if '.' in item:
source_fields = item.split('.')
temp = record
for i, source_field in enumerate(source_fields):
if source_field in temp:
temp = temp[source_field]
continue
res = get_nested_obj(source_fields[i+1:])
dest_path = get_dest_path(source_fields[:i])
print(dest_path)
record[source_field] = res # Here's the problem. How to use dest_path here?
break
else:
record[item] = random.randint(0, 1)
print(json.dumps(record))
My output:
{
"access": {
"device": {
"java": {
"version": "Test Data"
}
}
},
"python": {
"version": "Test Data"
},
"type": "Test Data",
"item1": 1,
"item2": 0
}
To construct the record dictionary from the items list you can use next example:
import random
record = {}
items = [
"access.device.java.version",
"access.device.python.version",
"access.type",
"item1",
"item2",
]
for i in items:
i = i.split(".")
if len(i) == 1:
record[i[0]] = random.randint(0, 1)
else:
r = record
for v in i[:-1]:
r.setdefault(v, {})
r = r[v]
r[i[-1]] = "Test Data"
print(record)
Prints:
{
"access": {
"device": {
"java": {"version": "Test Data"},
"python": {"version": "Test Data"},
},
"type": "Test Data",
},
"item1": 1,
"item2": 1,
}
Not very different from other answer, but recursive.
import json
items = ['access.device.java.version', 'access.device.python.version', 'access.type', 'item1', 'item2']
def populate(di, item):
parts = item.split(".", maxsplit=1)
key = parts[0]
if len(parts) == 1:
if key.startswith("item"):
v = 1
else:
v = "Test Data"
di[key] = v
else:
dikey = di.setdefault(key, {})
populate(dikey,parts[1])
return di
di = {}
for item in items:
populate(di,item)
print(json.dumps(di, indent=4))
output:
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "Test Data"
},
"item1": 1,
"item2": 1
}
And here's a version that directly specifies the data, which would probably be more useful (and that return di is also unnecessary in both cases):
import json
items = [('access.device.java.version',"Test Data"), ('access.device.python.version', "Test Data"), ('access.type', "type data"), ('item1',1), ('item2',2)]
def populate(di, item):
parts = item[0].split(".", maxsplit=1)
key = parts[0]
v = item[1]
if len(parts) == 1:
di[key] = v
else:
dikey = di.setdefault(key, {})
populate(dikey,(parts[1],v))
record = {}
for item in items:
populate(record,item)
print(json.dumps(record, indent=4))
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "type data"
},
"item1": 1,
"item2": 2
}
FWIW, tried collections.defaultdict for fun and that does not recurse itself.
Related
I am writing Python code to validate a dictionnary using a JSON schema and this schema is referenced by $ref some of them are a path of an other schema such like : ( "$ref": "./insurance-schema.json#/components/schemas/Name") and the others are the ref in the same file like ( "$ref": "#/components/schemas/Adresse"). my schema looks like this:
And the documents-schema.json contain a nested dictionnary that is called components where i can find my AdditionalDocument properties
and after the validation i want to delete the attributes that doesn't match with schema fom my dictionnary
here is my code
"components": {
"schemas": {
"title": "Data",
"description": "Bloc data de l'objet",
"additionalProperties": false,
"properties": {
"Id": {
"type": "string"
},
"Name": {
"type": "string"
},
"DateNac": {
"type": "string"
},
"Adresse": {
"$ref": "#/components/schemas/Adresse"
},
"MedicalDocuments": {
"type": "array",
"items": {
"$ref": "./documents-schema.json#/components/schemas/AdditionnalDocument"
}
},
"CompanyProperties": {
"type": "Object",
"required": [
"EtsName",
"EtsIdx",
],
"additionalProperties": false,
"properties": {
"startDate": {
"type": "string",
"format": "date"
},
"EtsName": {
"type": "string"
},
"EtsIdx": {
"$ref": "#/components/schemas/EtsIdx"
}
}
},
"EtsIdx": {
"required": [
"Reference",
"Date"
],
"type": "object",
"additionalProperties": false,
"properties": {
"Reference": {
"type": "string"
},
"Complement": {
"type": "string"
},
"Date": {
"type": "string",
"format": "date"
}
}
}
}
}
}
}```
import json
elements_to_exclude = ["$..traceability"]
UTF_8 = 'utf8'
def extract_object(input_payload: dict, path_schema: str) -> list:
if is_valid(path_schema, input_payload):
is_exist(path_schema, input_payload)
def is_valid(path_schema: str, input: dict) -> bool:
import validators
from jsonschema import RefResolver, exceptions, validate
import os
import requests
if (validators.url(path_schema)):
response = requests.get(path_schema)
schema_content = response.content.decode(UTF_8)
url = True
else:
schema_dir = os.path.abspath('schemas')
schema_content = json.loads(open(path_schema))
try:
if not url:
resolver = RefResolver('file://' + schema_dir + '/', None)
else:
# resolver = RefResolver("file://%s.json" % os.path.join(path_schema), schema_content)
validate(input,schema_content, resolver)
except exceptions.ValidationError as validation_error:
return False
def is_exist(path_schema: dict, input: dict) -> list:
schema_content = json.loads(open(path_schema))
for k, v in input.items():
if schema_content.get(k) is None:
# nexite pas
elements_to_exclude.append("$..", k)
else:
if schema_content.get(v, {}).get(type) != type(input.get(v)) and schema_content.get(v, {}).get(
type) is not None:
# n'ont pas le meme type
raise Exception("Types doesnt mach")
elif schema_content.get(v, {}).get(type) == 'object':
# nested object with type object
return is_exist(schema_content.get(v, {}).get("properties"), input.get(v))
elif schema_content.get(v, {}).get("$ref") is not None:
# nested object with ref object or array
ref = schema_content.get(v, {}).get("$ref")
if "#" in ref:
# chemin contient "#"
ref_content = ref.split("#")
if len(ref_content) > 1 and len(ref_content[0]) != 0:
# existe dans un autre fichier
path = ref_content[0]
nested_schema_content = json.loads(open(path))
return is_exist(nested_schema_content.get("components", {}).get("schemas"), input.get(v))
elif len(ref_content) > 1 and len(ref_content[0]) == 0:
# ref of the same file
keys = ref_content[1].split("/")
for key in keys:
nested_schema_content = nested_schema_content[key]
return is_exist(nested_schema_content, input.get(v))
else:
# chemin simple ne contient pas "#"
nested_schema_content = json.loads(open(ref))
return is_exist(nested_schema_content.get("components", {}).get("schemas"), input.get(v))
return elements_to_exclude
Need some help please.
I have a similar json file:
{
"timestamp": "2022-09-20T08:16:00.000Z",
"metadata": {
"orgID": "6780",
"projectId": 0988,
}
},
{
"data":
"workers": [
{
"identifiers": {
"FullName": null,
"NINumber": null,
"CompID": null
},
"lastName": null,
"costCenter": null
},
{
"codes": [
{
"source": {
"name": "net_salary",
"value": 11500
},
"name": "net_salary",
"code": "rt_sa",
"value": 11500
},
{
"identifiers": {
"FullName": null,
"NINumber": null,
Comp ID": null
},
"lastName": null,
"costCenter": null
},
{
"codes": [
{
"source": {
"name": "hiredate",
"value": 3.333
},
"name": "hiredate",
"code": "h_code",
"value": 3.333
},
I want to change the key names under source from name->fieldname and value to fieldvalue.
However, I don't want to change the keys where there are the keys: name, code, value.
I tried this but it is not correct:
with open(r'C:\Users\Administrator\Documents\test\PJSON.json') as f:
payrolldata = json.load(f)
source = payrolldata[1]['data']['workers'][1]['codes'][1]['source']
print(source)
oldvalue = source.keys()
print(str(oldvalue).replace('name', 'newname').replace('value', 'value2'))
payrolldata = str(oldvalue).replace('name', 'newname').replace('value', 'newvalue2')
for d in payrolldata:
d['newName':] = d.pop["'name':"]
with open(r'C:\Users\Administrator\Documents\test\PJSON.json', "w") as f:
json.dump(payrolldata, f, indent=4)
I suggest you don't convert your dict into string and use something like this on you dict read from json file (with json.load)
def deep_replace_key(
d,
old_key: str,
new_key: str,
branch_name: str = None,
replace: bool = False,
):
"""deep replace key in dict.
Only make replacement if the we are in the branch branch_name."""
if branch_name is None:
replace = True
if isinstance(d, dict):
d_copy = d.copy()
for key, value in d_copy.items():
if key == old_key and replace:
d[new_key] = d.pop(old_key)
else:
if branch_name and key == branch_name:
deep_replace_key(value, old_key, new_key, branch_name, True)
else:
deep_replace_key(value, old_key, new_key, branch_name, False)
elif isinstance(d, list):
for item in d:
deep_replace_key(item, old_key, new_key, branch_name, replace)
return d
Here is a working test for this code
import unittest
# test
class TestDeepReplaceKey(unittest.TestCase):
def test_deep_replace_key(self):
d = {
"codes": [
{
"source": {
"name": "hiredate",
"value": 3.333
},
"not_source": {
"name": "hiredate",
"value": 3.333
},
},
{
"source": {
"name": "hiredate",
"value": 3.333
},
"not_source": {
"name": "hiredate",
"value": 3.333
},
},
]
}
d = deep_replace_key(d, "name", "new_name", "source", )
self.assertEqual(d["codes"][0]["source"]["new_name"], "hiredate")
self.assertEqual(d["codes"][0]["not_source"]["name"], "hiredate")
d = deep_replace_key(d, "name", "new_name", )
self.assertEqual(d["codes"][0]["not_source"]["new_name"], "hiredate")
So you can see if I call deep_replace_key(d, "name", "new_name", "source", ) the change only happens in the source block.
If I omit mentioning "source" like this deep_replace_key(d, "name", "new_name", ) change happens everywhere.
I need to create a json string like in the example below and I am thinking of using a dict that I can ultimately json.dumps to a json string. I will build this dict in a loop. This is what the json should look like
{
"big-data-list" :[
{
"indexnum": "1",
"components" :
[
{
"key": "some-key1",
"item" :"item name",
"data" :"some string",
}
]
},
{
"indexnum": "2",
"components" :
[
{
"key": "some-key2",
"item" :"item name 2",
"data" :"some string 2",
},
{
"key": "some-key3",
"item" :"item name 3",
"data" :"some string 3",
}
]
}
}
Here is what I tried without a loop to see how things work
bigdata= {}
indexnum= {}
componentList = {}
indexnum["components"] = {}
indexnum["mileage"] = 20
componentList["key"] = "some-key1"
componentList["item"] = "item name"
componentList["data"] = "some string"
indexnum["components"][0] = componentList
componentList["key"] = "some-key2"
componentList["item"] = "item name 2"
componentList["data"] = "some string 2"
indexnum["components"][1] = componentList
print(json.dumps(indexnum))
What I end up getting looks like this:
{"components": {"0": {"key": "somekey2", "item": "fuel2"}, "1": {"key": "somekey2", "item": "fuel2"}}, "mileage": 20}
How do I build the dict so I can json dump it in the way I need to? Is there a better way to come up with such a json object as represented in the example above?
You basically just need to work on your logic to put your data in the appropriate structure of dicts and lists.
Below is an example of a loop that puts some data into the specified structure:
>>> # say you originally have your data in the following list
... lists_of_components = [
... [("some-key1", "item name", "some string")],
... [("some-key2", "item name 2", "some string 2"),
... ("some-key3", "item name 3", "some string 3")],
... ]
... bigdata = {}
... bigdata["big-data-list"] = []
... for i, components in enumerate(lists_of_components, 1):
... bigdata["big-data-list"].append({
... "indexnum": str(i),
... "components": [
... {k: v for k, v in zip(["key", "item", "data"], component)}
... for component in components]
... })
... print(json.dumps(bigdata, indent=4))
{
"big-data-list": [
{
"indexnum": "1",
"components": [
{
"key": "some-key1",
"item": "item name",
"data": "some string"
}
]
},
{
"indexnum": "2",
"components": [
{
"key": "some-key2",
"item": "item name 2",
"data": "some string 2"
},
{
"key": "some-key3",
"item": "item name 3",
"data": "some string 3"
}
]
}
]
}
[
{
"asset_id": 49,
"status": "success",
"name": "de1",
"app": "CCid",
"action_results": [
{
"status": "success",
"data": [
{
"report": {
"status": {
"origin": "sa",
"status": "Up.",
"sha1": "4a",
"sample_started_at": 159,
"running_on": "mt",
"ran": true,
"auto": true,
"vm": "w"
},
"artifacts": {
"1": {
"size": 599518,
"mime-type": "applic=binary",
"antivirus": {
"reversing_labs": {
"status": "UNKNOWN",
"scanner_count": 0,
"scanner_match": 0,
"threat_name": "",
"query_hash": {
"sha256": "029"
},
"last_seen": "0001-01-01T00:00:00Z"
}
},
"entropy": 7.9870740440306
},
"10": {
"size": 599518,
"mime-type": "applic=binary",
"antivirus": {
"reversing_labs": {
"status": "UNKNOWN",
"scanner_count": 0,
"scanner_match": 0,
"threat_name": "",
"query_hash": {
"sha256": "d38"
},
"last_seen": "0001-01-01T00:00:00Z"
}
},
"entropy": 1
}
}
}
}
],
"app_id": 15
}
]
}
]
I am trying to access scanner count and scanner match values using python But not getting the required result.
action_results = results[0].get('action_results', [])
action_status = action_results[0].get('status', 'failed')
results_data = action_results[0].get('data', [])
sandbox_report = results_data[0].get('report', {})
for key,value in sandbox_report.items():
if key == "artifacts":
artifacts = list()
for each_key, each_value in value.items():
for i in each_value:
if i == "antivirus":
artifact_item = dict()
reversing_labs = i.get('reversing_labs', {})
artifact_item['scanner_count'] = reversing_labs.get('scanner_count', 0)
Can anyone point out whats wrong ?
You can try
action_results = results[0].get('action_results', [])
action_status = action_results[0].get('status', 'failed')
results_data = action_results[0].get('data', [])
sandbox_report = results_data[0].get('report', {})
artifact_item = dict()
for key,value in sandbox_report.items():
if key == "artifacts":
artifacts = list()
for each_key, each_value in value.items():
for k, v in each_value.items():
if k == "antivirus":
reversing_labs = v.get('reversing_labs', {})
artifact_item[key] = [{'scanner_count' : reversing_labs.get('scanner_count', 0)},
{'scanner_match' : reversing_labs.get('scanner_match', 0)}]
print(artifact_item)
Output
{'artifacts': [{'scanner_count': 0}, {'scanner_match': 0}]}
This code will extract the values of scanner_count and scanner_match in a list in a dictionary that the key value is artifacts.
this is how i'd do it.
jsonData = json.loads(results)
nested = jsonData[0]['action_results'][0]['data'][0]['report']['artifacts']
for artifact in nested:
print(f"Scanner count = {nested[artifact]['antivirus']['reversing_labs']['scanner_count']}")
print(f"Scanner match = {nested[artifact]['antivirus']['reversing_labs']['scanner_match']}")
In my JSON file how do I replace specific parameter value with key value pair combination?
In the below JSON I want to replace document and code values with by referring dict json sample.
JSON file:
[
{
"_id": "211123",
"_metadata": {
"version": {
"document": "CUS",
"service": "1"
},
"rider": [
{
"code": "01"
}
]
}
},
{
"_id": "211123",
"_metadata": {
"version": {
"document": "POL",
"service": "1"
},
"rider": [
{
"code": "02"
}
]
}
}
]
Referall JSON:
document:
{
"_metadata.version.document.CUS" : "Customer",
"_metadata.version.document.POL" : "Policy"
}
rider:
{
"rider.code.01" : "RIDER01",
"rider.code.02" : "RIDER02"
}
Example:
In the first JSON record, document has CUS value and it should be replaced with Customer.
If code has 01 as value it should be replaced with RIDER01.
Your question is unclear but if I got it right, here is what you are looking for:
import json
json_text = '''
[
{
"_id": "211123",
"_metadata": {
"version": {
"document": "CUS",
"service": "1"
},
"rider": [
{
"code": "01"
}
]
}
},
{
"_id": "211123",
"_metadata": {
"version": {
"document": "POL",
"service": "1"
},
"rider": [
{
"code": "02"
}
]
}
}
]
'''
documents = {
'CUS': 'Customer',
'POL': 'Policy'
}
riders = {
'01': 'RIDER01',
'02': 'RIDER02'
}
json_dict = json.loads(json_text)
for _id in json_dict:
document = _id['_metadata']['version']['document']
if document in documents:
_id['_metadata']['version']['document'] = documents[document]
for i, rider in enumerate(_id['_metadata']['rider']):
code = rider['code']
if code in riders:
rider['code'] = riders[code]
json_text = json.dumps(json_dict)
If your JSON text is in a file called file.json, you can use the following code instead:
import json
from pathlib import Path
documents = {
'CUS': 'Customer',
'POL': 'Policy'
}
riders = {
'01': 'RIDER01',
'02': 'RIDER02'
}
json_file = Path('file.json')
json_dict = json.loads(json_file.read_text())
for _id in json_dict:
document = _id['_metadata']['version']['document']
if document in documents:
_id['_metadata']['version']['document'] = documents[document]
for i, rider in enumerate(_id['_metadata']['rider']):
code = rider['code']
if code in riders:
rider['code'] = riders[code]
json_file.write_text(json.dumps(json_dict, indent=4))
I hope it helps.
This will be helpful
import json
document = {"_metadata.version.document.CUS" : "Customer","_metadata.version.document.POL" : "Policy" }
jsons = {"rider.code.01" : "RIDER01","rider.code.02" : "RIDER02" }
with open('jsonfile.json','r') as f:
json_input = json.load(f)
dlist = [x.split('.') for x in document.keys()]
jlist = [['_metadata']+k for k in [x.split('.') for x in jsons.keys()]]
for js in json_input:
for d in dlist:
if js['_metadata']['version']['document'] == d[-1]:
js['_metadata']['version']['document']= document['.'.join(d)]
break
for j in jlist:
if js['_metadata']['rider'][0]['code'] == j[-1]:
js['_metadata']['rider'][0]['code'] = jsons['.'.join(j[1:])]
break
with open('output_json.json','w') as f:
json.dump(json_input, f)