How to convert the complex Json format to python? I feel difficulty in converting the attached complex json to python object and I have to validate this data later against the DB.
Json:
{
"namespace":"Data.Datapoint",
"type":"record",
"name":"Blood Donar",
"fields":[
{
"name":"id",
"type":"int"
},
{
"name":"donor_number",
"type":"string"
},
{
"name":"birth_date",
"type":{
"type":"int",
"logicalType":"date"
},
"doc":"Birth Date"
},
{
"name":"height",
"type":[
"int",
"null"
],
"doc":"Height"
},
{
"name":"applicant_ts",
"type":[
{
"type":"long",
"logicalType":"timestamp-millis"
},
"null"
],
"doc":"Creation Timestamp"
},
{
"name":"arm_preference_ind",
"type":[
"string",
"null"
],
"doc":"Arm Preference; Selection from list"
},
{
"name":"abo_ind",
"type":[
"string",
"null"
],
"doc":"Blood Type/ABO"
},
{
"name":"vein_grading_ind",
"type":[
"string",
"null"
],
"doc":"Vein Grade"
}
]
}
import json
data = '''
{ "namespace": "Data.Datapoint", "type": "record", "name": "Blood Donar", "fields": [ { "name": "id", "type": "int" }, { "name": "donor_number", "type": "string" }, { "name": "birth_date", "type": { "type": "int", "logicalType": "date" }, "doc": "Birth Date" }, { "name": "height", "type": [ "int", "null" ], "doc": "Height" }, { "name": "applicant_ts", "type": [ { "type": "long", "logicalType": "timestamp-millis" }, "null" ], "doc": "Creation Timestamp" }, { "name": "arm_preference_ind", "type": [ "string", "null" ], "doc": "Arm Preference; Selection from list" }, { "name": "abo_ind", "type": [ "string", "null" ], "doc": "Blood Type/ABO" }, { "name": "vein_grading_ind", "type": [ "string", "null" ], "doc": "Vein Grade" } ] }
'''
json_data = json.loads(data)
json_data is your python dict obj.
if you want json data from web you can try this
import json
import requests
response = requests.get("https://jsonplaceholder.typicode.com/todos")
todos = json.loads(response.text)
Related
I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.
This question already has answers here:
How to extract data from dictionary in the list
(3 answers)
Closed 11 months ago.
I have the following json output.
"detections": [
{
"source": "detection",
"uuid": "50594028",
"detectionTime": "2022-03-27T06:50:56Z",
"ingestionTime": "2022-03-27T07:04:50Z",
"filters": [
{
"id": "F2058",
"unique_id": "3638f7c0",
"level": "critical",
"name": "Possible Right-To-Left Override Attack",
"description": "Possible Right-To-Left Override Detected in the Filename",
"tactics": [
"TA0005"
],
"techniques": [
"T1036.002"
],
"highlightedObjects": [
{
"field": "fileName",
"type": "filename",
"value": [
"1465940311.,S=473394(NONAMEFL(Z00057-PIfdp.exe))"
]
},
{
"field": "filePathName",
"type": "fullpath",
"value": "/exports/10_19/mail/12/91/20193/new/1465940311.,S=473394(NONAMEFL(Z00057-PIfdp.exe))"
},
{
"field": "malName",
"type": "detection_name",
"value": "HEUR_RLOTRICK.A"
},
{
"field": "actResult",
"type": "text",
"value": [
"Passed"
]
},
{
"field": "scanType",
"type": "text",
"value": "REALTIME"
}
]
},
{
"id": "F2140",
"unique_id": "5a313874",
"level": "medium",
"name": "Malicious Software",
"description": "A malicious software was detected on an endpoint.",
"tactics": [],
"techniques": [],
"highlightedObjects": [
{
"field": "fileName",
"type": "filename",
"value": [
"1465940311.,S=473394(NONAMEFL(Z00057-PIfdp.exe))"
]
},
{
"field": "filePathName",
"type": "fullpath",
"value": "/exports/10_19/mail/12/91/rs001291-excluido-20193/new/1465940311.,S=473394(NONAMEFL(Z00057-PIfdp.exe))"
},
{
"field": "malName",
"type": "detection_name",
"value": "HEUR_RLOTRICK.A"
},
{
"field": "actResult",
"type": "text",
"value": [
"Passed"
]
},
{
"field": "scanType",
"type": "text",
"value": "REALTIME"
},
{
"field": "endpointIp",
"type": "ip",
"value": [
"xxx.xxx.xxx"
]
}
]
}
],
"entityType": "endpoint",
"entityName": "xxx(xxx.xxx.xxx)",
"endpoint": {
"name": "xxx",
"guid": "d1dd7e61",
"ips": [
"2xx.xxx.xxx"
]
}
}
Inside the 'filters' offset it brings me two levels, one critical and one medim, both with the variable 'name'.
I want to print only the first name, but when I print the 'name', it returns both names:
How do I print only the first one?
If I put print in for filters, it returns both names:
If I put print in for detections, it only returns the second 'name' and that's not what I want:
If you only want to print the name of the first filter, why iterate over it, just index it and print the value under "name":
for d in r['detections']:
print(d['filters'][0]['name'])
I have a json file where I need to read it in a structured way to insert in a database each value in its respective column, but in the tag "customFields" the fields change index, example: "Tribe / Customer" can be index 0 (row['customFields'][0]) in a json block, and in the other one be index 3 (row['customFields'][3]), so I tried to read the data using the name of the row field ['customFields'] ['Tribe / Customer'], but I got the error below:
TypeError: list indices must be integers or slices, not str
Script:
def getCustomField(ModelData):
for row in ModelData["data"]["squads"][0]["cards"]:
print(row['identifier'],
row['customFields']['Tribe / Customer'],
row['customFields']['Stopped with'],
row['customFields']['Sub-Activity'],
row['customFields']['Activity'],
row['customFields']['Complexity'],
row['customFields']['Effort'])
if __name__ == "__main__":
f = open('test.json')
json_file = json.load(f)
getCustomField(json_file)
JSON:
{
"data": {
"squads": [
{
"name": "TESTE",
"cards": [
{
"identifier": "0102",
"title": "TESTE",
"description": " TESTE ",
"status": "on_track",
"priority": null,
"assignees": [
{
"fullname": "TESTE",
"email": "TESTE"
}
],
"createdAt": "2020-04-16T15:00:31-03:00",
"secondaryLabel": null,
"primaryLabels": [
"TESTE",
"TESTE"
],
"swimlane": "TESTE",
"workstate": "Active",
"customFields": [
{
"name": "Tribe / Customer",
"value": "TESTE 1"
},
{
"name": "Checkpoint",
"value": "GNN"
},
{
"name": "Stopped with",
"value": null
},
{
"name": "Sub-Activity",
"value": "DEPLOY"
},
{
"name": "Activity",
"value": "TOOL"
},
{
"name": "Complexity",
"value": "HIGH"
},
{
"name": "Effort",
"value": "20"
}
]
},
{
"identifier": "0103",
"title": "TESTE",
"description": " TESTE ",
"status": "on_track",
"priority": null,
"assignees": [
{
"fullname": "TESTE",
"email": "TESTE"
}
],
"createdAt": "2020-04-16T15:00:31-03:00",
"secondaryLabel": null,
"primaryLabels": [
"TESTE",
"TESTE"
],
"swimlane": "TESTE",
"workstate": "Active",
"customFields": [
{
"name": "Tribe / Customer",
"value": "TESTE 1"
},
{
"name": "Stopped with",
"value": null
},
{
"name": "Checkpoint",
"value": "GNN"
},
{
"name": "Sub-Activity",
"value": "DEPLOY"
},
{
"name": "Activity",
"value": "TOOL"
},
{
"name": "Complexity",
"value": "HIGH"
},
{
"name": "Effort",
"value": "20"
}
]
}
]
}
]
}
}
You'll have to parse the list of custom fields into something you can access by name. Since you're accessing multiple entries from the same list, a dictionary is the most appropriate choice.
for row in ModelData["data"]["squads"][0]["cards"]:
custom_fields_dict = {field['name']: field['value'] for field in row['customFields']}
print(row['identifier'],
custom_fields_dict['Tribe / Customer'],
...
)
If you only wanted a single field you could traverse the list looking for a match, but it would be less efficient to do that repeatedly.
I'm skipping over dealing with missing fields - you'd probably want to use get('Tribe / Customer', some_reasonable_default) if there's any possibility of the field not being present in the json list.
Got a bit of a puzzle here, I'm trying to build a schema to use in my python app, But I can't figure out how to get this "we" field to be both required and contain a random string (ex: "QWERT1")
{
"we": [
{
"finished": "01.23.2020 12:56:31",
"run": "02611",
"scenarios": [
{
"name": "name",
"status": "failed",
"run_id": "42",
"tests": [
{
"test_id": "7",
"name": "TC29",
"status": "success",
"finished": "01.23.2020 12:56:31"
}
]
}
]
}
]
}
Rest of the fields should be also mandatory (name, status etc). If I exclude the "we" from the required the rest of the fields are treated as non-mandatory, and if I add the "we" as mandatory I can't then use there any other word :/
This my schema I've ended up with (with "we" mandatory):
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"we": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"finished": {
"type": "string"
},
"run": {
"type": "string"
},
"scenarios": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"status": {
"type": "string"
},
"run_id": {
"type": "string"
},
"tests": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"test_id": {
"type": "string"
},
"name": {
"type": "string"
},
"status": {
"type": "string"
},
"finished": {
"type": "string"
}
},
"required": [
"test_id",
"name",
"status",
"finished"
]
}
]
}
},
"required": [
"name",
"status",
"run_id",
"tests"
]
}
]
}
},
"required": [
"finished",
"run",
"scenarios"
]
}
]
}
},
"required": [
"we"
]
}
Any ideas ?
If I understand correctly, the root object key could be any string.
First, you need to replace required with minProperties: 1. If you require only 1 property, you also need maxProperties: 1.
Next, you need to use additionalProperties rather than properties > we.
additionalProperties applies the value subschema to all property values at the JSON instance location object.
Here's a bare version of that schema...
{
"$schema": "http://json-schema.org/draft-07/schema#",
"minProperties": 1,
"additionalProperties": {}
}
You can test it with your schema and instance here: https://jsonschema.dev/s/2kE9y
I'm trying to insert a doc in ElasticSearch but every time i try to insert in python, its return me an error. But if i try to insert from Kibana or cUrl, its succeed.
I already tried the elasticserach-dsl but i've got the same error.
(Sorry for my bad english, i'm from brazil :D)
Error i've got:
elasticsearch.helpers.BulkIndexError: ((...)'status': 400, 'error': {'type':
'illegal_argument_exception', 'reason': "object mapping [prices] can't be changed from nested to non-nested"}}}])
My code:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
doc = [{
"_index": "products",
"_type": "test_products",
"_source": {
[...]
"prices": {
"latest": {
"value": 89,
"when": 1502795602848
},
"old": [
{
"value": 0,
"when": 1502795602848
}
]
},
"sizes": [
{
"name": "P",
"available": True
},
{
"name": "M",
"available": True
}
],
"created": "2017-08-15T08:13:22.848284"
}
}]
bulk(self.es, doc, index="products")
My ES mapping:
{
"test_products": {
"mappings": {
"products": {
"properties": {
"approved": {
"type": "boolean"
},
"available": {
"type": "boolean"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"buyClicks": {
"type": "integer"
},
"category": {
"type": "keyword"
},
"code": {
"type": "keyword"
},
"color": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"created": {
"type": "date"
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"gender": {
"type": "keyword"
},
"images": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"likes": {
"type": "integer"
},
"link": {
"type": "keyword"
},
"name": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"prices": {
"type": "nested",
"properties": {
"latest": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
},
"old": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
}
}
},
"redirectClicks": {
"type": "integer"
},
"sizes": {
"type": "nested",
"properties": {
"available": {
"type": "boolean"
},
"name": {
"type": "keyword"
},
"quantity": {
"type": "integer"
}
}
},
"slug": {
"type": "keyword"
},
"store": {
"type": "keyword"
},
"subCategories": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"tags": {
"type": "text",
"fields": {
"raw": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
},
"thumbnails": {
"type": "keyword"
}
}
}
}
}
}