output every attribute, value in an uneven JSON object - python

I have a very long and uneven JSON object and I want to output every attribute, value for the end points (leaves) of the object.
For instance, it could look like this:
data = {
"Response": {
"Version": "2.0",
"Detail": {
"TransactionID": "Ib410c-2",
"Timestamp": "04:00"
},
"Transaction": {
"Severity": "Info",
"ID": "2222",
"Text": "Success"
},
"Detail": {
"InquiryDetail": {
"Value": "804",
"CountryISOAlpha2Code": "US"
},
"Product": {
"ID": "PRD",
"Org": {
"Header": {
"valuer": "804"
},
"Location": {
"Address": [
{
"CountryISOAlpha2Code": "US",
"Address": [
{
"Text": {
"#Value": 2,
"$": "Hill St"
}
}
]
}
]
}
}
}
}
}
}
I want to output each potential leaf. It can output the (final attribute or the entire path) and the value.
I know I just need to add something to this:
data = json.loads(inputFile)
small = repeat(data)
for attribute,value in small.iteritems():
print attribute,value

You could use recursion:
def print_leaf_keyvalues(d):
for key, value in d.iteritems():
if hasattr(value, 'iteritems'):
# recurse into nested dictionary
print_leaf_keyvalues(value)
else:
print key, value
Demo on your sample data:
>>> print_leaf_keyvalues(data)
Version 2.0
valuer 804
Address [{'CountryISOAlpha2Code': 'US', 'Address': [{'Text': {'#Value': 2, '$': 'Hill St'}}]}]
ID PRD
CountryISOAlpha2Code US
Value 804
Text Success
Severity Info
ID 2222
This will not handle the list value of Address however. You can always add an additional test for sequences and iterate and recurse again.

Related

replace nested document array mongodb with python

i have this document in mongodb
{
"_id": {
"$oid": "62644af0368cb0a46d7c2a95"
},
"insertionData": "23/04/2022 19:50:50",
"ipfsMetadata": {
"Name": "data.json",
"Hash": "Qmb3FWgyJHzJA7WCBX1phgkV93GiEQ9UDWUYffDqUCbe7E",
"Size": "431"
},
"metadata": {
"sessionDate": "20220415 17:42:55",
"dataSender": "user345",
"data": {
"height": "180",
"weight": "80"
},
"addtionalInformation": [
{
"name": "poolsize",
"value": "30m"
},
{
"name": "swimStyle",
"value": "mariposa"
},
{
"name": "modality",
"value": "swim"
},
{
"name": "gender-title",
"value": "schoolA"
}
]
},
"fileId": {
"$numberLong": "4"
}
}
I want to update nested array document, for instance the name with gender-tittle. This have value schoolA and i want to change to adult like the body. I give the parameter number of fileId in the post request and in body i pass this
post request : localhost/sessionUpdate/4
and body:
{
"name": "gender-title",
"value": "adultos"
}
flask
#app.route('/sessionUpdate/<string:a>', methods=['PUT'])
def sessionUpdate(a):
datas=request.json
r=str(datas['name'])
r2=str(datas['value'])
print(r,r2)
r3=collection.update_one({'fileId':a, 'metadata.addtionalInformation':r}, {'$set':{'metadata.addtionalInformation.$.value':r2}})
return str(r3),200
i'm getting the 200 but the document don't update with the new value.
As you are using positional operator $ to work with your array, make sure your select query is targeting array element. You can see in below query that it is targeting metadata.addtionalInformation array with the condition that name: "gender-title"
db.collection.update({
"fileId": 4,
"metadata.addtionalInformation.name": "gender-title"
},
{
"$set": {
"metadata.addtionalInformation.$.value": "junior"
}
})
Here is the Mongo playground for your reference.

How do I extract keys from a dictionary that has {"key":[{"A":"1"},{"B":"2"}]?

I have a python dictionary,
dict = {
"A": [{
"264": "0.1965"
}, {
"289": "0.1509"
}, {
"192": "0.1244"
}]
}
I have a collection in mongoDB that has,
{
"_id": ObjectId("5d5a7f474c55b68a873f9602"),
"A": [{
"264": "0.5700"
}, {
"175": "0.321"
}
}
{
"_id": ObjectId("5d5a7f474c55b68a873f9610"),
"B": [{
"152": "0.2826"
}, {
"012": "0.1234"
}
}
}
I want to see if the key "A" from dict is available in mongodb. If yes, I want to loop over the keys in the list i.e.
[{
"264": "0.19652049960139123"
}, {
"289": "0.1509138215380371"
}, {
"192": "0.12447470015715734"
}]
}
and check if 264 is available in mongodb and update the key value else append.
Expected output in mongodb:
{
"_id": ObjectId("5d5a7f474c55b68a873f9602"),
"A": [{
"264": "0.1965"
}, {
"175": "0.321"
}, {
"289": "0.1509"
}, {
"192": "0.1244"
}
}
{
"_id": ObjectId("5d5a7f474c55b68a873f9610"),
"B": [{
"152": "0.2826"
},{
"012": "0.1234"
}
}
The value for key 264 is updated. Kindly help.
Assuming you are looking for the python part and not the mongoDB, try:
for k,v in dict['A'].items(): #k is key, v is value
process_entry(k, v) #do what you want with the database
assuming your mongodb collection is called your_collection
data= your_collection.find_one({'A':{'$exists':1}})
if data:
#loop over the keys
for item in data['A']:
#check whether a certain key is available
if 'some_key' not in item:
do_something()# update

Accessing nested value in loop in json using python

I want to fetch the value of each api3 in this json object where each array has api3 value.
{
"count": 10,
"result": [
{
"type": "year",
"year": {
"month": {
"api1": {
"href": "https://Ap1.com"
},
"api2": {
"href": "FETCH-CONTENT"
},
"api3": {
"href": "https://Ap3.com"
},
"api4": {
"href": "https://Ap4.com"
}
},
"id": "sdvnkjsnvj",
"summary": "summeryc",
"type": "REST",
"apiId": "mlksmfmksdfs",
"idProvider": {
"id": "sfsmkfmskf",
"name": "Apikey"
},
"tags": []
}
},
{
"type": "year1",
"year": {
"month": {
"api1": {
"href": "https://Ap11.com"
},
"api2": {
"href": "FETCH-CONTENT-1"
},
"api3": {
"href": "https://Ap13.com"
},
"api4": {
"href": "https://Ap14.com"
}
},
"id": "sdvnkjsnvj",
"summary": "summeryc",
"type": "REST",
"apiId": "mlksmfmksdfs",
"idProvider": {
"id": "sfsmkfmskf",
"name": "Apikey"
},
"tags": []
}
},
I am able to get the whole json object and first value inside it.
with open('C:\python\examplee.json','r+') as fr:
data = json.load(fr)
print(data["result"])
Thank you in advance for helping me figuring this.
For each element in list of result key, get the value for the nested dictionary within item
print([item['year']['month']['api3'] for item in data['result']])
The output will be [{'href': 'https://Ap3.com'}, {'href': 'https://Ap13.com'}]
Or if you want to get the href value as well
print([item['year']['month']['api3']['href'] for item in data['result']])
The output will be
['https://Ap3.com', 'https://Ap13.com']
So your whole code will look like
data = {}
with open('C:\python\examplee.json','r+') as fr:
data = json.load(fr)
print([item['year']['month']['api3']['href'] for item in dct['result']])
Looks like your JSON schema is static so you can just use this:
print([x['year']['month']['api3']['href'] for x in data['result']])
will return you:
['https://Ap3.com', 'https://Ap13.com']

Accessing nested json objects using python

I am trying to interact with an API and running into issues accessing nested objects. Below is sample json output that I am working with.
{
"results": [
{
"task_id": "22774853-2b2c-49f4-b044-2d053141b635",
"params": {
"type": "host",
"target": "54.243.80.16",
"source": "malware_analysis"
},
"v": "2.0.2",
"status": "success",
"time": 227,
"data": {
"details": {
"as_owner": "Amazon.com, Inc.",
"asn": "14618",
"country": "US",
"detected_urls": [],
"resolutions": [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
],
"response_code": 1,
"verbose_msg": "IP address in dataset"
},
"match": true
}
}
]
}
The deepest I am able to access is the data portion which returns too much.... ideally I am just trying access as_owner,asn,country,detected_urls,resolutions
When I try to access details / response code ... etc I will get a KeyError. My nested json goes deeper then other Q's mentioned and I have tried that logic.
Below is my current code snippet and any help is appreciated!
import requests
import json
headers = {
'Content-Type': 'application/json',
}
params = (
('wait', 'true'),
)
data = '{"target":{"one":{"type": "ip","target": "54.243.80.16", "sources": ["xxx","xxxxx"]}}}'
r=requests.post('https://fakewebsite:8000/api/services/intel/lookup/jobs', headers=headers, params=params, data=data, auth=('apikey', ''))
parsed_json = json.loads(r.text)
#results = parsed_json["results"]
for item in parsed_json["results"]:
print(item['data'])
You just need to index correctly into the converted JSON. Then you can easily loop over a list of the keys you want to fetch, since they are all in the "details" dictionary.
import json
raw = '''\
{
"results": [
{
"task_id": "22774853-2b2c-49f4-b044-2d053141b635",
"params": {
"type": "host",
"target": "54.243.80.16",
"source": "malware_analysis"
},
"v": "2.0.2",
"status": "success",
"time": 227,
"data": {
"details": {
"as_owner": "Amazon.com, Inc.",
"asn": "14618",
"country": "US",
"detected_urls": [],
"resolutions": [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
],
"response_code": 1,
"verbose_msg": "IP address in dataset"
},
"match": true
}
}
]
}
'''
parsed_json = json.loads(raw)
wanted = ['as_owner', 'asn', 'country', 'detected_urls', 'resolutions']
for item in parsed_json["results"]:
details = item['data']['details']
for key in wanted:
print(key, ':', json.dumps(details[key], indent=4))
# Put a blank line at the end of the details for each item
print()
output
as_owner : "Amazon.com, Inc."
asn : "14618"
country : "US"
detected_urls : []
resolutions : [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
]
BTW, when you fetch JSON data using requests there's no need to use json.loads: you can access the converted JSON using the .json method of the returned request object instead of using its .text attribute.
Here's a more robust version of the main loop of the above code. It simply ignores any missing keys. I didn't post this code earlier because the extra if tests make it slightly less efficient, and I didn't know that keys could be missing.
for item in parsed_json["results"]:
if not 'data' in item:
continue
data = item['data']
if not 'details' in data:
continue
details = data['details']
for key in wanted:
if key in details:
print(key, ':', json.dumps(details[key], indent=4))
# Put a blank line at the end of the details for each item
print()

Add #timestamp field in ElasticSearch with Python

I'm using Python to add entries in a local ElasticSearch (localhost:9200)
Currently, I use this method:
def insertintoes(data):
"""
Insert data into ElasicSearch
:param data: dict
:return:
"""
timestamp = data.get('#timestamp')
logstashIndex = 'logstash-' + timestamp.strftime("%Y.%m.%d")
es = Elasticsearch()
if not es.indices.exists(logstashIndex):
# Setting mappings for index
mapping = '''
{
"mappings": {
"_default_": {
"_all": {
"enabled": true,
"norms": false
},
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#timestamp": {
"type": "date",
"include_in_all": true
},
"#version": {
"type": "keyword",
"include_in_all": true
}
}
}
}
}
'''
es.indices.create(logstashIndex, ignore=400, body=mapping)
es.index(index=logstashIndex, doc_type='system', timestamp=timestamp, body=data)
data is a dict structure with a valid #timestamp defined like this data['#timestamp'] = datetime.datetime.now()
The problem is, even if there is a timestamp value in my data, Kibana doesn't show the entry in «discovery» field. :(
Here is an example of a full entry in ElasicSearch:
{
"_index": "logstash-2017.06.25",
"_type": "system",
"_id": "AVzf3QX3iazKBndbIkg4",
"_score": 1,
"_source": {
"priority": 6,
"uid": 0,
"gid": 0,
"systemd_slice": "system.slice",
"cap_effective": "1fffffffff",
"exe": "/usr/bin/bash",
"hostname": "ns3003395",
"syslog_facility": 9,
"comm": "crond",
"systemd_cgroup": "/system.slice/cronie.service",
"systemd_unit": "cronie.service",
"syslog_identifier": "CROND",
"message": "(root) CMD (/usr/local/rtm/bin/rtm 14 > /dev/null 2> /dev/null)",
"systemd_invocation_id": "9228b6c72e6a4624a1806e4c59af8d04",
"syslog_pid": 26652,
"pid": 26652,
"#timestamp": "2017-06-25T17:27:01.734453"
}
}
As you can see, there IS a #timestamp field but it doesn't seems to be what Kibana expects.
And don't know what to do to make my entries visible in Kibana.
Any idea ?
Elasticsearch is not recognizing #timestamp as a date, but as a string. If your data['#timestamp'] is a datetime object, you can try to convert it to a ISO string, which is automatically recognized, try:
timestamp = data.get('#timestamp').isoformat()
timestamp should now be a string, but in ISO format

Categories