python parse every loop items in json response - python

i have json response like this :
r = {
"ID": "0001",
"ST": "87549937737",
"DT": "22062022",
"DATA": {
"0": {
"PC": "100",
"NM": "ALEX",
"AMOUNT": "210,912",
"ACT": "123123456"
},
"1": {
"PC": "100",
"NM": "MARCO",
"AMOUNT": "500,200",
"ACT": "123555456"
},
"2": {
"PC": "100",
"NM": "TIFFANY",
"AMOUNT": "712,111",
"ACT": "123666981"
}
},
"RCMSG": {
"0": {
"SEVERITY": "00"
}
},
"RC": "0000"
}
and i want to parse every loop items in DATA, and my scripts are like this :
for items in r['DATA']:
jdata = {'PRODUCTCODE': items['PC'], 'NAME': items['NM'], 'AMOUNTBALANCE': items['AMOUNT'], 'ACCOUNT': items['ACT']}
return jsonify({'status':'success', 'code':'00', 'data': jdata})
but the response is only parsed '0' in 'DATA, how do i loop every items inside 'DATA' ?
{
"PRODUCTCODE": "100",
"NAME": "ALEX",
"AMOUNTBALANCE": "210,912",
"ACCOUNT": "123123456"
}

In your code, there is a return statement inside the for loop, so it will never traverse all elements in JSON data because it will end its execution at the first item. So, you need to traverse all elements accessing them from the original JSON response:
r = {
"ID": "0001",
"ST": "87549937737",
"DT": "22062022",
"DATA": {
"0": {
"PC": "100",
"NM": "ALEX",
"AMOUNT": "210,912",
"ACT": "123123456"
},
"1": {
"PC": "100",
"NM": "MARCO",
"AMOUNT": "500,200",
"ACT": "123555456"
},
"2": {
"PC": "100",
"NM": "TIFFANY",
"AMOUNT": "712,111",
"ACT": "123666981"
}
},
"RCMSG": {
"0": {
"SEVERITY": "00"
}
},
"RC": "0000"
}
d = []
for i in r['DATA']:
d.append(r['DATA'][i])
print(d)
Output:
[{'PC': '100', 'NM': 'ALEX', 'AMOUNT': '210,912', 'ACT': '123123456'}, {'PC': '100', 'NM': 'MARCO', 'AMOUNT': '500,200', 'ACT': '123555456'}, {'PC': '100', 'NM': 'TIFFANY', 'AMOUNT': '712,111', 'ACT': '123666981'}]

results = []
for k, v in r['DATA'].items():
item = {
"PRODUCTCODE": v["PC"],
"NAME": v["NM"],
"AMOUNTBALANCE": v["AMOUNT"],
"ACCOUNT": v["ACT"],
}
results.append(item)

Related

Merge Json with same key value pairs

I got a resultant json from an API in the following format
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
since the Uid and Id are same for multiple entires, can I club them togeather with Details key being the comma seperate key,value pair? Something like mentioned below
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}]
Please Guide me on this for the approach to be followed. Thanks
What you need is the dictionary function update(). Here's an example:
A = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
B = []
def find(uid, id_):
for i, d in enumerate(B):
if d['Uid'] == uid and d['Id'] == id_:
return i
return -1
for d in A:
if (i := find(d['Uid'], d['Id'])) < 0:
B.append(d)
else:
B[i]['Details'].update(d['Details'])
print(B)
Prettyfied output:
[
{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
Note:
This could be very inefficient if your API response contains very large numbers of dictionaries. You might need a completely different approach
You should iterate over the list and merge with accumulator with (Uid, Id) as key:
from typing import Dict, List
l = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
def mergeItem(it: Dict, acc: Dict) -> Dict:
uid = it["Uid"]
id = it["Id"]
if (uid, id) in acc:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": {**acc[(uid, id)]["Details"], **it["Details"]}}
else:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": it["Details"]}
return acc
def mergeList(a:List) -> Dict:
acc = {}
for v in a:
acc = mergeItem(v, acc)
return acc
print(list(mergeList(l).values()))
# [
# {
# 'Uid': '40cc6103-1cf0-4735-b882-d14d32018e58',
# 'Id': '9e1a0057-4570-4a6e-8ff5-88b2facbaf4e',
# 'Details': {'Name': 'Kiran', 'Age': '24'}},
# {
# 'Uid': '196f5865-e9fe-4847-86ae-97d0bf57b816',
# 'Id': '84909ecb-c92e-48a7-bcaa-d478bf3a9220',
# 'Details': {'Name': 'Shreyas'}
# }
# ]

I am unable to create dictionary

I am unable to create a dictionary whose structure is like :
"aggs": {
"4": {
"date_histogram": {
"field": "#timestamp",
"interval": "1m",
"time_zone": "Asia/Kolkata",
"min_doc_count": 1,
},
"aggs": {
"5": {
"range": {
"field": "system.core.idle.pct",
"ranges": [{"from": 0, "to": 0.001}, {"from": 0.001, "to": 1}],
"keyed": true,
},
"aggs": {
"2": {"max": {"field": "system.core.system.pct"}},
"3": {"max": {"field": "system.core.idle.pct"}},
},
}
},
}
}
Mostly I am getting the issue in creating aggs:5 inside aggs:4 and then looping it. I need to do it for multiple aggs. the number of agg id can be upto 1000 also.
I am trying to derive it from the dictionary:
"aggs": [
{"id": "1", "enabled": true, "type": "count", "schema": "metric", "params": {}},
{
"id": "2",
"enabled": true,
"type": "max",
"schema": "metric",
"params": {"field": "system.core.system.pct", "customLabel": "system"},
},
{
"id": "3",
"enabled": true,
"type": "max",
"schema": "metric",
"params": {"field": "system.core.idle.pct", "customLabel": "idle"},
},
{
"id": "4",
"enabled": true,
"type": "date_histogram",
"schema": "bucket",
"params": {
"field": "#timestamp",
"interval": "m",
"customInterval": "2h",
"min_doc_count": 1,
"extended_bounds": {},
},
},
{
"id": "5",
"enabled": true,
"type": "range",
"schema": "bucket",
"params": {
"field": "system.core.idle.pct",
"ranges": [{"from": 0, "to": 0.001}, {"from": 0.001, "to": 1}],
},
},
]
Can anyone show the code how to execute it. Basically I am failing in creating nested dictionary.
It looks like you are using JSON data.
You can see this link https://realpython.com/python-json/.
You can use the inbuilt JSON lib to covert the whole thing in a dict
Basically something like this
json_string = """
{
"researcher": {
"name": "Ford Prefect",
"species": "Betelgeusian",
"relatives": [
{
"name": "Zaphod Beeblebrox",
"species": "Betelgeusian"
}
]
}
}
"""
data = json.loads(json_string)
add whole structure in {}, and replace true to True.
{"aggs": {
"4": {
"date_histogram": {
"field": "#timestamp",
"interval": "1m",
"time_zone": "Asia/Kolkata",
"min_doc_count": 1,
},
"aggs": {
"5": {
"range": {
"field": "system.core.idle.pct",
"ranges": [{"from": 0, "to": 0.001}, {"from": 0.001, "to": 1}],
"keyed": True,
},
"aggs": {
"2": {"max": {"field": "system.core.system.pct"}},
"3": {"max": {"field": "system.core.idle.pct"}},
},
}
},
}
}}
{'aggs': {'4': {'date_histogram': {'field': '#timestamp',
'interval': '1m',
'time_zone': 'Asia/Kolkata',
'min_doc_count': 1},
'aggs': {'5': {'range': {'field': 'system.core.idle.pct',
'ranges': [{'from': 0, 'to': 0.001}, {'from': 0.001, 'to': 1}],
'keyed': True},
'aggs': {'2': {'max': {'field': 'system.core.system.pct'}},
'3': {'max': {'field': 'system.core.idle.pct'}}}}}}}}

Parsing AWS ATHENA outputs

Relatively new to Python here, coming from a node.js background, having quite a few issues parsing the output I get from get_query_results()
Documentation Link
I have been at this for some hours, i have tried iterating through the ['ResultSetMetadata']['ColumnInfo'] to grab the column names, but i don't know how to tie the ['ResultSet']['Data'] to these items so the code knows which name to apply to each dataValue.
I know i need to select the row headers then add the associated objects to those rows, but the logic on how to do such a thing in python escapes me.
I can see that the first column name always lines up with the first ['Data']['VarCharValue'] so I can get all the values in order, but if I loop through ['ResultSet']['Rows'] how do I isolate the first iteration as the column names to then populate with each other row?
Or is there a better way to do this?
Here is my json.dumps(ATHENAoutput)
{
"ResultSet": {
"Rows": [{
"Data": [{
"VarCharValue": "postcode"
}, {
"VarCharValue": "CountOf"
}]
}, {
"Data": [{
"VarCharValue": "1231"
}, {
"VarCharValue": "2"
}]
}, {
"Data": [{
"VarCharValue": "1166"
}, {
"VarCharValue": "2"
}]
}, {
"Data": [{
"VarCharValue": "3651"
}, {
"VarCharValue": "3"
}]
}, {
"Data": [{
"VarCharValue": "2171"
}, {
"VarCharValue": "2"
}]
}, {
"Data": [{
"VarCharValue": "4697"
}, {
"VarCharValue": "2"
}]
}, {
"Data": [{
"VarCharValue": "4450"
}, {
"VarCharValue": "2"
}]
}, {
"Data": [{
"VarCharValue": "4469"
}, {
"VarCharValue": "1"
}]
}],
"ResultSetMetadata": {
"ColumnInfo": [{
"Scale": 0,
"Name": "postcode",
"Nullable": "UNKNOWN",
"TableName": "",
"Precision": 2147483647,
"Label": "postcode",
"CaseSensitive": true,
"SchemaName": "",
"Type": "varchar",
"CatalogName": "hive"
}, {
"Scale": 0,
"Name": "CountOf",
"Nullable": "UNKNOWN",
"TableName": "",
"Precision": 19,
"Label": "CountOf",
"CaseSensitive": false,
"SchemaName": "",
"Type": "bigint",
"CatalogName": "hive"
}]
}
},
"ResponseMetadata": {
"RetryAttempts": 0,
"HTTPStatusCode": 200,
"RequestId": "18190e7c-901c-40b4-b6ef-10a5013b1a70",
"HTTPHeaders": {
"date": "Mon, 01 Oct 2018 04:51:14 GMT",
"x-amzn-requestid": "18190e7c-901c-40b4-b6ef-10a5013b1a70",
"content-length": "1464",
"content-type": "application/x-amz-json-1.1",
"connection": "keep-alive"
}
}
}
My desired Result is a JSON Array like the following:
[{
"postcode": "2171",
"CountOf": "2"
}, {
"postcode": "4697",
"CountOf": "2"
}, {
"postcode": "1166",
"CountOf": "2"
},
...
]
>>> def get_var_char_values(d):
... return [obj['VarCharValue'] for obj in d['Data']]
...
...
... header, *rows = input_data['ResultSet']['Rows']
... header = get_var_char_values(header)
... result = [dict(zip(header, get_var_char_values(row))) for row in rows]
>>> import json; print(json.dumps(result, indent=2))
[
{
"postcode": "4450",
"CountOf": "2"
},
{
"postcode": "1231",
"CountOf": "2"
},
{
"postcode": "4469",
"CountOf": "1"
},
{
"postcode": "3651",
"CountOf": "3"
},
{
"postcode": "1166",
"CountOf": "2"
},
{
"postcode": "4697",
"CountOf": "2"
},
{
"postcode": "2171",
"CountOf": "2"
}
]

python append dictionary to list

According to this post, I need to use .copy() on a dictionary, if I want to reference a dictionary which gets updated in a loop (instead of always referencing the same dictionary). However, in my code example below this doesn't seem to work:
main.py:
import collections
import json
nodes_list = ['donald', 'daisy', 'mickey', 'minnie']
edges_list = [('donald', 'daisy', '3'), ('mickey', 'minnie', '3'), ('daisy', 'minnie', '2')]
node_dict, edge_dict = collections.defaultdict(dict), collections.defaultdict(dict)
ultimate_list = []
for n in nodes_list:
node_dict["data"]["id"] = str(n)
ultimate_list.append(node_dict.copy())
for e in edges_list:
edge_dict["data"]["id"] = str(e[2])
edge_dict["data"]["source"] = e[0]
edge_dict["data"]["target"] = e[1]
ultimate_list.append(edge_dict.copy())
print(json.dumps(ultimate_list, indent=2))
As a result here I get the following:
[
{
"data": {
"id": "minnie"
}
},
{
"data": {
"id": "minnie"
}
},
{
"data": {
"id": "minnie"
}
},
{
"data": {
"id": "minnie"
}
},
{
"data": {
"target": "minnie",
"id": "2",
"source": "daysi"
}
},
{
"data": {
"target": "minnie",
"id": "2",
"source": "daysi"
}
},
{
"data": {
"target": "minnie",
"id": "2",
"source": "daysi"
}
}
]
Whereas I would actually expect to get this:
[
{
"data": {
"id": "donald"
}
},
{
"data": {
"id": "daisy"
}
},
{
"data": {
"id": "mickey"
}
},
{
"data": {
"id": "minnie"
}
},
{
"data": {
"target": "donald",
"id": "3",
"source": "daysi"
}
},
{
"data": {
"target": "mickey",
"id": "3",
"source": "minnie"
}
},
{
"data": {
"target": "minnie",
"id": "2",
"source": "daysi"
}
}
]
Can anyone please tell me what I'm doing wrong here?
dict.copy only makes a shallow copy of the dict, the nested dictionaries are never copied, you need deep copies to have those copied over too.
However, you can simply define each new dict at each iteration of the loop and append the new dict at that iteration instead:
for n in nodes_list:
node_dict = collections.defaultdict(dict) # create new instance of data structure
node_dict["data"]["id"] = str(n)
ultimate_list.append(node_dict)
Same applies to the edge_dict:
for e in edges_list:
edge_dict = collections.defaultdict(dict)
...
ultimate_list.append(edge_dict)
Use copy.deepcopy(your_dict): deepcopy.
I see a few things. According to your desired results your edge_list is a bit off.
Change:
('daisy', 'minnie', '2')
To:
('minnie', 'daisy', '2')
To create the data the way you would like in your desired output we can do this with a more basic approach to dicts.
If you are trying to match the desired results in your question then you are calling the wrong index in your for e in edges_list function.
It should be:
"target" : e[0]
"id" : str(e[2])
"source" : e[1]
First I removed
node_dict, edge_dict = collections.defaultdict(dict), collections.defaultdict(dict)
as its not needed for my method.
Next I changed how you are defining the data.
Instead of using pre-defined dictionaries we can just append the results of each set of data to the ultimate_list directly. This shortens the code and is a bit easier to set up.
for n in nodes_list:
ultimate_list.append({"data" : {"id" : str(n)}})
for e in edges_list:
ultimate_list.append({"data" : {"target" : e[0], "id" : str(e[2]), "source" : e[1]}})
print(json.dumps(ultimate_list, indent=2))
So the following code:
import collections
import json
nodes_list = ['donald', 'daisy', 'mickey', 'minnie']
edges_list = [('donald', 'daisy', '3'), ('mickey', 'minnie', '3'), ('minnie', 'daisy', '2')]
ultimate_list = []
for n in nodes_list:
ultimate_list.append({"data" : {"id" : str(n)}})
for e in edges_list:
ultimate_list.append({"data" : {"target" : e[0], "id" : str(e[2]), "source" : e[1]}})
print(json.dumps(ultimate_list, indent=2))
Should result in:
[
{
"data": {
"id": "donald"
}
},
{
"data": {
"id": "daisy"
}
},
{
"data": {
"id": "mickey"
}
},
{
"data": {
"id": "minnie"
}
},
{
"data": {
"target": "donald",
"id": "3",
"source": "daisy"
}
},
{
"data": {
"target": "mickey",
"id": "3",
"source": "minnie"
}
},
{
"data": {
"target": "minnie",
"id": "2",
"source": "daisy"
}
}
]

AWS DynamoDB Stream python convert native format

I've a Lambda function triggered by a DynamoDB Stream. My problem is the strange format of the event received(with type for each key/value).
Does exists a workaround to convert a whole document in a native python format(without any types). I'm looking for a dynamic solution because in the future I want use this lambda with other DynamoDB table Streams which have different format(multiple dict/list levels)
Example:
{
"Records": [
{
"eventID": "1",
"eventVersion": "1.0",
"dynamodb": {
"Keys": {
"Id": {
"N": "101"
}
},
"NewImage": {
"Message": {
"S": "New item!"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES",
"SequenceNumber": "111",
"SizeBytes": 26
},
"awsRegion": "us-west-2",
"eventName": "INSERT",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
},
{
"eventID": "2",
"eventVersion": "1.0",
"dynamodb": {
"OldImage": {
"Message": {
"S": "New item!"
},
"Id": {
"N": "101"
}
},
"SequenceNumber": "222",
"Keys": {
"Id": {
"N": "101"
}
},
"SizeBytes": 59,
"NewImage": {
"Message": {
"S": "This item has changed"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES"
},
"awsRegion": "us-west-2",
"eventName": "MODIFY",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
},
{
"eventID": "3",
"eventVersion": "1.0",
"dynamodb": {
"Keys": {
"Id": {
"N": "101"
}
},
"SizeBytes": 38,
"SequenceNumber": "333",
"OldImage": {
"Message": {
"S": "This item has changed"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES"
},
"awsRegion": "us-west-2",
"eventName": "REMOVE",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
}
]
}
Thanks
I've been using this, it served us well until now:
from boto3.dynamodb.types import TypeDeserializer
serializer = TypeDeserializer()
def deserialize(data):
if isinstance(data, list):
return [deserialize(v) for v in data]
if isinstance(data, dict):
try:
return serializer.deserialize(data)
except TypeError:
return {k: deserialize(v) for k, v in data.items()}
else:
return data

Categories