Convert CSV to Nested JSON complex structure using Pandas - python

Converted into a nested JSON file using Pandas
This is the sample csv for one row
name type aitm alitm aaitm adsc1
specs glass 70072187 ESA65Z45 ESA 65Z45 CUT TIP FG 1808-40
I'm trying to achieve the below structure of Nested JSON for every row

import pandas as pd
import json
df = pd.DataFrame([['specs','glass','70072187','ESA65Z45','ESA 65Z45','CUT TIP FG 1808-40'],
['specs','glass','666','ESA6665','ESB 666','CUT TIP FG 66-40']],
columns = ['name', 'type','aitm','alitm','aaitm','adsc1' ])
data = {'entities':[]}
for key,grp in df.groupby('name'):
for idx, row in grp.iterrows():
temp_dict_alpha = {'name':key, 'type':row['type'], 'data':{'attributes':{}}}
attr_row = row[~row.index.isin(['name','type'])]
for idx2, row2 in attr_row.iteritems():
dict_temp = {}
dict_temp[idx2] = {'values':[]}
dict_temp[idx2]['values'].append({'value':row2,'source':'internal','locale':'en_US'})
temp_dict_alpha['data']['attributes'].update(dict_temp)
data['entities'].append(temp_dict_alpha)
print(json.dumps(data, indent= 4))
Output:
print(json.dumps(data, indent= 4))
{
"entities": [
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "70072187",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESA 65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 1808-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
},
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "666",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA6665",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESB 666",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 66-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
}
]
}

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

While trying to convert a JSON output below to CSV, getting error
Here is the JSON output
{
"data": [
{
"id": "-1000100591151294842",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "194.950000",
"wavelength": "1537.79",
"channel": "CH-20"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-BBEG-CHLC-P109"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "100.0",
"utilizationPercent": "100",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"linkLabel": "BBEG-ROADM-0101:5-4-1,CHLC-ROADM-0401:7-35-1",
"lastUpdatedAdminStateTimeStamp": "2021-05-03T00:29:24.444Z",
"lastUpdatedOperationalStateTimeStamp": "2022-12-08T22:42:21.567Z",
"userLabel": "J-BBEG-CHLC-P109",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-12-08T22:42:22.123Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "194.950000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1000100591151294842"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6765278351459212874"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1000100591151294842:1"
},
{
"type": "endPoints",
"id": "-1000100591151294842:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "7147507956181395827"
}
]
}
}
},
{
"id": "-1013895107051577774",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "191.600000",
"wavelength": "1564.68",
"channel": "CH-87"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-KFF9-PNTH-P101"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "90.0",
"utilizationPercent": "90",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"tags": [
"J-KFF9-PNTH-P101"
],
"linkLabel": "KFF9-ROADM-0301:1-1-1,PNTH-ROADM-0101:1-1-1",
"lastUpdatedAdminStateTimeStamp": "2021-09-12T20:22:59.334Z",
"lastUpdatedOperationalStateTimeStamp": "2022-10-12T14:20:44.779Z",
"userLabel": "J-KFF9-PNTH-P101",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-10-12T14:20:45.417Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "191.600000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1013895107051577774"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6055685088078365419"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1013895107051577774:1"
},
{
"type": "endPoints",
"id": "-1013895107051577774:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "-6727082893715936342"
}
]
}
}
}
] }
getting below error, not sure what is missing
Here is the python script I used. have been trying different variations but no luck getting different errors in all other instances
filename = Path('fre.json')
data = []
with open(filename,'r') as json_file:
data_str = json_file.read()
data_str = data_str.split('[',1)[-1]
data_str = data_str.rsplit(']',1)[0]
data_str = data_str.split('][')
for jsonStr in data_str:
jsonStr = '[' + jsonStr + ']'
temp_data = json.loads(jsonStr)
for each in temp_data:
data.append(each)
what is wrong?

update nested json object in python

I have a json file name input which as follows
{
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
I want "sqlExpression": "HAI" value to be replaced with BYE as below
"sqlExpression": "BYE"
I want python code to do it, I tried the below code but not working
input['resources'][0]['resources'][0]['resources'][0]['resources'][0][properties][0][sqlFilter][0][sqlExpression][0]='BYE'
inp = {
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
inp['resources'][0]['resources'][0]['resources'][0]['resources'][0]['properties']['sqlFilter']['sqlExpression']='BYE'
print(inp)
Result
{'abc': {'dbc': ...truncated... {'sqlExpression': 'BYE'}}}]}]}]}]}

Python Cubes OLAP Framework - How to sum a json column?

I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.

Using pandas to convert csv into nested json with dynamic strucutre

I am new to python and now want to convert a csv file into json file. Basically the json file is nested with dynamic structure, the structure will be defined using the csv header.
From csv input:
ID, Name, person_id/id_type, person_id/id_value,person_id_expiry_date,additional_info/0/name,additional_info/0/value,additional_info/1/name,additional_info/1/value,salary_info/details/0/grade,salary_info/details/0/payment,salary_info/details/0/amount,salary_info/details/1/next_promotion
1,Peter,PASSPORT,A452817,1-01-2055,Age,19,Gender,M,Manager,Monthly,8956.23,unknown
2,Jane,PASSPORT,B859804,2-01-2035,Age,38,Gender,F,Worker, Monthly,125980.1,unknown
To json output:
[
{
"ID": 1,
"Name": "Peter",
"person_id": {
"id_type": "PASSPORT",
"id_value": "A452817"
},
"person_id_expiry_date": "1-01-2055",
"additional_info": [
{
"name": "Age",
"value": 19
},
{
"name": "Gender",
"value": "M"
}
],
"salary_info": {
"details": [
{
"grade": "Manager",
"payment": "Monthly",
"amount": 8956.23
},
{
"next_promotion": "unknown"
}
]
}
},
{
"ID": 2,
"Name": "Jane",
"person_id": {
"id_type": "PASSPORT",
"id_value": "B859804"
},
"person_id_expiry_date": "2-01-2035",
"additional_info": [
{
"name": "Age",
"value": 38
},
{
"name": "Gender",
"value": "F"
}
],
"salary_info": {
"details": [
{
"grade": "Worker",
"payment": " Monthly",
"amount": 125980.1
},
{
"next_promotion": "unknown"
}
]
}
}
]
Is this something can be done by the existing pandas API or I have to write lots of complex codes to dynamically construct the json object? Thanks.

how to convert multi valued CSV to Json

I have a csv file with 4 columns data as below.
type,MetalType,Date,Acknowledge
Metal,abc123451,2018-05-26,Success
Metal,abc123452,2018-05-27,Success
Metal,abc123454,2018-05-28,Failure
Iron,abc123455,2018-05-29,Success
Iron,abc123456,2018-05-30,Failure
( I just provided header in the above example data but in my case i dont have header in the data)
how can i convert above csv file to Json in the below format...
1st Column : belongs to --> "type": "Metal"
2nd Column : MetalType: "values" : "value": "abc123451"
3rd column : "Date": "values":"value": "2018-05-26"
4th Column : "Acknowledge": "values":"value": "Success"
and remaining all columns are default values.
As per below format ,
{
"entities": [
{
"id": "XXXXXXX",
"type": "Metal",
"data": {
"attributes": {
"MetalType": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "abc123451"
}
]
},
"Date": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "2018-05-26"
}
]
},
"Acknowledge": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "Success"
}
]
}
}
}
}
]
}
Even though jww is right, I built something for you:
I import the csv using pandas:
df = pd.read_csv('data.csv')
then I create a template for the dictionaries you want to add:
d_json = {"entities": []}
template = {
"id": "XXXXXXX",
"type": "",
"data": {
"attributes": {
"MetalType": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
},
"Date": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
},
"Acknowledge": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
}
}
}
}
Now you just need to fill in the dictionary:
for i in range(len(df)):
d = template
d['type'] = df['type'][i]
d['data']['attributes']['MetalType']['values'][0]['value'] = df['MetalType'][i]
d['data']['attributes']['Date']['values'][0]['value'] = df['Date'][i]
d['data']['attributes']['Acknowledge']['values'][0]['value'] = df['Acknowledge'][i]
d_json['entities'].append(d)
I know my way of iterating over the df is kind of ugly, maybe someone knows a cleaner way.
Cheers!

Categories