Converting nested JSON structures to Pandas DataFrames

Converting nested JSON structures to Pandas DataFrames - python

I've been struggling with the nested structure in json, how to convert to correct form
{
"id": "0c576f35-d704-4fa8-8cbb-311c6be36358",
"employee_id": null,
"creator_id": "16ca2db9-206c-4e18-891d-a00a5252dbd3",
"closed_by_id": null,
"request_number": 23,
"priority": "2",
"form_id": "urlaub-weitere-abwesenheiten",
"status": "opened",
"name": "Urlaub & weitere Abwesenheiten",
"read_by_employee": false,
"custom_status": {
"id": 15793,
"name": "In Bearbeitung HR"
},
"due_date": null,
"created_at": "2021-03-29T15:18:37.572040+02:00",
"updated_at": "2021-03-29T15:22:15.590156+02:00",
"closed_at": null,
"archived_at": null,
"attachment_count": 1,
"category": {
"id": "payroll-time-management",
"name": "Payroll, Time & Attendance"
},
"public_comment_count": 0,
"form_data": [
{
"field_id": "subcategory",
"values": [
"Time & Attendance - Manage monthly/year-end consolidation and report"
]
},
{
"field_id": "separator-2",
"values": [
null
]
},
{
"field_id": "art-der-massnahme",
"values": [
"Fortbildung"
]
},
{
"field_id": "bezeichnung-der-schulung-kurses",
"values": [
"dfgzhujiko"
]
},
{
"field_id": "startdatum",
"values": [
"2021-03-26"
]
},
{
"field_id": "enddatum",
"values": [
"2021-03-27"
]
},
{
"field_id": "freistellung",
"values": [
"nein"
]
},
{
"field_id": "mit-bildungsurlaub",
"values": [
""
]
},
{
"field_id": "kommentarfeld_fortbildung",
"values": [
""
]
},
{
"field_id": "separator",
"values": [
null
]
},
{
"field_id": "instructions",
"values": [
null
]
},
{
"field_id": "entscheidung-hr-bp",
"values": [
"Zustimmen"
]
},
{
"field_id": "kommentarfeld-hr-bp",
"values": [
"wsdfghjkmhnbgvfcdxsybvnm,"
]
},
{
"field_id": "individuelle-abstimmung",
"values": [
""
]
}
],
"form_files": [
{
"id": 30129,
"filename": "empty_background.png",
"field_id": "anhang"
}
],
"visible_by_employee": false,
"organization_ids": [],
"need_edit_by_employee": false,
"attachments": []
}
using a simple solution with pandas, dataframe
Request = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')
it's displaying almost in its correct form:
how to split a dictionary from columns form_data i form_files, I've done a lot of research, but I'm still having a lot of trouble solving this problem, how to split form_data for columns, no rows for meta to ID

You can do something like this.
pass the dataframe and the column to the function as arguments
def explode_node(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[str(column_value)].iloc[0], str):
child_df[column_value] = child_df[str(column_value)].apply(ast.literal_eval)
expanded_child_df = (pd.concat({i: json_normalize(x) for i, x in child_df.pop(str(column_value)).items()}).reset_index(level=1,drop=True).join(child_df, how='right', lsuffix='_left', rsuffix='_right').reset_index(drop=True))
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

While trying to convert a JSON output below to CSV, getting error
Here is the JSON output
{
"data": [
{
"id": "-1000100591151294842",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "194.950000",
"wavelength": "1537.79",
"channel": "CH-20"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-BBEG-CHLC-P109"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "100.0",
"utilizationPercent": "100",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"linkLabel": "BBEG-ROADM-0101:5-4-1,CHLC-ROADM-0401:7-35-1",
"lastUpdatedAdminStateTimeStamp": "2021-05-03T00:29:24.444Z",
"lastUpdatedOperationalStateTimeStamp": "2022-12-08T22:42:21.567Z",
"userLabel": "J-BBEG-CHLC-P109",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-12-08T22:42:22.123Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "194.950000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1000100591151294842"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6765278351459212874"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1000100591151294842:1"
},
{
"type": "endPoints",
"id": "-1000100591151294842:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "7147507956181395827"
}
]
}
}
},
{
"id": "-1013895107051577774",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "191.600000",
"wavelength": "1564.68",
"channel": "CH-87"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-KFF9-PNTH-P101"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "90.0",
"utilizationPercent": "90",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"tags": [
"J-KFF9-PNTH-P101"
],
"linkLabel": "KFF9-ROADM-0301:1-1-1,PNTH-ROADM-0101:1-1-1",
"lastUpdatedAdminStateTimeStamp": "2021-09-12T20:22:59.334Z",
"lastUpdatedOperationalStateTimeStamp": "2022-10-12T14:20:44.779Z",
"userLabel": "J-KFF9-PNTH-P101",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-10-12T14:20:45.417Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "191.600000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1013895107051577774"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6055685088078365419"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1013895107051577774:1"
},
{
"type": "endPoints",
"id": "-1013895107051577774:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "-6727082893715936342"
}
]
}
}
}
] }
getting below error, not sure what is missing
Here is the python script I used. have been trying different variations but no luck getting different errors in all other instances
filename = Path('fre.json')
data = []
with open(filename,'r') as json_file:
data_str = json_file.read()
data_str = data_str.split('[',1)[-1]
data_str = data_str.rsplit(']',1)[0]
data_str = data_str.split('][')
for jsonStr in data_str:
jsonStr = '[' + jsonStr + ']'
temp_data = json.loads(jsonStr)
for each in temp_data:
data.append(each)
what is wrong?

Python Cubes OLAP Framework - How to sum a json column?

I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.

Convert CSV to Nested JSON complex structure using Pandas

Converted into a nested JSON file using Pandas
This is the sample csv for one row
name type aitm alitm aaitm adsc1
specs glass 70072187 ESA65Z45 ESA 65Z45 CUT TIP FG 1808-40
I'm trying to achieve the below structure of Nested JSON for every row

import pandas as pd
import json
df = pd.DataFrame([['specs','glass','70072187','ESA65Z45','ESA 65Z45','CUT TIP FG 1808-40'],
['specs','glass','666','ESA6665','ESB 666','CUT TIP FG 66-40']],
columns = ['name', 'type','aitm','alitm','aaitm','adsc1' ])
data = {'entities':[]}
for key,grp in df.groupby('name'):
for idx, row in grp.iterrows():
temp_dict_alpha = {'name':key, 'type':row['type'], 'data':{'attributes':{}}}
attr_row = row[~row.index.isin(['name','type'])]
for idx2, row2 in attr_row.iteritems():
dict_temp = {}
dict_temp[idx2] = {'values':[]}
dict_temp[idx2]['values'].append({'value':row2,'source':'internal','locale':'en_US'})
temp_dict_alpha['data']['attributes'].update(dict_temp)
data['entities'].append(temp_dict_alpha)
print(json.dumps(data, indent= 4))
Output:
print(json.dumps(data, indent= 4))
{
"entities": [
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "70072187",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESA 65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 1808-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
},
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "666",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA6665",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESB 666",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 66-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
}
]
}

how to convert multi valued CSV to Json

I have a csv file with 4 columns data as below.
type,MetalType,Date,Acknowledge
Metal,abc123451,2018-05-26,Success
Metal,abc123452,2018-05-27,Success
Metal,abc123454,2018-05-28,Failure
Iron,abc123455,2018-05-29,Success
Iron,abc123456,2018-05-30,Failure
( I just provided header in the above example data but in my case i dont have header in the data)
how can i convert above csv file to Json in the below format...
1st Column : belongs to --> "type": "Metal"
2nd Column : MetalType: "values" : "value": "abc123451"
3rd column : "Date": "values":"value": "2018-05-26"
4th Column : "Acknowledge": "values":"value": "Success"
and remaining all columns are default values.
As per below format ,
{
"entities": [
{
"id": "XXXXXXX",
"type": "Metal",
"data": {
"attributes": {
"MetalType": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "abc123451"
}
]
},
"Date": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "2018-05-26"
}
]
},
"Acknowledge": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": "Success"
}
]
}
}
}
}
]
}

Even though jww is right, I built something for you:
I import the csv using pandas:
df = pd.read_csv('data.csv')
then I create a template for the dictionaries you want to add:
d_json = {"entities": []}
template = {
"id": "XXXXXXX",
"type": "",
"data": {
"attributes": {
"MetalType": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
},
"Date": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
},
"Acknowledge": {
"values": [
{
"source": "XYZ",
"locale": "Australia",
"value": ""
}
]
}
}
}
}
Now you just need to fill in the dictionary:
for i in range(len(df)):
d = template
d['type'] = df['type'][i]
d['data']['attributes']['MetalType']['values'][0]['value'] = df['MetalType'][i]
d['data']['attributes']['Date']['values'][0]['value'] = df['Date'][i]
d['data']['attributes']['Acknowledge']['values'][0]['value'] = df['Acknowledge'][i]
d_json['entities'].append(d)
I know my way of iterating over the df is kind of ugly, maybe someone knows a cleaner way.
Cheers!

API Nested JSON Response TO CSV

I am trying to convert a Nested JSON Response to CSV. Following is the JSON Response
{
"rows": [
[
{
"postId": 188365573,
"messageId": 198365562,
"accountId": 214,
"messageType": 2,
"channelType": "TWITTER",
"accountType": "TWITTER",
"taxonomy": {
"campaignId": "2521_4",
"clientCustomProperties": {
"PromotionChannelAbbreviation": [
"3tw"
],
"PromotionChannels": [
"Twitter"
],
"ContentOwner": [
"Audit"
],
"Location": [
"us"
],
"Sub_Category": [
"dbriefs"
],
"ContentOwnerAbbreviation": [
"aud"
],
"PrimaryPurpose_Outcome": [
"Engagement"
],
"PrimaryPurposeOutcomeAbbv": [
"eng"
]
},
"partnerCustomProperties": {},
"tags": [],
"urlShortnerDomain": "2721_spr.ly"
},
"approval": {
"approvalOption": "NONE",
"comment": ""
},
"status": "SENT",
"createdDate": 1433331585000,
"scheduleDate": 1435783440000,
"version": 4,
"deleted": false,
"publishedDate": 1435783441000,
"statusID": "6163465412728176",
"permalink": "https://twitter.com/Acctg/status/916346541272498176",
"additional": {
"links": []
}
},
0
],
[
{
"postId": 999145171,
"messageId": 109145169,
"accountId": 21388,
"messageType": 2,
"channelType": "TWITTER",
"accountType": "TWITTER",
"taxonomy": {
"campaignId": "2521_4",
"clientCustomProperties": {
"PromotionChannelAbbreviation": [
"3tw"
],
"Eminence_Registry_Number": [
"1000159"
],
"PromotionChannels": [
"Twitter"
],
"ContentOwner": [
"Ctr. Health Solutions"
],
"Location": [
"us"
],
"Sub_Category": [
"fraud"
],
"ContentOwnerAbbreviation": [
"chs"
],
"PrimaryPurpose_Outcome": [
"Awareness"
],
"PrimaryPurposeOutcomeAbbv": [
"awa"
]
},
"partnerCustomProperties": {},
"tags": [],
"urlShortnerDomain": "2521_spr.ly"
},
"approval": {
"approvalOption": "NONE",
"comment": ""
},
"status": "SENT",
"createdDate": 1434983660000,
"scheduleDate": 1435753800000,
"version": 4,
"deleted": false,
"publishedDate": 1435753801000,
"statusID": "616222222198407168",
"permalink": "https://twitter.com/Health/status/6162222221984070968",
"additional": {
"links": []
}
},
0
]
}
And the python code I am using to covert this is
import json
import csv
# importing the data
with open('Post_Insights_test.json') as Test:
data1 = json.load(Test)
# opening the csv
csvdata= open('Data_table2.csv', 'w')
csvwriter = csv.writer(csvdata, delimiter=',')
#Taking the keys out from 1st dict, that too which aren't nested
header= data1["rows"][1][0].keys()
csvwriter.writerow(header)
for i in range(0,70):
csvwriter.writerow(data1["rows"][i][0].values())
csvdata.close()
Problems are following:
Unable to get the keys for nested responses like taxonomy
Unable to get the values for nested responses like taxonomy
Many responses have different headers/ keys, so ideally I should have them as headers in my excel, but I am not able to figure out how to do it in python
My excel sheet shows gap of row after every entry , I dont know why
Please help. All criticism are welcome. Kind Regards

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Converting nested JSON structures to Pandas DataFrames - python

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

Python Cubes OLAP Framework - How to sum a json column?

Convert CSV to Nested JSON complex structure using Pandas

how to convert multi valued CSV to Json

API Nested JSON Response TO CSV

Categories

Resources