urllib—expressing what piece of JSON Data I desire - python

Here is a big piece of JSON data that I fetch in my code below:
{
"status": 200,
"offset": 0,
"limit": 10,
"count": 8,
"total": 8,
"url": "/v2/dictionaries/ldoce5/entries?headword=extra",
"results": [
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 3,
"id": "cqAFDjvvYg",
"part_of_speech": "adverb",
"senses": [
{
"collocation_examples": [
{
"collocation": "one/a few etc extra",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627480.mp3"
}
],
"text": "I got a few extra in case anyone else decides to come."
}
}
],
"definition": [
"in addition to the usual things or the usual amount"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627477.mp3"
}
],
"text": "They need to offer something extra to attract customers."
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDjvvYg"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra-",
"id": "cqAFDk1BDw",
"part_of_speech": "prefix",
"pronunciations": [
{
"audio": [
{
"lang": "American English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/us_pron/extra__pre.mp3"
}
],
"ipa": "ekstrə"
}
],
"senses": [
{
"definition": [
"outside or beyond"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001832333.mp3"
}
],
"text": "extragalactic (=outside our galaxy)"
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDk1BDw"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 1,
"id": "cqAFDjpNZQ",
"part_of_speech": "adjective",
"pronunciations": [
{
"audio": [
{
"lang": "British English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/gb_pron/extra_n0205.mp3"
},
{
"lang": "American English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/us_pron/extra1.mp3"
}
],
"ipa": "ˈekstrə"
}
],
"senses": [
{
"collocation_examples": [
{
"collocation": "an extra ten minutes/three metres etc",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202489.mp3"
}
],
"text": "I asked for an extra two weeks to finish the work."
}
}
],
"definition": [
"more of something, in addition to the usual or standard amount or number"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202484.mp3"
}
],
"text": "Could you get an extra loaf of bread?"
}
],
"gramatical_info": {
"type": "only before noun"
}
}
],
"url": "/v2/dictionaries/entries/cqAFDjpNZQ"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 2,
"id": "cqAFDjsQjH",
"part_of_speech": "pronoun",
"senses": [
{
"collocation_examples": [
{
"collocation": "pay/charge/cost etc extra",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202499.mp3"
}
],
"text": "I earn extra for working on Sunday."
}
}
],
"definition": [
"an amount of something, especially money, in addition to the usual, basic, or necessary amount"
],
"synonym": "more"
}
],
"url": "/v2/dictionaries/entries/cqAFDjsQjH"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 4,
"id": "cqAFDjyTn8",
"part_of_speech": "noun",
"senses": [
{
"definition": [
"something which is added to a basic product or service that improves it and often costs more"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202524.mp3"
}
],
"text": "Tinted windows and a sunroof are optional extras(=something that you can choose to have or not)."
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDjyTn8"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra virgin",
"id": "cqAFDmV2Jw",
"part_of_speech": "adjective",
"senses": [
{
"definition": [
"extra virgin olive oil comes from olives that are pressed for the first time, and is considered to be the best quality olive oil"
]
}
],
"url": "/v2/dictionaries/entries/cqAFDmV2Jw"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra time",
"id": "cqAFDmGZyQ",
"part_of_speech": "noun",
"senses": [
{
"american_equivalent": "overtime",
"definition": [
"a period, usually of 30 minutes, added to the end of a football game in some competitions if neither team has won after normal time"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627835.mp3"
}
],
"text": "The match went into extra time."
}
],
"geography": "especially British English",
"gramatical_examples": [
{
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627834.mp3"
}
],
"text": "Beckham scored in extra time."
}
],
"pattern": "in extra time"
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDmGZyQ"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra-sensory perception",
"id": "cqAFDm6ceW",
"part_of_speech": "noun",
"senses": [
{
"definition": [
"ESP"
]
}
],
"url": "/v2/dictionaries/entries/cqAFDm6ceW"
}
]
}
I want to grab and print the definitions offered in the JSON results. I don't know how to express this and am getting a 'list indices must be integers or slices, not str' error for my sense = data['senses'].
#!/usr/bin/env python
import urllib.request
import json
wp = urllib.request.urlopen("http://api.pearson.com/v2/dictionaries/ldoce5/entries?headword=extra").read().decode('utf8')
jsonData=json.loads(wp)
data=jsonData['results']
for item in data:
sense = data['senses']
print(senses['definition'])

sense is actually a list with a single element, a dictionary. The contained dictionary has your desired key-value pair.
For example:
for item in data:
sense = data['senses'][0]
print(sense['definition'])

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

While trying to convert a JSON output below to CSV, getting error
Here is the JSON output
{
"data": [
{
"id": "-1000100591151294842",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "194.950000",
"wavelength": "1537.79",
"channel": "CH-20"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-BBEG-CHLC-P109"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "100.0",
"utilizationPercent": "100",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"linkLabel": "BBEG-ROADM-0101:5-4-1,CHLC-ROADM-0401:7-35-1",
"lastUpdatedAdminStateTimeStamp": "2021-05-03T00:29:24.444Z",
"lastUpdatedOperationalStateTimeStamp": "2022-12-08T22:42:21.567Z",
"userLabel": "J-BBEG-CHLC-P109",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-12-08T22:42:22.123Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "194.950000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1000100591151294842"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6765278351459212874"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1000100591151294842:1"
},
{
"type": "endPoints",
"id": "-1000100591151294842:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "7147507956181395827"
}
]
}
}
},
{
"id": "-1013895107051577774",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "191.600000",
"wavelength": "1564.68",
"channel": "CH-87"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-KFF9-PNTH-P101"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "90.0",
"utilizationPercent": "90",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"tags": [
"J-KFF9-PNTH-P101"
],
"linkLabel": "KFF9-ROADM-0301:1-1-1,PNTH-ROADM-0101:1-1-1",
"lastUpdatedAdminStateTimeStamp": "2021-09-12T20:22:59.334Z",
"lastUpdatedOperationalStateTimeStamp": "2022-10-12T14:20:44.779Z",
"userLabel": "J-KFF9-PNTH-P101",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-10-12T14:20:45.417Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "191.600000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1013895107051577774"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6055685088078365419"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1013895107051577774:1"
},
{
"type": "endPoints",
"id": "-1013895107051577774:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "-6727082893715936342"
}
]
}
}
}
] }
getting below error, not sure what is missing
Here is the python script I used. have been trying different variations but no luck getting different errors in all other instances
filename = Path('fre.json')
data = []
with open(filename,'r') as json_file:
data_str = json_file.read()
data_str = data_str.split('[',1)[-1]
data_str = data_str.rsplit(']',1)[0]
data_str = data_str.split('][')
for jsonStr in data_str:
jsonStr = '[' + jsonStr + ']'
temp_data = json.loads(jsonStr)
for each in temp_data:
data.append(each)
what is wrong?

Python Cubes OLAP Framework - How to sum a json column?

I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.

Is there a way to add curly brackets around a list of dictionaries already existing within a JSON file?

I currently have two JSONS that I want to merge into one singular JSON, additionally I want to add in a slight change.
Firstly, these are the two JSONS in question.
An intents JSON:
[
{
"ID": "G1",
"intent": "password_reset",
"examples": [
{
"text": "I forgot my password"
},
{
"text": "I can't log in"
},
{
"text": "I can't access the site"
},
{
"text": "My log in is failing"
},
{
"text": "I need to reset my password"
}
]
},
{
"ID": "G2",
"intent": "account_closure",
"examples": [
{
"text": "I want to close my account"
},
{
"text": "I want to terminate my account"
}
]
},
{
"ID": "G3",
"intent": "account_creation",
"examples": [
{
"text": "I want to open an account"
},
{
"text": "Create account"
}
]
},
{
"ID": "G4",
"intent": "complaint",
"examples": [
{
"text": "A member of staff was being rude"
},
{
"text": "I have a complaint"
}
]
}
]
and an entities JSON:
[
{
"ID": "K1",
"entity": "account_type",
"values": [
{
"type": "synonyms",
"value": "business",
"synonyms": [
"corporate"
]
},
{
"type": "synonyms",
"value": "personal",
"synonyms": [
"vanguard",
"student"
]
}
]
},
{
"ID": "K2",
"entity": "beverage",
"values": [
{
"type": "synonyms",
"value": "hot",
"synonyms": [
"heated",
"warm"
]
},
{
"type": "synonyms",
"value": "cold",
"synonyms": [
"ice",
"freezing"
]
}
]
}
]
The expected outcome is to create a JSON file that mimics this structure:
{
"intents": [
{
"intent": "password_reset",
"examples": [
{
"text": "I forgot my password"
},
{
"text": "I want to reset my password"
}
],
"description": "Reset a user password"
}
],
"entities": [
{
"entity": "account_type",
"values": [
{
"type": "synonyms",
"value": "business",
"synonyms": [
"company",
"corporate",
"enterprise"
]
},
{
"type": "synonyms",
"value": "personal",
"synonyms": []
}
],
"fuzzy_match": true
}
],
"metadata": {
"api_version": {
"major_version": "v2",
"minor_version": "2018-11-08"
}
},
"dialog_nodes": [
{
"type": "standard",
"title": "anything_else",
"output": {
"generic": [
{
"values": [
{
"text": "I didn't understand. You can try rephrasing."
},
{
"text": "Can you reword your statement? I'm not understanding."
},
{
"text": "I didn't get your meaning."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"conditions": "anything_else",
"dialog_node": "Anything else",
"previous_sibling": "node_4_1655399659061",
"disambiguation_opt_out": true
},
{
"type": "event_handler",
"output": {
"generic": [
{
"title": "What type of account do you hold with us?",
"options": [
{
"label": "Personal",
"value": {
"input": {
"text": "personal"
}
}
},
{
"label": "Business",
"value": {
"input": {
"text": "business"
}
}
}
],
"response_type": "option"
}
]
},
"parent": "slot_9_1655398217028",
"event_name": "focus",
"dialog_node": "handler_6_1655398217052",
"previous_sibling": "handler_7_1655398217052"
},
{
"type": "event_handler",
"output": {},
"parent": "slot_9_1655398217028",
"context": {
"account_type": "#account_type"
},
"conditions": "#account_type",
"event_name": "input",
"dialog_node": "handler_7_1655398217052"
},
{
"type": "standard",
"title": "business_account",
"output": {
"generic": [
{
"values": [
{
"text": "We have notified your corporate security team, they will be in touch to reset your password."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655397279884",
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_4_1655399659061"
},
"conditions": "#account_type:business",
"dialog_node": "node_1_1655399028379",
"previous_sibling": "node_3_1655399027429"
},
{
"type": "standard",
"title": "intent_collection",
"output": {
"generic": [
{
"values": [
{
"text": "Thank you for confirming that you want to reset your password."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_3_1655397279884"
},
"conditions": "#password_reset",
"dialog_node": "node_3_1655396920143",
"previous_sibling": "Welcome"
},
{
"type": "frame",
"title": "account_type_confirmation",
"output": {
"generic": [
{
"values": [
{
"text": "Thank you"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655396920143",
"context": {},
"next_step": {
"behavior": "skip_user_input"
},
"conditions": "#password_reset",
"dialog_node": "node_3_1655397279884"
},
{
"type": "standard",
"title": "personal_account",
"output": {
"generic": [
{
"values": [
{
"text": "We have sent you an email with a password reset link."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655397279884",
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_4_1655399659061"
},
"conditions": "#account_type:personal",
"dialog_node": "node_3_1655399027429"
},
{
"type": "standard",
"title": "reset_confirmation",
"output": {
"generic": [
{
"values": [
{
"text": "Do you need assistance with anything else today?"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"digress_in": "does_not_return",
"dialog_node": "node_4_1655399659061",
"previous_sibling": "node_3_1655396920143"
},
{
"type": "slot",
"output": {},
"parent": "node_3_1655397279884",
"variable": "$account_type",
"dialog_node": "slot_9_1655398217028",
"previous_sibling": "node_1_1655399028379"
},
{
"type": "standard",
"title": "welcome",
"output": {
"generic": [
{
"values": [
{
"text": "Hello. How can I help you?"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"conditions": "welcome",
"dialog_node": "Welcome"
}
],
"counterexamples": [],
"system_settings": {
"off_topic": {
"enabled": true
},
"disambiguation": {
"prompt": "Did you mean:",
"enabled": true,
"randomize": true,
"max_suggestions": 5,
"suggestion_text_policy": "title",
"none_of_the_above_prompt": "None of the above"
},
"human_agent_assist": {
"prompt": "Did you mean:"
},
"intent_classification": {
"training_backend_version": "v2"
},
"spelling_auto_correct": true
},
"learning_opt_out": false,
"name": "Reset Password",
"language": "en",
"description": "Basic Password Reset Request"
}
So what I am missing in my original files, is essentially:
"intents":
and for the entities file:
"entities"
at the start of each list of dictionaries.
Additionally, I would need to wrap the whole thing in curly braces to comply with json formatting.
As seen, the final goal is not just appending these two to one another but the file technically continues with some other JSON code that I have yet to write and deal with.
My question now is as follows; by what method can I either add in these words and the braces to the individual files, then combine them into a singular JSON or alternatively by what method can I read in these files and combine them with the changes all in one go?
The new output file closing on a curly brace after the entities list of dicts is an acceptable outcome for me at the time, so that I can continue to make changes and hopefully further learn from this how to do these changes in future when I get there.
TIA
JSON is only a string format, you can it load in a language structure, in python that is list and dict, do what you need then dump it back, so you don't "add strings" and "add brackets", on modify the structure
file = 'intents.txt'
intents = json.load(open(file)) # load a list
file = 'entities.txt'
entities = json.load(open(file)) # load a list
# create a dict
content = {
"intents": intents,
"entities": entities
}
json.dump(content, open(file, "w"))
If you're reading all the json in as a string, you can just prepend "{'intents':" to the start and append a closing "}".
myJson = "your json string"
myWrappedJson = '{"intents":' + myJson + "}"

Converting nested JSON structures to Pandas DataFrames

I've been struggling with the nested structure in json, how to convert to correct form
{
"id": "0c576f35-d704-4fa8-8cbb-311c6be36358",
"employee_id": null,
"creator_id": "16ca2db9-206c-4e18-891d-a00a5252dbd3",
"closed_by_id": null,
"request_number": 23,
"priority": "2",
"form_id": "urlaub-weitere-abwesenheiten",
"status": "opened",
"name": "Urlaub & weitere Abwesenheiten",
"read_by_employee": false,
"custom_status": {
"id": 15793,
"name": "In Bearbeitung HR"
},
"due_date": null,
"created_at": "2021-03-29T15:18:37.572040+02:00",
"updated_at": "2021-03-29T15:22:15.590156+02:00",
"closed_at": null,
"archived_at": null,
"attachment_count": 1,
"category": {
"id": "payroll-time-management",
"name": "Payroll, Time & Attendance"
},
"public_comment_count": 0,
"form_data": [
{
"field_id": "subcategory",
"values": [
"Time & Attendance - Manage monthly/year-end consolidation and report"
]
},
{
"field_id": "separator-2",
"values": [
null
]
},
{
"field_id": "art-der-massnahme",
"values": [
"Fortbildung"
]
},
{
"field_id": "bezeichnung-der-schulung-kurses",
"values": [
"dfgzhujiko"
]
},
{
"field_id": "startdatum",
"values": [
"2021-03-26"
]
},
{
"field_id": "enddatum",
"values": [
"2021-03-27"
]
},
{
"field_id": "freistellung",
"values": [
"nein"
]
},
{
"field_id": "mit-bildungsurlaub",
"values": [
""
]
},
{
"field_id": "kommentarfeld_fortbildung",
"values": [
""
]
},
{
"field_id": "separator",
"values": [
null
]
},
{
"field_id": "instructions",
"values": [
null
]
},
{
"field_id": "entscheidung-hr-bp",
"values": [
"Zustimmen"
]
},
{
"field_id": "kommentarfeld-hr-bp",
"values": [
"wsdfghjkmhnbgvfcdxsybvnm,"
]
},
{
"field_id": "individuelle-abstimmung",
"values": [
""
]
}
],
"form_files": [
{
"id": 30129,
"filename": "empty_background.png",
"field_id": "anhang"
}
],
"visible_by_employee": false,
"organization_ids": [],
"need_edit_by_employee": false,
"attachments": []
}
using a simple solution with pandas, dataframe
Request = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')
it's displaying almost in its correct form:
how to split a dictionary from columns form_data i form_files, I've done a lot of research, but I'm still having a lot of trouble solving this problem, how to split form_data for columns, no rows for meta to ID
You can do something like this.
pass the dataframe and the column to the function as arguments
def explode_node(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[str(column_value)].iloc[0], str):
child_df[column_value] = child_df[str(column_value)].apply(ast.literal_eval)
expanded_child_df = (pd.concat({i: json_normalize(x) for i, x in child_df.pop(str(column_value)).items()}).reset_index(level=1,drop=True).join(child_df, how='right', lsuffix='_left', rsuffix='_right').reset_index(drop=True))
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df

Python - Parsing Oxford Dictionary API JSON Output

I would like to extract the first definition value from the following json output. Additionally, it would also be nice to print all of the definition values line by line.
{
"metadata": {
"provider": "Oxford University Press"
},
"results": [
{
"id": "ace",
"language": "en",
"lexicalEntries": [
{
"entries": [
{
"etymologies": [
"Middle English (denoting the âoneâ on dice): via Old French from Latin as âunity, a unitâ"
],
"grammaticalFeatures": [
{
"text": "Singular",
"type": "Number"
}
],
"homographNumber": "100",
"senses": [
{
"definitions": [
"a playing card with a single spot on it, ranked as the highest card in its suit in most card games"
],
"domains": [
"Cards"
],
"examples": [
{
"registers": [
"figurative"
],
"text": "life had started dealing him aces again"
},
{
"text": "the ace of diamonds"
}
],
"id": "m_en_gbus0005680.006",
"short_definitions": [
"playing card with single spot on it, ranked as highest card in its suit in most card games"
]
},
{
"definitions": [
"a person who excels at a particular sport or other activity"
],
"domains": [
"Sport"
],
"examples": [
{
"text": "a motorcycle ace"
}
],
"id": "m_en_gbus0005680.010",
"registers": [
"informal"
],
"short_definitions": [
"person who excels at particular sport or other activity"
],
"subsenses": [
{
"definitions": [
"a pilot who has shot down many enemy aircraft"
],
"domains": [
"Air Force"
],
"examples": [
{
"text": "a Battle of Britain ace"
}
],
"id": "m_en_gbus0005680.011",
"short_definitions": [
"pilot who has shot down many enemy aircraft"
]
}
],
"thesaurusLinks": [
{
"entry_id": "ace",
"sense_id": "t_en_gb0000173.001"
}
]
},
{
"definitions": [
"(in tennis and similar games) a service that an opponent is unable to return and thus wins a point"
],
"domains": [
"Tennis"
],
"examples": [
{
"text": "Nadal banged down eight aces in the set"
}
],
"id": "m_en_gbus0005680.013",
"short_definitions": [
"(in tennis and similar games) service that opponent is unable to return and thus wins point"
],
"subsenses": [
{
"definitions": [
"a hole in one"
],
"domains": [
"Golf"
],
"examples": [
{
"text": "his hole in one at the 15th was Senior's second ace as a professional"
}
],
"id": "m_en_gbus0005680.014",
"registers": [
"informal"
],
"short_definitions": [
"hole in one"
]
}
]
}
]
},
{
"etymologies": [
"early 21st century: abbreviation of asexual, with alteration of spelling on the model of ace"
],
"grammaticalFeatures": [
{
"text": "Singular",
"type": "Number"
}
],
"homographNumber": "200",
"senses": [
{
"definitions": [
"a person who has no sexual feelings or desires"
],
"domains": [
"Sex"
],
"examples": [
{
"text": "both asexual, they have managed to connect with other aces offline"
}
],
"id": "m_en_gbus1190638.004",
"short_definitions": [
"asexual person"
]
}
]
}
],
"language": "en",
"lexicalCategory": "Noun",
"pronunciations": [
{
"audioFile": "http://audio.oxforddictionaries.com/en/mp3/ace_1_gb_1_abbr.mp3",
"dialects": [
"British English"
],
"phoneticNotation": "IPA",
"phoneticSpelling": "eɪs"
}
],
"text": "ace"
},
{
"entries": [
{
"grammaticalFeatures": [
{
"text": "Positive",
"type": "Degree"
}
],
"homographNumber": "101",
"senses": [
{
"definitions": [
"very good"
],
"examples": [
{
"text": "Ace! You've done it!"
},
{
"text": "an ace swimmer"
}
],
"id": "m_en_gbus0005680.016",
"registers": [
"informal"
],
"short_definitions": [
"very good"
],
"thesaurusLinks": [
{
"entry_id": "ace",
"sense_id": "t_en_gb0000173.002"
}
]
}
]
},
{
"grammaticalFeatures": [
{
"text": "Positive",
"type": "Degree"
}
],
"homographNumber": "201",
"senses": [
{
"definitions": [
"(of a person) having no sexual feelings or desires; asexual"
],
"domains": [
"Sex"
],
"examples": [
{
"text": "I didn't realize that I was ace for a long time"
}
],
"id": "m_en_gbus1190638.006",
"short_definitions": [
"asexual"
]
}
]
}
],
"language": "en",
"lexicalCategory": "Adjective",
"pronunciations": [
{
"audioFile": "http://audio.oxforddictionaries.com/en/mp3/ace_1_gb_1_abbr.mp3",
"dialects": [
"British English"
],
"phoneticNotation": "IPA",
"phoneticSpelling": "eɪs"
}
],
"text": "ace"
},
{
"entries": [
{
"grammaticalFeatures": [
{
"text": "Transitive",
"type": "Subcategorization"
},
{
"text": "Present",
"type": "Tense"
}
],
"homographNumber": "102",
"senses": [
{
"definitions": [
"(in tennis and similar games) serve an ace against (an opponent)"
],
"domains": [
"Tennis"
],
"examples": [
{
"text": "he can ace opponents with serves of no more than 62 mph"
}
],
"id": "m_en_gbus0005680.020",
"registers": [
"informal"
],
"short_definitions": [
"(in tennis and similar games) serve ace against"
],
"subsenses": [
{
"definitions": [
"score an ace on (a hole) or with (a shot)"
],
"domains": [
"Golf"
],
"examples": [
{
"text": "there was a prize for the first player to ace the hole"
}
],
"id": "m_en_gbus0005680.026",
"short_definitions": [
"score ace on hole or with"
]
}
]
},
{
"definitions": [
"achieve high marks in (a test or exam)"
],
"examples": [
{
"text": "I aced my grammar test"
}
],
"id": "m_en_gbus0005680.028",
"regions": [
"North American"
],
"registers": [
"informal"
],
"short_definitions": [
"achieve high marks in"
],
"subsenses": [
{
"definitions": [
"outdo someone in a competitive situation"
],
"examples": [
{
"text": "the magazine won an award, acing out its rivals"
}
],
"id": "m_en_gbus0005680.029",
"notes": [
{
"text": "\"ace someone out\"",
"type": "wordFormNote"
}
],
"short_definitions": [
"outdo someone in competitive situation"
]
}
]
}
]
}
],
"language": "en",
"lexicalCategory": "Verb",
"pronunciations": [
{
"audioFile": "http://audio.oxforddictionaries.com/en/mp3/ace_1_gb_1_abbr.mp3",
"dialects": [
"British English"
],
"phoneticNotation": "IPA",
"phoneticSpelling": "eɪs"
}
],
"text": "ace"
}
],
"type": "headword",
"word": "ace"
}
]
I'm reading that output with something like this:
oxford_dict = json.loads(oxford_output)
print(oxford_dict['metadata']['provider'])
But I'm not really sure how to drill down further and grab the first definition, since it exists multiple times.
You have a multi-nested dictionary.
Try:
for i in oxford_dict["results"]:
for j in i["lexicalEntries"]:
for k in j["entries"]:
for v in k["senses"]:
print(v["definitions"])
Output:
['a playing card with a single spot on it, ranked as the highest card in its suit in most card games']
['a person who excels at a particular sport or other activity']
['(in tennis and similar games) a service that an opponent is unable to return and thus wins a point']
['a person who has no sexual feelings or desires']
['very good']
['(of a person) having no sexual feelings or desires; asexual']
['(in tennis and similar games) serve an ace against (an opponent)']
['achieve high marks in (a test or exam)']

Categories