I am trying to convert a Nested JSON Response to CSV. Following is the JSON Response
{
"rows": [
[
{
"postId": 188365573,
"messageId": 198365562,
"accountId": 214,
"messageType": 2,
"channelType": "TWITTER",
"accountType": "TWITTER",
"taxonomy": {
"campaignId": "2521_4",
"clientCustomProperties": {
"PromotionChannelAbbreviation": [
"3tw"
],
"PromotionChannels": [
"Twitter"
],
"ContentOwner": [
"Audit"
],
"Location": [
"us"
],
"Sub_Category": [
"dbriefs"
],
"ContentOwnerAbbreviation": [
"aud"
],
"PrimaryPurpose_Outcome": [
"Engagement"
],
"PrimaryPurposeOutcomeAbbv": [
"eng"
]
},
"partnerCustomProperties": {},
"tags": [],
"urlShortnerDomain": "2721_spr.ly"
},
"approval": {
"approvalOption": "NONE",
"comment": ""
},
"status": "SENT",
"createdDate": 1433331585000,
"scheduleDate": 1435783440000,
"version": 4,
"deleted": false,
"publishedDate": 1435783441000,
"statusID": "6163465412728176",
"permalink": "https://twitter.com/Acctg/status/916346541272498176",
"additional": {
"links": []
}
},
0
],
[
{
"postId": 999145171,
"messageId": 109145169,
"accountId": 21388,
"messageType": 2,
"channelType": "TWITTER",
"accountType": "TWITTER",
"taxonomy": {
"campaignId": "2521_4",
"clientCustomProperties": {
"PromotionChannelAbbreviation": [
"3tw"
],
"Eminence_Registry_Number": [
"1000159"
],
"PromotionChannels": [
"Twitter"
],
"ContentOwner": [
"Ctr. Health Solutions"
],
"Location": [
"us"
],
"Sub_Category": [
"fraud"
],
"ContentOwnerAbbreviation": [
"chs"
],
"PrimaryPurpose_Outcome": [
"Awareness"
],
"PrimaryPurposeOutcomeAbbv": [
"awa"
]
},
"partnerCustomProperties": {},
"tags": [],
"urlShortnerDomain": "2521_spr.ly"
},
"approval": {
"approvalOption": "NONE",
"comment": ""
},
"status": "SENT",
"createdDate": 1434983660000,
"scheduleDate": 1435753800000,
"version": 4,
"deleted": false,
"publishedDate": 1435753801000,
"statusID": "616222222198407168",
"permalink": "https://twitter.com/Health/status/6162222221984070968",
"additional": {
"links": []
}
},
0
]
}
And the python code I am using to covert this is
import json
import csv
# importing the data
with open('Post_Insights_test.json') as Test:
data1 = json.load(Test)
# opening the csv
csvdata= open('Data_table2.csv', 'w')
csvwriter = csv.writer(csvdata, delimiter=',')
#Taking the keys out from 1st dict, that too which aren't nested
header= data1["rows"][1][0].keys()
csvwriter.writerow(header)
for i in range(0,70):
csvwriter.writerow(data1["rows"][i][0].values())
csvdata.close()
Problems are following:
Unable to get the keys for nested responses like taxonomy
Unable to get the values for nested responses like taxonomy
Many responses have different headers/ keys, so ideally I should have them as headers in my excel, but I am not able to figure out how to do it in python
My excel sheet shows gap of row after every entry , I dont know why
Please help. All criticism are welcome. Kind Regards
Related
I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.
I've been struggling with the nested structure in json, how to convert to correct form
{
"id": "0c576f35-d704-4fa8-8cbb-311c6be36358",
"employee_id": null,
"creator_id": "16ca2db9-206c-4e18-891d-a00a5252dbd3",
"closed_by_id": null,
"request_number": 23,
"priority": "2",
"form_id": "urlaub-weitere-abwesenheiten",
"status": "opened",
"name": "Urlaub & weitere Abwesenheiten",
"read_by_employee": false,
"custom_status": {
"id": 15793,
"name": "In Bearbeitung HR"
},
"due_date": null,
"created_at": "2021-03-29T15:18:37.572040+02:00",
"updated_at": "2021-03-29T15:22:15.590156+02:00",
"closed_at": null,
"archived_at": null,
"attachment_count": 1,
"category": {
"id": "payroll-time-management",
"name": "Payroll, Time & Attendance"
},
"public_comment_count": 0,
"form_data": [
{
"field_id": "subcategory",
"values": [
"Time & Attendance - Manage monthly/year-end consolidation and report"
]
},
{
"field_id": "separator-2",
"values": [
null
]
},
{
"field_id": "art-der-massnahme",
"values": [
"Fortbildung"
]
},
{
"field_id": "bezeichnung-der-schulung-kurses",
"values": [
"dfgzhujiko"
]
},
{
"field_id": "startdatum",
"values": [
"2021-03-26"
]
},
{
"field_id": "enddatum",
"values": [
"2021-03-27"
]
},
{
"field_id": "freistellung",
"values": [
"nein"
]
},
{
"field_id": "mit-bildungsurlaub",
"values": [
""
]
},
{
"field_id": "kommentarfeld_fortbildung",
"values": [
""
]
},
{
"field_id": "separator",
"values": [
null
]
},
{
"field_id": "instructions",
"values": [
null
]
},
{
"field_id": "entscheidung-hr-bp",
"values": [
"Zustimmen"
]
},
{
"field_id": "kommentarfeld-hr-bp",
"values": [
"wsdfghjkmhnbgvfcdxsybvnm,"
]
},
{
"field_id": "individuelle-abstimmung",
"values": [
""
]
}
],
"form_files": [
{
"id": 30129,
"filename": "empty_background.png",
"field_id": "anhang"
}
],
"visible_by_employee": false,
"organization_ids": [],
"need_edit_by_employee": false,
"attachments": []
}
using a simple solution with pandas, dataframe
Request = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')
it's displaying almost in its correct form:
how to split a dictionary from columns form_data i form_files, I've done a lot of research, but I'm still having a lot of trouble solving this problem, how to split form_data for columns, no rows for meta to ID
You can do something like this.
pass the dataframe and the column to the function as arguments
def explode_node(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[str(column_value)].iloc[0], str):
child_df[column_value] = child_df[str(column_value)].apply(ast.literal_eval)
expanded_child_df = (pd.concat({i: json_normalize(x) for i, x in child_df.pop(str(column_value)).items()}).reset_index(level=1,drop=True).join(child_df, how='right', lsuffix='_left', rsuffix='_right').reset_index(drop=True))
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df
I am trying to Extract values from json based on select condition using python.
My Json file looks like below:
{
"bindings": [
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/browser"
},
{
"members": [
"serviceAccount:admin-user#linuxacademy-3.iam.gserviceaccount.com",
"user:rohithmn03#gmail.com"
],
"role": "roles/owner"
},
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/viewer"
}
],
"etag": "BwrRsH-UhJ0=",
"version": 1
}
I am trying to parse this above file in python based on the user. For Example: Get the roles defined for user rohithmn3#gmail.com; as per the json the output should be :
roles/browser
roles/viewer
Regards,
Rohith
Using a list comprehension and dictionary input d:
var = 'rohithmn3#gmail.com'
res = [subd['role'] for subd in d['bindings'] if 'user:'+var in subd['members']]
print(res)
['roles/browser', 'roles/viewer']
Setup
d = {
"bindings": [
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/browser"
},
{
"members": [
"serviceAccount:admin-user#linuxacademy-3.iam.gserviceaccount.com",
"user:rohithmn03#gmail.com"
],
"role": "roles/owner"
},
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/viewer"
}
],
"etag": "BwrRsH-UhJ0=",
"version": 1
}
{
"matching_results": 1264,
"results": [
{
"main_image_url": "https://s4.reutersmedia.net/resources_v2/images/rcom-default.png",
"enriched_text": {
"entities": [
{
"relevance": 0.33,
"disambiguation": {
"subtype": [
"Country"
]
},
"sentiment": {
"score": 0
},
"type": "Location",
"count": 1,
"text": "China"
},
{
"relevance": 0.33,
"disambiguation": {
"subtype": [
"Country"
]
},
"sentiment": {
"score": 0
},
This is too much large file so I want to find "relevance" and "score" using python.
How fetch this info?
Regardless of how large it is, it is only a simple dictionary.
Iterate the lists. Extract the key-values.
for result in data['results']:
for e in result['enriched_text']['entities']:
print(e['relevance'])
print(e['sentiment']['score'])
Here is a big piece of JSON data that I fetch in my code below:
{
"status": 200,
"offset": 0,
"limit": 10,
"count": 8,
"total": 8,
"url": "/v2/dictionaries/ldoce5/entries?headword=extra",
"results": [
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 3,
"id": "cqAFDjvvYg",
"part_of_speech": "adverb",
"senses": [
{
"collocation_examples": [
{
"collocation": "one/a few etc extra",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627480.mp3"
}
],
"text": "I got a few extra in case anyone else decides to come."
}
}
],
"definition": [
"in addition to the usual things or the usual amount"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627477.mp3"
}
],
"text": "They need to offer something extra to attract customers."
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDjvvYg"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra-",
"id": "cqAFDk1BDw",
"part_of_speech": "prefix",
"pronunciations": [
{
"audio": [
{
"lang": "American English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/us_pron/extra__pre.mp3"
}
],
"ipa": "ekstrə"
}
],
"senses": [
{
"definition": [
"outside or beyond"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001832333.mp3"
}
],
"text": "extragalactic (=outside our galaxy)"
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDk1BDw"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 1,
"id": "cqAFDjpNZQ",
"part_of_speech": "adjective",
"pronunciations": [
{
"audio": [
{
"lang": "British English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/gb_pron/extra_n0205.mp3"
},
{
"lang": "American English",
"type": "pronunciation",
"url": "/v2/dictionaries/assets/ldoce/us_pron/extra1.mp3"
}
],
"ipa": "ˈekstrə"
}
],
"senses": [
{
"collocation_examples": [
{
"collocation": "an extra ten minutes/three metres etc",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202489.mp3"
}
],
"text": "I asked for an extra two weeks to finish the work."
}
}
],
"definition": [
"more of something, in addition to the usual or standard amount or number"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202484.mp3"
}
],
"text": "Could you get an extra loaf of bread?"
}
],
"gramatical_info": {
"type": "only before noun"
}
}
],
"url": "/v2/dictionaries/entries/cqAFDjpNZQ"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 2,
"id": "cqAFDjsQjH",
"part_of_speech": "pronoun",
"senses": [
{
"collocation_examples": [
{
"collocation": "pay/charge/cost etc extra",
"example": {
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202499.mp3"
}
],
"text": "I earn extra for working on Sunday."
}
}
],
"definition": [
"an amount of something, especially money, in addition to the usual, basic, or necessary amount"
],
"synonym": "more"
}
],
"url": "/v2/dictionaries/entries/cqAFDjsQjH"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra",
"homnum": 4,
"id": "cqAFDjyTn8",
"part_of_speech": "noun",
"senses": [
{
"definition": [
"something which is added to a basic product or service that improves it and often costs more"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001202524.mp3"
}
],
"text": "Tinted windows and a sunroof are optional extras(=something that you can choose to have or not)."
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDjyTn8"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra virgin",
"id": "cqAFDmV2Jw",
"part_of_speech": "adjective",
"senses": [
{
"definition": [
"extra virgin olive oil comes from olives that are pressed for the first time, and is considered to be the best quality olive oil"
]
}
],
"url": "/v2/dictionaries/entries/cqAFDmV2Jw"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra time",
"id": "cqAFDmGZyQ",
"part_of_speech": "noun",
"senses": [
{
"american_equivalent": "overtime",
"definition": [
"a period, usually of 30 minutes, added to the end of a football game in some competitions if neither team has won after normal time"
],
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627835.mp3"
}
],
"text": "The match went into extra time."
}
],
"geography": "especially British English",
"gramatical_examples": [
{
"examples": [
{
"audio": [
{
"type": "example",
"url": "/v2/dictionaries/assets/ldoce/exa_pron/p008-001627834.mp3"
}
],
"text": "Beckham scored in extra time."
}
],
"pattern": "in extra time"
}
]
}
],
"url": "/v2/dictionaries/entries/cqAFDmGZyQ"
},
{
"datasets": [
"ldoce5",
"dictionary"
],
"headword": "extra-sensory perception",
"id": "cqAFDm6ceW",
"part_of_speech": "noun",
"senses": [
{
"definition": [
"ESP"
]
}
],
"url": "/v2/dictionaries/entries/cqAFDm6ceW"
}
]
}
I want to grab and print the definitions offered in the JSON results. I don't know how to express this and am getting a 'list indices must be integers or slices, not str' error for my sense = data['senses'].
#!/usr/bin/env python
import urllib.request
import json
wp = urllib.request.urlopen("http://api.pearson.com/v2/dictionaries/ldoce5/entries?headword=extra").read().decode('utf8')
jsonData=json.loads(wp)
data=jsonData['results']
for item in data:
sense = data['senses']
print(senses['definition'])
sense is actually a list with a single element, a dictionary. The contained dictionary has your desired key-value pair.
For example:
for item in data:
sense = data['senses'][0]
print(sense['definition'])