How to add title row to using writer and unicodecsv

How to add title row to using writer and unicodecsv - python

I have the following JSON file - test.json (names, keys and addresses changed for security reasons)
[
{
"accountMode":"Live",
"acquirer":"TEST",
"acquirerConstraints":{
"cardTypes":[
"MASTERCARD",
"MAESTRO",
"VISA"
],
"cvcRegexp":"^[0-9]{3}$",
"cvcRequired":true,
"maxAmount":500000,
"minAmount":50
},
"acquirerDetails":{
"TEST":"Studio",
"ERROR_LIST":[
],
"MERCHANT_CODE":"218331",
"VALID":true,
"_mId":"T712484",
"_status":"INPROCESS",
"email":"test7#gmail.com",
"name":"Studio",
"valid":true
},
"acquirerValidations":null,
"allowedCurrencies":[
"EUR",
"USD",
"GBP"
],
"apiKeyPairs":[
{
"accountMode":"Live",
"label":"Virtual Terminal",
"publishableKey":"niunibiubniunijknkjknj",
"source":"VIRTUAL_TERMINAL"
},
{
"accountMode":"Live",
"label":"Default",
"publishableKey":"iiuhiuhiu",
"source":"ECOMMERCE"
}
],
"appLogoUrl":null,
"applicationId":"541d75e0-7db8b343a31f",
"authorizationCode":"",
"closedDate":null,
"closureReason":null,
"declineAvsAddressFailure":false,
"declineAvsZipFailure":false,
"declineCvcFailure":false,
"defaultCurrency":"EUR",
"descriptor":null,
"email":"test1#gmail.com",
"id":"ddddeff",
"invitationCode":null,
"locale":"en_IE",
"merchantApplication":{
"accountNumber":null,
"acquirer":"TEST",
"annualAmount":null,
"annualVolume":null,
"applicationType":"APPROVAL",
"bankName":"UNKNOWN",
"brand":null,
"businessAddress":"54 My St, 1",
"businessAddress2":null,
"businessCity":"Abbey",
"businessCountry":"IRL",
"businessPhone":null,
"businessState":"DUBLIN",
"businessZip":null,
"data":null,
"email":"test#gmail.com",
"escalationPhone":null,
"fax":null,
"legalName":"UAB \"Studio\"",
"maxTransactionAmount":null,
"mccCode":"5712",
"merchantPromotionCode":null,
"mobile":null,
"monthlyAmount":null,
"monthlyVolume":null,
"ownerFirstName":"tlana",
"ownerLastName":"nava",
"phone":"37647",
"GuideAccepted":null,
"privacyAccepted":true,
"privacyVersion":"1a",
"referenceId":"9104d65i08d071",
"routingNumber":null,
"singleTransactionAmount":null,
"statementName":"UAB \"Studio\"",
"taxId":null,
"termsAccepted":true,
"termsVersion":"1a",
"url":"http://www.design.lt"
},
"merchantId":"12484",
"merchantPromotionCode":null,
"mposEnabled":true,
"name":"Studio",
"netonfiguration":null,
"onboardedDate":1505513232485,
"onboardingMethod":null,
"onboardingStatus":"INPROCESS",
"partner":null,
"saqCompliant":false,
"saqExpires":null,
"settings":[
{
"key":"MERCHANT_DETAILS",
"value":"{\"zip\":\"Wicklow\",\"phone\":\"342647\",\"email\":\"suppoor#outlook.com\",\"address\":\"Bck 6\",\"state\":\"Ireland\",\"addressLine2\":\"Unit 8, Bl Par\",\"city\":\"Wicklow\"}"
},
{
"key":"VAT_NUMBER",
"value":"/evzaqen/"
}
],
"timezone":"Europe/Dublin",
"tinStatus":null
},
{
"accountMode":"Live",
"acquirer":"TEST",
"acquirerConstraints":{
"cardTypes":[
"MASTERCARD",
"MAESTRO",
"VISA"
],
"cvcRegexp":"^[0-9]{3}$",
"cvcRequired":true,
"maxAmount":500000,
"minAmount":50
},
"acquirerDetails":{
"TEST":"test",
"ERROR_LIST":[
],
"MERCHANT_CODE":"594920",
"MID_ASSIGNED":true,
"VALID":true,
"_mId":"103558",
"_status":"APPROVED",
"acquiringMid":"1036598",
"descriptor":"test 8885551212",
"email":"test#gmail.com",
"gatewayMid":"SIMP337",
"id":"SIMP337",
"level4Mid":"76576576",
"name":"test",
"status":"APPROVED",
"transactionCurrency":"USD;EUR;GBP",
"valid":true,
"paymentGatewayKey":"ytfytfytfyt"
},
"acquirerValidations":null,
"allowedCurrencies":[
"EUR",
"USD",
"GBP"
],
"apiKeyPairs":[
],
"appLogoUrl":null,
"applicationId":"949bdde5-07-d8d58f4c3d01",
"authorizationCode":"",
"closedDate":null,
"closureReason":null,
"declineAvsAddressFailure":false,
"declineAvsZipFailure":false,
"declineCvcFailure":false,
"defaultCurrency":"EUR",
"descriptor":"test85551212",
"email":"test#gmail.com",
"id":"9f3a7d7",
"invitationCode":null,
"locale":"en_US",
"merchantApplication":{
"accountNumber":null,
"acquirer":"TEST",
"annualAmount":null,
"annualVolume":null,
"applicationType":"APPROVAL",
"bankName":"UNKNOWN",
"brand":null,
"businessAddress":"123 test",
"businessAddress2":null,
"businessCity":"Atlanta",
"businessCountry":"IRL",
"businessPhone":null,
"businessState":"CARLOW",
"businessZip":null,
"data":null,
"email":"test#gmail.com",
"escalationPhone":null,
"fax":null,
"legalName":"stest",
"maxTransactionAmount":null,
"mccCode":"521",
"merchantPromotionCode":null,
"mobile":null,
"monthlyAmount":null,
"monthlyVolume":null,
"ownerFirstName":"moto",
"ownerLastName":"test",
"phone":"3141212",
"GuideAccepted":null,
"privacyAccepted":true,
"privacyVersion":"1a",
"referenceId":"2920",
"routingNumber":null,
"singleTransactionAmount":null,
"statementName":"test",
"taxId":null,
"termsAccepted":true,
"termsVersion":"1a",
"url":null
},
"merchantId":"1036558",
"merchantPromotionCode":null,
"mposEnabled":true,
"name":"test",
"netonfiguration":null,
"onboardedDate":1456846054925,
"onboardingMethod":null,
"onboardingStatus":"CLOSED",
"partner":null,
"saqCompliant":false,
"saqExpires":null,
"settings":[
],
"timezone":"Europe/Dublin",
"tinStatus":"InCompliance"
}
]
I want to process this file and take some of the information and populate a CSV file with it. To do this I am using the following:
import unicodecsv
import json
json_data = open("test.json")
data = json.load(json_data)
f = unicodecsv.writer(open("results.csv","wb+"))
for entry in data:
if "merchantApplication" in entry:
ma = entry["merchantApplication"]
if "email" in ma:
f.writerow([ma["ownerFirstName"],ma["ownerLastName"],ma["email"],ma["legalName"],ma["businessAddress"],ma["businessAddress2"],ma["businessCity"],ma["businessCountry"],ma["businessState"],ma["businessZip"],ma["phone"],ma["mobile"]])
json_data.close()
This Works fine but does not print the headers above the columns. How do I add in the headers? I am using Python 2.7.10

How do I add in the headers?
Well quite simply by calling f.writerow((<your>,<headers>,<here>)) before your for loop.

Related

Retrieve a specific value from a JSON data and generate a CSV file

I am retrieving this data from api and it comes in JSON format. I only need the certain portion of the data and ignore all other data. Please check my Output csv how the final csv look like this. I need the result key, in that result value I need id and unid and userHierarchies field.
{
"apiVersion": "3.0",
"loggedInUser": {
"id": "api#api.com",
"unid": "192",
"userHierarchies": [
{
"hierarchyField": "Project",
"value": "Eli-f"
},
{
"hierarchyField": "Division",
"value": "DDD"
},
{
"hierarchyField": "Site",
"value": "RD02"
},
{
"hierarchyField": "Company",
"value": "Core"
},
{
"hierarchyField": "Department",
"value": "Operations"
}
]
},
"results":[
{
"id":"Random_Company_57",
"unid":"75",
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"gfds"
},
{
"hierarchyField":"Project",
"value":"JKL-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",
},
{
"id":"xyz.abc#safe.net",
"unid":"98",
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"PUHJ"
},
{
"hierarchyField":"Project",
"value":"RPOJ-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",
}
]
}
My Output CSV look like this:
id,unid,hierarchyField,value
Random_Company_57,75,Company,ABC Company
Random_Company_57,75,Department,gfds
Random_Company_57,75,Project,JKL-SDFGHJW
Random_Company_57,75,Division,Silver RC
Random_Company_57,75,Site,SQ06
xyz.abc#safe.net,98,Company,ABC Company
xyz.abc#safe.net,98,Department,PUHJ
xyz.abc#safe.net,98,Project,RPOJ-SDFGHJW
xyz.abc#safe.net,98,Division,Silver RC
My python Code look like this:
import requests
from pathlib import Path
from pprint import pprint
import pandas as pd
import time
import os
import argparse
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument("-path_save", help="define where to save the file")
parser.add_argument("--verbose", help="display processing information")
start = time.time()
def GetData(URL, endPoint, path_save, verbose):
response = requests.get(URL, auth=('api#api.net', 'uojk00'),
headers={
'Content-Type': 'application/json',
'x-api-key': 'ydVtsni1blwJHb65OJBrrtV',
})
print(endPoint,response)
df = pd.DataFrame(response.json()["results"])
print(df)
df.to_csv(os.path.join(path_save,f"{endPoint}.csv"), index=False)
if __name__ == '__main__':
start = time.time()
args = parser.parse_args()
path_save = Path(args.path_save)
verbose = args.verbose
endPoint=['users']
for endPt in endPoint:
URL = "https://api.com/v10/chor/" + endPt
GetData(URL, endPt, path_save, verbose)
print("Processed time:", time.time() - start) # Total Time
Any help how I generate that CSV???

If data is your data from api you have in your question, you can use next example how to save it to CSV in required format:
df = pd.DataFrame(data["results"]).explode("userHierarchies")
df = pd.concat([df, df.pop("userHierarchies").apply(pd.Series)], axis=1)
df = df[["id", "unid", "hierarchyField", "value"]]
df.to_csv("data.csv", index=False)
Saves data.csv:
id,unid,hierarchyField,value
Random_Company_57,75,Company,ABC Company
Random_Company_57,75,Department,gfds
Random_Company_57,75,Project,JKL-SDFGHJW
Random_Company_57,75,Division,Silver RC
Random_Company_57,75,Site,SQ06
xyz.abc#safe.net,98,Company,ABC Company
xyz.abc#safe.net,98,Department,PUHJ
xyz.abc#safe.net,98,Project,RPOJ-SDFGHJW
xyz.abc#safe.net,98,Division,Silver RC
xyz.abc#safe.net,98,Site,SQ06

Python - How to retrieve element from json

Aloha,
My python routine will retrieve json from site, then check the file and download another json given the first answer and eventually download a zip.
The first json file gives information about doc.
Here's an example :
[
{
"id": "d9789918772f935b2d686f523d066a7b",
"originalName": "130010259_AC2_R44_20200101",
"type": "SUP",
"status": "document.deleted",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_AC2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.4212881,
47.6171589,
8.1598899,
50.1338684
],
"documentSource": "UPLOAD",
"uploadDate": "2020-06-25T14:56:27+02:00",
"updateDate": "2021-01-19T14:33:35+01:00",
"fileIdentifier": "SUP-AC2-R44-130010259-20200101",
"legalControlStatus": 101
},
{
"id": "6a9013bdde6acfa632861aeb1a02942b",
"originalName": "130010259_AC2_R44_20210101",
"type": "SUP",
"status": "document.production",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_AC2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.4212881,
47.6171589,
8.1598899,
50.1338684
],
"documentSource": "UPLOAD",
"uploadDate": "2021-01-18T16:37:01+01:00",
"updateDate": "2021-01-19T14:33:29+01:00",
"fileIdentifier": "SUP-AC2-R44-130010259-20210101",
"legalControlStatus": 101
},
{
"id": "efd51feaf35b12248966cb82f603e403",
"originalName": "130010259_PM2_R44_20210101",
"type": "SUP",
"status": "document.production",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_PM2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.6535762,
47.665021,
7.9509455,
49.907347
],
"documentSource": "UPLOAD",
"uploadDate": "2021-01-28T09:52:31+01:00",
"updateDate": "2021-01-28T18:53:34+01:00",
"fileIdentifier": "SUP-PM2-R44-130010259-20210101",
"legalControlStatus": 101
},
{
"id": "2e1b6104fdc09c84077d54fd9e74a7a7",
"originalName": "444619258_I4_R44_20210211",
"type": "SUP",
"status": "document.pre_production",
"legalStatus": "APPROVED",
"name": "444619258_SUP_R44_I4",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
2.8698336,
47.3373246,
8.0881368,
50.3796449
],
"documentSource": "UPLOAD",
"uploadDate": "2021-04-19T10:20:20+02:00",
"updateDate": "2021-04-19T14:46:21+02:00",
"fileIdentifier": "SUP-I4-R44-444619258-20210211",
"legalControlStatus": 100
}
]
What I try to do is to retrieve "id" from this json file. (ex. "id": "2e1b6104fdc09c84077d54fd9e74a7a7",)
I've tried
import json
from jsonpath_rw import jsonpath, parse
import jsonpath_rw_ext as jp
with open('C:/temp/gpu/SUP/20210419/SUPGE.json') as f:
d = json.load(f)
data = json.dumps(d)
print("oriName: {}".format( jp.match1("$.id[*]",data) ) )
It doesn't work In fact, I'm not sure how jsonpath-rw is intended to work. Thankfully there was this blogpost But I'm still stuck.
Does anyone have a clue ?
With the id, I'll be able to download another json and in this json there'll be an archiveUrl to get the zipfile.
Thanks in advance.

import json
file = open('SUPGE.json')
with file as f:
d = json.load(f)
for i in d:
print(i.get('id'))
this will give you id only.
d9789918772f935b2d686f523d066a7b
6a9013bdde6acfa632861aeb1a02942b
efd51feaf35b12248966cb82f603e403
2e1b6104fdc09c84077d54fd9e74a7a7

Ok.
Here's what I've done.
import json
import urllib
# not sure it's the best way to load json from url, but it works fine
# and I could test most of code if needed.
def getResponse(url):
operUrl = urllib.request.urlopen(url)
if(operUrl.getcode()==200):
data = operUrl.read()
jsonData = json.loads(data)
else:
print("Erreur reçue", operUrl.getcode())
return jsonData
# Here I get the json from the url. *
# That part will be in the final script a parameter,
# because I got lot of territory to control
d = getResponse('https://www.geoportail-urbanisme.gouv.fr/api/document?documentFamily=SUP&grid=R44&legalStatus=APPROVED')
for i in d:
if i['status'] == 'document.production' :
print('id du doc en production :',i.get('id'))
# here we parse the id to fetch the whole document.
# Same server, same API but different url
_URL = 'https://www.geoportail-urbanisme.gouv.fr/api/document/' + i.get('id')+'/details'
d2 = getResponse(_URL)
print('archive',d2['archiveUrl'])
urllib.request.urlretrieve(d2['archiveUrl'], 'c:/temp/gpu/SUP/'+d2['metadata']+'.zip' )
# I used wget in the past and loved the progression bar.
# Maybe I'd switch to wget because of it.
# Works fine.
Thanks for your answer. I'm delighted to see that even with only the json library you could do amazing things. Just normal stuff. But amazing.
Feel free to comment if you think I've missed smthg.

How to convert a list of OrderedDict to nested json with grouped keys in python

I'm working on a project where I need to convert a set of data rows from database into list of OrderedDict for other purpose and use this list of OrderedDict to convert into a nested JSON format in python. I'm starting to learn python. I was able convert the query response from database which is a list of lists to list of OrderedDict.
I have the list of OrderedDict as below:
{
'OUTBOUND': [
OrderedDict([('Leg', 1), ('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '2'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 145.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 1),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '4'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 111.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 1),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'BDM'),('SeatGroup', 'null'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 111.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '1'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 180.0),('Num_Pax', 1),('Channel', 'Web'))]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '4'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 97.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'BDM'),('SeatGroup', 'null'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 97.0),('Num_Pax', 1),('Channel', 'Web')])
]
}
And I needed the nested format like below:
{
"OUTBOUND": [
{
"Leg": 1,
"SessionID": "W12231fwfegwcaa2",
"Modality": "VB",
"BookingClass": "A",
"FeeCodes":[
{
"FeeCode": "ATO",
"Prices":
[
{
"SeatGroup": "2",
"Price": 145.0,
"Currency": "MXN"
},
{
"SeatGroup": "4",
"Price": 111.0,
"Currency": "MXN"
}
]
},
{
"FeeCode": "VBABDM",
"Prices":
[
{
"SeatGroup": "null",
"Price": 111.0,
"Currency": "MXN"
}
]
}
],
"Num_Pax": 1,
"Channel": "Web"
},
{
"Leg": 2,
"SessionID": "W12231fwfegwcaa2",
"Modality": "VB",
"BookingClass": "U",
"FeeCodes":[
{
"FeeCode": "ATO",
"Prices":
[
{
"SeatGroup": "1",
"Price": 180.0,
"Currency": "MXN"
},
{
"SeatGroup": "4",
"price": 97.0,
"Currency": "MXN"
}
]
},
{
"FeeCode": "VBABDM",
"Prices":
[
{
"SeatGroup": "null",
"price": 97.0,
"Currency": "MXN"
}
]
}
],
"Num_Pax": 1,
"Channel": "Web"
}
]
}
If I'm not wrong, I need to group by Leg, SessionID, Modality, BookingClass, NumPax and Channel and group the FeeCode, SeatGroup, Price and Currency into nested format as above but unable to move ahead with how to loop and group for nesting.
It would be great if I could get some help. Thanks

I was able to write a python code to get the format as I needed using simple looping with a couple of changes in the output like the fields SessionID, Num_Pax and Channel is taken outside then the OUTBOUND field and fields within are generated.
Instead of OrderedDict, I used a list of lists as input which I convert into Pandas DataFrame and work with the DataFrame to get the nested format.
Below is the code I used:
outbound_df = pd.DataFrame(response_outbound,columns=All_columns)
Common_columns = ['Leg', 'Modality', 'BookingClass']
### Taking SessionID, AirlineCode,Num_Pax and Channel outside OUTBOUND part as they are common for all the leg level data
response_data['SessionID'] = outbound_df['SessionID'].unique()[0]
response_data['Num_Pax'] = int(outbound_df['Num_Pax'].unique()[0])
response_data['Channel'] = outbound_df['Channel'].unique()[0]
temp_data = []
Legs = outbound_df['Leg'].unique()
for i in Legs:
subdata = outbound_df[outbound_df['Leg']==i]
### Initializing leg_data dict
leg_data = collections.OrderedDict()
### Populating common fields of the leg (Leg, Modality,BookingClass)
for j in Common_columns:
if(j=='Leg'):
leg_data[j] = int(subdata[j].unique()[0])
else:
leg_data[j] = subdata[j].unique()[0]
leg_data['FeeCodes'] = []
FeeCodes = subdata['FeeCode'].unique()
for fc in FeeCodes:
subdata_fees = subdata[subdata['FeeCode']==fc]
Prices = {'FeeCode':fc, "Prices":[]}
for _,rows in subdata_fees.iterrows():
data = {}
data['SeatGroup'] = rows['SeatGroup']
data['Price'] = float(rows['Price'])
data['Currency'] = rows['Currency']
Prices["Prices"].append(data)
leg_data["FeeCodes"].append(Prices)
temp_data.append(leg_data)
response_data["OUTBOUND"] = temp_data
I can just do json.dumps on response_data to get json format which will be sent to the next steps.
Below is the output format I get:
{
"SessionID":"W12231fwfegwcaa2",
"Num_Pax":1,
"Channel":"Web",
"OUTBOUND":[
{
"Leg":1,
"Modality":"VB",
"BookingClass":"A",
"FeeCodes":[
{
"FeeCode":"ATO",
"Prices":[
{
"SeatGroup":"2",
"Price":145.0,
"Currency":"MXN"
},
{
"SeatGroup":"4",
"Price":111.0,
"Currency":"MXN"
}
]
},
{
"FeeCode":"VBABDM",
"Prices":[
{
"SeatGroup":"null",
"Price":111.0,
"Currency":"MXN"
}
]
}
]
},
{
"Leg":2,
"Modality":"VB",
"BookingClass":"U",
"FeeCodes":[
{
"FeeCode":"ATO",
"Prices":[
{
"SeatGroup":"1",
"Price":180.0,
"Currency":"MXN"
},
{
"SeatGroup":"4",
"price":97.0,
"Currency":"MXN"
}
]
},
{
"FeeCode":"VBABDM",
"Prices":[
{
"SeatGroup":"null",
"price":97.0,
"Currency":"MXN"
}
]
}
]
}
]
}
Please let me know if we can shorten the code in terms of lengthy iterations or any other changes. Thanks.
PS: Sorry for my editing mistakes

Assuming that you stored the dictionary to some variable foo, you can do:
import json
json.dumps(foo)
And be careful, you added extra bracket in the 4th element OUTBOUND list

Dictionary length is equal to 3 but when trying to access an index receiving KeyError

I am attempting to parse a json response that looks like this:
{
"links": {
"next": "http://www.neowsapp.com/rest/v1/feed?start_date=2015-09-08&end_date=2015-09-09&detailed=false&api_key=xxx",
"prev": "http://www.neowsapp.com/rest/v1/feed?start_date=2015-09-06&end_date=2015-09-07&detailed=false&api_key=xxx",
"self": "http://www.neowsapp.com/rest/v1/feed?start_date=2015-09-07&end_date=2015-09-08&detailed=false&api_key=xxx"
},
"element_count": 22,
"near_earth_objects": {
"2015-09-08": [
{
"links": {
"self": "http://www.neowsapp.com/rest/v1/neo/3726710?api_key=xxx"
},
"id": "3726710",
"neo_reference_id": "3726710",
"name": "(2015 RC)",
"nasa_jpl_url": "http://ssd.jpl.nasa.gov/sbdb.cgi?sstr=3726710",
"absolute_magnitude_h": 24.3,
"estimated_diameter": {
"kilometers": {
"estimated_diameter_min": 0.0366906138,
"estimated_diameter_max": 0.0820427065
},
"meters": {
"estimated_diameter_min": 36.6906137531,
"estimated_diameter_max": 82.0427064882
},
"miles": {
"estimated_diameter_min": 0.0227984834,
"estimated_diameter_max": 0.0509789586
},
"feet": {
"estimated_diameter_min": 120.3760332259,
"estimated_diameter_max": 269.1689931548
}
},
"is_potentially_hazardous_asteroid": false,
"close_approach_data": [
{
"close_approach_date": "2015-09-08",
"close_approach_date_full": "2015-Sep-08 09:45",
"epoch_date_close_approach": 1441705500000,
"relative_velocity": {
"kilometers_per_second": "19.4850295284",
"kilometers_per_hour": "70146.106302123",
"miles_per_hour": "43586.0625520053"
},
"miss_distance": {
"astronomical": "0.0269230459",
"lunar": "10.4730648551",
"kilometers": "4027630.320552233",
"miles": "2502653.4316094954"
},
"orbiting_body": "Earth"
}
],
"is_sentry_object": false
},
}
I am trying to figure out how to parse through to get "miss_distance" dictionary values ? I am unable to wrap my head around it.
Here is what I have been able to do so far:
After I get a Response object from request.get()
response = request.get(url
I convert the response object to json object
data = response.json() #this returns dictionary object
I try to parse the first level of the dictionary:
for i in data:
if i == "near_earth_objects":
dataset1 = data["near_earth_objects"]["2015-09-08"]
#this returns the next object which is of type list
Please someone can explain me :
1. How to decipher this response in the first place.
2. How can I move forward in parsing the response object and get to miss_distance dictionary ?
Please any pointers/help is appreciated.
Thank you

Your data will will have multiple dictionaries for the each date, near earth object, and close approach:
near_earth_objects = data['near_earth_objects']
for date in near_earth_objects:
objects = near_earth_objects[date]
for object in objects:
close_approach_data = object['close_approach_data']
for close_approach in close_approach_data:
print(close_approach['miss_distance'])

The code below gives you a table of date, miss_distances for every object for every date
import json
raw_json = '''
{
"near_earth_objects": {
"2015-09-08": [
{
"close_approach_data": [
{
"miss_distance": {
"astronomical": "0.0269230459",
"lunar": "10.4730648551",
"kilometers": "4027630.320552233",
"miles": "2502653.4316094954"
},
"orbiting_body": "Earth"
}
]
}
]
}
}
'''
if __name__ == "__main__":
parsed = json.loads(raw_json)
# assuming this json includes more than one near_earch_object spread across dates
near_objects = []
for date, near_objs in parsed['near_earth_objects'].items():
for obj in near_objs:
for appr in obj['close_approach_data']:
o = {
'date': date,
'miss_distances': appr['miss_distance']
}
near_objects.append(o)
print(near_objects)
output:
[
{'date': '2015-09-08',
'miss_distances': {
'astronomical': '0.0269230459',
'lunar': '10.4730648551',
'kilometers': '4027630.320552233',
'miles': '2502653.4316094954'
}
}
]

Parsing Through Nested JSON Python

data = response.json()
sortJson = json.dumps(data, sort_keys=True,
indent=2, separators=(',', ':'))
result = json.loads (data)
print ('"saleTotal":', result['trips']['tripOption']['pricing']['saleTotal'])
This is the code I have currently. I'm looking to parse through a nested JSON file, but each time I run this I get the following error:
TypeError: the JSON object must be str, not 'dict'
The JSON file when Pretty Printed turns out like this:
{
"kind":"qpxExpress#tripsSearch",
"trips":{
"data":{
"aircraft":[
{
"code":"321",
"kind":"qpxexpress#aircraftData",
"name":"Airbus A321"
}
],
"airport":[
{
"city":"ORL",
"code":"MCO",
"kind":"qpxexpress#airportData",
"name":"Orlando International"
},
{
"city":"CHI",
"code":"ORD",
"kind":"qpxexpress#airportData",
"name":"Chicago O'Hare"
}
],
"carrier":[
{
"code":"F9",
"kind":"qpxexpress#carrierData",
"name":"Frontier Airlines, Inc."
}
],
"city":[
{
"code":"CHI",
"kind":"qpxexpress#cityData",
"name":"Chicago"
},
{
"code":"ORL",
"kind":"qpxexpress#cityData",
"name":"Orlando"
}
],
"kind":"qpxexpress#data",
"tax":[
{
"id":"ZP",
"kind":"qpxexpress#taxData",
"name":"US Flight Segment Tax"
},
{
"id":"AY_001",
"kind":"qpxexpress#taxData",
"name":"US September 11th Security Fee"
},
{
"id":"US_001",
"kind":"qpxexpress#taxData",
"name":"US Transportation Tax"
},
{
"id":"XF",
"kind":"qpxexpress#taxData",
"name":"US Passenger Facility Charge"
}
]
},
"kind":"qpxexpress#tripOptions",
"requestId":"2z1TQ9iVMcSlUH8HW0O0eq",
"tripOption":[
{
"id":"WQZ8ICu2L8RLqt1MyMNFAQ001",
"kind":"qpxexpress#tripOption",
"pricing":[
{
"baseFareTotal":"USD37.11",
"fare":[
{
"basisCode":"Z00ZSS5",
"carrier":"F9",
"destination":"ORL",
"id":"AR5um4n2cToXHml3a125O0CU7toTISvPQER/01Xhbf2E",
"kind":"qpxexpress#fareInfo",
"origin":"CHI"
}
],
"fareCalculation":"ORD F9 MCO Q9.29 Q4.65 23.17Z00ZSS5 USD 37.11 END ZP ORD XT 2.79US 4.00ZP 5.60AY 4.50XF ORD4.50",
"kind":"qpxexpress#pricingInfo",
"latestTicketingTime":"2016-03-22T00:24-04:00",
"passengers":{
"adultCount":1,
"kind":"qpxexpress#passengerCounts"
},
"ptc":"ADT",
"saleFareTotal":"USD37.11",
"saleTaxTotal":"USD16.89",
"saleTotal":"USD54.00",
"segmentPricing":[
{
"fareId":"AR5um4n2cToXHml3a125O0CU7toTISvPQER/01Xhbf2E",
"kind":"qpxexpress#segmentPricing",
"segmentId":"GoIDkawPBE2TZk14"
}
],
"tax":[
{
"chargeType":"GOVERNMENT",
"code":"US",
"country":"US",
"id":"US_001",
"kind":"qpxexpress#taxInfo",
"salePrice":"USD2.79"
},
{
"chargeType":"GOVERNMENT",
"code":"AY",
"country":"US",
"id":"AY_001",
"kind":"qpxexpress#taxInfo",
"salePrice":"USD5.60"
},
{
"chargeType":"GOVERNMENT",
"code":"XF",
"country":"US",
"id":"XF",
"kind":"qpxexpress#taxInfo",
"salePrice":"USD4.50"
},
{
"chargeType":"GOVERNMENT",
"code":"ZP",
"country":"US",
"id":"ZP",
"kind":"qpxexpress#taxInfo",
"salePrice":"USD4.00"
}
]
}
],
"saleTotal":"USD54.00",
"slice":[
{
"duration":167,
"kind":"qpxexpress#sliceInfo",
"segment":[
{
"bookingCode":"Z",
"bookingCodeCount":9,
"cabin":"COACH",
"duration":167,
"flight":{
"carrier":"F9",
"number":"1294"
},
"id":"GoIDkawPBE2TZk14",
"kind":"qpxexpress#segmentInfo",
"leg":[
{
"aircraft":"321",
"arrivalTime":"2016-05-11T09:42-04:00",
"departureTime":"2016-05-11T05:55-05:00",
"destination":"MCO",
"duration":167,
"id":"LQKIza3yQIpaLyDq",
"kind":"qpxexpress#legInfo",
"meal":"Food and Beverages for Purchase",
"mileage":1006,
"origin":"ORD",
"originTerminal":"3",
"secure":true
}
],
"marriedSegmentGroup":"0"
}
]
}
]
}
]
}
}
Ultimately I'm trying to find the saleTotal and have the corresponding value printed alongside it:
saleTotal: 54.00

tripOption and pricing are lists of objects, if you take that into account it will work:
print ('"saleTotal":', result['trips']['tripOption'][0]['pricing'][0]['saleTotal'])
Output:
"saleTotal": USD54.00

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to add title row to using writer and unicodecsv - python

How do I add in the headers? Well quite simply by calling f.writerow((<your>,<headers>,<here>)) before your for loop.

Related

Retrieve a specific value from a JSON data and generate a CSV file

Python - How to retrieve element from json

How to convert a list of OrderedDict to nested json with grouped keys in python

Dictionary length is equal to 3 but when trying to access an index receiving KeyError

Parsing Through Nested JSON Python

Categories

Resources