KeyError with Python dictionary from API - python

How can I call a specific list with items in a dictionary? I want to use the key name, and output Richmond, in the station list. A tutorial I was using is outdated, so the best I could manage was this loop that printed the keys and items:
for key, value in data.items():
print(key, value)
That revealed the two outermost keys (?xml and root), but there are nested dictionaries I would like to access.
What I would like:
for item in data['station']
print(item['name'])
>>> Richmond
Instead I get KeyError with station. Seeing as how ?xml and root are the keys identified with the loop, I'm presuming I need a nested loop, first going through the root key, and then accessing station, and then using the key name to print Richmond.
The API result:
{
"?xml": {
"#version": "1.0",
"#encoding": "utf-8"
},
"root": {
"#id": "1",
"uri": {
"#cdata-section": "http://api.bart.gov/api/etd.aspx?cmd=etd&orig=RICH&json=y"
},
"date": "10/14/2017",
"time": "07:50:17 PM PDT",
"station": [{
"name": "Richmond",
"abbr": "RICH",
"etd": [{
"destination": "Warm Springs",
"abbreviation": "WARM",
"limited": "0",
"estimate": [{
"minutes": "4",
"platform": "2",
"direction": "South",
"length": "6",
"color": "ORANGE",
"hexcolor": "#ff9933",
"bikeflag": "1",
"delay": "0"
}, {
"minutes": "24",
"platform": "2",
"direction": "South",
"length": "6",
"color": "ORANGE",
"hexcolor": "#ff9933",
"bikeflag": "1",
"delay": "0"
}]
}]
}],
"message": ""
}
}

Related

How to find empty fields in json file?

I have this sample.json file with me:
{
"details":[
{
"name": "",
"class": "4",
"marks": "72.6"
},
{
"name": "David",
"class": "",
"marks": "78.2"
},
{
"name": "Emily",
"class": "4",
"marks": ""
}
]
}
As you can see for the first one; "name" is string datatype is actually empty.
For the second one; "class" with integer datatype is empty.
And for the third one; "marks" with float datatype is empty.
Now my task is;
to find the fields which are empty, if string is empty replace it with "BLANK", if integer is empty replace it with 0, and if float is empty replace it with 0.0
P.S: I'm doing this with Python like this:
import json
path = open('D:\github repo\python\sample.json')
df = json.load(path)
for i in df["details"]:
print(i["name"])
Also make sure that I don't want to hard-code the values. Coz here if we see there are only 3 fields(name, class, marks) but what if I have more that 3. Then what? How will I find which fields are empty or not?
Like you see here:
{
"code": "AAA",
"lat": "-17.3595",
"lon": "-145.494",
"name": "Anaa Airport",
"city": "Anaa",
"state": "Tuamotu-Gambier",
"country": "French Polynesia",
"woeid": "12512819",
"tz": "Pacific\/Midway",
"phone": "",
"type": "Airports",
"email": "",
"url": "",
"runway_length": "4921",
"elev": "7",
"icao": "NTGA",
"direct_flights": "2",
"carriers": "1"
},
This is just one block, I've N-number of blocks like this. That's why I can't hard_code the values right?
Can anybody help me with it!
Thank You so much!
Since the type info isn't available anywhere programmatically, and there seem to be only three hard-coded fields, I'd just check each of them explicitly.
Short-circuiting with the or operator would even allow you to achieve this fairly elegantly:
for d in df['details']:
d['name'] = d['name'] or 'BLANK'
d['class'] = d['class'] or '0'
d['marks'] = d['marks'] or '0.0'
You could check whether the string is empty with a simple if statement like so.
if not i['name'] == ""
Alternatively, you could also do
if not i['name']
The second if statement makes use of falsy and truthy values in Python. Here's a link to read more about it
You could create a dictionary empty_replacements mapping each key to its corresponding desired empty value:
import json
sample_json = {
"details": [
{
"name": "",
"class": "4",
"marks": "72.6"
},
{
"name": "David",
"class": "",
"marks": "78.2"
},
{
"name": "Emily",
"class": "4",
"marks": ""
}
]
}
empty_replacements = {"name": "BLANK", "class": "0", "marks": "0.0"}
sample_json["details"] = [{
k: v if v else empty_replacements[k]
for k, v in d.items()
} for d in sample_json["details"]]
print('sample_json after replacements: ')
print(json.dumps(
sample_json,
sort_keys=False,
indent=4,
))
Output:
sample_json after replacements:
{
"details": [
{
"name": "BLANK",
"class": "4",
"marks": "72.6"
},
{
"name": "David",
"class": "0",
"marks": "78.2"
},
{
"name": "Emily",
"class": "4",
"marks": "0.0"
}
]
}
I 'm assuming by the dictionary which you provided that marks & class are stored as String.
li=[]
for d in df["details"]:
for k,v in d.items():
if (v==''):
if (k=='name'):
d[k]="BLANK"
elif (k=='class') :
d[k]='0'
elif (k=='marks'):
d[k]='0.0'
li.append(d)
df['details']=li

Deleting items in nested dictionaries

I am trying to delete items, based upon a lookup table i iterate through inside my python dictionary, but am not able to delete the values.
This post depends on python 3.8 to work with.
inEvent = {
"event_data": {
"event_name": "Test",
"station_id": "Station1",
"serial_no": "1234",
},
"tests": [
{
"name": "Temperature from Sensor 1",
"status": "Passed",
"value": "21.0",
"limits": [],
"units": "°C",
},
{
"name": "Power Measurement CH1",
"status": "Passed",
"value": "30.9",
"limits": [],
"units": "W",
},
{
"name": "Slope Check: Measured Slope CH1",
"status": "Passed",
"value": "1234.5678",
"limits": {"low": "0", "high": None},
"units": [],
},
{
"name": "Slope Check: Measured Slope CH2",
"status": "Passed",
"value": "-1",
"limits": [],
"units": [],
},
],
"plots": [],
}
while iterating over my dictionary i replace matching names, but in rare cases i want to delete the complete matching test entry. for e.g.{"name": "Slope Check: Measured Slope CH1", "status":"Passed","value": "1234.5678","limits": {"low": "0", "high": None},"units": [],} should be deleted
for t in list(inEvent['tests']):
eventName = t['name']
for row in self.LookUpTable.itertuples(index=False): # own lookuptable excel file
if eventName == row[4] and not pd.isnull(row[5]):
t['name'] = row[5]
break
elif eventName == row[4] and pd.isnull(row[5]):
print(inEvent)
inEvent['tests'].pop(t)
break
I recommend using a nested loop for this. You can do this by looping through all of the keys and values in the dictionary and then loop through all of the keys and values in the nested dictionaries. You can then use an if statement to find the key in teh dictionary that you want to delete. Here is a simple outline of code that you can use (if this does not work then I recommend googling exactly what the issue is and looking further into it):
for key, value in dict.items():
for k, v in k.items():
if v == something:
del key[k]

How to read nested Json lists using Python?

How can I get all the accountNumber, name and phoneNumber to be printed separately in Json Response using Python
[{
"msg": "result",
"id": "testdata",
"result": [{
"accountNumber": "123456",
"name": "CHRISfarmece",
"phoneNumber": "2333455"
}, {
"accountNumber": "553222",
"name": "name1",
"phoneNumber": "123456"
}, {
"accountNumber": "34566",
"name": "name2",
"phoneNumber": "24567"
}]
}]
You can simply loop through the result value (it's a list) and store each value in separeted list like below :
data2 = {'accountNumber':[], 'name':[], 'phoneNumber':[]}
for x in data1[0]['result']:
for key, value in x.items():
data2[key].append(value)

python dictionary/json inception

How do you pull, split, and append an array inside a dictionary inside a dictionary?
This is the data I've got:
data = {
"Event":{
"distribution":"0",
"orgc":"Oxygen",
"Attribute": [{
"type":"ip-dst",
"category":"Network activity",
"to_ids":"true",
"distribution":"3",
"value":["1.1.1.1","2.2.2.2"]
}, {
"type":"url",
"category":"Network activity",
"to_ids":"true",
"distribution":"3",
"value":["msn.com","google.com"]
}]
}
}
This is what I need --
{
"Event": {
"distribution": "0",
"orgc": "Oxygen",
"Attribute": [{
"type": "ip-dst",
"category": "Network activity",
"to_ids": "true",
"distribution": "3",
"value": "1.1.1.1"
}, {
"type": "ip-dst",
"category": "Network activity",
"to_ids": "true",
"distribution": "3",
"value": "2.2.2.2"
}, {
"type": "url",
"category": "Network activity",
"to_ids": "true",
"distribution": "3",
"value": "msn.com"
}, {
"type": "url",
"category": "Network activity",
"to_ids": "true",
"distribution": "3",
"value": "google.com"
}
}
}
Here is where I was just playing around with it and totally lost!!
for item in data["Event"]["Attribute"]:
if "type":"ip-dst" and len("value")>1:
if 'ip-dst' in item["type"] and len(item["value"])>1:
for item in item["value"]:
...and totally lost
How about this?
#get reference to attribute dict
attributes = data["Event"]["Attribute"]
#in the event dictionary, replace it with an empty list
data["Event"]["Attribute"] = []
for attribute in attributes:
for value in attribute["value"]:
#for every value in every attribute, copy that attribute
new_attr = attribute.copy()
#set the value to that value
new_attr["value"] = value
#and append it to the attribute list
data["Event"]["Attribute"].append(new_attr)
This will work with the data structure you've shown, but not necessarily with all kinds of nested data, since we do a shallow copy of the attribute. That will mean that you have to make sure that apart from the "value" list, it only contains atomic values like numbers, strings, or booleans. The values list may contain nested structures, since we're only moving references there.

How can I dynamically build a path/reference to a nested dictionary in python

I am writing a class to take a Chrome bookmarks file (see example below):
{
"checksum": "452bebcad611a3faffb2c009099139e5",
"roots": {
"bookmark_bar": {
"children": [ {
"date_added": "13028719861473329",
"id": "4",
"name": "first bookmark",
"type": "url",
"url": "chrome://newtab/"
}, {
"children": [ {
"children": [ {
"date_added": "13026904508000000",
"id": "7",
"name": "Getting Started",
"type": "url",
"url": "https://www.mozilla.org/en-GB/firefox/central/"
} ],
"date_added": "13028740260032410",
"date_modified": "0",
"id": "6",
"name": "Bookmarks Toolbar",
"type": "folder"
}, {
"children": [ {
"date_added": "13026904508000000",
"id": "9",
"name": "Help and Tutorials",
"type": "url",
"url": "https://www.mozilla.org/en-GB/firefox/help/"
}, {
"date_added": "13026904508000000",
"id": "10",
"name": "Customise Firefox",
"type": "url",
"url": "https://www.mozilla.org/en-GB/firefox/customize/"
}, {
"date_added": "13026904508000000",
"id": "11",
"name": "Get Involved",
"type": "url",
"url": "https://www.mozilla.org/en-GB/contribute/"
}, {
"date_added": "13026904508000000",
"id": "12",
"name": "About Us",
"type": "url",
"url": "https://www.mozilla.org/en-GB/about/"
} ],
"date_added": "13028740260032410",
"date_modified": "0",
"id": "8",
"name": "Mozilla Firefox",
"type": "folder"
}, {
"date_added": "13026904551000000",
"id": "13",
"name": "Welcome to Firefox",
"type": "url",
"url": "http://www.mozilla.org/en-US/firefox/24.0/firstrun/"
} ],
"date_added": "13028740260004410",
"date_modified": "0",
"id": "5",
"name": "Imported From Firefox",
"type": "folder"
} ],
"date_added": "13028719626916276",
"date_modified": "13028719861473329",
"id": "1",
"name": "Bookmarks bar",
"type": "folder"
},
"other": {
"children": [ ],
"date_added": "13028719626916276",
"date_modified": "0",
"id": "2",
"name": "Other bookmarks",
"type": "folder"
},
"synced": {
"children": [ ],
"date_added": "13028719626916276",
"date_modified": "0",
"id": "3",
"name": "Mobile bookmarks",
"type": "folder"
}
},
"version": 1
}
I convert from JSON to a nested dictionary then extract the bookmark urls under each relevant bookmark folder in my write_data method.
As there can be any number of bookmark folders and/or bookmarks nested within each folder, I want to call the write_data method within itself so that it keeps on extracting child data every time it finds a nested folder. I just can't work out how to pass the relevant child dictionaries into the same method.
I've tried building up the dictionary path with a string. I think I need to pass in a tuple or list of keys to loop through and dynamically build up the path but I can't get it working and my poor head is wrecked!
There is a similar question but the answer uses yield which has confused me totally and was not a totally working solution anyway. Please help!
import json
import sys
import codecs
class FileExtractor(object):
def __init__(self, input_file):
self.infile = codecs.open(input_file, encoding='utf-8')
self.bookmark_data = json.load(self.infile)
def write_data(self, my_key):
for key, value in self.bookmark_data[my_key].iteritems():
if type(self.bookmark_data[my_key][key]) is dict:
print self.bookmark_data[my_key][key]['name']
for subkey, subvalue in self.bookmark_data[my_key][key].iteritems():
if subkey == "children" and len(self.bookmark_data[my_key][key][subkey]) <> 0:
print "this is a child. I can't figure out how to use write_data with this"
#self.write_data('[my_key][key][subkey]')
else:
print subkey, ": ", self.bookmark_data[my_key][key][subkey]
if(__name__=="__main__"):
stuff= FileExtractor(sys.argv[1])
stuff.write_data(('roots'))
Not sure if this is what you are getting at but if you pass in the dictionary object itself to write_data rather than the key you can recurse as far down as the dictionary goes. WARNING I haven't tested this, it's just to give you an idea.
def write_data(self, my_dict=None):
my_dict = my_dict or self.bookmark_data['roots']
for key, value in my_dict.items():
if type(my_dict[key]) is dict:
print my_dict[key]['name']
for subkey, subvalue in my_dict[key].items():
if subkey == "children" and len(my_dict[key][subkey]) <> 0:
for child in my_dict[key][subkey]:
self.write_data(child)
else:
print subkey, ": ", my_dict[key][subkey]
better version:
def write_data(self, my_dict=None):
my_dict = my_dict or self.bookmark_data['roots']
if 'name' in my_dict:
print my_dict['name']
for key, value in my_dict.items():
if type(my_dict[key]) is dict:
self.write_data(my_dict[key])
elif type(my_dict[key]) is list:
for item in my_dict[key]:
self.write_data(item)
else:
print key, ": ", my_dict[key]

Categories