Default dict keys to avoid KeyError - python

I'm calling some JSON and parsing relevant data as CSV. I cannot figure out how to fill in the intermediate JSON dict file with default keys, as many are unpopulated. The result is a KeyError as I attempt to parse the content into a CSV.
I'm now receiving a 'NoneType' error for (manufacturer):
import urllib2, json, csv, sys, os, codecs, re
from collections import defaultdict
output = 'bb.csv'
csv_writer = csv.writer(open(output, 'w'))
header = ['sku', 'name', 'description', 'image', 'manufacturer', 'upc', 'department', 'class', 'subclass']
csv_writer.writerow(header)
i=1
while i<101:
print i
bb_url = urllib2.Request("http://api.remix.bestbuy.com/v1/products(sku=*)?show=sku,name,description,image,manufacturer,upc,department,class,subclass&format=json&sort=sku.asc&page=" + str(i) + "&pageSize=100&apiKey=*****************")
bb_json = json.load(urllib2.urlopen(bb_url))
print bb_json
for product in bb_json['products']:
row = []
row.append(product['sku'])
if product['name']:
row.append(str((product['name']).encode('utf-8')))
else:
row.append("")
row.append(str(product.get('description',"")))
row.append(str(product['image'])+ " ")
if product['name']:
row.append(str(product.get('manufacturer',"").encode('utf-8')))
else:
row.append("")
row.append(str(product.get('upc','').encode('utf-8')))
row.append(str((product['department']).encode('utf-8')))
row.append(str((product['class']).encode('utf-8')))
row.append(str((product['subclass']).encode('utf-8')))
csv_writer.writerow(row)
i = i+1

You can use your_dict.get(key, "default value") instead of directly referencing a key.

Don't use the "default" argument name. For example, if we want 1.0 as default value,
rank = dict.get(key, 1.0)
For more details:
TypeError: get() takes no keyword arguments

If you can't define a default value and want to do something else (or just omit the entry):
if key in dict:
rank = dict[key]
else:
# do something or just skip the else block entirely

You could use syntax like this: product.get("your field", "default value")

Related

Python 'list' object has no attribute 'keys' when trying to write a row in CSV file

I am trying to write a new row into a CSV file and I can't because I get an error in Python Shell.
Below is the code I am using (I am reading JSON from API and want to put data into CSV file)
# import urllib library
from urllib.request import Request, urlopen
c=1
# import json
import json
# store the URL in url as
# parameter for urlopen
import pandas as pd
import csv
headerList = ['name','id','order','height','weight','speed','special_defense','special_attack','defense','attack','hp']
# open CSV file and assign header
with open("pokemon_stats.csv", 'w') as file:
dw = csv.DictWriter(file, delimiter=',',
fieldnames=headerList)
dw.writeheader()
# display csv file
fileContent = pd.read_csv("pokemon_stats.csv")
for r in range(1,3):
req = Request('https://pokeapi.co/api/v2/pokemon/'+str(r)+'/', headers={'User-Agent': 'Chrome/32.0.1667.0'})
# store the response of URL
response = urlopen(req)
# storing the JSON response
# from url in data
data_json = json.loads(response.read())
#print(data_json)
for key, value in data_json.items():
if key=='name':
name=value
elif key=='id':
id=value
elif key=='order':
order=value
elif key=='height':
height=value
elif key=='weight':
weight=value
elif key == 'stats':
for sub in data_json['stats']:
for i in sub:
if i=='base_stat':
base_stat=sub[i]
if i=='stat':
for j in sub[i]:
if j=='name':
stat_name=sub[i][j]
if stat_name=='hp':
hp=base_stat
elif stat_name=='attack':
attack=base_stat
elif stat_name=='defense':
defense=base_stat
elif stat_name=='special-attack':
special_attack=base_stat
elif stat_name=='special-defense':
special_defense=base_stat
elif stat_name=='speed':
speed=base_stat
data = [name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp]
dw.writerow(data)
After I try the execution of this code I get an error as it follows:
Traceback (most recent call last):
File "C:/Users/sbelcic/Desktop/NANOBIT_API.py", line 117, in <module>
dw.writerow(data)
File "C:\Users\sbelcic\AppData\Local\Programs\Python\Python37\lib\csv.py", line 155, in writerow
return self.writer.writerow(self._dict_to_list(rowdict))
File "C:\Users\sbelcic\AppData\Local\Programs\Python\Python37\lib\csv.py", line 148, in _dict_to_list
wrong_fields = rowdict.keys() - self.fieldnames
AttributeError: 'list' object has no attribute 'keys'*
Can somebody pls help and tell me what I am doing wrong.
I don't have working experience of manipulating JSON response with Python so any comments are welcome. If someone sees a better way to do this he is welcome to share.
Since dw is a DictionaryWriter, data needs to be a dictionary (currently it's a list) as seen in the documentation.
Convert data to a dictionary with your headers
data = [name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp]
data = dict(zip(headerList, data))
dw.writerow(data)
Check the example for using the DictWriter. You need to pass a dictionary to writerow instead of a list, so your last line should be
data =['name':name,'id': id,'order':order,'height': height,'weight':weight,'speed':speed,'special_defense':special_defense,'special_attack':special_attack,'defense':defense,'attack':attack,'hp':hp]
dw.writerow(data)
Note that your whole code can also be simplified if you populate the data dictionary instead of all your if/else:
data={} #empty dictionary
#First extract everything that is on the main level of your dict
for key in ("name", "id", "order", "height", "weight":
if key in data_json:
data[key]=data_json[key]
#Check if the "stats" dict exists in your JSON data
if 'stats' in data_json:
if 'base_stat' in data_json['stats']:
data['base_stat']=data_json['stats']['base_stat']
if 'stat' in data_json['stats']:
statDict = data_json['stats']['stat']
for key in ['hp', 'attack', 'defense', 'special-attack', 'special-defense', 'speed']:
if key in statDict:
data[key]=statDict[key]
Notes:
I did not test this code, check it carefully, but I hope you get the idea
You could add else to all if key in checks to include an error message if a stat is missing
If you are sure that all keys will always be present, then you can skip a few of the if checks
I'm going to ignore the actual error that got you here, and instead propose a radical restructure: I think your code will be simpler and easier to reason about.
I've looked at the JSON returned from that Pokemon API and I can see why you started down the path you did: there's a lot of data, and you only need a small subset of it. So, you're going through a lot of effort to pick out exactly what you want.
The DictWriter interface can really help you here. Consider this really small example:
header = ['name', 'id', 'order']
with open('output.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=header)
writer.writeheader()
writer.writerow({'name': 'bulbasaur', 'id': 1, 'order': 1, 'species': {}})
Maybe you've run something like this before and got this error:
ValueError: dict contains fields not in fieldnames: 'species'
because the JSON you loaded has keys you didn't include when you created your writer... because you don't want them. And then, maybe you figured, "well, that means I've got to be very selective about what I put in the dict before passing to writerow()?
Since you've already defined which keys you care about for the header, use those keys to pull out what you want from the JSON:
header = ['name', 'id', 'order', 'height', 'weight',
'speed', 'special-defense', 'special-attack',
'defense', 'attack', 'hp']
all_data = json.load(open('1.json')) # bulbasaur, I downloaded this from the API URL
my_data = {}
for key in header:
my_data[key] = all_data.get(key) # will return None for sub-stats keys, which is okay for now
writer = csv.DictWriter(sys.stdout, fieldnames=header)
writer.writeheader()
writer.writerow(my_data)
The get(key_name) method on a dict (the JSON data) will try to find that key in the dict and return that key's value. If the key isn't found, None is returned. Running that I get the following CSV (the sub-stat columns are empty, as expected):
name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp
bulbasaur,1,1,7,69,,,,,,
This has the same effect as your "if this key, then this value" statements, but it's driven by the data (header names) you already defined.
On to the sub-stats...
I think it's safe to assume that if there is a stats key in the JSON, each "stat object" in the list of stats will have the data you want. It's important to make sure you're only copying the stats you've specified in header; and again, you can use your data to drive the process:
for stat in all_data['stats']:
stat_name = stat['stat']['name']
if stat_name not in header:
continue # skip this sub-stat, no column for it in the CSV
base_stat = stat['base_stat']
my_data[stat_name] = base_stat
When I insert that loop, I now get this for my CSV output:
name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp
bulbasaur,1,1,7,69,45,,,49,49,45
Some stats are populated, but some, the "special" stats are blank? That's because in your header you've named them like special_attack (with underscore) but in reality they're like special-attack (with hyphen). I fixed your header, and now I get:
name,id,order,height,weight,speed,special-defense,special-attack,defense,attack,hp
bulbasaur,1,1,7,69,45,65,65,49,49,45
Those are all the pieces you need. To put it together, I recommend the following structure... I'm a big fan of breaking up a process like this into distinct tasks: get all the data, then process all the data, then write all the processed data. It makes debugging easier, and less indentation of code:
# Make all API calls and record their JSON
all_datas = []
# loop over your API calls:
# make the request
# get the JSON data
# append JSON data to all_datas
# Process/transform the API JSON into what you want
my_data_rows = []
for all_data in all_datas:
my_data_row = {}
for key in header:
my_data_row[key] = all_data.get(key)
for stat in all_data['stats']:
stat_name = stat['stat']['name']
if stat_name not in header:
continue # skip this sub-stat
base_stat = stat['base_stat']
my_data[stat_name] = base_stat
# Write your transformed data to CSV
writer = csv.DictWriter(sys.stdout, fieldnames=header)
writer.writeheader()
writer.writerows(my_data_rows)

How do I avoid KeyError when working with dictionaries?

Right now I'm trying to code an assembler but I keep getting this error:
Traceback (most recent call last):
File "/Users/Douglas/Documents/NeWS.py", line 44, in
if item in registerTable[item]:
KeyError: 'LD'
I currently have this code:
functionTable = {"ADD":"00",
"SUB":"01",
"LD" :"10"}
registerTable = {"R0":"00",
"R1":"00",
"R2":"00",
"R3":"00"}
accumulatorTable = {"A" :"00",
"B" :"10",
"A+B":"11"}
conditionTable = {"JH":"1"}
valueTable = {"0":"0000",
"1":"0001",
"2":"0010",
"3":"0011",
"4":"0100",
"5":"0101",
"6":"0110",
"7":"0111",
"8":"1000",
"9":"1001",
"10":"1010",
"11":"1011",
"12":"1100",
"13":"1101",
"14":"1110",
"15":"1111"}
source = "LD R3 15"
newS = source.split(" ")
for item in newS:
if item in functionTable[item]:
functionField = functionTable[item]
else:
functionField = "00"
if item in registerTable[item]:
registerField = registerTable[item]
else:
registerField = "00"
print(functionField + registerField)
Help is appreciated.
You generally use .get with a default
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.
So when you use get the loop would look like this:
for item in newS:
functionField = functionTable.get(item, "00")
registerField = registerTable.get(item, "00")
print(functionField + registerField)
which prints:
1000
0000
0000
If you want to do the explicit check if the key is in the dictionary you have to check if the key is in the dictionary (without indexing!).
For example:
if item in functionTable: # checks if "item" is a *key* in the dict "functionTable"
functionField = functionTable[item] # store the *value* for the *key* "item"
else:
functionField = "00"
But the get method makes the code shorter and faster, so I wouldn't actually use the latter approach. It was just to point out why your code failed.
There is no key 'LD' in registerTable. Can put a try except block :
try:
a=registerTable[item]
...
except KeyError:
pass
You are looking to see if the potential key item exists in in dictionary at item. You simply need to remove the lookup in the test.
if item in functionTable:
...
Though this could even be improved.
It looks like you try to look up the item, or default to '00'. Python dictionaries has the built in function .get(key, default) to try to get a value, or default to something else.
Try:
functionField = functionTable.get(item, '00')
registerField = registerTable.get(item, '00')

Python return value without inverted commas

I have csv file:
shack_imei.csv:
shack, imei
F10, "5555"
code:
reader = csv.reader(open("shack_imei.csv", "rb"))
my_dict = dict(reader)
shack = raw_input('Enter Shack:')
print shack
def get_imei_from_entered_shack(shack):
for key, value in my_dict.iteritems():
if key == shack:
return value
list = str(get_imei_from_entered_shack(shack))
print list
which gives me "5555"
But I need this value in a list structure like this:
["5555"]
I've tried a lot of different methods, and they all end up with extra ' or""
EDIT 1:
new simpler code:
reader = csv.reader(open("shack_imei.csv", "rb"))
my_dict = dict(reader)
shack = raw_input('Enter Shack:')
imei = my_dict[shack]
print imei
"5555"
list(imei) gives me ['"5555"'], I need it to be ["5555"]
You can change your "return" sentence:
shack = raw_input('Enter Shack:')
print shack
def get_imei_from_entered_shack(shack):
for key, value in my_dict.iteritems():
if key == shack:
return [str(value)]
list = get_imei_from_entered_shack(shack)
print list
As far as I understand, you want to create a list containing the returned string, which you do with [ ]
list = [str(get_imei_from_entered_shack(shack))]
There are a few problems with this code, which are too long to tackle in comments
my_dict
my_dict = dict(reader) works only well if this csv is a collection of keys and values. If there are duplicate keys, this might give some problems
get_imei_from_entered_shack
Why this special method, instead of just asking my_dict the correct value. Even if you don't want it to trow an Exception when you ask for a shack that doesn't exists, you can use the dict.get(<key>, <default>) method
my_dict(shack, None)
does the same as your 4-line method
list
don't name variables the same as builtins
list2
if you want a list, you can do [<value>] or list(<value>) (unless you replaced list with your own variable assignment)
reader = csv.reader(open("shack_imei.csv", "rb"))
my_dict = dict(reader)
shack = raw_input('Enter Shack:')
imei = my_dict[shack]
imei = imei.replace('"',"")
IMEI_LIST =[]
IMEI_LIST.append(imei)
print IMEI_LIST
['5555']

How to check for blank fields in input json data in python?

Suppose in my python below function, i am getting the json feeds like below
def mapper_1(self, key, line):
j_feed = json.loads(line)
unicoded = j_feed[u'category_description'].encode("utf-8")
cn = j_feed[u'categoryname']
location = j_feed[u'location']
How to check if there is any blank fields for data in categoryname/categorydescription/location from the input.json.
Say you are unsure of your fields, you can use .get and provide it a default sentinel value
fields = ['categoryname', 'categorydescription', 'location']
for field in fields:
print j_feed.get(field, "not set!")

Python - is there an elegant way to avoid dozens try/except blocks while getting data out of a json object?

I'm looking for ways to write functions like get_profile(js) but without all the ugly try/excepts.
Each assignment is in a try/except because occasionally the json field doesn't exist. I'd be happy with an elegant solution which defaulted everything to None even though I'm setting some defaults to [] and such, if doing so would make the overall code much nicer.
def get_profile(js):
""" given a json object, return a dict of a subset of the data.
what are some cleaner/terser ways to implement this?
There will be many other get_foo(js), get_bar(js) functions which
need to do the same general type of thing.
"""
d = {}
try:
d['links'] = js['entry']['gd$feedLink']
except:
d['links'] = []
try:
d['statisitcs'] = js['entry']['yt$statistics']
except:
d['statistics'] = {}
try:
d['published'] = js['entry']['published']['$t']
except:
d['published'] = ''
try:
d['updated'] = js['entry']['updated']['$t']
except:
d['updated'] = ''
try:
d['age'] = js['entry']['yt$age']['$t']
except:
d['age'] = 0
try:
d['name'] = js['entry']['author'][0]['name']['$t']
except:
d['name'] = ''
return d
Replace each of your try catch blocks with chained calls to the dictionary get(key [,default]) method. All calls to get before the last call in the chain should have a default value of {} (empty dictionary) so that the later calls can be called on a valid object, Only the last call in the chain should have the default value for the key that you are trying to look up.
See the python documentation for dictionairies http://docs.python.org/library/stdtypes.html#mapping-types-dict
For example:
d['links'] = js.get('entry', {}).get('gd$feedLink', [])
d['published'] = js.get('entry', {}).get('published',{}).get('$t', '')
Use get(key[, default]) method of dictionaries
Code generate this boilerplate code and save yourself even more trouble.
Try something like...
import time
def get_profile(js):
def cas(prev, el):
if hasattr(prev, "get") and prev:
return prev.get(el, prev)
return prev
def getget(default, *elements):
return reduce(cas, elements[1:], js.get(elements[0], default))
d = {}
d['links'] = getget([], 'entry', 'gd$feedLink')
d['statistics'] = getget({}, 'entry', 'yt$statistics')
d['published'] = getget('', 'entry', 'published', '$t')
d['updated'] = getget('', 'entry', 'updated', '$t')
d['age'] = getget(0, 'entry', 'yt$age', '$t')
d['name'] = getget('', 'entry', 'author', 0, 'name' '$t')
return d
print get_profile({
'entry':{
'gd$feedLink':range(4),
'yt$statistics':{'foo':1, 'bar':2},
'published':{
"$t":time.strftime("%x %X"),
},
'updated':{
"$t":time.strftime("%x %X"),
},
'yt$age':{
"$t":"infinity years",
},
'author':{0:{'name':{'$t':"I am a cow"}}},
}
})
It's kind of a leap of faith for me to assume that you've got a dictionary with a key of 0 instead of a list but... You get the idea.
You need to familiarise yourself with dictionary methods Check here for how to handle what you're asking.
Two possible solutions come to mind, without knowing more about how your data is structured:
if k in js['entry']:
something = js['entry'][k]
(though this solution wouldn't really get rid of your redundancy problem, it is more concise than a ton of try/excepts)
or
js['entry'].get(k, []) # or (k, None) depending on what you want to do
A much shorter version is just something like...
for k,v in js['entry']:
d[k] = v
But again, more would have to be said about your data.

Categories