Dynamically adding nested dictionaries - python

I want to dynamically add values in a nested dictionary. I am trying to cache similarity score of two words with their part-of-speech-tag.
In short I want to store values as this;
synset_cache[word1][word1_tag][word2][word2_tag] = score
class MyClass(Object):
def __init__(self):
MyClass.synset_cache={} #dict
def set_cache(self,word1, word1_tag, word2, word2_tag, score)
try:
MyClass.synset_cache[word1]
except:
MyClass.synset_cache[word1]={} #create new dict
try:
MyClass.synset_cache[word1][word1_tag]
except:
MyClass.synset_cache[word1][word1_tag]={} #create new dict
try:
MyClass.synset_cache[word1][word1_tag][word2]
except:
MyClass.synset_cache[word1][word1_tag][word2]={} #create new dict
#store the value
MyClass.synset_cache[word1][word1_tag][word2][word2_tag] = score
But I am getting this error.
Type error: list indices must be integers, not unicode
Line number it shows is at MyClass.synset_cache[word1][word1_tag]={} #create new dict.
How can I get this working?
EDIT:
According to the #Robᵩ's comments on his answer; I was assigning a list to this MyClass.synset_cache in another method(note it is at the class-level). So this code part had no errors.

Use dict.setdefault.
This might work:
#UNTESTED
d = MyClass.synset_cache.setdefault(word1, {})
d = d.setdefault(word1_tag, {})
d = d.setdefault(word2, {})
d[word2_tag] = score
Alternatively, you can use this handy recursive defaultdict that springs up new levels of dict automatically. (See: here and here.)
import collections
def tree():
return collections.defaultdict(tree)
class MyClass(Object):
def __init__(self):
MyClass.synset_cache=tree()
def set_cache(self,word1, word1_tag, word2, word2_tag, score)
MyClass.synset_cache[word1][word1_tag][word2][word2_tag] = score

This will be data dependent, as at least for some test data (see below), the code does not produce that error. How are you calling it?
Also, note that as written above, it won't compile due to some syntax errors (i.e. no colon to end the def set_cache line).
Below is some tweaked-to-compile code with some example calling data and how that pretty-prints:
#!/usr/bin/env python
import pprint
class MyClass():
def __init__(self):
MyClass.synset_cache={} #dict
def set_cache(self,word1, word1_tag, word2, word2_tag, score):
try:
MyClass.synset_cache[word1]
except:
MyClass.synset_cache[word1]={} #create new dict
try:
MyClass.synset_cache[word1][word1_tag]
except:
MyClass.synset_cache[word1][word1_tag]={} #create new dict
try:
MyClass.synset_cache[word1][word1_tag][word2]
except:
MyClass.synset_cache[word1][word1_tag][word2]={} #create new dict
#store the value
MyClass.synset_cache[word1][word1_tag][word2][word2_tag] = score
x = MyClass()
x.set_cache('foo', 'foo-tag', 'bar', 'bar-tag', 100)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(x.synset_cache)
Which outputs:
{ 'foo': { 'foo-tag': { 'bar': { 'bar-tag': 100}}}}
A couple other things of note...
I'd recommend using the in style syntax to check for key presence rather than try-except. It's more compact and more Pythonic.
Also, your main variable, synset_cache, is class-level (i.e. static). Did you mean for that to be the case?

Related

Nested dictionary that acts as defaultdict when setting items but not when getting items

I want to implement a dict-like data structure that has the following properties:
from collections import UserDict
class TestDict(UserDict):
pass
test_dict = TestDict()
# Create empty dictionaries at 'level_1' and 'level_2' and insert 'Hello' at the 'level_3' key.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
# However, this should not return an empty dictionary but raise a KeyError.
>>> test_dict['unknown_key']
KeyError: 'unknown_key'
The problem, to my knowledge, is that python does not know whether __getitem__ is being called in the context of setting an item, i.e. the first example, or in the context of getting and item, the second example.
I have already seen Python `defaultdict`: Use default when setting, but not when getting, but I do not think that this question is a duplicate, or that it answers my question.
Please let me know if you have any ideas.
Thanks in advance.
EDIT:
It is possible to achieve something similar using:
def set_nested_item(dict_in: Union[dict, TestDict], value, keys):
for i, key in enumerate(keys):
is_last = i == (len(keys) - 1)
if is_last:
dict_in[key] = value
else:
if key not in dict_in:
dict_in[key] = {}
else:
if not isinstance(dict_in[key], (dict, TestDict)):
dict_in[key] = {}
dict_in[key] = set_nested_item(dict_in[key], value, keys[(i + 1):])
return dict_in
class TestDict(UserDict):
def __init__(self):
super().__init__()
def __setitem__(self, key, value):
if isinstance(key, list):
self.update(set_nested_item(self, value, key))
else:
super().__setitem__(key, value)
test_dict[['level_1', 'level_2', 'level_3']] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
It's impossible.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
is semantically equivalent to:
temp1 = test_dict['level_1'] # Should this line fail?
temp1['level_2']['level_3'] = 'Hello'
But... if determined to implement it anyway, you could inspect the Python stack to grab/parse the calling line of code, and then vary the behaviour depending on whether the calling line of code contains an assignment! Unfortunately, sometimes the calling code isn't available in the stack trace (e.g. when called interactively), in which case you need to work with Python bytecode.
import dis
import inspect
from collections import UserDict
def get_opcodes(code_object, lineno):
"""Utility function to extract Python VM opcodes for line of code"""
line_ops = []
instructions = dis.get_instructions(code_object).__iter__()
for instruction in instructions:
if instruction.starts_line == lineno:
# found start of our line
line_ops.append(instruction.opcode)
break
for instruction in instructions:
if not instruction.starts_line:
line_ops.append(instruction.opcode)
else:
# start of next line
break
return line_ops
class TestDict(UserDict):
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
# inspect the stack to get calling line of code
frame = inspect.stack()[1].frame
opcodes = get_opcodes(frame.f_code, frame.f_lineno)
# STORE_SUBSCR is Python opcode for TOS1[TOS] = TOS2
if dis.opmap['STORE_SUBSCR'] in opcodes:
# calling line of code contains a dict/array assignment
default = TestDict()
super().__setitem__(key, default)
return default
else:
raise
test_dict = TestDict()
test_dict['level_1']['level_2']['level_3'] = 'Hello'
print(test_dict)
# {'level_1': {'level_2': {'level_3': 'Hello'}}}
test_dict['unknown_key']
# KeyError: 'unknown_key'
The above is just a partial solution. It can still be fooled if there are other dictionary/array assignments on the same line, e.g. other['key'] = test_dict['unknown_key']. A more complete solution would need to actually parse the line of code to figure out where the variable occurs in the assignment.

How to parse json to get all values of a specific key within an array?

I'm having trouble trying to get a list of values from a specific key inside an json array using python. Using the JSON example below, I am trying to create a list which consists only the values of the name key.
Original JSON:
[
{
"id": 1,
"name": "Bulbasaur",
"type": [
"grass",
"poison"
]
},
{
"id": 2,
"name": "Ivysaur",
"type": [
"grass",
"poison"
]
}
]
Expected:
["Bulbasaur", "Ivysaur"]
Below is the code of my approach:
import json
try:
with open("./simple.json", 'r') as f:
contents = json.load(f)
except Exception as e:
print(e)
print(contents[:]["name"])
I'm trying to go to an approach where i don't need to loop every single index and append them, something like the code above. Is this approach possible using python' json library?
You cannot do contents[:]["name"] since contents is a list is a dictionary with integer indexes, and you cannot access an element from it using a string name.
To fix that, you would want to iterate over the list and get the value for key name for each item
import json
contents = []
try:
with open("./simple.json", 'r') as f:
contents = json.load(f)
except Exception as e:
print(e)
li = [item.get('name') for item in contents]
print(li)
The output will be
['Bulbasaur', 'Ivysaur']
This is not a real answer to the question. The real answer is to use a list comprehension. However, you can make a class that allows you to use specifically the syntax you tried in the question. The general idea is to subclass list so that a slice like [:] returns a special view (another class) into the list. This special view will then allow retrieval and assignment from all the dictionaries simultaneously.
class DictView:
"""
A special class for getting and setting multiple dictionaries
simultaneously. This class is not meant to be instantiated
in its own, but rather in response to a slice operation on UniformDictList.
"""
def __init__(parent, slice):
self.parent = parent
self.range = range(*slice.indices(len(parent)))
def keys(self):
"""
Retreives a set of all the keys that are shared across all
indexed dictionaries. This method makes `DictView` appear as
a genuine mapping type to `dict`.
"""
key_set = set()
for k in self.range:
key_set &= self.parent.keys()
return key_set
def __getitem__(self, key):
"""
Retreives a list of values corresponding to all the indexed
values for `key` in the parent. Any missing key will raise
a `KeyError`.
"""
return [self.parent[k][key] for k in self.range]
def get(self, key, default=None):
"""
Retreives a list of values corresponding to all the indexed
values for `key` in the parent. Any missing key will return
`default`.
"""
return [self.parent[k].get(key, default) for k in self.range]
def __setitem__(self, key, value):
"""
Set all the values in the indexed dictionaries for `key` to `value`.
"""
for k in self.range:
self.parent[k][key] = value
def update(self, *args, **kwargs):
"""
Update all the indexed dictionaries in the parent with the specified
values. Arguments are the same as to `dict.update`.
"""
for k in self.range:
self.parent[k].update(*args, **kwargs)
class UniformDictList(list):
def __getitem__(self, key):
if isinstance(key, slice):
return DictView(self, key)
return super().__getitem__(key)
Your original code would now work out of the box with just one additional wrap in UniformDictList:
import json
try:
with open("./simple.json", 'r') as f:
contents = UniformDictList(json.load(f))
except Exception as e:
print(e)
print(contents[:]["name"])
Try this with list comprehensions:
print([d["name"] for d in contents])

Detecting Duplicates in Class Instance's Variable Values

class Translator(object):
def __init__(self, tracking_col='tracking_id', coding_col='coding', qualifying_code_col='qualifying_code',
translation_col='translation'):
self._results = []
self.tracking_col = tracking_col
self.data_col = coding_col
self.definition_col = qualifying_code_col
self.translation_col = translation_col
self.__validate_parameters(self.__dict__)
def __validate_parameters(self, variable_values):
class_values = {}
for key, value in variable_values.items():
if type(value) is str:
class_values.setdefault(value, set()).add(key)
for key, values in class_values.items():
# If there is more than one value, there is a duplicate
if len(values) > 1:
raise Exception('Duplicate column names exist in parameters. \'{}\' are set to \'{}\'. '
'Do not use duplicate column names.'.format(values, key))
This class cannot have the duplicate values for any of the 'col' variables. If duplicate values exist, logic further in the class may not crash but will create unpredictable results.
Upon instantiation my function __validate_parameters will detect duplicate values and raise an Exception. The problem is I am dumping all the values out to a dictionary, iterating to create another dictionary, and finally manually raising an exception (which from what I've been told is the wrong thing to do in any situation). It's also rather verbose.
Is there a shorter and more concise way to validate for duplicates while propogating an error up without the complexity above?
There is nothing wrong with manually raising an exception. Collecting your cols in some collection will make validation easier:
class Translator(object):
def __init__(self, tracking_col=..., coding_col=..., qualifying_code_col=...,
translation_col=...):
self._results = []
self.cols = [tracking_col, coding_col, qualifying_code_col, translation_col]
self.validate_cols(self)
def validate_cols(self):
if len(self.cols) > len(set(self.cols)):
raise ...
#property
def tracking_col(self):
return cols[0]
# ...
You could make the constructor take a dictionary instead of individual variables, e.g.
class Translator(object):
def __init__(self, cols={}):
defaults = { "tracking_col" : "tracking_id",
"coding_col" : "coding",
"qualifying_code_col" : "qualifying_code",
"translation_col" : "translation" }
for d in defaults:
if d not in cols:
cols[d] = defaults[d]
self.__validate_parameters(cols)
def __validate_parameters(self, d):
import Counter
c = Counter.Counter(d.values())
if any(cnt > 1 for cnt in c.values()):
raise Exception("Duplicate values found: '%s'" % str(c))
(Code not tested)

Python Pickle not saving entire object

I'm trying to pickle out a list of objects where the objects contain a list. When I open the pickled file I can see any data in my objects except from the list. I'm putting code below so this makes more sense.
Object that contains a list.
class TestPickle:
testNumber = None
testList = []
def addNumber(self, value):
self.testNumber = value
def getNumber(self):
return self.testNumber
def addTestList(self, value):
self.testList.append(value)
def getTestList(self):
return self.testList
This example I create a list of the above object (I'm adding one object to keep it brief)
testPKL = TestPickle()
testList = []
testPKL.addNumber(12)
testPKL.addTestList(1)
testPKL.addTestList(2)
testList.append(testPKL)
with open(os.path.join(os.path.curdir, 'test.pkl'), 'wb') as f:
pickle.dump(testList, f)
Here is an example of me opening the pickled file and trying to access the data, I can only retrieve the testNumber from above, the testList returns a empty list.
pklResult = None
with open(os.path.join(os.path.curdir, 'test.pkl'), 'rb') as f:
pklResult = pickle.load(f)
for result in pklResult:
print result.getNumber() # returns 12
print result.testNumber # returns 12
print result.getTestList() # returns []
print result.testList # returns []
I think i'm missing something obvious here but I'm not having any luck spotting it. Thanks for any guidance.
testNumber and testList both are class attributes initially. testNumber is of immutable type hence modifying it create new instance attribute, But testList is of mutable type and can be modified in place. Hence modifying testList doesn't create new instance attribute and it remains as class attribute.
You can verify it -
print testPKL.__dict__
{'testNumber': 12}
print result.__dict__
{'testNumber': 12}
So when you access result.testList, it looks for class attribute TestPickle.testList, which is [] in your case.
Solution
You are storing instance in pickle so use instance attribute. Modify TestPickle class as below -
class TestPickle:
def __init__(self):
self.testNumber = None
self.testList = []
def addNumber(self, value):
self.testNumber = value
def getNumber(self):
return self.testNumber
def addTestList(self, value):
self.testList.append(value)
def getTestList(self):
return self.testList

How to modify ndb.Query object?

Let's assume that we the following ndb model:
class MyModel(ndb.Model):
x = ndb.StringProperty()
y = ndb.StringProperty()
z = ndb.StringProperty(repeated=True)
We have a method that creates a query for the above model, executes it and fetch the results. However, we want this query to be modified my other functions. Specifically, we have the following:
def method_a():
qry = MyModel.query()
values = {'query':qry}
method_b(**values)
entities = qry.fetch()
def method_b(**kwargs):
k = ['a', 'b', 'c']
qry = kwargs['query']
qry.filter(MyModel.z.IN(k))
The problem is that the Query object is immutable, and thus it cannot be modified by method_b. Also, based on the specific architecture of the code, we cannot have method_b to return the new Query to method_a.
Any ideas on how to achieve the aforementioned functionality in another way??
Update: Please check the architecture of my code as presented below:
First, in a configuration file we specify a list of modules and if they are enabled or not. These modules affect the filters of the query we want to execute.
testparams = {
'Test1': True,
'Test2': True,
'Test3': False,
'Test4': True
}
Then, we have a method somewhere in the code that makes a query after the appropriate modules have been executed. Thus, it seems like this:
def my_func():
qry = MyEntity.query()
# modules
query_wrapper = [qry]
values = {'param':'x', 'query_wrapper':query_wrapper} #other values also
execute_modules(**values)
# get query and add some more things, like ordering
entities = query_wrapper[0].fetch()
The execute_modules function is the following:
def execute_modules(**kwargs):
for k in config.testparams:
if config.testparams[k]:
if kwargs['param'] == 'x':
(globals()[k]).x(**kwargs)
elif kwargs['param'] == 'y':
(globals()[k]).y(**kwargs)
Finally, an indicative module is similar to the following:
class Test1():
#classmethod
def x(cls, *args, **kwargs):
qry = kwargs['query_wrapper'][0]
# do some stuff like adding filters
kwargs['query_wrapper'][0] = qry
Any proposals to modify this architecture to a better approach?
I'm not aware of a way to do this without having method_b either return or change a referenced parameter. You should use a technique to pass a variable by reference, like passing a class with parameters.
You can pass in the args in a refrence object such as a dict/list:
def modify_query(kwargs):
kwargs['qry'] = kwargs['qry'].filter(MyModel.z.IN(k))
qry = MyModel.query()
kwargs = {'qry': qry}
modify_query(kwargs)
result = kwargs['qry'].fetch()
It should be noted that this is an extremly dirty way to accomplish what you want to accomplish. Similarly, if you pass in a list with say one object, then you can modify the contents of said list (through assignment) to modify the object:
def modify_query(list_object):
list_object[0] = list_object[0].filter(...)
You can do some hack for replace it object by other. For example:
def f(args):
qry = args[0]
qry_new = qry.filter(Model.a == 2)
args[0] = qry_new
qry = Model.query()
args = [qry]
f(args)
qry = args[0]

Categories