Overriding a method that yields - python

I am new to Python 3 concepts.
The base class has following functions:
class DocumentFormatter(object):
def transform_element(self, key, value):
if isinstance(value, list):
for li, lv in enumerate(value):
for inner_k, inner_v in self.transform_element(
"%s.%s" % (key, li), lv):
yield inner_k, inner_v
elif isinstance(value, dict):
formatted = self.format_document(value)
for doc_key in formatted:
yield "%s.%s" % (key, doc_key), formatted[doc_key]
else:
# We assume that transform_value will return a 'flat' value,
# not a list or dict
yield key, self.transform_value(value)
def format_document(self, document):
def flatten(doc, path):
top_level = (len(path) == 0)
if not top_level:
path_string = ".".join(path)
for k in doc:
v = doc[k]
if isinstance(v, dict):
path.append(k)
for inner_k, inner_v in flatten(v, path):
yield inner_k, inner_v
path.pop()
else:
transformed = self.transform_element(k, v)
for new_k, new_v in transformed:
if top_level:
yield new_k, new_v
else:
yield "%s.%s" % (path_string, new_k), new_v
return dict(flatten(document, []))
In the derived class, I only want to change a bit of the transform_element function:
def transform_element(self, key, value):
if key=="cats":
yield key, self.transform_value(value)
else:
yield super().transform_element(key,value)
If the key is "cats", I want to yield using my logic. Else, I want the base class implementation to work. Am I supposed to call yield or should I call return?

you should iterate over the overridden method and yield each item of it, otherwise you will end up with a generator of a generator which is not what you need.
So your method should look like:
def transform_element(self, key, value):
if key=="cats":
yield key, self.transform_value(value)
else:
for item in super().transform_element(key,value):
yield item

Related

Dictionary comprehension inside insert() method not working

I am making a MappingList class which is a list implemented as an OrderedDict.
This is the MappingList class (some methods omitted):
class MappingList(MutableSequence):
"""
A MappingList is a regular list implemented as a dictionary
"""
def __repr__(self):
return str(list(self.seq.values()))
def __getitem__(self, item):
try:
return self.seq[item]
except KeyError:
_traceback_from_none(IndexError, "list index out of range")
def __setitem__(self, key, value, *, usage=None):
if key > max(self.seq.keys()) and usage != "append":
raise IndexError("list index out of range")
self.seq[key] = value
def __delitem__(self, key):
try:
del self.seq[key]
except KeyError:
_traceback_from_none(IndexError, "list index out of range")
def __len__(self):
return len(self.seq)
def __eq__(self, other):
if not isinstance(other, MappingList):
return NotImplemented
return self.seq == other.seq
#classmethod
def _dict_from_seq(cls, seq):
return OrderedDict(enumerate(seq))
def _next_available_slot(self):
return max(self.seq) + 1
def insert(self, index, value): # todo: insert() should not overwrite
"""Insert a value into the MappingList"""
if index > max(self.seq.keys()):
raise IndexError("list index out of range")
for k, v in {k: v for k, v in self.seq.items() if k > index}:
del self.seq[k]
self.seq[k + 1] = v
self[index] = value
When I try to insert an item into a MappingList, I get the following error:
File "C:\...\My Python Programs\free_time\mappinglist.py", line 103, in test_insert
self.li.insert(1, MappingList(["blah", 1, 5.8]))
File "C:\...\My Python Programs\free_time\mappinglist.py", line 85, in insert
for k, v in {k: v for k, v in self.seq.items() if k > index}:
TypeError: cannot unpack non-iterable int object
Why is this error happening? Does OrderedDict.items() return an integer?
The error doesn't happen due to that.
When you don't provide keys(), values(), items(), python iterates over the keys by default. You need to provide items() to tell python to get the keys and values.
for k, v in {k: v for k, v in self.seq.items() if k > index}.items():

Recursive function return None

I have a function that runs over an API output and should return the path to specific key.
Here is the function:
def find_path(obj, val, path=''):
if isinstance(obj, dict):
for k, v in obj.items():
if k == val:
return f'{path}[{k!r}]'
return find_path(v, val, path + f'[{k!r}]')
elif isinstance(obj, list):
for i, v in enumerate(obj):
if v == val:
return f'{path}[{i!r}]'
return find_path(v, val, path + f'[{i!r}]')
API output Example:
ex = {'Resources': [{'uschemas': {'emailSelfUpdateAllowed': True,
'emailVerificationDays': 30,
'approvers': {'manager': False,
'secondLevelManager': False,
'owner': False,
'workGroup': 'workgroup'}}}]}
When I run the function I get None:
bla = find_path(ex, 'approvers')
print(bla)
>>> None
I expect to get:
['Resources'][0]['uschemas']['approvers']
I can only get the expected output when I am using the function with print instead of return.
Can someone help me to understand why? and how can I make it work with returns and not prints because I need to use its output.
Thank you.
You need to return value after the for loop is done, this is the value returns from the recursion, if you don't have anything it will return None
def find_path(obj, val, path=''):
p = []
if isinstance(obj, dict):
for k, v in obj.items():
if k == val:
return f'{path}[{k!r}]'
p = find_path(v, val, path + f'[{k!r}]')
return p
elif isinstance(obj, list):
for i, v in enumerate(obj):
if v == val:
return f'{path}[{i!r}]'
p = find_path(v, val, path + f'[{i!r}]')
return p
bla = find_path(ex, 'approvers')
print(bla) # ['Resources'][0]['uschemas']['approvers']

Python dictionary not adding subsequent keys after the first

Fairly new to Python and I can not figure this out. I go to add a key to a dictionary and it adds it fine. I can even update that same key with a new value, however when I go to add a second key to the dictionary, it does not add the second key value pair.
class CountedSet:
def __init__(self):
self.data = {}
def __iadd__(self,other):
if isinstance(other,int):
self.data[other] = self.data.get(other, 0) + 1
return self
elif isinstance(other,CountedSet):
#TODO::iterate through second countedSet and update self
return self
def __add__(self,obj):
for key, value in obj.data.items():
if len(self.data) == 0:
self.data[key] = value
elif self.data[key]:
self.data[key] = self.data[key] + value
else:
self.data[key] = value
return self
def __getitem__(self,item):
if item in self.data:
return self.data.get(item)
else:
return None
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value))
a = CountedSet()
a += 17
a += 4
print(a)
This simply outputs {17,1} when I would expect to see {17,1} {4,1}
Your __str__ implementation returns on the first iteration of the for-loop:
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value)) # here
Maybe you want something like:
def __str__(self):
return " ".join([{"{%s,%s}" % (k,v) for k, v in self.data.items()])
Or, without the comprehension:
def __str__(self):
items = []
for key, value in self.data.items():
items.append("{%s,%s}" % (key,value))
return ' '.join(items)

How can I read a dictionary with a list?

How can I read a list inside a dictionary and try to change string numbers to digits? For example:
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
I use this to change dictionary number strings to integers:
{k:int(v) if v.isdigit() else v for k,v in obj.items()}
But it doesn't work, so I was trying something like this:
for objs in obj:
if objs.isdigit():
k:int(v)
else:
for k,v in objs.items():
print k
But this fails as well.
this seems like a good problem for recursion
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
def fix_ints(obj):
if isinstance(obj,basestring):
try:
return int(obj)
except ValueError:
print "I cant Make %r an int"%obj
return obj
elif isinstance(obj,(list,tuple)):
return [fix_ints(item) for item in obj]
elif isinstance(obj,dict):
return dict((key,fix_ints(value)) for key,value in obj.items())
else:
print "I have no idea what to do with %r"%obj
new_obj = fix_ints(obj)
print new_obj
note that python does not support tail recursion so if this data structure goes very deep (greater than 1k levels of nesting) then recursion may not be appropriate ...
of coarse you can also do silly string tricks with it
import json,re
new_obj = json.loads(re.sub("\"(\d+)\"","\\1",json.dumps(obj)))
(although really you should do it like i do in my first exzample ... this second method is really just for fun)
String to number:
def int_it(obj):
if obj.isdigit():
obj = int(obj)
return obj
Dict to number (regardless of the number of nested dicts or lists):
class Convert(object):
def __init__(self, obj):
self.obj = obj
if isinstance(obj, dict):
self.handle_dict(obj)
def handle_dict(self, obj):
for key, value in obj.items():
if isinstance(value, str) and value.isdigit():
self.obj[key] = int_it(value)
elif isinstance(obj[key], list):
ins = HandleList(obj[key])
self.obj[key] = ins.obj
elif isinstance(obj[key], dict):
ins = Convert(obj.items())
self.obj[key] = ins.obj
return obj
List to numbers, regardless of the number of nested lists or dicts.
class HandleList(object):
def __init__(self, obj):
self.obj = obj
self.handle_list(obj)
def handle_list(self, obj):
for index, item in enumerate(obj):
if isinstance(item, list):
obj.index(index, [HandleList(val).obj for val in item])
elif isinstance(item, str):
obj.index(index, int_it(item))
elif isinstance(item, dict):
Convert(item)
return obj
output = Convert(values)
print(output.obj)
Returns:
{
'amarillo': 'xxx',
'naranja': [{'naranja_1': 1, 'naranja_2': 2}],
'rojo': [{'rojo_b': 2, 'rojo_a': 1}],
'azul': 4
}
Given the input:
values = {
'azul':'4',
'rojo': [
{'rojo_a':'1',
'rojo_b':'2'
}
],
'amarillo':'xxx',
'naranja': [
{'naranja_1':'1',
'naranja_2':'2'
}
]
}

Extracting all path from a multi-level dictionary

I have a dictionary like this:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
Imagine this like a folder structure. And I would like to get similar as os.walk would do with a folder structure. Something like this:
["DIR1/DIR11/DIR111/Maki111",
"DIR1/DIR11/DIR112/Maki112",
"DIR1/DIR12/Maki12",
"DIR1/DIR13/DIR131/Maki131"]
So it is basically all the path for the dictionary values. I tried it many ways with recursive functions but I got lost.
Here is my latest trial:
def walk(input_dict, path_string = "", result = ""):
for key, value in input_dict.items():
if isinstance(value, dict):
path_string += "/" + key
print "==== DICT ====", "\nkey: ", key, "\nvalue: ", value, "\n\t\tpath_string: ", path_string
result = walk(value, path_string)
print "\t\t\t\tresulting: ", result
elif isinstance(value, str):
print "==== NOT DICT ===="
path_string += "/" + value
print "\t\tpath_string: ", path_string, "\nvalue: ", value
return path_string
else:
path_string = "/" + key
result += "\n" + result
return result
Using Python 3:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
def recurse(d, prefix=None, sep='/'):
if prefix is None:
prefix = []
for key, value in d.items():
if isinstance(value, dict):
yield from recurse(value, prefix + [key])
else:
yield sep.join(prefix + [key, value])
print(list(recurse(dirDict)))
Output:
['DIR1/DIR13/DIR131/Maki131', 'DIR1/DIR11/DIR111/Maki111', 'DIR1/DIR11/DIR112/Maki112', 'DIR1/DIR12/Maki12']
def walk(d, path):
paths = []
if len(d) == 0:
return path
for k, v in d.iteritems():
child_path = path + k + '/'
if isinstance(v, basestring):
paths.append(child_path + v)
else:
paths.extend(walk(v, child_path))
return paths
THe walk function I posted at https://gist.github.com/nvie/f304caf3b4f1ca4c3884#gistcomment-1597937 can be used as a helper for your problem:
def walk(obj, parent_first=True):
# Top down?
if parent_first:
yield (), obj
# For nested objects, the key is the path component.
if isinstance(obj, dict):
children = obj.items()
# For nested lists, the position is the path component.
elif isinstance(obj, (list, tuple)):
children = enumerate(obj)
# Scalar values have no children.
else:
children = []
# Recurse into children
for key, value in children:
for child_path, child in walk(value, parent_first):
yield (key,) + child_path, child
# Bottom up?
if not parent_first:
yield (), obj
Your problem can be approached using something like this:
for path, value in walk(obj):
if isinstance(value, str): # leaf node
path_with_value = path + (value,)
print("/".join(path_with_value))
A compact solution with a list comprehension:
def f(v):
if isinstance(v, dict):
return dict_to_list(v)
elif isinstance(v, list):
return v
else:
return [v]
def dict_to_list(d):
return ['{}/{}'.format(k, i) for k, v in d.items() for i in f(v)]
lst = dict_to_list(dirDict)
lst.sort()
print('\n'.join(lst))

Categories