Overriding __getitem__ for a nested list? - python

I'm implementing an exerimental QR code parser and I figured it would be handy to override a list's __getitem__ that takes a given mask into account, like this:
m = [[1, 0], [0, 1]]
def mask(m, i, j):
if i % 2 == 0 or j == 0:
return int(not m[i][j])
return m[i][j]
m2 = list_with_mask(m, mask)
n = m2[0][0]
How can I achieve it in the most Pythonic way?

Quick & dirty implementation, maybe it's better to inherit from the built-in list class.
Not directly what OP asked but but at least it's a start, and you can customize it for your needs.
class CustomNestedObject:
"""Custom weird class to handle __getitem__
TODO: add error handling for strings and other non list/tuple objects
"""
ERRORS = {
'element_doesnt_exist': "You don't have element with such index"
}
def __init__(self, obj):
self._nested = [] # will store nested recursive CustomNestedObject(s)
self._value = None # will store value (for example integer or string)
# recursively parse obj to CustomNestedObject
self._parse_to_self(obj)
def __repr__(self):
"""Method which will return string representation for the nested objects or self._value"""
if not self._nested:
return str(self._value)
else:
return str([x._value for x in self._nested])
def __getitem__(self, index):
# handle error
try:
self._nested[index]
except IndexError:
raise Exception(self.ERRORS['element_doesnt_exist'])
if not self._nested[index]._nested:
# it means that returned object will be self.value
# print(f'trying to access {self._nested[index]._value}')
return self._nested[index]._value
else:
# print('trying to access nested object')
return self._nested[index]
def _parse_to_self(self, obj):
if isinstance(obj, list) or isinstance(obj, tuple):
for item in obj:
self._nested.append(CustomNestedObject(item))
else:
# save as number if obj is not a list or tuple
self._value = obj
if __name__ == '__main__':
x = CustomNestedObject([1, 2, 3, [4, 5]])
print(x[3][1])
print(x[0])
print(x[9])

Related

How to get Python iterators not to communicate with each other?

Here's a simple iterator through the characters of a string.
class MyString:
def __init__(self,s):
self.s = s
self._ix = 0
def __iter__(self):
return self
def __next__(self):
try:
item = self.s[self._ix]
except IndexError:
self._ix = 0
raise StopIteration
self._ix += 1
return item
string = MyString('abcd')
iter1 = iter(string)
iter2 = iter(string)
print(next(iter1))
print(next(iter2))
Trying to get this iterator to function like it should. There are a few requirements. First, the __next__ method MUST raise StopIteration and multiple iterators running at the same time must not interact with each other.
I accomplished objective 1, but need help on objective 2. As of right now the output is:
'a'
'b'
When it should be:
'a'
'a'
Any advice would be appreciated.
Thank you!
MyString acts as its own iterator much like a file object
>>> f = open('deleteme', 'w')
>>> iter(f) is f
True
You use this pattern when you want all iterators to affect each other - in this case advancing through the lines of a file.
The other pattern is to use a separate class to iterate much like a list whose iterators are independent.
>>> l = [1, 2, 3]
>>> iter(l) is l
False
To do this, move the _ix indexer to a separate class that references MyString. Have MyString.__iter__ create an instance of the class. Now you have a separate indexer per iterator.
class MyString:
def __init__(self,s):
self.s = s
def __iter__(self):
return MyStringIter(self)
class MyStringIter:
def __init__(self, my_string):
self._ix = 0
self.my_string = my_string
def __iter__(self):
return self
def __next__(self):
try:
item = self.my_string.s[self._ix]
except IndexError:
raise StopIteration
self._ix += 1
return item
string = MyString('abcd')
iter1 = iter(string)
iter2 = iter(string)
print(next(iter1))
print(next(iter2))
Your question title asks how to get iterators, plural, to not communicate with each other, but you don't have multiple iterators, you only have one. If you want to be able to get distinct iterators from MyString, you can add a copy method:
class MyString:
def __init__(self,s):
self.s = s
self._ix = 0
def __iter__(self):
return self
def __next__(self):
try:
item = self.s[self._ix]
except IndexError:
self._ix = 0
raise StopIteration
self._ix += 1
return item
def copy(self):
return MyString(self.s)
string = MyString('abcd')
iter1 = string.copy()
iter2 = string.copy()
print(next(iter1))
print(next(iter2))

__getitem__ a 2d array

I am getting a weird error when trying to make the getitem method.
My code is:
def __getitem__(self, item):
if (self.shape[0] == 1):
return self.values[item]
else:
x, y = item
return self.twoDim[x][y]
I can't see where my mistake is, when I try
assert my_array[1][0] == 4
I get this error under:
x, y = item
TypeError: cannot unpack non-iterable int object
Any idea what the problem is?
Thank for any tips
Doing array[0][1] is first passing 0 into the __getitem__ function, and then whatever the function returns, it passes [1] into that function. With your implementation, you cannot do this. You must input a tuple of values in the first __getitem__ function:
class Array:
def __init__(self):
self.shape = (2, 2)
self.values = None
self.twoDim = [[1, 2], [3, 4]]
def __getitem__(self, item):
print(f"{item=}")
if (self.shape[0] == 1):
return self.values[item]
else:
x, y = item
return self.twoDim[x][y]
array = Array()
try:
print(array[1][0]) # item=1
except TypeError:
pass
print("------")
print(array[1, 1] == 4) # item=(1, 1)
# True

Objects passing objects

I'm new to python and am currently trying to use an old module to output graphs. The code below is a excerpt from the module that uses rpy to design
standard celeration charts (don't look it up).
I'm having trouble understanding how the class Element and class Vector work together.
I've been trying to pass the a element object to the vector get_elements but I'm not sure if that's what I should be doing.
Any help would be appreciated. Thanks!
class Element(object):
"""Base class for Chartshare vector elements."""
def __init__(self, offset=0, value=0):
self.offset=offset
self.value=value
self.text=''
def setText(self, value):
self.value=value
def getText(self):
return self.value
text = property(getText, setText)
class Vector(object):
"""Base class for Chartshare Vectors."""
def __init__(self, name='', color='black', linetype='o', symbol=1, clutter=0, start=0, end=140, continuous=False, debug=False):
self.name=name
self.color=color
self.linetype=linetype
self.symbol=symbol
self.start=start
self.end=end
self.elements={}
self.debug=debug
self.continuous=continuous
if not self.continuous:
for i in range(self.start, self.end+1):
self.elements[i]='NaN'
def getSymbol(self):
return self._symbol
def setSymbol(self, value):
if (type(value) == int):
if (value >= 0) and (value <= 18):
self._symbol = value
else:
raise SymbolOutOfRange, "Symbol should be an integer between 0 and 18."
elif (type(value) == str):
try:
self._symbol = value[0]
except IndexError:
self._symbol=1
else:
self._symbol = 1
symbol = property(getSymbol, setSymbol)
def getLinetype(self):
return self._linetype
def setLinetype(self, value):
if (value == 'p') or (value == 'o') or (value == 'l'):
self._linetype = value
else:
raise InvalidLinetype, "Line type should be 'o', 'p', or 'l'"
linetype = property(getLinetype, setLinetype)
def get_elements(self):
"""Returns a list with the elements of a Vector."""
retval = []
for i in range(self.start, self.end+1):
if (not self.continuous):
retval.append(self.elements[i])
else:
if (self.elements[i] != 'NaN'):
retval.append(self.elements[i])
return retval
def get_offsets(self):
"""Returns a list of the offsets of a Vector."""
retval = []
for i in range(self.start, self.end+1):
if (not self.continuous):
retval.append(i)
else:
if (self.elements[i] == 'NaN'):
retval.append(i)
return retval
def to_xml(self, container=False):
"""Returns an xml representation of the Vector."""
if (container == False):
container = StringIO.StringIO()
xml = XMLGenerator(container)
attrs = {}
attrs[u'name'] = u"%s" % self.name
attrs[u'symbol'] = u"%s" % self.symbol
attrs[u'linetype'] = u"%s" % self.linetype
attrs[u'color'] = u"%s" % self.color
xml.startElement(u'vector', attrs)
for i in range(self.start, self.end+1):
if (self.elements[i] != 'NaN'):
attrs.clear()
attrs[u'offset'] = u"%s" % i
xml.startElement(u'element', attrs)
xml.characters(u"%s" % self.elements[i])
xml.endElement(u'element')
xml.endElement(u'vector')
def render(self):
"""Plots the current vector."""
if (self.debug):
print "Rendering Vector: %s" % self.name
print self.elements
r.points(x=range(self.start, self.end+1),
y=self.elements,
col=self.color,
type=self.linetype,
pch=self.symbol)
if (self.debug):
print "Finished rendering Vector: %s" % self.name
Vector's get_elements() doesn't take any arguments. Well, technically it does. It takes self. self is syntactic sugar that lets you do this:
vec = Vector()
vec.get_elements()
It's equivalent to this:
vec = Vector()
Vector.get_elements(vec)
Since get_elements() doesn't take any arguments, you can't pass a to it. Skimming the code, I don't see a set_elements() analog. This means you'll have to modify the vector's element's dictionary directly.
vec = Vector()
vec.elements[a] = ...
print(vec.get_elements()) # >>> [a,...]
As I can see, there is no place in this code where you are assigning self.elements with any input from a function. You are only initialising it or obtaining values
Also note that the .get_elements() function doesn't have any arguments (only self, that is the object where you are calling it in), so of course it won't work.
Unless you can do something such as the following, we would need more code to understand how to manipulate and connect these two objects.
element_obj = Element()
vector_obj = Vector()
position = 4
vector_obj.elements[4] = element_obj
I got to this answer with the following: as I can see, the elements property in the Vector class is a dictonary, that when you call vector_obj.get_elements() is casted to an array using the start and end parameters as delimiters.
Unless there is something else missing, this would be the only way I could think out of adding the an element into a vector object. Otheriwse, we would need some more code or context to understand how these classes behave with each other!
Hope it helps!

Why does overriding __contains__ break OrderedDict.keys?

I'm subclasssing OrderedDict (Cpython, 2.7.3) to represent a datafile. __getitem__ pulls a field out of the datafile and sets it on the current instance similar to the code I've posted below. now I would like to override __contains__ to return True if the field is in the dictionary or in the file on the disk since it can be read either way. However, this seems to break OrderedDict's ability to inspect it's keys.
from collections import OrderedDict
dictclass = OrderedDict
class Foo(dictclass):
def __getitem__(self,key):
try:
return dictclass.__getitem__(self,key)
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
return dictclass.__contains__(self,whatever) or 'bar' in whatever
a = Foo()
print a['bar']
print a.keys()
If you run the code above, you'll get this output:
barbar
[]
Note that if you change dictclass = dict in the above code, it still seems to work (giving the following output).
barbar
['bar']
Am I doing something horribly wrong?
When Foo.__contains__ is not defined:
a['bar']
calls Foo.__getitem__, which executes
self[key] = data
This calls OrderedDict.__setitem__, which is defined this way:
def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[PREV]
last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
dict_setitem(self, key, value)
Since Foo.__contains__ is not defined,
if key not in self:
is True. So the key is properly added to self.__root and self.__map.
When Foo.__contains__ is defined,
if key not in self:
if False. So the key is not properly added to self.__root and self.__map.
Foo.__contains__ effective fools OrderedDict.__setitem__ into thinking that the 'bar' key has already been added.
I found it helpful to play with the following code (adding print statements in __setitem__ and __iter__):
from collections import OrderedDict
dictclass = OrderedDict
class Foo(dictclass):
def __getitem__(self,key):
try:
return dictclass.__getitem__(self,key)
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
print('contains: {}'.format(whatever))
return dictclass.__contains__(self,whatever) or 'bar' in whatever
def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
print('key not in self: {}'.format(key not in self))
if key not in self:
root = self._OrderedDict__root
last = root[PREV]
last[NEXT] = root[PREV] = self._OrderedDict__map[key] = [last, root, key]
dict_setitem(self, key, value)
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
NEXT, KEY = 1, 2
root = self._OrderedDict__root
curr = root[NEXT]
print('curr: {}'.format(curr))
print('root: {}'.format(root))
print('curr is not root: {}'.format(curr is not root))
while curr is not root:
yield curr[KEY]
curr = curr[NEXT]
a = Foo()
print a['bar']
# barbar
print a.keys()
# ['bar']
Notice that you can avoid this problem by making Foo a subclass of collections.MutableMapping and delegating most of its behavior to a OrderedDict attribute:
import collections
dictclass = collections.OrderedDict
class Foo(collections.MutableMapping):
def __init__(self, *args, **kwargs):
self._data = dictclass(*args, **kwargs)
def __setitem__(self, key, value):
self._data[key] = value
def __delitem__(self, key):
del self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self):
return len(self._data)
def __getitem__(self,key):
try:
return self._data[key]
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
return dictclass.__contains__(self,whatever) or 'bar' in whatever
which yields
a = Foo()
print a['bar']
# barbar
print a.keys()
# ['bar']
even with __contains__ defined.
What breaks your code is the or 'bar' in whatever. If you remove it, it will work as with the change dictclass = dict you mention.
The __setitem__ implementation of OrderedDict is this:
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[0]
last[1] = root[0] = self.__map[key] = [last, root, key]
return dict_setitem(self, key, value)
So with self["bar"] = "barbar", the condition should be False, but it is True even before inserting any item. Thus, the key isn' added to self.__root which is used in OrderedDict.__iter__:
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
root = self.__root
curr = root[1] # start at the first node
while curr is not root:
yield curr[2] # yield the curr[KEY]
curr = curr[1] # move to next node
Since the code for retrieving the values uses this iterator and self.__root does not contain "bar", this concrete key cannot be returned in the values.

How can I change in Python the return/input type of a list that is implemented as an class attribute?

EDIT (complete rephrase of the problem as the original version (see "original version", later) is misleading):
Here is the setting: I have a object which has a list of objects of type
<class 'One'>. I would like to access this list but rather work with objects
of type <class 'Two'> which is an enriched version of <class 'One'>.
Background (1):
One could be an object that can be stored easily via a ORM. The ORM would handle the list depending on the data model
Two would be an object like One but enriched by many features or the way it can be accessed
Background (2):
I try to solve a SQLAlchemy related question that I asked here. So, the answer to the present question could be also a solution to that question changing return/input type of SQLAlchemy-lists.
Here is some code for illustration:
import numpy as np
class One(object):
"""
Data Transfere Object (DTO)
"""
def __init__(self, name, data):
assert type(name) == str
assert type(data) == str
self.name = name
self.data = data
def __repr__(self):
return "%s(%r, %r)" %(self.__class__.__name__, self.name, self.data)
class Two(np.ndarray):
_DTO = One
def __new__(cls, name, data):
dto = cls._DTO(name, data)
return cls.newByDTO(dto)
#classmethod
def newByDTO(cls, dto):
obj = np.fromstring(dto.data, dtype="float", sep=',').view(cls)
obj.setflags(write=False) # Immutable
obj._dto = dto
return obj
#property
def name(self):
return self._dto.name
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
if __name__ == '__main__':
o1 = One('first object', "1, 3.0, 7, 8,1")
o2 = One('second object', "3.7, 8, 10")
my_data = DataUI ([o1, o2])
How to implement a list_of_twos which operates on list_of_ones but provides the user a list with elements of type Two:
type (my_data.list_of_twos[1]) == Two
>>> True
my_data.list_of_twos.append(Two("test", "1, 7, 4.5"))
print my_data.list_of_ones[-1]
>>> One('test', '1, 7, 4.5')
Original version of the question:
Here is an illustration of the problem:
class Data(object):
def __init__(self, name, data_list):
self.name = name
self.data_list = data_list
if __name__ == '__main__':
my_data = Data ("first data set", [0, 1, 1.4, 5])
I would like to access my_data.data_list via another list (e.g. my_data.data_np_list) that handles list-elements as a different type (e.g. as numpy.ndarray):
>>> my_data.data_np_list[1]
array(1)
>>> my_data.data_np_list.append(np.array(7))
>>> print my_data.data_list
[0, 1, 1.4, 5, 7]
You should use a property
class Data(object):
def __init__(self, name, data_list):
self.name = name
self.data_list = data_list
#property
def data_np_list(self):
return numpy.array(self.data_list)
if __name__ == '__main__':
my_data = Data ("first data set", [0, 1, 1.4, 5])
print my_data.data_np_list
edit: numpy use a continous memory area. python list are linked list. You can't have both at the same time without paying a performance cost which will make the whole thing useless. They are different data structures.
No, you can't do it easily (or at all without losing any performance gain you might get in using numpy.array). You're wanting two fundamentally different structures mirroring one another, this will mean storing the two and transferring any modifications between the two; subclassing both list and numpy.array to observe modifications will be the only way to do that.
Not sure whether your approach is correct.
A property getter would help achieve what you're doing. Here's something similar using arrays instead of numpy.
I've made the array (or in your case numpy data type) the internal representation, with the conversion to list only done on demand with a temporary object returned.
import unittest
import array
class GotAGetter(object):
"""Gets something.
"""
def __init__(self, name, data_list):
super(GotAGetter, self).__init__()
self.name = name
self.data_array = array.array('i', data_list)
#property
def data_list(self):
return list(self.data_array)
class TestProperties(unittest.TestCase):
def testProperties(self):
data = [1,3,5]
test = GotAGetter('fred', data)
aString = str(test.data_array)
lString = str(test.data_list) #Here you go.
try:
test.data_list = 'oops'
self.fail('Should have had an attribute error by now')
except AttributeError as exAttr:
self.assertEqual(exAttr.message, "can't set attribute")
self.assertEqual(aString, "array('i', [1, 3, 5])",
"The array doesn't look right")
self.assertEqual(lString, '[1, 3, 5]',
"The list property doesn't look right")
if __name__ == "__main__":
unittest.main()
One solution I just came up with would be to implement a View of the list via class ListView which takes the following arguments:
raw_list: a list of One-objects
raw2new: a function that converts One-objects to Two-objects
new2raw: a function that converts Two-objects to One-objects
Here is a the code:
class ListView(list):
def __init__(self, raw_list, raw2new, new2raw):
self._data = raw_list
self.converters = {'raw2new': raw2new,
'new2raw': new2raw}
def __repr__(self):
repr_list = [self.converters['raw2new'](item) for item in self._data]
repr_str = "["
for element in repr_list:
repr_str += element.__repr__() + ",\n "
repr_str = repr_str[:-3] + "]"
return repr_str
def append(self, item):
self._data.append(self.converters['new2raw'](item))
def pop(self, index):
self._data.pop(index)
def __getitem__(self, index):
return self.converters['raw2new'](self._data[index])
def __setitem__(self, key, value):
self._data.__setitem__(key, self.converters['new2raw'](value))
def __delitem__(self, key):
return self._data.__delitem__(key)
def __getslice__(self, i, j):
return ListView(self._data.__getslice__(i,j), **self.converters)
def __contains__(self, item):
return self._data.__contains__(self.converters['new2raw'](item))
def __add__(self, other_list_view):
assert self.converters == other_list_view.converters
return ListView(
self._data + other_list_view._data,
**self.converters
)
def __len__(self):
return len(self._data)
def __eq__(self, other):
return self._data == other._data
def __iter__(self):
return iter([self.converters['raw2new'](item) for item in self._data])
Now, DataUI has to look something like this:
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
self.list_of_twos = ListView(
self.list_of_ones,
Two.newByDTO,
Two.getDTO
)
With that, Two needs the following method:
def getDTO(self):
return self._dto
The entire example would now look like the following:
import unittest
import numpy as np
class ListView(list):
def __init__(self, raw_list, raw2new, new2raw):
self._data = raw_list
self.converters = {'raw2new': raw2new,
'new2raw': new2raw}
def __repr__(self):
repr_list = [self.converters['raw2new'](item) for item in self._data]
repr_str = "["
for element in repr_list:
repr_str += element.__repr__() + ",\n "
repr_str = repr_str[:-3] + "]"
return repr_str
def append(self, item):
self._data.append(self.converters['new2raw'](item))
def pop(self, index):
self._data.pop(index)
def __getitem__(self, index):
return self.converters['raw2new'](self._data[index])
def __setitem__(self, key, value):
self._data.__setitem__(key, self.converters['new2raw'](value))
def __delitem__(self, key):
return self._data.__delitem__(key)
def __getslice__(self, i, j):
return ListView(self._data.__getslice__(i,j), **self.converters)
def __contains__(self, item):
return self._data.__contains__(self.converters['new2raw'](item))
def __add__(self, other_list_view):
assert self.converters == other_list_view.converters
return ListView(
self._data + other_list_view._data,
**self.converters
)
def __len__(self):
return len(self._data)
def __iter__(self):
return iter([self.converters['raw2new'](item) for item in self._data])
def __eq__(self, other):
return self._data == other._data
class One(object):
"""
Data Transfere Object (DTO)
"""
def __init__(self, name, data):
assert type(name) == str
assert type(data) == str
self.name = name
self.data = data
def __repr__(self):
return "%s(%r, %r)" %(self.__class__.__name__, self.name, self.data)
class Two(np.ndarray):
_DTO = One
def __new__(cls, name, data):
dto = cls._DTO(name, data)
return cls.newByDTO(dto)
#classmethod
def newByDTO(cls, dto):
obj = np.fromstring(dto.data, dtype="float", sep=',').view(cls)
obj.setflags(write=False) # Immutable
obj._dto = dto
return obj
#property
def name(self):
return self._dto.name
def getDTO(self):
return self._dto
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
self.list_of_twos = ListView(
self.list_of_ones,
Two.newByDTO,
Two.getDTO
)
class TestListView(unittest.TestCase):
def testProperties(self):
o1 = One('first object', "1, 3.0, 7, 8,1")
o2 = One('second object', "3.7, 8, 10")
my_data = DataUI ([o1, o2])
t1 = Two('third object', "4.8, 8.2, 10.3")
t2 = Two('forth object', "33, 1.8, 1.0")
# append:
my_data.list_of_twos.append(t1)
# __getitem__:
np.testing.assert_array_equal(my_data.list_of_twos[2], t1)
# __add__:
np.testing.assert_array_equal(
(my_data.list_of_twos + my_data.list_of_twos)[5], t1)
# __getslice__:
np.testing.assert_array_equal(
my_data.list_of_twos[1:],
my_data.list_of_twos[1:2] + my_data.list_of_twos[2:]
)
# __contains__:
self.assertEqual(my_data.list_of_twos.__contains__(t1), True)
# __setitem__:
my_data.list_of_twos.__setitem__(1, t1),
np.testing.assert_array_equal(my_data.list_of_twos[1], t1)
# __delitem__:
l1 = len(my_data.list_of_twos)
my_data.list_of_twos.__delitem__(1)
l2 = len(my_data.list_of_twos)
self.assertEqual(l1 - 1, l2)
# __iter__:
my_data_2 = DataUI ([])
for two in my_data.list_of_twos:
my_data_2.list_of_twos.append(two)
if __name__ == '__main__':
unittest.main()

Categories