Python - next method not working properly with generator - python

I made a class in python that splits a stream of code in tokens and advances token by token to work with them
import re
class Tokenizer:
def __init__(self, input_file):
self.in_file = input_file
self.tokens = []
self.current_token = None
self.next_token = None
self.line = 1
def split_tokens(self):
''' Create a list with all the tokens of the input file '''
self.tokens = re.findall("\w+|[{}()\[\].;,+\-*/&|<>=~\n]", self.in_file)
def __iter__(self):
for token in self.tokens:
if token != '\n':
yield token
else:
self.line += 1
def advance(self):
self.current_token = self.next_token
self.next_token = next(self.__iter__())
After initialization:
text = 'constructor SquareGame03 new()\n\
{let square=square;\n\
let direction=direction;\n\
return square;\n\
}'
t = Tokenizer(text)
t.split_tokens()
t.advance()
It seems to work if i print the tokens
print(t.current_token, t.next_token)
None constructor
but every other call of the advance method give those results:
t.advance()
print(t.current_token, t.next_token)
constructor constructor
t.advance()
print(t.current_token, t.next_token)
constructor constructor
So it's not advancing and i can't understand why.

In this case, .__iter__ is implemented as a generator function (instead of a generator iterator) which returns a generator iterator.
Every time Tokenizer.advance is called, a new generator iterator is created and returned by .__iter__. Instead, an iterator should be stored by a Tokenizer object at the initialization stage for all subsequent usage.
For example:
import re
class Tokenizer:
def __init__(self, input_file):
self.in_file = input_file
self.tokens = []
self.current_token = None
self.next_token = None
self.line = 1
def split_tokens(self):
''' Create a list with all the tokens of the input file '''
self.tokens = re.findall("\w+|[{}()\[\].;,+\-*/&|<>=~\n]", self.in_file)
self.iterator = self.__iter__()
def __iter__(self):
for token in self.tokens:
if token != '\n':
yield token
else:
self.line += 1
def advance(self):
self.current_token = self.next_token
self.next_token = next(self.iterator)
Another minimal example that may explain:
def fib():
a = 0
b = 1
while True:
yield b
a, b = b, a + b
# 1, 1, 2, ...
fibs = fib()
next(fibs)
next(fibs)
next(fibs)
# 1, 1, 1, ...
next(fib())
next(fib())
next(fib())
By the way, I cannot see the reason to mixed the usage of a .__iter__ magic method and a separate .advance method. It might introduce some confusion.

Related

How to get Python iterators not to communicate with each other?

Here's a simple iterator through the characters of a string.
class MyString:
def __init__(self,s):
self.s = s
self._ix = 0
def __iter__(self):
return self
def __next__(self):
try:
item = self.s[self._ix]
except IndexError:
self._ix = 0
raise StopIteration
self._ix += 1
return item
string = MyString('abcd')
iter1 = iter(string)
iter2 = iter(string)
print(next(iter1))
print(next(iter2))
Trying to get this iterator to function like it should. There are a few requirements. First, the __next__ method MUST raise StopIteration and multiple iterators running at the same time must not interact with each other.
I accomplished objective 1, but need help on objective 2. As of right now the output is:
'a'
'b'
When it should be:
'a'
'a'
Any advice would be appreciated.
Thank you!
MyString acts as its own iterator much like a file object
>>> f = open('deleteme', 'w')
>>> iter(f) is f
True
You use this pattern when you want all iterators to affect each other - in this case advancing through the lines of a file.
The other pattern is to use a separate class to iterate much like a list whose iterators are independent.
>>> l = [1, 2, 3]
>>> iter(l) is l
False
To do this, move the _ix indexer to a separate class that references MyString. Have MyString.__iter__ create an instance of the class. Now you have a separate indexer per iterator.
class MyString:
def __init__(self,s):
self.s = s
def __iter__(self):
return MyStringIter(self)
class MyStringIter:
def __init__(self, my_string):
self._ix = 0
self.my_string = my_string
def __iter__(self):
return self
def __next__(self):
try:
item = self.my_string.s[self._ix]
except IndexError:
raise StopIteration
self._ix += 1
return item
string = MyString('abcd')
iter1 = iter(string)
iter2 = iter(string)
print(next(iter1))
print(next(iter2))
Your question title asks how to get iterators, plural, to not communicate with each other, but you don't have multiple iterators, you only have one. If you want to be able to get distinct iterators from MyString, you can add a copy method:
class MyString:
def __init__(self,s):
self.s = s
self._ix = 0
def __iter__(self):
return self
def __next__(self):
try:
item = self.s[self._ix]
except IndexError:
self._ix = 0
raise StopIteration
self._ix += 1
return item
def copy(self):
return MyString(self.s)
string = MyString('abcd')
iter1 = string.copy()
iter2 = string.copy()
print(next(iter1))
print(next(iter2))

Python Printing a Deque

I have an entire Deque Array class that looks like this:
from collections import deque
import ctypes
class dequeArray:
DEFAULT_CAPACITY = 10 #moderate capacity for all new queues
def __init__(self):
self.capacity = 5
capacity = self.capacity
self._data = self._make_array(self.capacity)
self._size = 0
self._front = 0
def __len__(self):
return self._size
def __getitem__(self, k): #Return element at index k
if not 0 <= k < self._size:
raise IndexError('invalid index')
return self._data[k]
def isEmpty(self):
if self._data == 0:
return False
else:
return True
def append(self, item): #add an element to the back of the queue
if self._size == self.capacity:
self._data.pop(0)
else:
avail = (self._front + self._size) % len(self._data)
self._data[avail] = item
self._size += 1
#def _resize(self, c):
#B = self._make_array(c)
#for k in range(self._size):
#B[k] = self._A[k]
#self._data = B
#self.capacity = capacity
def _make_array(self, c):
capacity = self.capacity
return (capacity * ctypes.py_object)()
def removeFirst(self):
if self._size == self.capacity:
self._data.pop(0)
else:
answer = self._data[self._front]
self._data[self._front] = None
self._front = (self._front + 1) % len(self._data)
self._size -= 1
print(answer)
def removeLast(self):
return self._data.popleft()
def __str__(self):
return str(self._data)
and when I try to print the deque in the main it prints out something like this,
<bound method dequeArray.__str__ of <__main__.dequeArray object at 0x1053aec88>>
when it should be printing the entire array. I think i need to use the str function and i tried adding
def __str__(self):
return str(self._data)
and that failed to give me the output. I also tried just
def __str__(self):
return str(d)
d being the deque array but I still am not having any success. How do I do i get it to print correctly?
you should call the str function of each element of the array that is not NULL, can be done with the following str function:
def __str__(self):
contents = ", ".join(map(str, self._data[:self._size]))
return "dequeArray[{}]".format(contents)
What I get when I try to q = dequeArray(); print(q) is <__main__.py_object_Array_5 object at 0x006188A0> which makes sense. If you want it list-like, use something like this (print uses __str__ method implicitly):
def __str__(self):
values = []
for i in range(5):
try:
values.append(self._data[i])
except ValueError: # since accessing ctypes array by index
# prior to assignment to this index raises
# the exception
values.append('NULL (never used)')
return repr(values)
Also, several things about the code:
from collections import deque
This import is never user and should be removed.
DEFAULT_CAPACITY = 10
is never used. Consider using it in the __init__:
def __init__(self, capacity=None):
self.capacity = capacity or self.DEFAULT_CAPACITY
This variable inside __init__ is never user and should be removed:
capacity = self.capacity
def _make_array(self, c):
capacity = self.capacity
return (capacity * ctypes.py_object)()
Though this is a valid code, you're doing it wrong unless you're absolutely required to do it in your assignment. Ctypes shouldn't be used like this, Python is a language with automated memory management. Just return [] would be fine. And yes, variable c is never used and should be removed from the signature.
if self._data == 0
In isEmpty always evaluates to False because you're comparing ctypes object with zero, and ctypes object is definitely not a zero.

updating a python generator after it has been created

Is there any way to do something like this in python 2.7?
def scaleit(g, k):
for item in g:
yield item*k
promise = ??????
# defines a generator for reference but not use:
# other functions can make use of it,
# but it requires a call to promise.fulfill() to
# define what the generator is going to yield;
# promise raises an error if next() is called
# before the promise is fulfilled
f = scaleit(promise, 3)
promise.fulfill(range(10))
for item in f:
print item
Yes; generators don't run until they're actually iterated, so you can just defer iterating the fulfilled promise's value until requested:
class Promise(object):
def fulfill(self, result):
self.result = result
def __iter__(self):
return iter(self.result)
def scaleit(g, k):
for item in g:
yield item*k
promise = Promise()
f = scaleit(promise, 3)
promise.fulfill(range(10))
print list(f)
Is this what you want?
def scaleit(g, k):
for item in g:
yield item * k
class Promise(object):
def __init__(self):
self.g = None
def fulfill(self, g):
self.g = iter(g)
def __iter__(self):
return self
def next(self):
return next(self.g)
promise = Promise()
f = scaleit(promise, 3)
promise.fulfill(range(10))
for item in f:
print item
I think you want the send() method on generators:
def gen():
reply = yield None
if not reply: # reply will be None if send() wasn't called
raise ValueError("promise not fulfilled")
yield 5
g1 = gen()
next(g1) # advance to the first yield
g1.send(True)
print(next(g1)) # prints 5
g2 = gen()
next(g2)
# forget to send
print(next(g2)) # raises ValueError

stay on same value in python iterator

I'm creating an interator like so:
some_list = [1,2,5,12,30,75,180,440]
iter(some_list)
I have a need to access the current value of an iterator again. Is there a current() method that allows me to stay on the same position?
You certainly can make a class which will allow you to do this:
from collections import deque
class RepeatableIter(object):
def __init__(self,iterable):
self.iter = iter(iterable)
self.deque = deque([])
def __iter__(self):
return self
#define `next` and `__next__` for forward/backward compatability
def next(self):
if self.deque:
return self.deque.popleft()
else:
return next(self.iter)
__next__ = next
def requeue(self,what):
self.deque.append(what)
x = RepeatableIter([1, 2, 3, 4, 5, 6])
count = 0
for i in x:
print i
if i == 4 and count == 0:
count += 1
x.requeue(i)
The question is really why would you want to?
You can use numpy.nditer to build your iterator, then you have many amazing options including the current value.
import numpy
rng = range(100)
itr = numpy.nditer([rng])
print itr.next() #0
print itr.next() #1
print itr.next() #2
print itr.value #2 : current iterator value
Adapting the third example from this answer:
class CurrentIterator():
_sentinal = object()
_current = _sentinal
#property
def current(self):
if self._current is self._sentinal:
raise ValueError('No current value')
return self._current
def __init__(self, iterable):
self.it = iter(iterable)
def __iter__(self):
return self
def __next__(self):
try:
self._current = current = next(self.it)
except StopIteration:
self._current = self._sentinal
raise
return current
next = __next__ # for python2.7 compatibility
Some interesting points:
use of _sentinal so an error can be raised if no current value exists
use of property so current looks like a simple attribute
use of __next__ and next = __next__ for Python 2&3 compatibility

Adapt an iterator to behave like a file-like object in Python

I have a generator producing a list of strings. Is there a utility/adapter in Python that could make it look like a file?
For example,
>>> def str_fn():
... for c in 'a', 'b', 'c':
... yield c * 3
...
>>> for s in str_fn():
... print s
...
aaa
bbb
ccc
>>> stream = some_magic_adaptor(str_fn())
>>> while True:
... data = stream.read(4)
... if not data:
... break
... print data
aaab
bbcc
c
Because data may be big and needs to be streamable (each fragment is a few kilobytes, the entire stream is tens of megabytes), I do not want to eagerly evaluate the whole generator before passing it to stream adaptor.
The "correct" way to do this is inherit from a standard Python io abstract base class. However it doesn't appear that Python allows you to provide a raw text class, and wrap this with a buffered reader of any kind.
The best class to inherit from is TextIOBase. Here's such an implementation, handling readline, and read while being mindful of performance. (gist)
import io
class StringIteratorIO(io.TextIOBase):
def __init__(self, iter):
self._iter = iter
self._left = ''
def readable(self):
return True
def _read1(self, n=None):
while not self._left:
try:
self._left = next(self._iter)
except StopIteration:
break
ret = self._left[:n]
self._left = self._left[len(ret):]
return ret
def read(self, n=None):
l = []
if n is None or n < 0:
while True:
m = self._read1()
if not m:
break
l.append(m)
else:
while n > 0:
m = self._read1(n)
if not m:
break
n -= len(m)
l.append(m)
return ''.join(l)
def readline(self):
l = []
while True:
i = self._left.find('\n')
if i == -1:
l.append(self._left)
try:
self._left = next(self._iter)
except StopIteration:
self._left = ''
break
else:
l.append(self._left[:i+1])
self._left = self._left[i+1:]
break
return ''.join(l)
Here's a solution that should read from your iterator in chunks.
class some_magic_adaptor:
def __init__( self, it ):
self.it = it
self.next_chunk = ""
def growChunk( self ):
self.next_chunk = self.next_chunk + self.it.next()
def read( self, n ):
if self.next_chunk == None:
return None
try:
while len(self.next_chunk)<n:
self.growChunk()
rv = self.next_chunk[:n]
self.next_chunk = self.next_chunk[n:]
return rv
except StopIteration:
rv = self.next_chunk
self.next_chunk = None
return rv
def str_fn():
for c in 'a', 'b', 'c':
yield c * 3
ff = some_magic_adaptor( str_fn() )
while True:
data = ff.read(4)
if not data:
break
print data
The problem with StringIO is that you have to load everything into the buffer up front. This can be a problem if the generator is infinite :)
from itertools import chain, islice
class some_magic_adaptor(object):
def __init__(self, src):
self.src = chain.from_iterable(src)
def read(self, n):
return "".join(islice(self.src, None, n))
Here's a modified version of John and Matt's answer that can read a list/generator of strings and output bytearrays
import itertools as it
from io import TextIOBase
class IterStringIO(TextIOBase):
def __init__(self, iterable=None):
iterable = iterable or []
self.iter = it.chain.from_iterable(iterable)
def not_newline(self, s):
return s not in {'\n', '\r', '\r\n'}
def write(self, iterable):
to_chain = it.chain.from_iterable(iterable)
self.iter = it.chain.from_iterable([self.iter, to_chain])
def read(self, n=None):
return bytearray(it.islice(self.iter, None, n))
def readline(self, n=None):
to_read = it.takewhile(self.not_newline, self.iter)
return bytearray(it.islice(to_read, None, n))
usage:
ff = IterStringIO(c * 3 for c in ['a', 'b', 'c'])
while True:
data = ff.read(4)
if not data:
break
print data
aaab
bbcc
c
alternate usage:
ff = IterStringIO()
ff.write('ddd')
ff.write(c * 3 for c in ['a', 'b', 'c'])
while True:
data = ff.read(4)
if not data:
break
print data
ddda
aabb
bccc
There is one called werkzeug.contrib.iterio.IterIO but note that it stores the entire iterator in its memory (up to the point you have read it as a file) so it might not be suitable.
http://werkzeug.pocoo.org/docs/contrib/iterio/
Source: https://github.com/mitsuhiko/werkzeug/blob/master/werkzeug/contrib/iterio.py
An open bug on readline/iter: https://github.com/mitsuhiko/werkzeug/pull/500
Looking at Matt's answer, I can see that it's not always necessary to implement all the read methods. read1 may be sufficient, which is described as:
Read and return up to size bytes, with at most one call to the underlying raw stream’s read()...
Then it can be wrapped with io.TextIOWrapper which, for instance, has implementation of readline. As an example here's streaming of CSV-file from S3's (Amazon Simple Storage Service) boto.s3.key.Key which implements iterator for reading.
import io
import csv
from boto import s3
class StringIteratorIO(io.TextIOBase):
def __init__(self, iter):
self._iterator = iter
self._buffer = ''
def readable(self):
return True
def read1(self, n=None):
while not self._buffer:
try:
self._buffer = next(self._iterator)
except StopIteration:
break
result = self._buffer[:n]
self._buffer = self._buffer[len(result):]
return result
conn = s3.connect_to_region('some_aws_region')
bucket = conn.get_bucket('some_bucket')
key = bucket.get_key('some.csv')
fp = io.TextIOWrapper(StringIteratorIO(key))
reader = csv.DictReader(fp, delimiter = ';')
for row in reader:
print(row)
Update
Here's an answer to related question which looks a little better. It inherits io.RawIOBase and overrides readinto. In Python 3 it's sufficient, so instead of wrapping IterStream in io.BufferedReader one can wrap it in io.TextIOWrapper. In Python 2 read1 is needed but it can be simply expressed though readinto.
If you only need a read method, then this can be enough
def to_file_like_obj(iterable, base):
chunk = base()
offset = 0
it = iter(iterable)
def up_to_iter(size):
nonlocal chunk, offset
while size:
if offset == len(chunk):
try:
chunk = next(it)
except StopIteration:
break
else:
offset = 0
to_yield = min(size, len(chunk) - offset)
offset = offset + to_yield
size -= to_yield
yield chunk[offset - to_yield:offset]
class FileLikeObj:
def read(self, size=-1):
return base().join(up_to_iter(float('inf') if size is None or size < 0 else size))
return FileLikeObj()
which can be used for an iterable yielding str
my_file = to_file_like_object(str_fn, str)
or if you have an iterable yielding bytes rather than str, and you want a file-like object whose read method returns bytes
my_file = to_file_like_object(bytes_fn, bytes)
This pattern has a few nice properties I think:
Not much code, which can be used for both str and bytes
Returns exactly what has been asked for in terms of length, in both of the cases of the iterable yielding small chunks, and big chunks (other than at the end of the iterable)
Does not append str/bytes - so avoids copying
Leverages slicing - so also avoids copying because a slice of a str/bytes that should be the entire instance will return exactly that same instance
For the bytes case, it's enough of a file-like object to pass through to boto3's upload_fileobj for multipart upload to S3
this is exactly what stringIO is for ..
>>> import StringIO
>>> some_var = StringIO.StringIO("Hello World!")
>>> some_var.read(4)
'Hell'
>>> some_var.read(4)
'o Wo'
>>> some_var.read(4)
'rld!'
>>>
Or if you wanna do what it sounds like
Class MyString(StringIO.StringIO):
def __init__(self,*args):
StringIO.StringIO.__init__(self,"".join(args))
then you can simply
xx = MyString(*list_of_strings)

Categories