Python multiple context managers in one class - python

I would like to be able to write code like this:
with obj.in_batch_mode:
obj.some_attr = "some_value"
obj.some_int = 142
...
when I want obj to wait with sending updates about itself until multiple jobs are completed. I have hooks on __setattr__ that take some time to run, and the changes can be sent together.
I do not want to use code like this, since it increases the risk of forgetting to leave batch_mode (which is what the with keyword is good for):
obj.enter_batch_mode()
obj.some_attr = "some_value"
obj.some_int = 142
...
obj.exit_batch_mode()
I have not been able to figure out how to implement this. Just typing with obj: (and simply implementing with on obj) does not read anywhere near as descriptive.

Generally, a very simple way to implement context managers is to use the contextlib module. Writing a context manager becomes as simple as writing a single yield generator. Before the yield replaces the __enter__ method, the object yielded is the return value of __enter__, and the section after the yield is the __exit__ method. Any function on your class can be a context manager, it just needs the be decorated as such. For instance, take this simple ConsoleWriter class:
from contextlib import contextmanager
from sys import stdout
from io import StringIO
from functools import partial
class ConsoleWriter:
def __init__(self, out=stdout, fmt=None):
self._out = out
self._fmt = fmt
#property
#contextmanager
def batch(self):
original_out = self._out
self._out = StringIO()
try:
yield self
except Exception as e:
# There was a problem. Ignore batch commands.
# (do not swallow the exception though)
raise
else:
# no problem
original_out.write(self._out.getvalue())
finally:
self._out = original_out
#contextmanager
def verbose(self, fmt="VERBOSE: {!r}"):
original_fmt = self._fmt
self._fmt = fmt
try:
yield self
finally:
# don't care about errors, just restore end
self._fmt = original_fmt
def __getattr__(self, attr):
"""creates function that writes capitalised attribute three times"""
return partial(self.write, attr.upper()*3)
def write(self, arg):
if self._fmt:
arg = self._fmt.format(arg)
print(arg, file=self._out)
Example usage:
writer = ConsoleWriter()
with writer.batch:
print("begin batch")
writer.a()
writer.b()
with writer.verbose():
writer.c()
print("before reentrant block")
with writer.batch:
writer.d()
print("after reentrant block")
print("end batch -- all data is now flushed")
Outputing:
begin batch
before reentrant block
after reentrant block
end batch -- all data is now flushed
AAA
BBB
VERBOSE: 'CCC'
DDD

If you are after a simple solution and do not need any nested mode-change (e.g. from STD to BATCH to VERBOSE back to BATCH back to STD)
class A(object):
STD_MODE = 'std'
BATCH_MODE = 'batch'
VERBOSE_MODE = 'verb'
def __init__(self):
self.mode = self.STD_MODE
def in_mode(self, mode):
self.mode = mode
return self
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self.mode = self.STD_MODE
obj = A()
print obj.mode
with obj.in_mode(obj.BATCH_MODE) as x:
print x.mode
print obj.mode
outputs
std
batch
std

This builds on Pynchia's answer, but adds support for multiple modes and allows nesting of with statements, even with the same mode multiple times. It scales O(#nested_modes) which is basically O(1).
Just remember to use stacks for data storage related to the modes.
class A():
_batch_mode = "batch_mode"
_mode_stack = []
#property
def in_batch_mode(self):
self._mode_stack.append(self._batch_mode)
return self
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self._mode_stack.pop()
if self._batch_mode not in self._mode_stack:
self.apply_edits()
and then I have these checks wherever I need them:
if self._batch_mode not in self._mode_stack:
self.apply_edits()
It is also possible to use methods for modes:
with x.in_some_mode(my_arg):
just remember to save my_arg in a stack within x, and to clear it from the stack when that mode is popped from the mode stack.
The code using this object can now be
with obj.in_batch_mode:
obj.some_property = "some_value"
and there are no problems with nesting, so we can add another with obj.in_some_mode: wherever without any hard-to-debug errors or having to check every function called to make sure the object's with-statements are never nested:
def b(obj):
with obj.in_batch_mode:
obj.some_property = "some_value"
x = A()
with x.in_batch_mode:
x.my_property = "my_value"
b(x)

Maybe something like this:
Implement helper class
class WithHelperObj(object):
def __init__(self,obj):
self.obj = obj
def __enter__(self):
self.obj.impl_enter_batch()
def __exit__(self, exc_type, exc_value, traceback):
self.obj.impl_exit_batch()
class MyObject(object):
def in_batch_mode(self):
return WithHelperObj(self)
In the class itself, implement method instead of field, to use with the with statement
def impl_enter_batch(self):
print 'In impl_enter_batch'
def impl_exit_batch(self):
print 'In impl_exit_batch'
def doing(self):
print 'doing'
Then use it:
o = MyObject()
with o.in_batch_mode():
o.doing()

Related

How to create a python class with a single use context

If we look at python docs it states:
Most context managers are written in a way that means they can only be used effectively in a with statement once. These single use context managers must be created afresh each time they’re used - attempting to use them a second time will trigger an exception or otherwise not work correctly.
This common limitation means that it is generally advisable to create context managers directly in the header of the with statement where they are used (as shown in all of the usage examples above).
Yet, the example most commonly shared for creating context managers inside classes is:
from contextlib import ContextDecorator
import logging
logging.basicConfig(level=logging.INFO)
class track_entry_and_exit(ContextDecorator):
def __init__(self, name):
self.name = name
def __enter__(self):
logging.info('Entering: %s', self.name)
def __exit__(self, exc_type, exc, exc_tb):
logging.info('Exiting: %s', self.name)
But, when I instantiate this class, I can pass it several times to a with statement:
In [8]: test_context = track_entry_and_exit('test')
In [9]: with test_context:
...: pass
...:
INFO:root:Entering: test
INFO:root:Exiting: test
In [10]: with test_context:
...: pass
...:
INFO:root:Entering: test
INFO:root:Exiting: test
How can I create a class that fails on the second call to the with statement?
Here is a possible solution:
from functools import wraps
class MultipleCallToCM(Exception):
pass
def single_use(cls):
if not ("__enter__" in vars(cls) and "__exit__" in vars(cls)):
raise TypeError(f"{cls} is not a Context Manager.")
org_new = cls.__new__
#wraps(org_new)
def new(clss, *args, **kwargs):
instance = org_new(clss)
instance._called = False
return instance
cls.__new__ = new
org_enter = cls.__enter__
#wraps(org_enter)
def enter(self):
if self._called:
raise MultipleCallToCM("You can't call this CM twice!")
self._called = True
return org_enter(self)
cls.__enter__ = enter
return cls
#single_use
class CM:
def __enter__(self):
print("Enter to the CM")
def __exit__(self, exc_type, exc_value, exc_tb):
print("Exit from the CM")
with CM():
print("Inside.")
print("-----------------------------------")
with CM():
print("Inside.")
print("-----------------------------------")
cm = CM()
with cm:
print("Inside.")
print("-----------------------------------")
with cm:
print("Inside.")
output:
Enter to the CM
Inside.
Exit from the CM
-----------------------------------
Enter to the CM
Inside.
Exit from the CM
-----------------------------------
Enter to the CM
Inside.
Exit from the CM
-----------------------------------
Traceback (most recent call last):
File "...", line 51, in <module>
with cm:
File "...", line 24, in enter
raise MultipleCallToCM("You can't call this CM twice!")
__main__.MultipleCallToCM: You can't call this CM twice!
I used a class decorator for it so that you can apply it to other context manager classes. I dispatched the __new__ method and give every instance a flag called __called, then change the original __enter__ to my enter which checks to see if this object has used in a with-statement or not.
How robust is this? I don't know. Seems like it works, I hope it gave an idea at least.
Arguably the simplest method is mentioned two paragraphs further down in the documentation you have cited:
Context managers created using contextmanager() are also single use context managers, and will complain about the underlying generator failing to yield if an attempt is made to use them a second time
Here is the corresponding invocation for your example:
>>> from contextlib import contextmanager
>>> #contextmanager
... def track_entry_and_exit(name):
... print('Entering', name)
... yield
... print('Exiting', name)
...
>>> c = track_entry_and_exit('test')
>>> with c:
... pass
...
Entering test
Exiting test
>>> with c:
... pass
...
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python3.9/contextlib.py", line 115, in __enter__
del self.args, self.kwds, self.func
AttributeError: args
It's even a class although it is written as a function:
>>> type(c)
<class 'contextlib._GeneratorContextManager'>
I suggest to consider iterable class instead of context manager, like this
class Iterable:
"""Iterable that can be iterated only once."""
def __init__(self, name):
self.name = name
self.it = iter([self])
def __iter__(self):
# code to acquire resource
print('enter')
yield next(self.it)
print('exit')
# code to release resource
def __repr__(self):
return f'{self.__class__.__name__}({self.name})'
It can be iterated only one
>>> it = Iterable('iterable')
>>> for item in it:
>>> print('entered', item)
enter
entered Iterable(iterable)
exit
>>> for item in it:
>>> print('entered', item)
RuntimeError: generator raised StopIteration
Context manager can be written in this manner:
class Context:
"""Context manager that can be used only once."""
def __init__(self, name):
self.name = name
self.it = iter([self])
def __enter__(self):
print('enter')
return next(self.it)
def __exit__(self, exc_type, exc, exc_tb):
print('exit')
def __repr__(self):
return f'{self.__class__.__name__}({self.name})'
It works only once
>>> ctx = Context('context')
>>> with ctx as c:
>>> print('entered', c)
enter
entered Context(context)
exit
>>> with ctx as c:
>>> print('entered', c)
enter
StopIteration:

Replacing macro-style class method with a decorator?

I'm having a lot of trouble getting a good grasp on decorators despite having read many an article on the subject (including [this][1] very popular one on SO). I'm suspecting I must be stupid, but with all the stubbornness that comes with being stupid, I've decided to try to figure this out.
That, and I suspect I have a good use case...
Below is some code from a project of mine that extracts text from PDF files. Processing involves three steps:
Set up PDFMiner objects needed for processing of PDF file (boilerplate initializations).
Apply a processing function to the PDF file.
No matter what happens, close the file.
I recently learned about context managers and the with statement, and this seemed like a good use case for them. As such, I started by defining the PDFMinerWrapper class:
class PDFMinerWrapper(object):
'''
Usage:
with PDFWrapper('/path/to/file.pdf') as doc:
doc.dosomething()
'''
def __init__(self, pdf_doc, pdf_pwd=''):
self.pdf_doc = pdf_doc
self.pdf_pwd = pdf_pwd
def __enter__(self):
self.pdf = open(self.pdf_doc, 'rb')
parser = PDFParser(self.pdf) # create a parser object associated with the file object
doc = PDFDocument() # create a PDFDocument object that stores the document structure
parser.set_document(doc) # connect the parser and document objects
doc.set_parser(parser)
doc.initialize(self.pdf_pwd) # pass '' if no password required
return doc
def __exit__(self, type, value, traceback):
self.pdf.close()
# if we have an error, catch it, log it, and return the info
if isinstance(value, Exception):
self.logError()
print traceback
return value
Now I can easily work with a PDF file and be sure that it will handle errors gracefully. In theory, all I need to do is something like this:
with PDFMinerWrapper('/path/to/pdf') as doc:
foo(doc)
This is great, except that I need to check that the PDF document is extractable before applying a function to the object returned by PDFMinerWrapper. My current solution involves an intermediate step.
I'm working with a class I call Pamplemousse which serves as an interface to work with the PDFs. It, in turn, uses PDFMinerWrapper each time an operation must be performed on the file to which the object has been linked.
Here is some (abridged) code that demonstrates its use:
class Pamplemousse(object):
def __init__(self, inputfile, passwd='', enc='utf-8'):
self.pdf_doc = inputfile
self.passwd = passwd
self.enc = enc
def with_pdf(self, fn, *args):
result = None
with PDFMinerWrapper(self.pdf_doc, self.passwd) as doc:
if doc.is_extractable: # This is the test I need to perform
# apply function and return result
result = fn(doc, *args)
return result
def _parse_toc(self, doc):
toc = []
try:
toc = [(level, title) for level, title, dest, a, se in doc.get_outlines()]
except PDFNoOutlines:
pass
return toc
def get_toc(self):
return self.with_pdf(self._parse_toc)
Any time I wish to perform an operation on the PDF file, I pass the relevant function to the with_pdf method along with its arguments. The with_pdf method, in turn, uses the with statement to exploit the context manager of PDFMinerWrapper (thus ensuring graceful handling of exceptions) and executes the check before actually applying the function it has been passed.
My question is as follows:
I would like to simplify this code such that I do not have to explicitly call Pamplemousse.with_pdf. My understanding is that decorators could be of help here, so:
How would I implement a decorator whose job would be to call the with statement and execute the extractability check?
Is it possible for a decorator to be a class method, or must my decorator be a free-form function or class?
The way I interpreted you goal, was to be able to define multiple methods on your Pamplemousse class, and not constantly have to wrap them in that call. Here is a really simplified version of what it might be:
def if_extractable(fn):
# this expects to be wrapping a Pamplemousse object
def wrapped(self, *args):
print "wrapper(): Calling %s with" % fn, args
result = None
with PDFMinerWrapper(self.pdf_doc) as doc:
if doc.is_extractable:
result = fn(self, doc, *args)
return result
return wrapped
class Pamplemousse(object):
def __init__(self, inputfile):
self.pdf_doc = inputfile
# get_toc will only get called if the wrapper check
# passes the extractable test
#if_extractable
def get_toc(self, doc, *args):
print "get_toc():", self, doc, args
The decorator if_extractable is defined is just a function, but it expects to be used on instance methods of your class.
The decorated get_toc, which used to delegate to a private method, simply will expect to receive a doc object and the args, if it passed the check. Otherwise it doesn't get called and the wrapper returns None.
With this, you can keep defining your operation functions to expect a doc
You could even add some type checking to make sure its wrapping the expected class:
def if_extractable(fn):
def wrapped(self, *args):
if not hasattr(self, 'pdf_doc'):
raise TypeError('if_extractable() is wrapping '\
'a non-Pamplemousse object')
...
A decorator is just a function that takes a function and returns another. You can do anything you like:
def my_func():
return 'banana'
def my_decorator(f): # see it takes a function as an argument
def wrapped():
res = None
with PDFMineWrapper(pdf_doc, passwd) as doc:
res = f()
return res
return wrapper # see, I return a function that also calls f
Now if you apply the decorator:
#my_decorator
def my_func():
return 'banana'
The wrapped function will replace my_func, so the extra code will be called.
You might want to try along the lines of this:
def with_pdf(self, fn, *args):
def wrappedfunc(*args):
result = None
with PDFMinerWrapper(self.pdf_doc, self.passwd) as doc:
if doc.is_extractable: # This is the test I need to perform
# apply function and return result
result = fn(doc, *args)
return result
return wrappedfunc
and when you need to wrap the function, just do this:
#pamplemousseinstance.with_pdf
def foo(doc, *args):
print 'I am doing stuff with', doc
print 'I also got some good args. Take a look!', args
Here is some demonstration code:
#! /usr/bin/python
class Doc(object):
"""Dummy PDFParser Object"""
is_extractable = True
text = ''
class PDFMinerWrapper(object):
'''
Usage:
with PDFWrapper('/path/to/file.pdf') as doc:
doc.dosomething()
'''
def __init__(self, pdf_doc, pdf_pwd=''):
self.pdf_doc = pdf_doc
self.pdf_pwd = pdf_pwd
def __enter__(self):
return self.pdf_doc
def __exit__(self, type, value, traceback):
pass
def safe_with_pdf(fn):
"""
This is the decorator, it gets passed the fn we want
to decorate.
However as it is also a class method it also get passed
the class. This appears as the first argument and the
function as the second argument.
"""
print "---- Decorator ----"
print "safe_with_pdf: First arg (fn):", fn
def wrapper(self, *args, **kargs):
"""
This will get passed the functions arguments and kargs,
which means that we can intercept them here.
"""
print "--- We are now in the wrapper ---"
print "wrapper: First arg (self):", self
print "wrapper: Other args (*args):", args
print "wrapper: Other kargs (**kargs):", kargs
# This function is accessible because this function is
# a closure, thus still has access to the decorators
# ivars.
print "wrapper: The function we run (fn):", fn
# This wrapper is now pretending to be the original function
# Perform all the checks and stuff
with PDFMinerWrapper(self.pdf, self.passwd) as doc:
if doc.is_extractable:
# Now call the orininal function with its
# argument and pass it the doc
result = fn(doc, *args, **kargs)
else:
result = None
print "--- End of the Wrapper ---"
return result
# Decorators are expected to return a function, this
# function is then run instead of the decorated function.
# So instead of returning the original function we return the
# wrapper. The wrapper will be run with the original functions
# argument.
# Now by using closures we can still access the original
# functions by looking up fn (the argument that was passed
# to this function) inside of the wrapper.
print "--- Decorator ---"
return wrapper
class SomeKlass(object):
#safe_with_pdf
def pdf_thing(doc, some_argument):
print ''
print "-- The Function --"
# This function is now passed the doc from the wrapper.
print 'The contents of the pdf:', doc.text
print 'some_argument', some_argument
print "-- End of the Function --"
print ''
doc = Doc()
doc.text = 'PDF contents'
klass = SomeKlass()
klass.pdf = doc
klass.passwd = ''
klass.pdf_thing('arg')
I recommend running that code to see how it works. Some of the interesting points to look out for tho:
First you will notice that we only pass a single argument to pdf_thing() but if you look at the method it takes two arguments:
#safe_with_pdf
def pdf_thing(doc, some_argument):
print ''
print "-- The Function --"
This is because if you look at the wrapper where we all the function:
with PDFMinerWrapper(self.pdf, self.passwd) as doc:
if doc.is_extractable:
# Now call the orininal function with its
# argument and pass it the doc
result = fn(doc, *args, **kargs)
We generate the doc argument and pass it in, along with the original arguments (*args, **kargs). This means that every method or function that is wrapped with this decorator receives an addition doc argument in addition to the arguments listed in its declaration (def pdf_thing(doc, some_argument):).
Another thing to note is that the wrapper:
def wrapper(self, *args, **kargs):
"""
This will get passed the functions arguments and kargs,
which means that we can intercept them here.
"""
Also captures the self argument and does not pass it to the method being called. You could change this behaviour my modifying the function call from:
result = fn(doc, *args, **kargs)
else:
result = None
To:
result = fn(self, doc, *args, **kargs)
else:
result = None
and then changing the method itself to:
def pdf_thing(self, doc, some_argument):
Hope that helps, feel free to ask for more clarification.
EDIT:
To answer the second part of your question.
Yes is can be a class method. Just place safe_with_pdf inside of SomeKlass above and calls to it e.g. The first method in the class.
Also here is a reduced version of the above code, with the decorator in the class.
class SomeKlass(object):
def safe_with_pdf(fn):
"""The decorator which will wrap the method"""
def wrapper(self, *args, **kargs):
"""The wrapper which will call the method is a doc"""
with PDFMinerWrapper(self.pdf, self.passwd) as doc:
if doc.is_extractable:
result = fn(doc, *args, **kargs)
else:
result = None
return result
return wrapper
#safe_with_pdf
def pdf_thing(doc, some_argument):
"""The method to decorate"""
print 'The contents of the pdf:', doc.text
print 'some_argument', some_argument
return '%s - Result' % doc.text
print klass.pdf_thing('arg')

Nesting Python context managers

In this question, I defined a context manager that contains a context manager. What is the easiest correct way to accomplish this nesting? I ended up calling self.temporary_file.__enter__() in self.__enter__(). However, in self.__exit__, I am pretty sure I have to call self.temporary_file.__exit__(type_, value, traceback) in a finally block in case an exception is raised. Should I be setting the type_, value, and traceback parameters if something goes wrong in self.__exit__? I checked contextlib, but couldn't find any utilities to help with this.
Original code from question:
import itertools as it
import tempfile
class WriteOnChangeFile:
def __init__(self, filename):
self.filename = filename
def __enter__(self):
self.temporary_file = tempfile.TemporaryFile('r+')
self.f = self.temporary_file.__enter__()
return self.f
def __exit__(self, type_, value, traceback):
try:
try:
with open(self.filename, 'r') as real_f:
self.f.seek(0)
overwrite = any(
l != real_l
for l, real_l in it.zip_longest(self.f, real_f))
except IOError:
overwrite = True
if overwrite:
with open(self.filename, 'w') as real_f:
self.f.seek(0)
for l in self.f:
real_f.write(l)
finally:
self.temporary_file.__exit__(type_, value, traceback)
The easy way to create context managers is with contextlib.contextmanager. Something like this:
#contextlib.contextmanager
def write_on_change_file(filename):
with tempfile.TemporaryFile('r+') as temporary_file:
yield temporary_file
try:
... some saving logic that you had in __exit__ ...
Then use with write_on_change_file(...) as f:.
The body of the with statement will be executed “instead of” the yield. Wrap the yield itself in a try block if you want to catch any exceptions that happen in the body.
The temporary file will always be properly closed (when its with block ends).
contextlib.contextmanager works great for functions, but when I need a classes as context manager, I'm using the following util:
class ContextManager(metaclass=abc.ABCMeta):
"""Class which can be used as `contextmanager`."""
def __init__(self):
self.__cm = None
#abc.abstractmethod
#contextlib.contextmanager
def contextmanager(self):
raise NotImplementedError('Abstract method')
def __enter__(self):
self.__cm = self.contextmanager()
return self.__cm.__enter__()
def __exit__(self, exc_type, exc_value, traceback):
return self.__cm.__exit__(exc_type, exc_value, traceback)
This allow to declare contextmanager classes with the generator syntax from #contextlib.contextmanager. It makes it much more natural to nest contextmanager, without having to manually call __enter__ and __exit__. Example:
class MyClass(ContextManager):
def __init__(self, filename):
self._filename = filename
#contextlib.contextmanager
def contextmanager(self):
with tempfile.TemporaryFile() as temp_file:
yield temp_file
... # Post-processing you previously had in __exit__
with MyClass('filename') as x:
print(x)
I wish this was in the standard library...

How to lazy load a data structure (python)

I have some way of building a data structure (out of some file contents, say):
def loadfile(FILE):
return # some data structure created from the contents of FILE
So I can do things like
puppies = loadfile("puppies.csv") # wait for loadfile to work
kitties = loadfile("kitties.csv") # wait some more
print len(puppies)
print puppies[32]
In the above example, I wasted a bunch of time actually reading kitties.csv and creating a data structure that I never used. I'd like to avoid that waste without constantly checking if not kitties whenever I want to do something. I'd like to be able to do
puppies = lazyload("puppies.csv") # instant
kitties = lazyload("kitties.csv") # instant
print len(puppies) # wait for loadfile
print puppies[32]
So if I don't ever try to do anything with kitties, loadfile("kitties.csv") never gets called.
Is there some standard way to do this?
After playing around with it for a bit, I produced the following solution, which appears to work correctly and is quite brief. Are there some alternatives? Are there drawbacks to using this approach that I should keep in mind?
class lazyload:
def __init__(self,FILE):
self.FILE = FILE
self.F = None
def __getattr__(self,name):
if not self.F:
print "loading %s" % self.FILE
self.F = loadfile(self.FILE)
return object.__getattribute__(self.F, name)
What might be even better is if something like this worked:
class lazyload:
def __init__(self,FILE):
self.FILE = FILE
def __getattr__(self,name):
self = loadfile(self.FILE) # this never gets called again
# since self is no longer a
# lazyload instance
return object.__getattribute__(self, name)
But this doesn't work because self is local. It actually ends up calling loadfile every time you do anything.
The csv module in the Python stdlibrary will not load the data until you start iterating over it, so it is in fact lazy.
Edit: If you need to read through the whole file to build the datastructure, having a complex Lazy load object that proxies things is overkill. Just do this:
class Lazywrapper(object):
def __init__(self, filename):
self.filename = filename
self._data = None
def get_data(self):
if self._data = None:
self._build_data()
return self._data
def _build_data(self):
# Now open and iterate over the file to build a datastructure, and
# put that datastructure as self._data
With the above class you can do this:
puppies = Lazywrapper("puppies.csv") # Instant
kitties = Lazywrapper("kitties.csv") # Instant
print len(puppies.getdata()) # Wait
print puppies.getdata()[32] # instant
Also
allkitties = kitties.get_data() # wait
print len(allkitties)
print kitties[32]
If you have a lot of data, and you don't really need to load all the data you could also implement something like class that will read the file until it finds the doggie called "Froufrou" and then stop, but at that point it's likely better to stick the data in a database once and for all and access it from there.
If you're really worried about the if statement, you have a Stateful object.
from collections import MutableMapping
class LazyLoad( MutableMapping ):
def __init__( self, source ):
self.source= source
self.process= LoadMe( self )
self.data= None
def __getitem__( self, key ):
self.process= self.process.load()
return self.data[key]
def __setitem__( self, key, value ):
self.process= self.process.load()
self.data[key]= value
def __contains__( self, key ):
self.process= self.process.load()
return key in self.data
This class delegates the work to a process object which is either a Load or a
DoneLoading object. The Load object will actually load. The DoneLoading
will not load.
Note that there are no if-statements.
class LoadMe( object ):
def __init__( self, parent ):
self.parent= parent
def load( self ):
## Actually load, setting self.parent.data
return DoneLoading( self.parent )
class DoneLoading( object ):
def __init__( self, parent ):
self.parent= parent
def load( self ):
return self
Wouldn't if not self.F lead to another call to __getattr__, putting you into an infinite loop? I think your approach makes sense, but to be on the safe side, I'd make that line into:
if name == "F" and not self.F:
Also, you could make loadfile a method on the class, depending on what you're doing.
Here's a solution that uses a class decorator to defer initialisation until the first time an object is used:
def lazyload(cls):
original_init = cls.__init__
original_getattribute = cls.__getattribute__
def newinit(self, *args, **kwargs):
# Just cache the arguments for the eventual initialization.
self._init_args = args
self._init_kwargs = kwargs
self.initialized = False
newinit.__doc__ = original_init.__doc__
def performinit(self):
# We call object's __getattribute__ rather than super(...).__getattribute__
# or original_getattribute so that no custom __getattribute__ implementations
# can interfere with what we are doing.
original_init(self,
*object.__getattribute__(self, "_init_args"),
**object.__getattribute__(self, "_init_kwargs"))
del self._init_args
del self._init_kwargs
self.initialized = True
def newgetattribute(self, name):
if not object.__getattribute__(self, "initialized"):
performinit(self)
return original_getattribute(self, name)
if hasattr(cls, "__getitem__"):
original_getitem = cls.__getitem__
def newgetitem(self, key):
if not object.__getattribute__(self, "initialized"):
performinit(self)
return original_getitem(self, key)
newgetitem.__doc__ = original_getitem.__doc__
cls.__getitem__ = newgetitem
if hasattr(cls, "__len__"):
original_len = cls.__len__
def newlen(self):
if not object.__getattribute__(self, "initialized"):
performinit(self)
return original_len(self)
newlen.__doc__ = original_len.__doc__
cls.__len__ = newlen
cls.__init__ = newinit
cls.__getattribute__ = newgetattribute
return cls
#lazyload
class FileLoader(dict):
def __init__(self, filename):
self.filename = filename
print "Performing expensive load operation"
self[32] = "Felix"
self[33] = "Eeek"
kittens = FileLoader("kitties.csv")
print "kittens is instance of FileLoader: %s" % isinstance(kittens, FileLoader) # Well obviously
print len(kittens) # Wait
print kittens[32] # No wait
print kittens[33] # No wait
print kittens.filename # Still no wait
print kittens.filename
The output:
kittens is instance of FileLoader: True
Performing expensive load operation
2
Felix
Eeek
kitties.csv
kitties.csv
I tried to actually restore the original magic methods after the initialization, but it wasn't working out. It may be necessary to proxy additional magic methods, I didn't investigate every scenario.
Note that kittens.initialized will always return True because it kicks off the initialization if it hasn't already been performed. Obviously it would be possible to add an exemption for this attribute so that it would return False if no other operation had been performed on the object, or the check could be changed to the equivalent of a hasattr call and the initialized attribute could be deleted after the initialization.
Here's a hack that makes the "even better" solution work, but I think it's annoying enough that it's probably better to just use the first solution. The idea is to execute the step self = loadfile(self.FILE) by passing the the variable name as an attribute:
class lazyload:
def __init__(self,FILE,var):
self.FILE = FILE
self.var = var
def __getattr__(self,name):
x = loadfile(self.FILE)
globals()[self.var]=x
return object.__getattribute__(x, name)
Then you can do
kitties = lazyload("kitties.csv","kitties")
^ ^
\ /
These two better match exactly
After you call any method on kitties (aside from kitties.FILE or kitties.var), it will become completely indistinguishable from what you'd have gotten with kitties = loadfile("kitties.csv"). In particular, it will no longer be an instance of lazyload and kitties.FILE and kitties.var will no longer exist.
If you need use puppies[32] you need also define __getitem__ method because __getattr__ don't catch that behaviour.
I implement lazy load for my needs, there is non-adapted code:
class lazy_mask(object):
'''Fake object, which is substituted in
place of masked object'''
def __init__(self, master, id):
self.master=master
self.id=id
self._result=None
self.master.add(self)
def _res(self):
'''Run lazy job'''
if not self._result:
self._result=self.master.get(self.id)
return self._result
def __getattribute__(self, name):
'''proxy all queries to masked object'''
name=name.replace('_lazy_mask', '')
#print 'attr', name
if name in ['_result', '_res', 'master', 'id']:#don't proxy requests for own properties
return super(lazy_mask, self).__getattribute__(name)
else:#but proxy requests for masked object
return self._res().__getattribute__(name)
def __getitem__(self, key):
'''provide object["key"] access. Else can raise
TypeError: 'lazy_mask' object is unsubscriptable'''
return self._res().__getitem__(key)
(master is registry object that load data when i run it's get() method)
This implementation works ok for isinstance() and str() and json.dumps() with it

What's the preferred way to implement a hook or callback in Python?

I'd like to provide the capability for users of one of my modules to extend its capabilities by providing an interface to call a user's function. For example, I want to give users the capability to be notified when an instance of a class is created and given the opportunity to modify the instance before it is used.
The way I've implemented it is to declare a module-level factory function that does the instantiation:
# in mymodule.py
def factory(cls, *args, **kwargs):
return cls(*args, **kwargs)
Then when I need an instance of a class in mymodule, I do factory(cls, arg1, arg2) rather than cls(arg1, arg2).
To extend it, a programmer would write in another module a function like this:
def myFactory(cls, *args, **kwargs):
instance = myFactory.chain(cls, *args, **kwargs)
# do something with the instance here if desired
return instance
Installation of the above callback looks like this:
myFactory.chain, mymodule.factory = mymodule.factory, myFactory
This seems straightforward enough to me, but I was wondering if you, as a Python programmer, would expect a function to register a callback rather than doing it with an assignment, or if there were other methods you would expect. Does my solution seem workable, idiomatic, and clear to you?
I am looking to keep it as simple as possible; I don't think most applications will actually need to chain more than one user callback, for example (though unlimited chaining comes "for free" with the above pattern). I doubt they will need to remove callbacks or specify priorities or order. Modules like python-callbacks or PyDispatcher seem to me like overkill, especially the latter, but if there are compelling benefits to a programmer working with my module, I'm open to them.
Taking aaronsterling's idea a bit further:
class C(object):
_oncreate = []
def __new__(cls):
return reduce(lambda x, y: y(x), cls._oncreate, super(C, cls).__new__(cls))
#classmethod
def oncreate(cls, func):
cls._oncreate.append(func)
c = C()
print hasattr(c, 'spew')
#C.oncreate
def spew(obj):
obj.spew = 42
return obj
c = C()
print c.spew
Combining Aaron's idea of using a decorator and Ignacio's idea of a class that maintains a list of attached callbacks, plus a concept borrowed from C#, I came up with this:
class delegate(object):
def __init__(self, func):
self.callbacks = []
self.basefunc = func
def __iadd__(self, func):
if callable(func):
self.__isub__(func)
self.callbacks.append(func)
return self
def callback(self, func):
if callable(func):
self.__isub__(func)
self.callbacks.append(func)
return func
def __isub__(self, func):
try:
self.callbacks.remove(func)
except ValueError:
pass
return self
def __call__(self, *args, **kwargs):
result = self.basefunc(*args, **kwargs)
for func in self.callbacks:
newresult = func(result)
result = result if newresult is None else newresult
return result
Decorating a function with #delegate allows other functions to be "attached" to it.
#delegate
def intfactory(num):
return int(num)
Functions can be added to the delegate with += (and removed with -=). You can also decorate with funcname.callback to add a callback function.
#intfactory.callback
def notify(num):
print "notify:", num
def increment(num):
return num+1
intfactory += increment
intfactory += lambda num: num * 2
print intfactory(3) # outputs 8
Does this feel Pythonic?
I might use a decorator so that the user could just write.
#new_factory
def myFactory(cls, *args, **kwargs):
instance = myFactory.chain(cls, *args, **kwargs)
# do something with the instance here if desired
return instance
Then in your module,
import sys
def new_factory(f):
mod = sys.modules[__name__]
f.chain = mod.factory
mod.factory = f
return f

Categories