How can I create a dead weakref in python? - python

Is there a better way of doing this than:
def create_expired_weakref():
class Tmp: pass
ref = weakref.ref(Tmp())
assert ref() is None
return ref
Context: I want a default state for my weakref, so that my class can do:
def __init__(self):
self._ref = create_expired_weakref()
def get_thing(self):
r = self._ref() # I need an empty weakref for this to work the first time
if r is None:
r = SomethingExpensive()
self._ref = weakref.ref(r)
return r

Another approach is to use duck typing here. If all you care about is that it behaves like a dead weakref with respect to the self._ref() call, then you can do
self._ref = lambda : None
This is what I ended up using when I had a similar desire to have a property that would return a cached value if it was available, but None otherwise. I initialized it with this lambda function. Then the property was
#property
def ref(self):
return self._ref()
Update: Credit to #Aran-Fey, who I see posted this idea as a comment to the question, rather than as an answer.

Use you a weakref.finalize for great good:
import weakref
def create_expired_weakref(type_=type("", (object,), {'__slots__':
('__weakref__',)})):
obj = type_()
ref = weakref.ref(obj)
collected = False
def on_collect():
nonlocal collected
collected = True
final = weakref.finalize(obj, on_collect)
del obj
while not collected:
pass
return ref
This might block the thread for a while if you're debugging, and might even deadlock in some obscure situations, but it's guaranteed to return an expired weakref.

Related

Is there something like the threading macro from Clojure in Python?

In Clojure I can do something like this:
(-> path
clojure.java.io/resource
slurp
read-string)
instead of doing this:
(read-string (slurp (clojure.java.io/resource path)))
This is called threading in Clojure terminology and helps getting rid of a lot of parentheses.
In Python if I try to use functional constructs like map, any, or filter I have to nest them to each other. Is there a construct in Python with which I can do something similar to threading (or piping) in Clojure?
I'm not looking for a fully featured version since there are no macros in Python, I just want to do away with a lot of parentheses when I'm doing functional programming in Python.
Edit: I ended up using toolz which supports pipeing.
Here is a simple implementation of #deceze's idea (although, as #Carcigenicate points out, it is at best a partial solution):
import functools
def apply(x,f): return f(x)
def thread(*args):
return functools.reduce(apply,args)
For example:
def f(x): return 2*x+1
def g(x): return x**2
thread(5,f,g) #evaluates to 121
I wanted to take this to the extreme and do it all dynamically.
Basically, the below Chain class lets you chain functions together similar to Clojure's -> and ->> macros. It supports both threading into the first and last arguments.
Functions are resolved in this order:
Object method
Local defined variable
Built-in variable
The code:
class Chain(object):
def __init__(self, value, index=0):
self.value = value
self.index = index
def __getattr__(self, item):
append_arg = True
try:
prop = getattr(self.value, item)
append_arg = False
except AttributeError:
try:
prop = locals()[item]
except KeyError:
prop = getattr(__builtins__, item)
if callable(prop):
def fn(*args, **kwargs):
orig = list(args)
if append_arg:
if self.index == -1:
orig.append(self.value)
else:
orig.insert(self.index, self.value)
return Chain(prop(*orig, **kwargs), index=self.index)
return fn
else:
return Chain(prop, index=self.index)
Thread each result as first arg
file = Chain(__file__).open('r').readlines().value
Thread each result as last arg
result = Chain(range(0, 100), index=-1).map(lambda x: x * x).reduce(lambda x, y: x + y).value

Identifying pure functions in python

I have a decorator #pure that registers a function as pure, for example:
#pure
def rectangle_area(a,b):
return a*b
#pure
def triangle_area(a,b,c):
return ((a+(b+c))(c-(a-b))(c+(a-b))(a+(b-c)))**0.5/4
Next, I want to identify a newly defined pure function
def house_area(a,b,c):
return rectangle_area(a,b) + triangle_area(a,b,c)
Obviously house_area is pure, since it only calls pure functions.
How can I discover all pure functions automatically (perhaps by using ast)
Assuming operators are all pure, then essentially you only need to check all the functions calls. This can indeed be done with the ast module.
First I defined the pure decorator as:
def pure(f):
f.pure = True
return f
Adding an attribute telling that it's pure, allows skipping early or "forcing" a function to identify as pure. This is useful if you'd need a function like math.sin to identify as pure. Additionally since you can't add attributes to builtin functions.
#pure
def sin(x):
return math.sin(x)
All in all. Use the ast module to visit all the nodes. Then for each Call node check whether the function being called is pure.
import ast
class PureVisitor(ast.NodeVisitor):
def __init__(self, visited):
super().__init__()
self.pure = True
self.visited = visited
def visit_Name(self, node):
return node.id
def visit_Attribute(self, node):
name = [node.attr]
child = node.value
while child is not None:
if isinstance(child, ast.Attribute):
name.append(child.attr)
child = child.value
else:
name.append(child.id)
break
name = ".".join(reversed(name))
return name
def visit_Call(self, node):
if not self.pure:
return
name = self.visit(node.func)
if name not in self.visited:
self.visited.append(name)
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False
Then check whether the function has the pure attribute. If not get code and check if all the functions calls can be classified as pure.
import inspect, textwrap
def is_pure(f, _visited=None):
try:
return f.pure
except AttributeError:
pass
try:
code = inspect.getsource(f.__code__)
except AttributeError:
return False
code = textwrap.dedent(code)
node = compile(code, "<unknown>", "exec", ast.PyCF_ONLY_AST)
if _visited is None:
_visited = []
visitor = PureVisitor(_visited)
visitor.visit(node)
return visitor.pure
Note that print(is_pure(lambda x: math.sin(x))) doesn't work since inspect.getsource(f.__code__) returns code on a line by line basis. So the source returned by getsource would include the print and is_pure call, thus yielding False. Unless those functions are overridden.
To verify that it works, test it by doing:
print(house_area) # Prints: True
To list through all the functions in the current module:
import sys, types
for k in dir(sys.modules[__name__]):
v = globals()[k]
if isinstance(v, types.FunctionType):
print(k, is_pure(v))
The visited list keeps track of which functions have already been verified pure. This help circumvent problems related to recursion. Since the code isn't executed, the evaluation would recursively visit factorial.
#pure
def factorial(n):
return 1 if n == 1 else n * factorial(n - 1)
Note that you might need to revise the following code. Choosing another way to obtain a function from its name.
try:
callee = eval(name)
if not is_pure(callee, self.visited):
self.pure = False
except NameError:
self.pure = False

Static classes being initialised on import. How does python 2 initialise static classes on import

I am trying to introduce python 3 support for the package mime and the code is doing something I have never seen before.
There is a class Types() that is used in the package as a static class.
class Types(with_metaclass(ItemMeta, object)): # I changed this for 2-3 compatibility
type_variants = defaultdict(list)
extension_index = defaultdict(list)
# __metaclass__ = ItemMeta # unnessecary now
def __init__(self, data_version=None):
self.data_version = data_version
The type_variants defaultdict is what is getting filled in python 2 but not in 3.
It very much seems to be getting filled by this class when is in a different file called mime_types.py.
class MIMETypes(object):
_types = Types(VERSION)
def __repr__(self):
return '<MIMETypes version:%s>' % VERSION
#classmethod
def load_from_file(cls, type_file):
data = open(type_file).read()
data = data.split('\n')
mime_types = Types()
for index, line in enumerate(data):
item = line.strip()
if not item:
continue
try:
ret = TEXT_FORMAT_RE.match(item).groups()
except Exception as e:
__parsing_error(type_file, index, line, e)
(unregistered, obsolete, platform, mediatype, subtype, extensions,
encoding, urls, docs, comment) = ret
if mediatype is None:
if comment is None:
__parsing_error(type_file, index, line, RuntimeError)
continue
extensions = extensions and extensions.split(',') or []
urls = urls and urls.split(',') or []
mime_type = Type('%s/%s' % (mediatype, subtype))
mime_type.extensions = extensions
...
mime_type.url = urls
mime_types.add(mime_type) # instance of Type() is being filled?
return mime_types
The function startup() is being run whenever mime_types.py is imported and it does this.
def startup():
global STARTUP
if STARTUP:
type_files = glob(join(DIR, 'types', '*'))
type_files.sort()
for type_file in type_files:
MIMETypes.load_from_file(type_file) # class method is filling Types?
STARTUP = False
This all seems pretty weird to me. The MIMETypes class first creates an instance of Types() on the first line. _types = Types(VERSION). It then seems to do nothing with this instance and only use the mime_types instance created in the load_from_file() class method. mime_types = Types().
This sort of thing vaguely reminds me of javascript class construction. How is the instance mime_types filling Types.type_variants so that when it is imported like this.
from mime import Type, Types
The class's type_variants defaultdict can be used. And why isn't this working in python 3?
EDIT:
Adding extra code to show how type_variants is filled
(In "Types" Class)
#classmethod
def add_type_variant(cls, mime_type):
cls.type_veriants[mime_type.simplified].append(mime_type)
#classmethod
def add(cls, *types):
for mime_type in types:
if isinstance(mime_type, Types):
cls.add(*mime_type.defined_types())
else:
mts = cls.type_veriants.get(mime_type.simplified)
if mts and mime_type in mts:
Warning('Type %s already registered as a variant of %s.',
mime_type, mime_type.simplified)
cls.add_type_variant(mime_type)
cls.index_extensions(mime_type)
You can see that MIMETypes uses the add() classmethod.
Without posting more of your code, it's hard to say. I will say that I was able to get that package ported to Python 3 with only a few changes (print statement -> function, basestring -> str, adding a dot before same-package imports, and a really ugly hack to compensate for their love of cmp:
def cmp(x,y):
if isinstance(x, Type): return x.__cmp__(y)
if isinstance(y, Type): return y.__cmp__(x) * -1
return 0 if x == y else (1 if x > y else -1)
Note, I'm not even sure this is correct.
Then
import mime
print(mime.Types.type_veriants) # sic
printed out a 1590 entry defaultdict.
Regarding your question about MIMETypes._types not being used, I agree, it's not.
Regarding your question about how the dictionary is being populated, it's quite simple, and you've identified most of it.
import mime
Imports the package's __init__.py which contains the line:
from .mime_types import MIMETypes, VERSION
And mime_types.py includes the lines:
def startup():
global STARTUP
if STARTUP:
type_files = glob(join(DIR, 'types', '*'))
type_files.sort()
for type_file in type_files:
MIMETypes.load_from_file(type_file)
STARTUP = False
startup()
And MIMETypes.load_from_file() has the lines:
mime_types = Types()
#...
for ... in ...:
mime_types.add(mime_type)
And Types.add(): has the line:
cls.add_type_variant(mime_type)
And that classmethod contains:
cls.type_veriants[mime_type.simplified].append(mime_type)

conditional python with

I have a five or six resources that have nice 'with' handlers, and normally I'd do this:
with res1, res2, res3, res4, res5, res6:
do1
do2
However, sometimes one or more of these resources should not be activated. Which leads to very ugly repetitive code:
with res1, res3, res4, res6: # these always acquired
if res2_enabled:
with res2:
if res5_enabled:
with res5:
do1
do2
else:
do1
do2
else if res5_enabled:
with res5:
...
There must be clean easy ways to do this surely?
You could create a wrapper object that supports the with statement, and do the checking in there. Something like:
with wrapper(res1), wrapper(res2), wrapper(res3):
...
or a wrapper than handles all of them:
with wrapper(res1, res2, res3):
...
The definition for you wrapper would be:
class wrapper(object):
def __init__(self, *objs):
...
def __enter__(self):
initialize objs here
def __exit__(self):
release objects here
If I understand you correctly you can do this:
from contextlib import contextmanager, nested
def enabled_resources(*resources):
return nested(*(res for res,enabled in resources if enabled))
# just for testing
#contextmanager
def test(n):
print n, "entered"
yield
resources = [(test(n), n%2) for n in range(10)]
# you want
# resources = [(res1, res1_enabled), ... ]
with enabled_resources(*resources):
# do1, do2
pass
Original Poster here; here is my approach refined so far:
I can add (or monkey-patch) the bool operator __nonzero__ onto the with objects, returning whether they are enabled. Then, when objects are mutually exclusive, I can have:
with res1 or res2 or res3 or res4:
...
When an resource is togglable, I can create an empty withable that is a nop; wither seems a nice name for it:
class sither:
#classmethod
def __enter__(cls): pass
#classmethod
def __exit__(cls,*args): pass
...
with res1 or wither, res2 or wither:
...
I can also use this keeping the toggling out of the withable objects:
with res1 if res1enabled else wither, res2 if res2enabled else wither:
..
Finally, those I have most control over, I can integrate the enabled checking into the class itself such that when used and not enabled, they are nop:
with res1, res2, res3:
...
The with statement is absolutely adorable, it just seems a bit unentrenched yet. It will be interesting to see what finesse others come up with in this regard...

has_next in Python iterators?

Have Python iterators got a has_next method?
There's an alternative to the StopIteration by using next(iterator, default_value).
For exapmle:
>>> a = iter('hi')
>>> print next(a, None)
h
>>> print next(a, None)
i
>>> print next(a, None)
None
So you can detect for None or other pre-specified value for end of the iterator if you don't want the exception way.
No, there is no such method. The end of iteration is indicated by an exception. See the documentation.
If you really need a has-next functionality, it's easy to obtain it with a little wrapper class. For example:
class hn_wrapper(object):
def __init__(self, it):
self.it = iter(it)
self._hasnext = None
def __iter__(self): return self
def next(self):
if self._hasnext:
result = self._thenext
else:
result = next(self.it)
self._hasnext = None
return result
def hasnext(self):
if self._hasnext is None:
try: self._thenext = next(self.it)
except StopIteration: self._hasnext = False
else: self._hasnext = True
return self._hasnext
now something like
x = hn_wrapper('ciao')
while x.hasnext(): print next(x)
emits
c
i
a
o
as required.
Note that the use of next(sel.it) as a built-in requires Python 2.6 or better; if you're using an older version of Python, use self.it.next() instead (and similarly for next(x) in the example usage). [[You might reasonably think this note is redundant, since Python 2.6 has been around for over a year now -- but more often than not when I use Python 2.6 features in a response, some commenter or other feels duty-bound to point out that they are 2.6 features, thus I'm trying to forestall such comments for once;-)]]
===
For Python3, you would make the following changes:
from collections.abc import Iterator # since python 3.3 Iterator is here
class hn_wrapper(Iterator): # need to subclass Iterator rather than object
def __init__(self, it):
self.it = iter(it)
self._hasnext = None
def __iter__(self):
return self
def __next__(self): # __next__ vs next in python 2
if self._hasnext:
result = self._thenext
else:
result = next(self.it)
self._hasnext = None
return result
def hasnext(self):
if self._hasnext is None:
try:
self._thenext = next(self.it)
except StopIteration:
self._hasnext = False
else: self._hasnext = True
return self._hasnext
In addition to all the mentions of StopIteration, the Python "for" loop simply does what you want:
>>> it = iter("hello")
>>> for i in it:
... print i
...
h
e
l
l
o
Try the __length_hint__() method from any iterator object:
iter(...).__length_hint__() > 0
You can tee the iterator using, itertools.tee, and check for StopIteration on the teed iterator.
hasNext somewhat translates to the StopIteration exception, e.g.:
>>> it = iter("hello")
>>> it.next()
'h'
>>> it.next()
'e'
>>> it.next()
'l'
>>> it.next()
'l'
>>> it.next()
'o'
>>> it.next()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration
StopIteration docs: http://docs.python.org/library/exceptions.html#exceptions.StopIteration
Some article about iterators and generator in python: http://www.ibm.com/developerworks/library/l-pycon.html
No. The most similar concept is most likely a StopIteration exception.
I believe python just has next() and according to the doc, it throws an exception is there are no more elements.
http://docs.python.org/library/stdtypes.html#iterator-types
The use case that lead me to search for this is the following
def setfrom(self,f):
"""Set from iterable f"""
fi = iter(f)
for i in range(self.n):
try:
x = next(fi)
except StopIteration:
fi = iter(f)
x = next(fi)
self.a[i] = x
where hasnext() is available, one could do
def setfrom(self,f):
"""Set from iterable f"""
fi = iter(f)
for i in range(self.n):
if not hasnext(fi):
fi = iter(f) # restart
self.a[i] = next(fi)
which to me is cleaner. Obviously you can work around issues by defining utility classes, but what then happens is you have a proliferation of twenty-odd different almost-equivalent workarounds each with their quirks, and if you wish to reuse code that uses different workarounds, you have to either have multiple near-equivalent in your single application, or go around picking through and rewriting code to use the same approach. The 'do it once and do it well' maxim fails badly.
Furthermore, the iterator itself needs to have an internal 'hasnext' check to run to see if it needs to raise an exception. This internal check is then hidden so that it needs to be tested by trying to get an item, catching the exception and running the handler if thrown. This is unnecessary hiding IMO.
Maybe it's just me, but while I like https://stackoverflow.com/users/95810/alex-martelli 's answer, I find this a bit easier to read:
from collections.abc import Iterator # since python 3.3 Iterator is here
class MyIterator(Iterator): # need to subclass Iterator rather than object
def __init__(self, it):
self._iter = iter(it)
self._sentinel = object()
self._next = next(self._iter, self._sentinel)
def __iter__(self):
return self
def __next__(self): # __next__ vs next in python 2
if not self.has_next():
next(self._iter) # raises StopIteration
val = self._next
self._next = next(self._iter, self._sentinel)
return val
def has_next(self):
return self._next is not self._sentinel
No, there is no such method. The end of iteration is indicated by a StopIteration (more on that here).
This follows the python principle EAFP (easier to ask for forgiveness than permission). A has_next method would follow the principle of LBYL (look before you leap) and contradicts this core python principle.
This interesting article explains the two concepts in more detail.
Suggested way is StopIteration.
Please see Fibonacci example from tutorialspoint
#!usr/bin/python3
import sys
def fibonacci(n): #generator function
a, b, counter = 0, 1, 0
while True:
if (counter > n):
return
yield a
a, b = b, a + b
counter += 1
f = fibonacci(5) #f is iterator object
while True:
try:
print (next(f), end=" ")
except StopIteration:
sys.exit()
It is also possible to implement a helper generator that wraps any iterator and answers question if it has next value:
Try it online!
def has_next(it):
first = True
for e in it:
if not first:
yield True, prev
else:
first = False
prev = e
if not first:
yield False, prev
for has_next_, e in has_next(range(4)):
print(has_next_, e)
Which outputs:
True 0
True 1
True 2
False 3
The main and probably only drawback of this method is that it reads ahead one more element, for most of tasks it is totally alright, but for some tasks it may be disallowed, especially if user of has_next() is not aware of this read-ahead logic and may missuse it.
Code above works for infinite iterators too.
Actually for all cases that I ever programmed such kind of has_next() was totally enough and didn't cause any problems and in fact was very helpful. You just have to be aware of its read-ahead logic.
The way has solved it based on handling the "StopIteration" execption is pretty straightforward in order to read all iterations :
end_cursor = False
while not end_cursor:
try:
print(cursor.next())
except StopIteration:
print('end loop')
end_cursor = True
except:
print('other exceptions to manage')
end_cursor = True
I think there are valid use cases for when you may want some sort of has_next functionality, in which case you should decorate an iterator with a has_next defined.
Combining concepts from the answers to this question here is my implementation of that which feels like a nice concise solution to me (python 3.9):
_EMPTY_BUF = object()
class BufferedIterator(Iterator[_T]):
def __init__(self, real_it: Iterator[_T]):
self._real_it = real_it
self._buf = next(self._real_it, _EMPTY_BUF)
def has_next(self):
return self._buf is not _EMPTY_BUF
def __next__(self) -> _T_co:
v = self._buf
self._buf = next(self._real_it, _EMPTY_BUF)
if v is _EMPTY_BUF:
raise StopIteration()
return v
The main difference is that has_next is just a boolean expression, and also handles iterators with None values.
Added this to a gist here with tests and example usage.
With 'for' one can implement his own version of 'next' avoiding exception
def my_next(it):
for x in it:
return x
return None
very interesting question, but this "hasnext" design had been put into leetcode:
https://leetcode.com/problems/iterator-for-combination/
here is my implementation:
class CombinationIterator:
def __init__(self, characters: str, combinationLength: int):
from itertools import combinations
from collections import deque
self.iter = combinations(characters, combinationLength)
self.res = deque()
def next(self) -> str:
if len(self.res) == 0:
return ''.join(next(self.iter))
else:
return ''.join(self.res.pop())
def hasNext(self) -> bool:
try:
self.res.insert(0, next(self.iter))
return True
except:
return len(self.res) > 0
The way I solved my problem is to keep the count of the number of objects iterated over, so far. I wanted to iterate over a set using calls to an instance method. Since I knew the length of the set, and the number of items counted so far, I effectively had an hasNext method.
A simple version of my code:
class Iterator:
# s is a string, say
def __init__(self, s):
self.s = set(list(s))
self.done = False
self.iter = iter(s)
self.charCount = 0
def next(self):
if self.done:
return None
self.char = next(self.iter)
self.charCount += 1
self.done = (self.charCount < len(self.s))
return self.char
def hasMore(self):
return not self.done
Of course, the example is a toy one, but you get the idea. This won't work in cases where there is no way to get the length of the iterable, like a generator etc.

Categories