Detect all global variables within a python function? - python

I am trying to analyze some messy code, that happens to use global variables quite heavily within functions (I am trying to refactor the code so that functions only use local variables). Is there any way to detect global variables within a function?
For example:
def f(x):
x = x + 1
z = x + y
return z
Here the global variable is y since it isn't given as an argument, and neither is it created within the function.
I tried to detect global variables within the function using string parsing, but it was getting a bit messy; I was wondering if there was a better way to do this?
Edit: If anyone is interested this is the code I am using to detect global variables (based on kindall's answer and Paolo's answer to this question: Capture stdout from a script in Python):
from dis import dis
def capture(f):
"""
Decorator to capture standard output
"""
def captured(*args, **kwargs):
import sys
from cStringIO import StringIO
# setup the environment
backup = sys.stdout
try:
sys.stdout = StringIO() # capture output
f(*args, **kwargs)
out = sys.stdout.getvalue() # release output
finally:
sys.stdout.close() # close the stream
sys.stdout = backup # restore original stdout
return out # captured output wrapped in a string
return captured
def return_globals(f):
"""
Prints all of the global variables in function f
"""
x = dis_(f)
for i in x.splitlines():
if "LOAD_GLOBAL" in i:
print i
dis_ = capture(dis)
dis_(f)
dis by default does not return output, so if you want to manipulate the output of dis as a string, you have to use the capture decorator written by Paolo and posted here: Capture stdout from a script in Python

Inspect the bytecode.
from dis import dis
dis(f)
Result:
2 0 LOAD_FAST 0 (x)
3 LOAD_CONST 1 (1)
6 BINARY_ADD
7 STORE_FAST 0 (x)
3 10 LOAD_FAST 0 (x)
13 LOAD_GLOBAL 0 (y)
16 BINARY_ADD
17 STORE_FAST 1 (z)
4 20 LOAD_FAST 1 (z)
23 RETURN_VALUE
The global variables will have a LOAD_GLOBAL opcode instead of LOAD_FAST. (If the function changes any global variables, there will be STORE_GLOBAL opcodes as well.)
With a little work, you could even write a function that scans the bytecode of a function and returns a list of the global variables it uses. In fact:
from dis import HAVE_ARGUMENT, opmap
def getglobals(func):
GLOBAL_OPS = opmap["LOAD_GLOBAL"], opmap["STORE_GLOBAL"]
EXTENDED_ARG = opmap["EXTENDED_ARG"]
func = getattr(func, "im_func", func)
code = func.func_code
names = code.co_names
op = (ord(c) for c in code.co_code)
globs = set()
extarg = 0
for c in op:
if c in GLOBAL_OPS:
globs.add(names[next(op) + next(op) * 256 + extarg])
elif c == EXTENDED_ARG:
extarg = (next(op) + next(op) * 256) * 65536
continue
elif c >= HAVE_ARGUMENT:
next(op)
next(op)
extarg = 0
return sorted(globs)
print getglobals(f) # ['y']

As mentioned in the LOAD_GLOBAL documentation:
LOAD_GLOBAL(namei)
Loads the global named co_names[namei] onto the stack.
This means you can inspect the code object for your function to find globals:
>>> f.__code__.co_names
('y',)
Note that this isn't sufficient for nested functions (nor is the dis.dis method in #kindall's answer). In that case, you will need to look at constants too:
# Define a function containing a nested function
>>> def foo():
... def bar():
... return some_global
# It doesn't contain LOAD_GLOBAL, so .co_names is empty.
>>> dis.dis(foo)
2 0 LOAD_CONST 1 (<code object bar at 0x2b70440c84b0, file "<ipython-input-106-77ead3dc3fb7>", line 2>)
3 MAKE_FUNCTION 0
6 STORE_FAST 0 (bar)
9 LOAD_CONST 0 (None)
12 RETURN_VALUE
# Instead, we need to walk the constants to find nested functions:
# (if bar contain a nested function too, we'd need to recurse)
>>> from types import CodeType
>>> for constant in foo.__code__.co_consts:
... if isinstance(constant, CodeType):
... print constant.co_names
('some_global',)

Related

AttributeError: Can't pickle local object 'parallel_operations.<locals>.process'

def parallel_operations(points, primitives):
batch_size,number_of_points,_ = points.shape
_,_,number_of_primitives = primitives.shape
gradient = torch.zeros(batch_size,number_of_points,number_of_primitives)
def process(_lock,i):
_lock.acquire()
temp_points = points[i,:,:]
temp_primitives= primitives[i,:,:].transpose(1,0) #[7,1024]
#print("temp_shape{}".format(temp_primitives.shape))
temp = torch.zeros(number_of_points,number_of_primitives)
for k in range(number_of_points):
for j in range(number_of_primitives):
temp[k,j] = torch.norm(temp_points[k,:]*temp_primitives[j,:3]+temp_primitives[j,3:6])
gradient[i,:,:] = temp
print("gradient update {} {}".format(i, gradient))
lock.release()
return (i, gradient[i,:,:])
result = []
pool = Pool(multiprocessing.cpu_count())
lock = Manager().Lock()
for i in range(10):
result.append(pool.apply_async(process,args=(lock,i)))
pool.close()
pool.join()
print(len(result))
for i in result:
print(i.get())if __name__ == "__main__":
points = torch.randn(10,3,3)
primitives = torch.randn(10,7,3)
result1 = parallel_operations(points,primitives)
The above is my parellized code but when I run it, it throw an error :`AttributeError: Can't pickle local object 'parallel_operations.<locals>.process' . Why is that?
When multiprocessing executes a function in a subprocess, it serializes the function and its parameters via the pickle protocol. But pickle doesn't serialize the code object itself, just its module and name. The unpickler loads the module and can get the right function via its name in that module.
But inner functions can't be reached by name like that. They are compiled once but they are only assigned to a function's local variable namespace when the function is executed. Otherwise, they are anonymous objects known to the function byte code. You can see this by disassembling a very simple program:
from dis import dis
def foo():
def bar(x):
return x
print(dis(foo))
The output is
5 0 LOAD_CONST 1 (<code object bar at 0x7f18c7f5b890, file "/home/td/tmp/l/w1.py", line 5>)
2 LOAD_CONST 2 ('foo.<locals>.bar')
4 MAKE_FUNCTION 0
6 STORE_FAST 0 (bar)
8 LOAD_CONST 0 (None)
10 RETURN_VALUE
Disassembly of <code object bar at 0x7f18c7f5b890, file "/home/td/tmp/l/w1.py", line 5>:
6 0 LOAD_FAST 0 (x)
2 RETURN_VALUE
The first section is the outer function. It binds an anonymous code object to local variable "bar" each time the function is run. When no instance of the outer function is running, the inner function has no name. The second section is the anonymous code object itself.
You should move process outside of parallel_operations so that pickle can find it.

Why am I able to global a non-existing variable in python

First, I fully understand what global statement means and how to use.
Now, let's look at this:
x = 100
def f():
global x
global xxx
x = 99
return x
print(f())
# >>> 99
print(x)
# >>> 99
You can see that by using global x, I successfully changed the value of x in the global environment.
But xxx does not exist at all, why am I allowed to global it and it won't even bring any error even if the function is executed?
global x does not define, declare, or otherwise create x. It simply states that if and when x is assigned to in the current function scope (whether that assignment comes before or after the global statement, which is why it is strongly recommended that global statements be used at the beginning of the function), the assignment is made to a global variable of that name, not a local variable. The actual creation is still the job of an actual assignment.
Put another way, global doesn't generate any byte code by itself; it simply modifies what byte code other assignment statements might generate. Consider these two functions:
def f():
global x
x = 99
def g():
x = 99
The only difference in the byte code for these two functions is that f use STORE_GOBAL as a result of the global statement, while g uses STORE_FAST.
>>> dis.dis(f)
5 0 LOAD_CONST 1 (99)
3 STORE_GLOBAL 0 (x)
6 LOAD_CONST 0 (None)
9 RETURN_VALUE
>>> dis.dis(g)
8 0 LOAD_CONST 1 (99)
3 STORE_FAST 0 (x)
6 LOAD_CONST 0 (None)
9 RETURN_VALUE
If you were to add an "unused" global statement, such as in
def h():
global xxx
x = 99
the resulting byte code is indistinguishable from g:
>>> dis.dis(h)
3 0 LOAD_CONST 1 (99)
2 STORE_FAST 0 (x)
4 LOAD_CONST 0 (None)
6 RETURN_VALUE

Python: Nested functions and variable scope

This code doesn't work:
def lol():
i = 1
def _lol():
i += 1
_lol()
lol()
Error:
local variable 'i' referenced before assignment
But, the following code works fine:
def lol():
i = [1]
def _lol():
i[0] += 1
_lol()
lol()
Why is that?
Python scopes fit into 3 categories -- local, nonlocal and global. By default, a function can only change a reference in the local scope (references are created with the assignment operator).
You're free to mutate an object that you have a reference to which is why the second example works (i is a reference to the list [1], then you change/mutate it's first item). In short, you're mutating the object that i references, you're not trying to change the reference. Note that you can give a function access to change the reference in the global scope via the global keyword:
i = 1
def func():
global i # If you comment this out, i doesn't get changed in the global scope
i = 2
func()
print(i) # 2 -- 1 if the global statement is commented out.
Note that python3.x adds the nonlocal keyword. It does the same thing as global but to the non-local scope. e.g.
def foo():
i = 1 # nonlocal to bar
def bar():
nonlocal i
print(i)
i += 1
return bar
bar1 = foo()
bar1() # 1
bar1() # 2
bar1() # 3
bar2 = foo()
bar2() # 1
bar2() # 2
bar1() # 4 bar2 doesn't influence bar1 at all.
augmented operators
This is a bit more advanced, but provided to hopefully help answer questions regarding operators like +=. Consider the case:
x = []
def func():
x += [1]
You might expect this to work -- After all, x += [1] for a list x is really just x.extend([1]), right?. Unfortunately, it's not quite. We can disassemble func using dis.dis to see a little more what's going on.
>>> dis.dis(func)
2 0 LOAD_FAST 0 (x)
3 LOAD_CONST 1 (1)
6 BUILD_LIST 1
9 INPLACE_ADD
10 STORE_FAST 0 (x) ### IMPORTANT!
13 LOAD_CONST 0 (None)
16 RETURN_VALUE
Notice the byte-code instruction STORE_FAST? That basically says, store the result of INPLACE_ADD in the name x in the local dictionary. In other words, you write:
x += [1]
but python executes1:
x = x.__iadd__([1])
Why? __iadd__ should operate in place so why does it need to rebind the name to __iadd__'s return value? The rebinding part is the problem -- i.e., this code would work:
x = []
def func():
x.__iadd__([1])
The answer is because python has immutable objects and __iadd__ needs to work with them too. Because of this, __iadd__ can return an object other than "self". This ends up being incredibly useful. Consider i = 1; i += 1. This invocation only works because int.__iadd__ is allowed to return a new integer.
1Discussing this in even more depth is actually my all-time most upvoted answer on StackOverflow and can be found here

What is the difference between locals and globals when using Python's eval()?

Why does it make a difference if variables are passed as globals or as locals to Python's function eval()?
As also described in the documenation, Python will copy __builtins__ to globals, if not given explicitly. But there must be also some other difference which I cannot see.
Consider the following example function. It takes a string code and returns a function object. Builtins are not allowed (e.g. abs()), but all functions from the math package.
def make_fn(code):
import math
ALLOWED_LOCALS = {v:getattr(math, v)
for v in filter(lambda x: not x.startswith('_'), dir(math))
}
return eval('lambda x: %s' % code, {'__builtins__': None}, ALLOWED_LOCALS)
It works as expected not using any local or global objects:
fn = make_fn('x + 3')
fn(5) # outputs 8
But it does not work using the math functions:
fn = make_fn('cos(x)')
fn(5)
This outputs the following exception:
<string> in <lambda>(x)
NameError: global name 'cos' is not defined
But when passing the same mapping as globals it works:
def make_fn(code):
import math
ALLOWED = {v:getattr(math, v)
for v in filter(lambda x: not x.startswith('_'), dir(math))
}
ALLOWED['__builtins__'] = None
return eval('lambda x: %s' % code, ALLOWED, {})
Same example as above:
fn = make_fn('cos(x)')
fn(5) # outputs 0.28366218546322625
What happens here in detail?
Python looks up names as globals by default; only names assigned to in functions are looked up as locals (so any name that is a parameter to the function or was assigned to in the function).
You can see this when you use the dis.dis() function to decompile code objects or functions:
>>> import dis
>>> def func(x):
... return cos(x)
...
>>> dis.dis(func)
2 0 LOAD_GLOBAL 0 (cos)
3 LOAD_FAST 0 (x)
6 CALL_FUNCTION 1
9 RETURN_VALUE
LOAD_GLOBAL loads cos as a global name, only looking in the globals namespace. The LOAD_FAST opcode uses the current namespace (function locals) to look up names by index (function local namespaces are highly optimized and stored as a C array).
There are three more opcodes to look up names; LOAD_CONST (reserved for true constants, such as None and literal definitions for immutable values), LOAD_DEREF (to reference a closure) and LOAD_NAME. The latter does look at both locals and globals and is only used when a function code object could not be optimized, as LOAD_NAME is a lot slower.
If you really wanted cos to be looked up in locals, you'd have to force the code to be unoptimised; this only works in Python 2, by adding a exec() call (or exec statement):
>>> def unoptimized(x):
... exec('pass')
... return cos(x)
...
>>> dis.dis(unoptimized)
2 0 LOAD_CONST 1 ('pass')
3 LOAD_CONST 0 (None)
6 DUP_TOP
7 EXEC_STMT
3 8 LOAD_NAME 0 (cos)
11 LOAD_FAST 0 (x)
14 CALL_FUNCTION 1
17 RETURN_VALUE
Now LOAD_NAME is used for cos because for all Python knows, the exec() call added that name as a local.
Even in this case, the locals LOAD_NAME looks into, will be the locals of the function itself, and not the locals passed to eval, which are for only for the parent scope.

exec() bytecode with arbitrary locals?

Suppose I want to execute code, for example
value += 5
inside a namespace of my own (so the result is essentially mydict['value'] += 5). There's a function exec(), but I have to pass a string there:
exec('value += 5', mydict)
and passing statements as strings seems strange (e.g. it's not colorized that way).
Can it be done like:
def block():
value += 5
???(block, mydict)
? The obvious candidate for last line was exec(block.__code__, mydict), but no luck: it raises UnboundLocalError about value. I believe it basically executes block(), not the code inside block, so assignments aren't easy – is that correct?
Of course, another possible solution would be to disassembly block.__code__...
FYI, I got the question because of this thread. Also, this is why some (me undecided) call for new syntax
using mydict:
value += 5
Note how this doesn't throw error but doesn't change mydict either:
def block(value = 0):
value += 5
block(**mydict)
You can pass bytecode instead of a string to exec, you just need to make the right bytecode for the purpose:
>>> bytecode = compile('value += 5', '<string>', 'exec')
>>> mydict = {'value': 23}
>>> exec(bytecode, mydict)
>>> mydict['value']
28
Specifically, ...:
>>> import dis
>>> dis.dis(bytecode)
1 0 LOAD_NAME 0 (value)
3 LOAD_CONST 0 (5)
6 INPLACE_ADD
7 STORE_NAME 0 (value)
10 LOAD_CONST 1 (None)
13 RETURN_VALUE
the load and store instructions must be of the _NAME persuasion, and this compile makes them so, while...:
>>> def f(): value += 5
...
>>> dis.dis(f.func_code)
1 0 LOAD_FAST 0 (value)
3 LOAD_CONST 1 (5)
6 INPLACE_ADD
7 STORE_FAST 0 (value)
10 LOAD_CONST 0 (None)
13 RETURN_VALUE
...code in a function is optimized to use the _FAST versions, and those don't work on a dict passed to exec. If you started somehow with a bytecode using the _FAST instructions, you could patch it to use the _NAME kind instead, e.g. with bytecodehacks or some similar approach.
Use the global keyword to force dynamic scoping on any variables you want to modify from within the block:
def block():
global value
value += 5
mydict = {"value": 42}
exec(block.__code__, mydict)
print(mydict["value"])
Here is a crazy decorator to create such a block that uses "custom locals". In reality it is a quick hack to turn all variable access inside the function to global access, and evaluate the result with the custom locals dictionary as environment.
import dis
import functools
import types
import string
def withlocals(func):
"""Decorator for executing a block with custom "local" variables.
The decorated function takes one argument: its scope dictionary.
>>> #withlocals
... def block():
... counter += 1
... luckynumber = 88
>>> d = {"counter": 1}
>>> block(d)
>>> d["counter"]
2
>>> d["luckynumber"]
88
"""
def opstr(*opnames):
return "".join([chr(dis.opmap[N]) for N in opnames])
translation_table = string.maketrans(
opstr("LOAD_FAST", "STORE_FAST"),
opstr("LOAD_GLOBAL", "STORE_GLOBAL"))
c = func.func_code
newcode = types.CodeType(c.co_argcount,
0, # co_nlocals
c.co_stacksize,
c.co_flags,
c.co_code.translate(translation_table),
c.co_consts,
c.co_varnames, # co_names, name of global vars
(), # co_varnames
c.co_filename,
c.co_name,
c.co_firstlineno,
c.co_lnotab)
#functools.wraps(func)
def wrapper(mylocals):
return eval(newcode, mylocals)
return wrapper
if __name__ == '__main__':
import doctest
doctest.testmod()
This is just a monkey-patching adaption of someone's brilliant recipe for a goto decorator
From S.Lott's comment above I think I get the idea for an answer using creation of new class.
class _(__metaclass__ = change(mydict)):
value += 1
...
where change is a metaclass whose __prepare__ reads dictionary and whose __new__ updates dictionary.
For reuse, the snippet below would work, but it's kind of ugly:
def increase_value(d):
class _(__metaclass__ = change(d)):
value += 1
...
increase_value(mydict)

Categories