Can't pickle local object 'ArgumentParser.__init__.<locals>.identity - python

import argparse
import pickle
parser = argparse.ArgumentParser(description='Process some integers.')
_ = pickle.dumps(parser)
In my code, the ArgumentParser object is serialized. But in runtime I get the error Can't pickle local object 'ArgumentParser.__init__.<locals>.identity.
In Lib/argparse.py the identity is function defined locally inside __init__ method and this prevents serialization. If convert this function to a method, then serialization is successful. But I think that this way is not the best solution, since the python library file is being changed. How serialize parser object best way?

I had a program that used sub-parsers and initializing the ArgumentParser noticeably delayed startup, even for —help. I experimented with several things encountered this. I found this to work.
from argparse import ArgumentParser
from pickle import dumps
def identity(string):
return string
p = ArgumentParser()
p.register('type', None, identity)
x = dumps(p)

I created the heir class class ArgumentParserSerializable(ArgumentParser) and definied identity method as static. It also works.
class ArgumentParserSerializable(ArgumentParser):
def __init__(self,
prog=None,
usage=None,
description=None,
epilog=None,
parents=[],
formatter_class=HelpFormatter,
prefix_chars='-',
fromfile_prefix_chars=None,
argument_default=None,
conflict_handler='error',
add_help=True,
allow_abbrev=True):
superinit = super(ArgumentParser, self).__init__
superinit(description=description,
prefix_chars=prefix_chars,
argument_default=argument_default,
conflict_handler=conflict_handler)
# default setting for prog
if prog is None:
prog = _os.path.basename(_sys.argv[0])
self.prog = prog
self.usage = usage
self.epilog = epilog
self.formatter_class = formatter_class
self.fromfile_prefix_chars = fromfile_prefix_chars
self.add_help = add_help
self.allow_abbrev = allow_abbrev
add_group = self.add_argument_group
self._positionals = add_group(_('positional arguments'))
self._optionals = add_group(_('optional arguments'))
self._subparsers = None
self.register('type', None, self.identity)
# add help argument if necessary
# (using explicit default to override global argument_default)
default_prefix = '-' if '-' in prefix_chars else prefix_chars[0]
if self.add_help:
self.add_argument(
default_prefix + 'h', default_prefix * 2 + 'help',
action='help', default=SUPPRESS,
help=_('show this help message and exit'))
# add parent arguments and defaults
for parent in parents:
self._add_container_actions(parent)
try:
defaults = parent._defaults
except AttributeError:
pass
else:
self._defaults.update(defaults)
# register types
#staticmethod
def identity(string):
return string

Related

Avoid redundancy in argparse defaults

import argparse
class A:
def __init__(self,val=1):
self.val = val
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--val', default=1)
args = parser.parse_args()
a = A(val=args.val)
Quite often I find myself with a structure as shown above, where a class takes an optional argument with a default value. For the argparse I need to define this default value again. If it changes, I have to change it in multiple spots.
Leaving the default value in the class is not an option, as the class might be initialised from somewhere else as well.
I considered to set the argparse default value to None and only pass it when it's not None, but with multiple variables that will lead to convoluted code.
The only other thing coming to my mind is a section on the top of the page to set constants like "DEFAULT_val" so the defaults are defined only once, but I'm wondering if there's a better solution.
import argparse
import inspect
def get_default_args(func):
signature = inspect.signature(func)
return {
k: v.default
for k, v in signature.parameters.items()
if v.default is not inspect.Parameter.empty
}
class A:
def __init__(self,val=1):
self.val = val
if __name__ == '__main__':
default_values = get_default_args(A.__init__)
parser = argparse.ArgumentParser()
parser.add_argument('--val', default=default_values['val'])
args = parser.parse_args()
a = A(val=args.val)
Found the solution in the answers to another question

Custom conflict handling for ArgumentParser

What I need
I need an ArgumentParser, with a conflict handling scheme, that resolves some registered set of duplicate arguments, but raises on all other arguments.
What I tried
My initial approach (see also the code example at the bottom) was to subclass ArgumentParser, add a _handle_conflict_custom method, and then instantiate the subclass with ArgumentParser(conflict_handler='custom'), thinking that the _get_handler method would pick it up.
The Problem
This raises an error, because the ArgumentParser inherits from _ActionsContainer, which provides the _get_handler and the _handle_conflict_{strategy} methods, and then internally instantiates an _ArgumentGroup (that also inherits from _ActionsContainer), which in turn doesn't know about the newly defined method on ArgumentParser and thus fails to get the custom handler.
Overriding the _get_handler method is not feasible for the same reasons.
I have created a (rudimentary) class diagram illustrating the relationships, and therefore hopefully the problem in subclassing ArgumentParser to achieve what I want.
Motivation
I (think, that I) need this, because I have two scripts, that handle distinct parts of a workflow, and I would like to be able to use those separately as scripts, but also have one script, that imports the methods of both of these scripts, and does everything in one go.
This script should support all the options of the two individual scripts, but I don't want to duplicate the (extensive) argument definitions, so that I would have to make changes in multiple places.
This is easily solved by importing the ArgumentParsers of the (part) scripts and using them as parents, like so combined_parser = ArgumentParser(parents=[arg_parser1, arg_parser2]).
In the scripts I have duplicate options, e.g. for the work directory, so I need to resolve those conflicts.
This could also be done, with conflict_handler='resolve'.
But because there are a lot of possible arguments (which is not up to our team, because we have to maintain compatibility), I also want the script to raise an error if something gets defined that causes a conflict, but hasn't been explicitly allowed to do so, instead of quietly overriding the other flag, potentially causing unwanted behavior.
Other suggestions to achieve these goals (keeping both scripts separate, enabling use of one script that wraps both, avoiding code duplication and raising on unexpected duplicates) are welcome.
Example Code
from argparse import ArgumentParser
class CustomParser(ArgumentParser):
def _handle_conflict_custom(self, action, conflicting_actions):
registered = ['-h', '--help', '-f']
conflicts = conflicting_actions[:]
use_error = False
while conflicts:
option_string, action = conflicts.pop()
if option_string in registered:
continue
else:
use_error = True
break
if use_error:
self._handle_conflict_error(action, conflicting_actions)
else:
self._handle_conflict_resolve(action, conflicting_actions)
if __name__ == '__main__':
ap1 = ArgumentParser()
ap2 = ArgumentParser()
ap1.add_argument('-f') # registered, so should be resolved
ap2.add_argument('-f')
ap1.add_argument('-g') # not registered, so should raise
ap2.add_argument('-g')
# this raises before ever resolving anything, for the stated reasons
ap3 = CustomParser(parents=[ap1, ap2], conflict_handler='custom')
Other questions
I am aware of these similar questions:
python argparse subcommand with dependency and conflict
argparse conflict when used with two connected python3 scripts
Handling argparse conflicts
... and others
But even though some of them provide interesting insights into argparse usage and conflicts, they seem to address issues that are not related to mine.
While I agree that FMc's approach is probably the better one in terms of long term viability, I have found a way to override a custom handler into the ArgumentParser.
The key is to override the _ActionsContainer class which actually defines the handler functions. Then to override the base classes that the ArgumentParser and _ArgumentGroup inherit from.
In the case below, I've simply added a handler that ignores any conflicts, but you could add any custom logic you want.
import argparse
class IgnorantActionsContainer(argparse._ActionsContainer):
def _handle_conflict_ignore(self, action, conflicting_actions):
pass
argparse.ArgumentParser.__bases__ = (argparse._AttributeHolder, IgnorantActionsContainer)
argparse._ArgumentGroup.__bases__ = (IgnorantActionsContainer,)
parser = argparse.ArgumentParser(conflict_handler="ignore")
parser.add_argument("-a", type=int, default=1)
parser.add_argument("-a", type=int, default=2)
parser.add_argument("-a", type=int, default=3)
parser.add_argument("-a", type=int, default=4)
print(parser.parse_args())
Running python custom_conflict_handler.py -h prints:
usage: custom_conflict_handler.py [-h] [-a A] [-a A] [-a A] [-a A]
optional arguments:
-h, --help show this help message and exit
-a A
-a A
-a A
-a A
Running python custom_conflict_handler.py prints:
Namespace(a=1)
Running python custom_conflict_handler.py -a 5 prints:
Namespace(a=5)
For a various reasons -- notably the needs of testing -- I have adopted the
habit of always defining argparse configuration in the form of a data
structure, typically a sequence of dicts. The actual creation of the
ArgumentParser is done in a reusable function that simply builds the parser
from the dicts. This approach has many benefits, especially for more complex
projects.
If each of your scripts were to shift to that model, I would think that you
might be able to detect any configuration conflicts in that function and raise
accordingly, thus avoiding the need to inherit from ArgumentParser and mess
around with understanding its internals.
I'm not certain I understand your conflict-handling needs very well, so the
demo below simply hunts for duplicate options and raises if it sees one, but I
think you should be able to understand the approach and assess whether it might
work for your case. The basic idea is to solve your problem in the realm
of ordinary data structures rather than in the byzantine world of argparse.
import sys
import argparse
from collections import Counter
OPTS_CONFIG1 = (
{
'names': 'path',
'metavar': 'PATH',
},
{
'names': '--nums',
'nargs': '+',
'type': int,
},
{
'names': '--dryrun',
'action': 'store_true',
},
)
OPTS_CONFIG2 = (
{
'names': '--foo',
'metavar': 'FOO',
},
{
'names': '--bar',
'metavar': 'BAR',
},
{
'names': '--dryrun',
'action': 'store_true',
},
)
def main(args):
ap = define_parser(OPTS_CONFIG1, OPTS_CONFIG2)
opts = ap.parse_args(args)
print(opts)
def define_parser(*configs):
# Validation: adjust as needed.
tally = Counter(
nm
for config in configs
for d in config
for nm in d['names'].split()
)
for k, n in tally.items():
if n > 1:
raise Exception(f'Duplicate argument configurations: {k}')
# Define and return parser.
ap = argparse.ArgumentParser()
for config in configs:
for d in config:
kws = dict(d)
xs = kws.pop('names').split()
ap.add_argument(*xs, **kws)
return ap
if __name__ == '__main__':
main(sys.argv[1:])
There is an answer that's marginally less hacky than Hans's approach. You can simply subclass argparse.ActionsContainer and argparse.ArgumentGroup and make sure you inherit from ActionsContainer after argparse.ArgumentParser, this way it'll be later in the MRO and will take precedence. Here's an example:
import argparse
from typing import Iterable, Any
class ArgumentGroup(argparse._ArgumentGroup):
def _handle_conflict_custom(
self,
action: argparse.Action,
conflicting_actions: Iterable[tuple[str, argparse.Action]],
) -> None:
...
class ActionsContainer(argparse._ActionsContainer):
def _handle_conflict_custom(
self,
action: argparse.Action,
conflicting_actions: Iterable[tuple[str, argparse.Action]],
) -> None:
...
def add_argument_group(self, *args: Any, **kwargs: Any) -> ArgumentGroup:
group = ArgumentGroup(self, *args, **kwargs)
self._action_groups.append(group)
return group
class ArgumentParser(argparse.ArgumentParser, ActionsContainer):
...
Based on FMcs approach I have created something a little more elaborate, I know this isn't code review, but feedback is still welcome. Also, maybe it helps someone to see this fleshed out a bit more.
import argparse
from collections import Counter, OrderedDict
from typing import List, Dict, Any
from copy import deepcopy
class OptionConf:
def __init__(self):
self._conf = OrderedDict() # type: Dict[str, List[Dict[str, Any]]]
self._allowed_dupes = list() # type: List[str]
def add_conf(self, command, *conf_args, **conf_kwargs):
if command not in self._conf:
self._conf[command] = []
conf_kwargs['*'] = conf_args
self._conf[command].append(conf_kwargs)
def add_argument(self, *conf_args, **conf_kwargs):
self.add_conf('add_argument', *conf_args, **conf_kwargs)
def register_allowed_duplicate(self, flag):
self._allowed_dupes.append(flag)
def generate_parser(self, **kwargs):
argument_parser = argparse.ArgumentParser(**kwargs)
for command, conf_kwargs_list in self._conf.items():
command_func = getattr(argument_parser, command)
for conf_kwargs in conf_kwargs_list:
list_args = conf_kwargs.pop('*', [])
command_func(*list_args, **conf_kwargs)
conf_kwargs['*'] = list_args
return argument_parser
def _get_add_argument_conf_args(self):
for command, kwargs_list in self._conf.items():
if command != 'add_argument':
continue
return kwargs_list
return []
def resolve_registered(self, other):
if self.__class__ == other.__class__:
conf_args_list = self._get_add_argument_conf_args() # type: List[Dict[str, Any]]
other_conf_args_list = other._get_add_argument_conf_args() # type: List[Dict[str, Any]]
# find all argument names of both parsers
all_names = []
for conf_args in conf_args_list:
all_names += conf_args.get('*', [])
all_other_names = []
for other_conf_args in other_conf_args_list:
all_other_names += other_conf_args.get('*', [])
# check for dupes and throw if appropriate
found_allowed_dupes = []
tally = Counter(all_names + all_other_names)
for name, count in tally.items():
if count > 1 and name not in self._allowed_dupes:
raise Exception(f'Duplicate argument configurations: {name}')
elif count > 1:
found_allowed_dupes.append(name)
# merge them in a new OptionConf, preferring the args of self (AS OPPOSED TO ORIGINAL RESOLVE)
new_opt_conf = OptionConf()
for command, kwargs_list in self._conf.items():
for kwargs in kwargs_list:
list_args = kwargs.get('*', [])
new_opt_conf.add_conf(command, *list_args, **kwargs)
for command, kwargs_list in other._conf.items():
for kwargs in kwargs_list:
# if it's another argument, we remove dupe names
if command == 'add_argument':
all_names = kwargs.pop('*', [])
names = [name for name in all_names if name not in found_allowed_dupes]
# and only add if there are names left
if names:
new_opt_conf.add_argument(*deepcopy(names), **deepcopy(kwargs))
# put names back
kwargs['*'] = all_names
else:
# if not, we just add it
list_args = kwargs.pop('*', [])
new_opt_conf.add_conf(command, *deepcopy(list_args), **deepcopy(kwargs))
# put list args back
kwargs['*'] = list_args
return new_opt_conf
raise NotImplementedError()
if __name__ == '__main__':
opts_conf = OptionConf()
opts_conf.add_argument('pos_arg')
opts_conf.add_argument('-n', '--number', metavar='N', type=int)
opts_conf.add_argument('-i', '--index')
opts_conf.add_argument('-v', '--verbose', action='store_true')
opts_conf2 = OptionConf()
opts_conf2.add_argument('-n', '--number', metavar='N', type=int)
opts_conf2.add_argument('-v', action='store_true')
opts_conf.register_allowed_duplicate('-n')
opts_conf.register_allowed_duplicate('--number')
try:
resolved_opts = opts_conf.resolve_registered(opts_conf2)
except Exception as e:
print(e) # raises on -v
opts_conf.register_allowed_duplicate('-v')
resolved_opts = opts_conf.resolve_registered(opts_conf2)
ap = resolved_opts.generate_parser(description='does it work?')
ap.parse_args(['-h'])

python: generating methods for a convenience class programatically

So I've written a module that contains a bunch of functions to easily interact with a subprocess. This subprocess has a whole bunch of settings that let you change how it formats and behaves. I realized that it'd be nice to have a convenience class that you could use as a handler to store the settings you prefer to use and pass them on to the module level functions. here's the example code I'm doing testing with:
import inspect
class MyHandler(object):
def __init__(self):
self.format_string='class format string'
self.database='class database'
self.mode = "class mode"
def rename(self, *args, **kwargs):
self._pass_to_function(rename, *args, **kwargs)
def _pass_to_function(self, function, *overrided_args, **overrided_kwargs):
# get the function's remaining arguments with the inspect module
functon_kwargs = inspect.getargspec(function)[0][len(overrided_args):]
handler_vars = vars(self)
kwargs_to_pass = {}
for arg in functon_kwargs:
if arg in handler_vars:
kwargs_to_pass[arg] = handler_vars[arg]
for arg in overrided_kwargs:
kwargs_to_pass[arg] = overrided_kwargs[arg]
return function(*overrided_args, **kwargs_to_pass)
def rename(targets, format_string=None, database=None, mode=None,
not_in_class='None'):
print 'targets = {}'.format(targets)
print 'format_string = {}'.format(format_string)
print 'database = {}'.format(database)
print 'mode = {}'.format(mode)
print 'not_in_class = {}\n'.format(not_in_class)
return
The thing I like about this solution is that it uses the attributes stored in the class, but you can easily override them by simply adding them to the method call if you want a one-off with a different setting. To do this I have the _pass_to_function as a kind of wrapper function to parse and fill in the needed settings and overrides. Here's how it looks:
>>> import argstest
>>> argstest.rename('some_file.avi', database='some database')
targets = some_file.avi
format_string = None
database = some database
mode = None
not_in_class = None
>>> tst = argstest.MyHandler()
>>> tst.rename('some_file.avi')
targets = some_file.avi
format_string = class format string
database = class database
mode = class mode
not_in_class = None
>>> tst.rename('some_file.avi', 'one off format string', not_in_class=True)
targets = some_file.avi
format_string = one off format string
database = class database
mode = class mode
not_in_class = True
Now in my real module I have dozens of module-level functions that I want to access from the handler class. Ideally they would generate automatically based on the functions in the module. Seeing as how all the methods are only going to be passing everything to _pass_to_function I get the sense that this shouldn't be very difficult but I'm having a lot of trouble figuring out exactly how.
I've read about using type to generate a meta-class, but I don't see how I would use it in this situation. Am I not seeing how I could use type? Should I use some sort of module level script that adds the functions with setattr? Is what I was doing the better/clearer way to do things?
Any and all advice would be appreciated.
Okay, I think I've answered my own question for now. This is how the module looks:
import inspect
import sys
from types import MethodType
class MyHandler(object):
def __init__(self):
self.format_string = 'class format string'
self.database = 'class database'
self.mode = "class mode"
self._populate_methods()
def _populate_methods(self):
to_add = inspect.getmembers(sys.modules[__name__], inspect.isfunction)
to_add = [x[0] for x in to_add if not x[0].startswith('_')]
for func_name in to_add:
func = getattr(sys.modules[__name__], func_name) # strings to functions
self._add_function_as_method(func_name, func)
def _add_function_as_method(self, func_name, func):
def f(self, *args, **kwargs): # the template for the method we'll add
return self._pass_to_function(func, *args, **kwargs)
setattr(MyHandler, func_name, MethodType(f, None, MyHandler))
def _pass_to_function(self, function, *overrided_args, **overrided_kwargs):
functon_kwargs = inspect.getargspec(function)[0][len(overrided_args):]
handler_vars = vars(self)
kwargs_to_pass = {}
for arg in functon_kwargs:
if arg in handler_vars:
kwargs_to_pass[arg] = handler_vars[arg]
for arg in overrided_kwargs:
kwargs_to_pass[arg] = overrided_kwargs[arg]
return function(*overrided_args, **kwargs_to_pass)
def rename(targets, format_string=None, database=None, mode=None,
not_in_class='None'):
print 'targets = {}'.format(targets)
print 'format_string = {}'.format(format_string)
print 'database = {}'.format(database)
print 'mode = {}'.format(mode)
print 'not_in_class = {}\n'.format(not_in_class)
return
def something_else():
print "this function should become a method"
def _not_a_member():
print "this function should not become a method"
I've added the _populate_methods and the _add_function_as_method member functions. the _populate_methods function gets the name of all "public" functions in the module, de-references them to their function and passes each one though _add_function_as_method. All this method does is use an internal function to capture arguments and sent them to _pass_to_function, and set that function as a method using setattr.
phew
so it works, but I'm still wondering if there isn't a clearer or more straight forward way to get this done. I'd be very grateful if anyone could chime in.

Python / YAML: How to initialize additional objects not just from the YAML file, within loadConfig?

I have what I think is a small misconception with loading some YAML objects. I defined the class below.
What I want to do is load some objects with the overridden loadConfig function for YAMLObjects. Some of these come from my .yaml file, but others should be built out of objects loaded from the YAML file.
For instance, in the class below, I load a member object named "keep" which is a string naming some items to keep in the region. But I want to also parse this into a list and have the list stored as a member object too. And I don't want the user to have to give both the string and list version of this parameter in the YAML.
My current work around has been to override the __getattr__ function inside Region and make it create the defaults if it looks and doesn't find them. But this is clunky and more complicated than needed for just initializing objects.
What convention am I misunderstanding here. Why doesn't the loadConfig method create additional things not found in the YAML?
import yaml, pdb
class Region(yaml.YAMLObject):
yaml_tag = u'!Region'
def __init__(self, name, keep, drop):
self.name = name
self.keep = keep
self.drop = drop
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
def loadConfig(self, yamlConfig):
yml = yaml.load_all(file(yamlConfig))
for data in yml:
# These get created fine
self.name = data["name"]
self.keep = data["keep"]
self.drop = data["drop"]
# These do not get created.
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
### End Region
if __name__ == "__main__":
my_yaml = "/home/path/to/test.yaml"
region_iterator = yaml.load_all(file(my_yaml))
# Set a debug breakpoint to play with region_iterator and
# confirm the extra stuff isn't created.
pdb.set_trace()
And here is test.yaml so you can run all of this and see what I mean:
Regions:
# Note: the string conventions below are for an
# existing system. This is a shortened, representative
# example.
Market1:
!Region
name: USAndGB
keep: US+GB
drop: !!null
Market2:
!Region
name: CanadaAndAustralia
keep: CA+AU
drop: !!null
And here, for example, is what it looks like for me when I run this in an IPython shell and explore the loaded object:
In [57]: %run "/home/espears/testWorkspace/testRegions.py"
--Return--
> /home/espears/testWorkspace/testRegions.py(38)<module>()->None
-> pdb.set_trace()
(Pdb) region_iterator
<generator object load_all at 0x1139d820>
(Pdb) tmp = region_iterator.next()
(Pdb) tmp
{'Regions': {'Market2': <__main__.Region object at 0x1f858550>, 'Market1': <__main__.Region object at 0x11a91e50>}}
(Pdb) us = tmp['Regions']['Market1']
(Pdb) us
<__main__.Region object at 0x11a91e50>
(Pdb) us.name
'USAndGB'
(Pdb) us.keep
'US+GB'
(Pdb) us.keep_list
*** AttributeError: 'Region' object has no attribute 'keep_list'
A pattern I have found useful for working with yaml for classes that are basically storage is to have the loader use the constructor so that objects are created in the same way as when you make them normally. If I understand what you are attempting to do correctly, this kind of structure might be useful:
import inspect
import yaml
from collections import OrderedDict
class Serializable(yaml.YAMLObject):
__metaclass__ = yaml.YAMLObjectMetaclass
#property
def _dict(self):
dump_dict = OrderedDict()
for var in inspect.getargspec(self.__init__).args[1:]:
if getattr(self, var, None) is not None:
item = getattr(self, var)
if isinstance(item, np.ndarray) and item.ndim == 1:
item = list(item)
dump_dict[var] = item
return dump_dict
#classmethod
def to_yaml(cls, dumper, data):
return ordered_dump(dumper, '!{0}'.format(data.__class__.__name__),
data._dict)
#classmethod
def from_yaml(cls, loader, node):
fields = loader.construct_mapping(node, deep=True)
return cls(**fields)
def ordered_dump(dumper, tag, data):
value = []
node = yaml.nodes.MappingNode(tag, value)
for key, item in data.iteritems():
node_key = dumper.represent_data(key)
node_value = dumper.represent_data(item)
value.append((node_key, node_value))
return node
You would then want to have your Region class inherit from Serializable, and remove the loadConfig stuff. The code I posted inspects the constructor to see what data to save to the yaml file, and then when loading a yaml file calls the constructor with that same set of data. That way you just have to get the logic right in your constructor and the yaml loading should get it for free.
That code was ripped from one of my projects, apologies in advance if it doesn't quite work. It is also slightly more complicated than it needs to be because I wanted to control the order of output by using OrderedDict. You could replace my ordered_dump function with a call to dumper.represent_dict.

Python decorator with multiprocessing fails

I would like to use a decorator on a function that I will subsequently pass to a multiprocessing pool. However, the code fails with "PicklingError: Can't pickle : attribute lookup __builtin__.function failed". I don't quite see why it fails here. I feel certain that it's something simple, but I can't find it. Below is a minimal "working" example. I thought that using the functools function would be enough to let this work.
If I comment out the function decoration, it works without an issue. What is it about multiprocessing that I'm misunderstanding here? Is there any way to make this work?
Edit: After adding both a callable class decorator and a function decorator, it turns out that the function decorator works as expected. The callable class decorator continues to fail. What is it about the callable class version that keeps it from being pickled?
import random
import multiprocessing
import functools
class my_decorator_class(object):
def __init__(self, target):
self.target = target
try:
functools.update_wrapper(self, target)
except:
pass
def __call__(self, elements):
f = []
for element in elements:
f.append(self.target([element])[0])
return f
def my_decorator_function(target):
#functools.wraps(target)
def inner(elements):
f = []
for element in elements:
f.append(target([element])[0])
return f
return inner
#my_decorator_function
def my_func(elements):
f = []
for element in elements:
f.append(sum(element))
return f
if __name__ == '__main__':
elements = [[random.randint(0, 9) for _ in range(5)] for _ in range(10)]
pool = multiprocessing.Pool(processes=4)
results = [pool.apply_async(my_func, ([e],)) for e in elements]
pool.close()
f = [r.get()[0] for r in results]
print(f)
The problem is that pickle needs to have some way to reassemble everything that you pickle. See here for a list of what can be pickled:
http://docs.python.org/library/pickle.html#what-can-be-pickled-and-unpickled
When pickling my_func, the following components need to be pickled:
An instance of my_decorator_class, called my_func.
This is fine. Pickle will store the name of the class and pickle its __dict__ contents. When unpickling, it uses the name to find the class, then creates an instance and fills in the __dict__ contents. However, the __dict__ contents present a problem...
The instance of the original my_func that's stored in my_func.target.
This isn't so good. It's a function at the top-level, and normally these can be pickled. Pickle will store the name of the function. The problem, however, is that the name "my_func" is no longer bound to the undecorated function, it's bound to the decorated function. This means that pickle won't be able to look up the undecorated function to recreate the object. Sadly, pickle doesn't have any way to know that object it's trying to pickle can always be found under the name __main__.my_func.
You can change it like this and it will work:
import random
import multiprocessing
import functools
class my_decorator(object):
def __init__(self, target):
self.target = target
try:
functools.update_wrapper(self, target)
except:
pass
def __call__(self, candidates, args):
f = []
for candidate in candidates:
f.append(self.target([candidate], args)[0])
return f
def old_my_func(candidates, args):
f = []
for c in candidates:
f.append(sum(c))
return f
my_func = my_decorator(old_my_func)
if __name__ == '__main__':
candidates = [[random.randint(0, 9) for _ in range(5)] for _ in range(10)]
pool = multiprocessing.Pool(processes=4)
results = [pool.apply_async(my_func, ([c], {})) for c in candidates]
pool.close()
f = [r.get()[0] for r in results]
print(f)
You have observed that the decorator function works when the class does not. I believe this is because functools.wraps modifies the decorated function so that it has the name and other properties of the function it wraps. As far as the pickle module can tell, it is indistinguishable from a normal top-level function, so it pickles it by storing its name. Upon unpickling, the name is bound to the decorated function so everything works out.
I also had some problem using decorators in multiprocessing. I'm not sure if it's the same problem as yours:
My code looked like this:
from multiprocessing import Pool
def decorate_func(f):
def _decorate_func(*args, **kwargs):
print "I'm decorating"
return f(*args, **kwargs)
return _decorate_func
#decorate_func
def actual_func(x):
return x ** 2
my_swimming_pool = Pool()
result = my_swimming_pool.apply_async(actual_func,(2,))
print result.get()
and when I run the code I get this:
Traceback (most recent call last):
File "test.py", line 15, in <module>
print result.get()
File "somedirectory_too_lengthy_to_put_here/lib/python2.7/multiprocessing/pool.py", line 572, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
I fixed it by defining a new function to wrap the function in the decorator function, instead of using the decorator syntax
from multiprocessing import Pool
def decorate_func(f):
def _decorate_func(*args, **kwargs):
print "I'm decorating"
return f(*args, **kwargs)
return _decorate_func
def actual_func(x):
return x ** 2
def wrapped_func(*args, **kwargs):
return decorate_func(actual_func)(*args, **kwargs)
my_swimming_pool = Pool()
result = my_swimming_pool.apply_async(wrapped_func,(2,))
print result.get()
The code ran perfectly and I got:
I'm decorating
4
I'm not very experienced at Python, but this solution solved my problem for me
If you want the decorators too bad (like me), you can also use the exec() command on the function string, to circumvent the mentioned pickling.
I wanted to be able to pass all the arguments to an original function and then use them successively. The following is my code for it.
At first, I made a make_functext() function to convert the target function object to a string. For that, I used the getsource() function from the inspect module (see doctumentation here and note that it can't retrieve source code from compiled code etc.). Here it is:
from inspect import getsource
def make_functext(func):
ft = '\n'.join(getsource(func).split('\n')[1:]) # Removing the decorator, of course
ft = ft.replace(func.__name__, 'func') # Making function callable with 'func'
ft = ft.replace('#§ ', '').replace('#§', '') # For using commented code starting with '#§'
ft = ft.strip() # In case the function code was indented
return ft
It is used in the following _worker() function that will be the target of the processes:
def _worker(functext, args):
scope = {} # This is needed to keep executed definitions
exec(functext, scope)
scope['func'](args) # Using func from scope
And finally, here's my decorator:
from multiprocessing import Process
def parallel(num_processes, **kwargs):
def parallel_decorator(func, num_processes=num_processes):
functext = make_functext(func)
print('This is the parallelized function:\n', functext)
def function_wrapper(funcargs, num_processes=num_processes):
workers = []
print('Launching processes...')
for k in range(num_processes):
p = Process(target=_worker, args=(functext, funcargs[k])) # use args here
p.start()
workers.append(p)
return function_wrapper
return parallel_decorator
The code can finally be used by defining a function like this:
#parallel(4)
def hello(args):
#§ from time import sleep # use '#§' to avoid unnecessary (re)imports in main program
name, seconds = tuple(args) # unpack args-list here
sleep(seconds)
print('Hi', name)
... which can now be called like this:
hello([['Marty', 0.5],
['Catherine', 0.9],
['Tyler', 0.7],
['Pavel', 0.3]])
... which outputs:
This is the parallelized function:
def func(args):
from time import sleep
name, seconds = tuple(args)
sleep(seconds)
print('Hi', name)
Launching processes...
Hi Pavel
Hi Marty
Hi Tyler
Hi Catherine
Thanks for reading, this is my very first post. If you find any mistakes or bad practices, feel free to leave a comment. I know that these string conversions are quite dirty, though...
If you use this code for your decorator:
import multiprocessing
from types import MethodType
DEFAULT_POOL = []
def run_parallel(_func=None, *, name: str = None, context_pool: list = DEFAULT_POOL):
class RunParallel:
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
process = multiprocessing.Process(target=self.func, name=name, args=args, kwargs=kwargs)
context_pool.append(process)
process.start()
def __get__(self, instance, owner):
return self if instance is None else MethodType(self, instance)
if _func is None:
return RunParallel
else:
return RunParallel(_func)
def wait_context(context_pool: list = DEFAULT_POOL, kill_others_if_one_fails: bool = False):
finished = []
for process in context_pool:
process.join()
finished.append(process)
if kill_others_if_one_fails and process.exitcode != 0:
break
if kill_others_if_one_fails:
# kill unfinished processes
for process in context_pool:
if process not in finished:
process.kill()
# wait for every process to be dead
for process in context_pool:
process.join()
Then you can use it like this, in these 4 examples:
#run_parallel
def m1(a, b="b"):
print(f"m1 -- {a=} {b=}")
#run_parallel(name="mym2", context_pool=DEFAULT_POOL)
def m2(d, cc="cc"):
print(f"m2 -- {d} {cc=}")
a = 1/0
class M:
#run_parallel
def c3(self, k, n="n"):
print(f"c3 -- {k=} {n=}")
#run_parallel(name="Mc4", context_pool=DEFAULT_POOL)
def c4(self, x, y="y"):
print(f"c4 -- {x=} {y=}")
if __name__ == "__main__":
m1(11)
m2(22)
M().c3(33)
M().c4(44)
wait_context(kill_others_if_one_fails=True)
The output will be:
m1 -- a=11 b='b'
m2 -- 22 cc='cc'
c3 -- k=33 n='n'
(followed by the exception raised in method m2)

Categories