Normally you can concatenate options like '-abbb', which will expand to '-a -b -b -b'. Counts would be 1 for a, abd 3 for b.
However when mixing prefix_chars I see something different ...
import argparse
parser = argparse.ArgumentParser( prefix_chars='-+' )
parser.add_argument( '-x', action='count', dest='counter1' )
parser.add_argument( '+x', action='count', dest='counter2' )
args = parser.parse_args( '-xxx +xxx -xxx'.split() )
print( 'counter1 = ' + str(args.counter1) )
print( 'counter2 = ' + str(args.counter2) )
Running this results in:
counter1 = 8
counter2 = 1
Apparently '+xxx' doesn't expand to '+x +x +x', but to '+x -x -x'.
Changing the prefix_chars to '+-' results in:
counter1 = 2
counter2 = 7
Now '-xxx' expands to '-x +x +x'.
Is this defined behaviour, or am I missing something?
This was patched in late 2010, in early 2.7
http://bugs.python.org/issue9352
================
I'm not aware of bug/issues or code changes that would affect this, but I could dig into it.
For a start, strings of single prefix options are handled rather deeply in the parsing. In the current argparse.py the relevant code is:
class ArgumentParser
def _parse_known_args
# function to convert arg_strings into an optional action
def consume_optional(start_index):
match_argument = self._match_argument
action_tuples = []
while True:
...
chars = self.prefix_chars # e.g. the `-+` parameter
if arg_count == 0 and option_string[1] not in chars:
action_tuples.append((action, [], option_string))
char = option_string[0]
option_string = char + explicit_arg[0]
new_explicit_arg = explicit_arg[1:] or None
optionals_map = self._option_string_actions
if option_string in optionals_map:
action = optionals_map[option_string]
explicit_arg = new_explicit_arg
else:
msg = _('ignored explicit argument %r')
raise ArgumentError(action, msg % explicit_arg)
It's the pair of lines:
char = option_string[0]
option_string = char + explicit_arg[0]
that preserves the initial -/+ when handling the repeated characters (in the unparsed explicit_arg string.
I can imagine the case where the code split +xyz into +x,-y,-z, and was corrected to use +x,+y,+z. But it will require some digging into bug/issues and/or the Python repository to find out if and when that change was made.
What does your problem argparse.py have at this point?
Related
I'm working on a bazel rule (using version 5.2.0) that uses SWIG (version 4.0.1) to make a python library from C++ code, adapted from a rule in the tensorflow library. The problem I've run into is that, depending on the contents of ctx.file.source.path, the swig invocation might produce a necessary .h file. If it does, the rule below works great. If it doesn't, I get:
ERROR: BUILD:31:11: output 'foo_swig_h.h' was not created
ERROR: BUILD:31:11: SWIGing foo.i. failed: not all outputs were created or valid
If the h_out stuff is removed from _py_swig_gen_impl, the rule below works great when swig doesn't produce the .h file. But, if swig does produce one, bazel seems to ignore it and it isn't available for native.cc_binary to compile, resulting in gcc failing with a 'no such file or directory' error on an #include <foo_swig_cc.h> line in foo_swig_cc.cc.
(The presence or absence of the .h file in the output is determined by whether the .i file at ctx.file.source.path uses SWIG's "directors" feature.)
def _include_dirs(deps):
return depset(transitive = [dep[CcInfo].compilation_context.includes for dep in deps]).to_list()
def _headers(deps):
return depset(transitive = [dep[CcInfo].compilation_context.headers for dep in deps]).to_list()
# Bazel rules for building swig files.
def _py_swig_gen_impl(ctx):
module_name = ctx.attr.module_name
cc_out = ctx.actions.declare_file(module_name + "_swig_cc.cc")
h_out = ctx.actions.declare_file(module_name + "_swig_h.h")
py_out = ctx.actions.declare_file(module_name + ".py")
args = ["-c++", "-python", "-py3"]
args += ["-module", module_name]
args += ["-I" + x for x in _include_dirs(ctx.attr.deps)]
args += ["-I" + x.dirname for x in ctx.files.swig_includes]
args += ["-o", cc_out.path]
args += ["-outdir", py_out.dirname]
args += ["-oh", h_out.path]
args.append(ctx.file.source.path)
outputs = [cc_out, h_out, py_out]
ctx.actions.run(
executable = "swig",
arguments = args,
mnemonic = "Swig",
inputs = [ctx.file.source] + _headers(ctx.attr.deps) + ctx.files.swig_includes,
outputs = outputs,
progress_message = "SWIGing %{input}.",
)
return [DefaultInfo(files = depset(direct = [cc_out, py_out]))]
_py_swig_gen = rule(
attrs = {
"source": attr.label(
mandatory = True,
allow_single_file = True,
),
"swig_includes": attr.label_list(
allow_files = [".i"],
),
"deps": attr.label_list(
allow_files = True,
providers = [CcInfo],
),
"module_name": attr.string(mandatory = True),
},
implementation = _py_swig_gen_impl,
)
def py_wrap_cc(name, source, module_name = None, deps = [], copts = [], **kwargs):
if module_name == None:
module_name = name
python_deps = [
"#local_config_python//:python_headers",
"#local_config_python//:python_lib",
]
# First, invoke the _py_wrap_cc rule, which runs swig. This outputs:
# `module_name.cc`, `module_name.py`, and, sometimes, `module_name.h` files.
swig_rule_name = "swig_gen_" + name
_py_swig_gen(
name = swig_rule_name,
source = source,
swig_includes = ["//third_party/swig_rules:swig_includes"],
deps = deps + python_deps,
module_name = module_name,
)
# Next, we need to compile the `module_name.cc` and `module_name.h` files
# from the previous rule. The `module_name.py` file already generated
# expects there to be a `_module_name.so` file, so we name the cc_binary
# rule this way to make sure that's the resulting file name.
cc_lib_name = "_" + module_name + ".so"
native.cc_binary(
name = cc_lib_name,
srcs = [":" + swig_rule_name],
linkopts = ["-dynamic", "-L/usr/local/lib/"],
linkshared = True,
deps = deps + python_deps,
)
# Finally, package everything up as a python library that can be depended
# on. Note that this rule uses the user-given `name`.
native.py_library(
name = name,
srcs = [":" + swig_rule_name],
srcs_version = "PY3",
data = [":" + cc_lib_name],
imports = ["./"],
)
My question, broadly, how I might best handle this with a single rule. I've tried adding a ctx.actions.write before the ctx.actions.run, thinking that I could generate a dummy '.h' file that would be overwritten if needed. That gives me:
ERROR: BUILD:41:11: for foo_swig_h.h, previous action: action 'Writing file foo_swig_h.h', attempted action: action 'SWIGing foo.i.'
My next idea is to remove the h_out stuff and then try to capture the h file for the cc_binary rule with some kind of glob invocation.
I've seen two approaches: add an attribute to indicate whether it applies, or write a wrapper script to generate it unconditionally.
Adding an attribute means something like "has_h": attr.bool(), and then use that in _py_swig_gen_impl to make the ctx.actions.declare_file(module_name + "_swig_h.h") conditional.
The wrapper script option means using something like this for the executable:
#!/bin/bash
set -e
touch the_path_of_the_header
exec swig "$#"
That will unconditionally create the output, and then swig will overwrite it if applicable. If it's not applicable, then passing around an empty header file in the Bazel rules should be harmless.
For posterity, this is what my _py_swig_gen_impl looks like after implementing #Brian's suggestion above:
def _py_swig_gen_impl(ctx):
module_name = ctx.attr.module_name
cc_out = ctx.actions.declare_file(module_name + "_swig_cc.cc")
h_out = ctx.actions.declare_file(module_name + "_swig_h.h")
py_out = ctx.actions.declare_file(module_name + ".py")
include_dirs = _include_dirs(ctx.attr.deps)
headers = _headers(ctx.attr.deps)
args = ["-c++", "-python", "-py3"]
args += ["-module", module_name]
args += ["-I" + x for x in include_dirs]
args += ["-I" + x.dirname for x in ctx.files.swig_includes]
args += ["-o", cc_out.path]
args += ["-outdir", py_out.dirname]
args += ["-oh", h_out.path]
args.append(ctx.file.source.path)
outputs = [cc_out, h_out, py_out]
# Depending on the contents of `ctx.file.source`, swig may or may not
# output a .h file needed by subsequent rules. Bazel doesn't like optional
# outputs, so instead of invoking swig directly we're going to make a
# lightweight executable script that first `touch`es the .h file that may
# get generated, and then execute that. This means we may be propagating
# an empty .h file around as a "dependency" sometimes, but that's okay.
swig_script_file = ctx.actions.declare_file("swig_exec.sh")
ctx.actions.write(
output = swig_script_file,
is_executable = True,
content = "#!/bin/bash\n\nset -e\ntouch " + h_out.path + "\nexec swig \"$#\"",
)
ctx.actions.run(
executable = swig_script_file,
arguments = args,
mnemonic = "Swig",
inputs = [ctx.file.source] + headers + ctx.files.swig_includes,
outputs = outputs,
progress_message = "SWIGing %{input}.",
)
return [
DefaultInfo(files = depset(direct = outputs)),
]
The ctx.actions.write generates the suggested bash script:
#!/bin/bash
set -e
touch %{h_out.path}
exec swig "$#"
Which guarantees that the expected h_out will always be output by ctx.actions.run, whether or not swig generates it.
I am trying to write tests (using pytest) for a script, but I don't know how to create/pass arguments to main(), especially when it doesn't recieve arguments. and even if I change it to def main(args = None): then it'll initiate it on the first line.
tests.py
def test_main(capfd):
main()
out, err = capfd.readouterr()
assert out == "my expected output"
script.py
def init_parser():
parser = argparse.ArgumentParser(description="The script searches one or \
more named input files for lines \
containing a match to a regular \
expression pattern.")
parser.add_argument('regex', help='the regular expression.')
parser.add_argument('infile', nargs='*', type=argparse.FileType('r'), default=[sys.stdin],
help='the name of the file(s) to search.')
group = parser.add_mutually_exclusive_group()
group.add_argument('-u', '--underscore', action='store_true', help='prints \
"^" under the matching text.')
group.add_argument('-c', '--color', action='store_true', help='highlights \
matching text.')
group.add_argument('-m', '--machine', action='store_true', help='generates \
machine readable output.')
return parser
def main():
args = init_parser().parse_args()
for file in args.infile:
for i, line in enumerate(iter(file.readline, '')):
for substring in re.finditer(args.regex, line):
if args.underscore:
underscore_output(file.name, i + 1, line[:-1],
substring.start(), substring.end())
elif args.color:
color_output(file.name, i + 1, line[:-1],
substring.group())
elif args.machine:
machine_output(file.name, i + 1, substring.start(),
substring.group())
else:
print_line(file.name, i + 1, line[:-1])```
In the test code, you would patch out sys.argv before calling main:
def test_main(monkeypatch):
monkeypatch.setattr("sys.argv", ["script.py", "test_regex", "test_infile"])
main()
# put your assertions here
parse_args can take an explicit None argument to tell it to parse sys.argv[1:]. Write your code so that it can be easily tested:
def main(args=None):
args = init_parser().parse_args(args)
...
In production use, you'll call main() to let it parse sys.argv. For tests, you'll pass a specific set of arguments.
def test_main(capfd):
main(['[a-z]*', 'foo.txt', '-u']) # For example
out, err = capfd.readouterr()
assert out == "my expected output"
I simplifying my problem here. I need to do this:
python test.py --arg1 '--no-route53'
I added the parser like this:
parser.add_argument("--arg1", nargs="?", type=str, help="option")
args = parser.parse_args()
I wanted to get the '--no-route53' as a whole string and use it later in my script. But I keep getting this error:
test.py: error: unrecognized arguments: --no-route53
How can I work around it?
UPDATE1: if i give extra space after '--no-route53', like this and it worked:
python test.py --arg1 '--no-route53 '
I had the exact same issue a while ago. I ended up pre-parsing the sys.argvlist:
def bug_workaround(argv):
# arg needs to be prepended by space in order to not be interpreted as
# an option
add_space = False
args = []
for arg in argv[1:]: # Skip argv[0] as that should not be passed to parse_args
if add_space:
arg = " " + arg
add_space = True if arg == "--args" else False
args.append(arg)
return args
parser = argparse.ArgumentParser(description='My progrm.')
.
.
.
parser.add_argument('--args', type=str,
help='Extra args (in "quotes")')
args = parser.parse_args(bug_workaround(sys.argv))
# Prune added whitespace (for bug workaround)
if args.args:
args.args = args.args[1:]
I am attempting to use argparse to convert an argument into a timedelta object. My program reads in strings supplied by the user and converts them to various datetime objects for later usage. I cannot get the filter_length argument to process correctly though. My code:
import datetime
import time
import argparse
def mkdate(datestring):
return datetime.datetime.strptime(datestring, '%Y-%m-%d').date()
def mktime(timestring):
return datetime.datetime.strptime(timestring, '%I:%M%p').time()
def mkdelta(deltatuple):
return datetime.timedelta(deltatuple)
parser = argparse.ArgumentParser()
parser.add_argument('start_date', type=mkdate, nargs=1)
parser.add_argument('start_time', type=mktime, nargs=1, )
parser.add_argument('filter_length', type=mkdelta, nargs=1, default=datetime.timedelta(1))#default filter length is 1 day.
I run the program, passing 1 as the timedelta value (I only want it to be one day):
> python program.py 2012-09-16 11:00am 1
But I get the following error:
>>> program.py: error: argument filter_length: invalid mkdelta value: '1'
I don't understand why the value is invalid. If I call the mkdelta function on its own, like this:
mkdelta(1)
print mkdelta(1)
It returns:
datetime.timedelta(1)
1 day, 0:00:00
This is exactly the value that I'm looking for. Can someone help me figure out how to do this conversion properly using argparse?
Notice the quotes around '1' in your error message? You pass a string to mkdelta, whereas in your test code, you pass an integer.
Your function doesn't handle a string argument, which is what argparse is handing it; call int() on it:
def mkdelta(deltatuple):
return datetime.timedelta(int(deltatuple))
If you need to support more than days, you'll have to find a way to parse the argument passed in into timedelta arguments.
You could, for example, support d, h, m or s postfixes to denote days, hours, minutes or seconds:
_units = dict(d=60*60*24, h=60*60, m=60, s=1)
def mkdelta(deltavalue):
seconds = 0
defaultunit = unit = _units['d'] # default to days
value = ''
for ch in list(str(deltavalue).strip()):
if ch.isdigit():
value += ch
continue
if ch in _units:
unit = _units[ch]
if value:
seconds += unit * int(value)
value = ''
unit = defaultunit
continue
if ch in ' \t':
# skip whitespace
continue
raise ValueError('Invalid time delta: %s' % deltavalue)
if value:
seconds = unit * int(value)
return datetime.timedelta(seconds=seconds)
Now your mkdelta method accepts more complete deltas, and even integers still:
>>> mkdelta('1d')
datetime.timedelta(1)
>>> mkdelta('10s')
datetime.timedelta(0, 10)
>>> mkdelta('5d 10h 3m 10s')
datetime.timedelta(5, 36190)
>>> mkdelta(5)
datetime.timedelta(5)
>>> mkdelta('1')
datetime.timedelta(1)
The default unit is days.
You could use a custom action to collect all the remaining args and parse them into a timedelta.
This will allow you to write CLI commands such as
% test.py 2012-09-16 11:00am 2 3 4 5
datetime.timedelta(2, 3, 5004) # args.filter_length
You could also provide optional arguments for --days, --seconds, etc, so you can write CLI commands such as
% test.py 2012-09-16 11:00am --weeks 6 --days 0
datetime.timedelta(42) # args.filter_length
% test.py 2012-09-16 11:00am --weeks 6.5 --days 0
datetime.timedelta(45, 43200)
import datetime as dt
import argparse
def mkdate(datestring):
return dt.datetime.strptime(datestring, '%Y-%m-%d').date()
def mktime(timestring):
return dt.datetime.strptime(timestring, '%I:%M%p').time()
class TimeDeltaAction(argparse.Action):
def __call__(self, parser, args, values, option_string = None):
# print '{n} {v} {o}'.format(n = args, v = values, o = option_string)
setattr(args, self.dest, dt.timedelta(*map(float, values)))
parser = argparse.ArgumentParser()
parser.add_argument('start_date', type = mkdate)
parser.add_argument('start_time', type = mktime)
parser.add_argument('--days', type = float, default = 1)
parser.add_argument('--seconds', type = float, default = 0)
parser.add_argument('--microseconds', type = float, default = 0)
parser.add_argument('--milliseconds', type = float, default = 0)
parser.add_argument('--minutes', type = float, default = 0)
parser.add_argument('--hours', type = float, default = 0)
parser.add_argument('--weeks', type = float, default = 0)
parser.add_argument('filter_length', nargs = '*', action = TimeDeltaAction)
args = parser.parse_args()
if not args.filter_length:
args.filter_length = dt.timedelta(
args.days, args.seconds, args.microseconds, args.milliseconds,
args.minutes, args.hours, args.weeks)
print(repr(args.filter_length))
This gist seems to solve your problem: https://gist.github.com/jnothman/4057689
Just in case somebody lands here looking to add a pandas.Timedelta to an argument parser (as I just did), it turns out the following works just fine:
parser = argparse.ArgumentParser()
parser.add_argument('--timeout', type=pd.Timedelta)
Then:
>>> parser.parse_args(['--timeout', '24 hours'])
Namespace(timeout=Timedelta('1 days 00:00:00'))
I am a Python re-newbie. I would like advice on handling program parameters which are in a file in json format. Currently, I am doing something like what is shown below, however, it seems too wordy, and the idea of typing the same literal string multiple times (sometimes with dashes and sometimes with underscores) seems juvenile - error prone - stinky... :-) (I do have many more parameters!)
#!/usr/bin/env python
import sys
import os
import json ## for control file parsing
# control parameters
mpi_nodes = 1
cluster_size = None
initial_cutoff = None
# ...
#process the arguments
if len(sys.argv) != 2:
raise Exception(
"""Usage:
run_foo <controls.json>
Where:
<control.json> is a dictionary of run parameters
"""
)
# We expect a .json file with our parameters
controlsFileName = sys.argv[1]
err = ""
err += "" #validateFileArgument(controlsFileName, exists=True)
# read in the control parameters from the .json file
try:
controls = json.load(open(controlsFileName, "r"))
except:
err += "Could not process the file '" + controlsFileName + "'!\n"
# check each control parameter. The first one is optional
if "mpi-nodes" in controls:
mpi_nodes = controls["mpi-nodes"]
else:
mpi_nodes = controls["mpi-nodes"] = 1
if "cluster-size" in controls:
cluster_size = controls["cluster-size"]
else:
err += "Missing control definition for \"cluster-size\".\n"
if "initial-cutoff" in controls:
initial_cutoff = controls["initial-cutoff"]
else:
err += "Missing control definition for \"initial-cutoff\".\n"
# ...
# Quit if any of these things were not true
if len(err) > 0:
print err
exit()
#...
This works, but it seems like there must be a better way. I am stuck with the requirements to use a json file and to use the hyphenated parameter names. Any ideas?
I was looking for something with more static binding. Perhaps this is as good as it gets.
Usually, we do things like this.
def get_parameters( some_file_name ):
source= json.loads( some_file_name )
return dict(
mpi_nodes= source.get('mpi-nodes',1),
cluster_size= source['cluster-size'],
initial_cutoff = source['initial-cutoff'],
)
controlsFileName= sys.argv[1]
try:
params = get_parameters( controlsFileName )
except IOError:
print "Could not process the file '{0}'!".format( controlsFileName )
sys.exit( 1 )
except KeyError, e:
print "Missing control definition for '{0}'.".format( e.message )
sys.exit( 2 )
A the end params['mpi_nodes'] has the value of mpi_nodes
If you want a simple variable, you do this. mpi_nodes = params['mpi_nodes']
If you want a namedtuple, change get_parameters like this
def get_parameters( some_file_name ):
Parameters= namedtuple( 'Parameters', 'mpi_nodes, cluster_size, initial_cutoff' )
return Parameters( source.get('mpi-nodes',1),
source['cluster-size'],
source['initial-cutoff'],
)
I don't know if you'd find that better or not.
the argparse library is nice, it can handle most of the argument parsing and validation for you as well as printing pretty help screens
[1] http://docs.python.org/dev/library/argparse.html
I will knock up a quick demo showing how you'd want to use it this arvo.
Assuming you have many more parameters to process, something like this could work:
def underscore(s):
return s.replace('-','_')
# parameters with default values
for name, default in (("mpi-nodes", 1),):
globals()[underscore(name)] = controls.get(name, default)
# mandatory parameters
for name in ("cluster-size", "initial-cutoff"):
try:
globals()[underscore(name)] = controls[name]
except KeyError:
err += "Missing control definition for %r" % name
Instead of manipulating globals, you can also make this more explicit:
def underscore(s):
return s.replace('-','_')
settings = {}
# parameters with default values
for name, default in (("mpi-nodes", 1),):
settings[underscore(name)] = controls.get(name, default)
# mandatory parameters
for name in ("cluster-size", "initial-cutoff"):
try:
settings[underscore(name)] = controls[name]
except KeyError:
err += "Missing control definition for %r" % name
# print out err if necessary
mpi_nodes = settings['mpi_nodes']
cluster_size = settings['cluster_size']
initial_cutoff = settings['initial_cutoff']
I learned something from all of these responses - thanks! I would like to get feedback on my approach which incorporates something from each suggestion. In addition to the conditions imposed by the client, I want something:
1) that is fairly obvious to use and to debug
2) that is easy to maintain and modify
I decided to incorporate str.replace, namedtuple, and globals(), creating a ControlParameters namedtuple in the globals() namespace.
#!/usr/bin/env python
import sys
import os
import collections
import json
def get_parameters(parameters_file_name ):
"""
Access all of the control parameters from the json filename given. A
variable of type namedtuple named "ControlParameters" is injected
into the global namespace. Parameter validation is not performed. Both
the names and the defaults, if any, are defined herein. Parameters not
found in the json file will get values of None.
Parameter usage example: ControlParameters.cluster_size
"""
parameterValues = json.load(open(parameters_file_name, "r"))
Parameters = collections.namedtuple( 'Parameters',
"""
mpi_nodes
cluster_size
initial_cutoff
truncation_length
"""
)
parameters = Parameters(
parameterValues.get(Parameters._fields[0].replace('_', '-'), 1),
parameterValues.get(Parameters._fields[1].replace('_', '-')),
parameterValues.get(Parameters._fields[2].replace('_', '-')),
parameterValues.get(Parameters._fields[3].replace('_', '-'))
)
globals()["ControlParameters"] = parameters
#process the program argument(s)
err = ""
if len(sys.argv) != 2:
raise Exception(
"""Usage:
foo <control.json>
Where:
<control.json> is a dictionary of run parameters
"""
)
# We expect a .json file with our parameters
parameters_file_name = sys.argv[1]
err += "" #validateFileArgument(parameters_file_name, exists=True)
if err == "":
get_parameters(parameters_file_name)
cp_dict = ControlParameters._asdict()
for name in ControlParameters._fields:
if cp_dict[name] == None:
err += "Missing control parameter '%s'\r\n" % name
print err
print "Done"