evaluate logical expression from string inputs - python

i am trying to rewrite a conditional operation purely using functions and introduce operators and arguments as variables.
for example something like 4>5 will be replaced with
import operator
arg1 = 4
arg2 = 5
result = operator.gt(arg1,arg2)
i would want to replicate the same for a complex expression that use multiple operators
for example in below case data is the input numbers will need to replaced with a function that can have values and operator supplied
data = {'val1':1,'val2':2,'val3':3,'val4':4,'val5':5,'val6':6}
exression_output = (data['val1'] > 0.5) & (data['val2'] == 2) & (data['val3'] >= 2)
what i did first is manually replace them like below
import operator
ops = {
'+' : operator.add, '-' : operator.sub, '*' : operator.mul, '/' : operator.truediv, # use operator.div for Python 2
'%' : operator.mod, '^' : operator.xor, '<' : operator.lt, '<=' : operator.le,
'==' : operator.eq, '!=' : operator.ne, '>' : operator.gt, '>=' : operator.ge,
'&' :operator.and_
}
def eval_logical_operator(data,operations):
if (len(operations)==1):
return ops[operations[0]['operation']](data[operations[0]['op1']], operations[0]['op2'])
if (len(operations)==2):
return operator.and_(
ops[operations[0]['operation']](data[operations[0]['op1']], operations[0]['op2']),
ops[operations[1]['operation']](data[operations[1]['op1']], operations[1]['op2'])
)
if (len(operations)==3):
return operator.and_(
operator.and_(
ops[operations[0]['operation']](data[operations[0]['op1']], operations[0]['op2']),
ops[operations[1]['operation']](data[operations[1]['op1']], operations[1]['op2'])
),
ops[operations[2]['operation']](data[operations[2]['op1']], operations[2]['op2'])
)
#return new_df[operator.gt(op1, op2)]
eval_logical_operator(data,[{'op1':'val1','operation':'>','op2':0.5},])
eval_logical_operator(data,[
{'op1':'val1','operation':'>','op2':0.5},
{'op1':'val2','operation':'==','op2':2},
]
)
eval_logical_operator(data,[
{'op1':'val1','operation':'>','op2':0.5},
{'op1':'val2','operation':'==','op2':2},
{'op1':'val3','operation':'>=','op2':2},
]
)
to remove limitation of numbers of condition(if blocks) i replaced it with
below expression. it works, but can work only for And operations
def eval_expression(data, operations):
output = []
for exp in operations:
expression_output = ops[exp['operation']](data[exp['op1']],exp['op2'])
output.append(expression_output)
return all(output)
eval_expression(data,[
{'op1':'val1','operation':'>','op2':0.5},
{'op1':'val2','operation':'==','op2':2},
{'op1':'val3','operation':'>=','op2':2},
]
)
this would fail if an 'or' is introduced like below
(data['val1'] > 0.5) & (data['val2'] == 2) | (data['val3'] >= 2)
can someone suggest a workaround this so that i can do this for a large complex expression like
data = {'val1':1,'val2':2,'val3':3,'val4':4,'val5':5,'val6':6}
output = (((data['val1'] > 0.5) & (data['val2'] == 2)) | (data['val3'] >= 2))&(data['val4'] >= 3)
i am willing to rewrite my input expression to accommodate this.
eval_logical_operator(data,[
{'op1':'val1','operation':'>','op2':0.5},
{'op1':'val2','operation':'==','op2':2},
{'op1':'val3','operation':'>=','op2':2},
]
)

This is how this problem could be approached with Abstract Syntax Tree (AST)
Firstly we will parse the python code into a tree, one standard implementation include ast in the python standard library (see: https://docs.python.org/3/library/ast.html)
Then we convert every BinOp Node to a Call node mapping from operations like +, - to operator.add and operator.sub for example. This can be done by subclassing ast.NodeTrasnformer then implementing visit_BinOp to return appropriate Call object.
This is an example of code a source to source compilation:
from ast import (
BinOp,
Call,
ImportFrom,
Load,
Module,
Name,
NodeTransformer,
alias,
fix_missing_locations,
parse,
unparse,
)
from collections.abc import Iterable
from typing import Any
bin_op_special_cases = {
"Mult": "mul",
"Div": "truediv",
"BitOr": "or_",
"BitXor": "xor",
"BitAnd": "and_",
"MatMult": "matmul",
}
def _get_bin_conversion(op_name: str) -> str:
return bin_op_special_cases.get(op_name, op_name.lower())
def _get_cls_name_of(obj: Any) -> str:
cls = type(obj)
return cls.__name__
class OperationNodeTransformer(NodeTransformer):
def __init__(self, tree) -> None:
self.operator_import_symbols = set()
self.result = fix_missing_locations(self.visit(tree))
def visit_BinOp(self, node: BinOp) -> Call:
lhs, rhs = node.left, node.right
op_name = _get_bin_conversion(_get_cls_name_of(node.op))
self.operator_import_symbols.add(op_name)
new_node = OperationNodeTransformer(
Call(
func=Name(id=op_name, ctx=Load()),
args=[lhs, rhs],
keywords=[],
)
)
self.operator_import_symbols.update(new_node.operator_import_symbols)
return new_node.result
def _is_ImportFromFuture(node) -> bool:
return isinstance(node, ImportFrom) and node.module == "__future__"
def _add_ImportFromNode(
mod: Module, mod_name: str, symbols: Iterable[str], level: int = 0
) -> None:
node = ImportFrom(
module=mod_name,
names=[alias(name=name) for name in symbols],
level=level,
)
body = mod.body
first_expr = body[0]
body.insert(int(_is_ImportFromFuture(first_expr)), node)
def convert_operations(py_code: str) -> str:
tree: Module = parse(py_code)
new = OperationNodeTransformer(tree)
_add_ImportFromNode(new.result, "operator", new.operator_import_symbols)
return unparse(new.result)
sample = """\
def foo(a, b):
return a + b
"""
print(convert_operations(sample))
# Result :
# from operator import add
# def foo(a, b):
# return add(a, b)
Now we can successfully convert python into new python code, although the above code has only been implemented for the following types :
class ast.Add
class ast.Sub
class ast.Mult
class ast.Div
class ast.FloorDiv
class ast.Mod
class ast.Pow
class ast.LShift
class ast.RShift
class ast.BitOr
class ast.BitXor
class ast.BitAnd
class ast.MatMult
(does not fully cover the operator alternatives, but the code is already way too big to be solitarily included in a single answer)
Example of a Fibonacci function being converted using convert_operation :
Old :
def fib(n):
if n <= 1:
return n
return fib(n - 1) + fib(n - 2)
print(fib(5))
Converted :
from operator import sub, add
def fib(n):
if n <= 1:
return n
return add(fib(sub(n, 1)), fib(sub(n, 2)))
print(fib(5))
(For <= to be converted, you'll have to implemented ast.Compare with visit_Compare which was too much to be included in this answer)
Hope that helps...

Related

How do I convert a string containing a simple math expression into a integer? [duplicate]

stringExp = "2^4"
intVal = int(stringExp) # Expected value: 16
This returns the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'
I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?
eval is evil
eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory
Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:
eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})
Evaluate arithmetic expression using ast
import ast
import operator as op
# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
ast.USub: op.neg}
def eval_expr(expr):
"""
>>> eval_expr('2^6')
4
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return eval_(ast.parse(expr, mode='eval').body)
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)
You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:
def power(a, b):
if any(abs(n) > 100 for n in [a, b]):
raise ValueError((a,b))
return op.pow(a, b)
operators[ast.Pow] = power
Or to limit magnitude of intermediate results:
import functools
def limit(max_=None):
"""Return decorator that limits allowed returned values."""
def decorator(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
ret = func(*args, **kwargs)
try:
mag = abs(ret)
except TypeError:
pass # not applicable
else:
if mag > max_:
raise ValueError(ret)
return ret
return wrapper
return decorator
eval_ = limit(max_=10**100)(eval_)
Example
>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py
shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.
from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator
__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
'''
def pushFirst(self, strg, loc, toks):
self.exprStack.append(toks[0])
def pushUMinus(self, strg, loc, toks):
if toks and toks[0] == '-':
self.exprStack.append('unary -')
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
| Optional(oneOf("- +")) + Group(lpar + expr + rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + \
ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
term = factor + \
ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
expr << term + \
ZeroOrMore((addop + term).setParseAction(self.pushFirst))
# addop_term = ( addop + term ).setParseAction( self.pushFirst )
# general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
# expr << general_term
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = {"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow}
self.fn = {"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"exp": math.exp,
"abs": abs,
"trunc": lambda a: int(a),
"round": round,
"sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
def evaluateStack(self, s):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack(s)
if op in "+-*/^":
op2 = self.evaluateStack(s)
op1 = self.evaluateStack(s)
return self.opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op](self.evaluateStack(s))
elif op[0].isalpha():
return 0
else:
return float(op)
def eval(self, num_string, parseAll=True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack(self.exprStack[:])
return val
You can use it like this
nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0
result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Some safer alternatives to eval() and sympy.sympify().evalf()*:
asteval
numexpr
*SymPy sympify is also unsafe according to the following warning from the documentation.
Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.
Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..
import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})
Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?
The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.
Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.
c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]
The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).
This also opens the door to some more sophisticated input formats. For example:
stringExp = "1 + cos(2)"
This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.
from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator
globs = {'sin':sin, 'cos':cos}
safe = globs.values()
stack = LifoQueue()
class BINARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get(),stack.get()))
class UNARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get()))
def CALL_FUNCTION(context, arg):
argc = arg[0]+arg[1]*256
args = [stack.get() for i in range(argc)]
func = stack.get()
if func not in safe:
raise TypeError("Function %r now allowed"%func)
stack.put(func(*args))
def LOAD_CONST(context, arg):
cons = arg[0]+arg[1]*256
stack.put(context['code'].co_consts[cons])
def LOAD_NAME(context, arg):
name_num = arg[0]+arg[1]*256
name = context['code'].co_names[name_num]
if name in context['locals']:
stack.put(context['locals'][name])
else:
stack.put(context['globals'][name])
def RETURN_VALUE(context):
return stack.get()
opfuncs = {
opmap['BINARY_ADD']: BINARY(operator.add),
opmap['UNARY_INVERT']: UNARY(operator.invert),
opmap['CALL_FUNCTION']: CALL_FUNCTION,
opmap['LOAD_CONST']: LOAD_CONST,
opmap['LOAD_NAME']: LOAD_NAME
opmap['RETURN_VALUE']: RETURN_VALUE,
}
def VMeval(c):
context = dict(locals={}, globals=globs, code=c)
bci = iter(c.co_code)
for bytecode in bci:
func = opfuncs[ord(bytecode)]
if func.func_code.co_argcount==1:
ret = func(context)
else:
args = ord(bci.next()), ord(bci.next())
ret = func(context, args)
if ret:
return ret
def evaluate(expr):
return VMeval(compile(expr, 'userinput', 'eval'))
Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).
While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).
You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.
import ast, math
locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})
class Visitor(ast.NodeVisitor):
def visit(self, node):
if not isinstance(node, self.whitelist):
raise ValueError(node)
return super().visit(node)
whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)
def evaluate(expr, locals = {}):
if any(elem in expr for elem in '\n#') : raise ValueError(expr)
try:
node = ast.parse(expr.strip(), mode='eval')
Visitor().visit(node)
return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
except Exception: raise ValueError(expr)
Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.
If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.
Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.
>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0
The above code has only been tested on Python 3.
If desired, you can add a timeout decorator on this function.
I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.
eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.
[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]
Since python3.6, this capability is now built into the language, coined "f-strings".
See: PEP 498 -- Literal String Interpolation
For example (note the f prefix):
f'{2**4}'
=> '16'
Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:
import re
# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')
def simple_eval(expr):
try:
c = compile(expr, 'userinput', 'eval')
except SyntaxError:
raise ValueError(f"Malformed expression: {expr}")
m = _re_simple_eval.fullmatch(c.co_code)
if not m:
raise ValueError(f"Not a simple algebraic expression: {expr}")
try:
return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
except IndexError:
raise ValueError(f"Expression not evaluated as constant: {expr}")
Testing, using some of the examples in other answers:
for expr, res in (
('2^4', 6 ),
('2**4', 16 ),
('1 + 2*3**(4^5) / (6 + -7)', -5.0 ),
('7 + 9 * (2 << 2)', 79 ),
('6 // 2 + 0.0', 3.0 ),
('2+3', 5 ),
('6+4/2*2', 10.0 ),
('3+2.45/8', 3.30625),
('3**3*3/3+3', 30.0 ),
):
result = simple_eval(expr)
ok = (result == res and type(result) == type(res))
print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0
Testing bad input:
for expr in (
'foo bar',
'print("hi")',
'2*x',
'lambda: 10',
'2**1234',
):
try:
result = simple_eval(expr)
except ValueError as e:
print(e)
continue
print("OK!") # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:
>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133
Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.
Use eval in a clean namespace:
>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16
The clean namespace should prevent injection. For instance:
>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'
Otherwise you would get:
>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0
You might want to give access to the math module:
>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.
$ python -m pytest test.py
test.py
from solution import Solutions
class SolutionsTestCase(unittest.TestCase):
def setUp(self):
self.solutions = Solutions()
def test_evaluate(self):
expressions = [
'2+3=5',
'6+4/2*2=10',
'3+2.45/8=3.30625',
'3**3*3/3+3=30',
'2^4=6'
]
results = [x.split('=')[1] for x in expressions]
for e in range(len(expressions)):
if '.' in results[e]:
results[e] = float(results[e])
else:
results[e] = int(results[e])
self.assertEqual(
results[e],
self.solutions.evaluate(expressions[e])
)
solution.py
class Solutions(object):
def evaluate(self, exp):
def format(res):
if '.' in res:
try:
res = float(res)
except ValueError:
pass
else:
try:
res = int(res)
except ValueError:
pass
return res
def splitter(item, op):
mul = item.split(op)
if len(mul) == 2:
for x in ['^', '*', '/', '+', '-']:
if x in mul[0]:
mul = [mul[0].split(x)[1], mul[1]]
if x in mul[1]:
mul = [mul[0], mul[1].split(x)[0]]
elif len(mul) > 2:
pass
else:
pass
for x in range(len(mul)):
mul[x] = format(mul[x])
return mul
exp = exp.replace(' ', '')
if '=' in exp:
res = exp.split('=')[1]
res = format(res)
exp = exp.replace('=%s' % res, '')
while '^' in exp:
if '^' in exp:
itm = splitter(exp, '^')
res = itm[0] ^ itm[1]
exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
while '**' in exp:
if '**' in exp:
itm = splitter(exp, '**')
res = itm[0] ** itm[1]
exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
while '/' in exp:
if '/' in exp:
itm = splitter(exp, '/')
res = itm[0] / itm[1]
exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
while '*' in exp:
if '*' in exp:
itm = splitter(exp, '*')
res = itm[0] * itm[1]
exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
while '+' in exp:
if '+' in exp:
itm = splitter(exp, '+')
res = itm[0] + itm[1]
exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
while '-' in exp:
if '-' in exp:
itm = splitter(exp, '-')
res = itm[0] - itm[1]
exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))
return format(exp)
Using lark parser library https://stackoverflow.com/posts/67491514/edit
from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args
calc_grammar = f"""
?start: sum
?sum: product
| sum "+" product -> {add.__name__}
| sum "-" product -> {sub.__name__}
?product: power
| product "*" power -> {mul.__name__}
| product "/" power -> {truediv.__name__}
?power: atom
| power "^" atom -> {pow.__name__}
?atom: NUMBER -> number
| "-" atom -> {neg.__name__}
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateTree(Transformer):
add = add
sub = sub
neg = neg
mul = mul
truediv = truediv
pow = pow
number = float
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse
def eval_expr(expression: str) -> float:
return calc(expression)
print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4")) # Error
I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.
The basic syntax is:
from py_expression.core import Exp
exp = Exp()
parsed_formula = exp.parse('a+4')
result = exp.eval(parsed_formula, {"a":2})
The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

How to convert a string to standard input? [duplicate]

stringExp = "2^4"
intVal = int(stringExp) # Expected value: 16
This returns the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'
I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?
eval is evil
eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory
Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:
eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})
Evaluate arithmetic expression using ast
import ast
import operator as op
# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
ast.USub: op.neg}
def eval_expr(expr):
"""
>>> eval_expr('2^6')
4
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return eval_(ast.parse(expr, mode='eval').body)
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)
You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:
def power(a, b):
if any(abs(n) > 100 for n in [a, b]):
raise ValueError((a,b))
return op.pow(a, b)
operators[ast.Pow] = power
Or to limit magnitude of intermediate results:
import functools
def limit(max_=None):
"""Return decorator that limits allowed returned values."""
def decorator(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
ret = func(*args, **kwargs)
try:
mag = abs(ret)
except TypeError:
pass # not applicable
else:
if mag > max_:
raise ValueError(ret)
return ret
return wrapper
return decorator
eval_ = limit(max_=10**100)(eval_)
Example
>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py
shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.
from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator
__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
'''
def pushFirst(self, strg, loc, toks):
self.exprStack.append(toks[0])
def pushUMinus(self, strg, loc, toks):
if toks and toks[0] == '-':
self.exprStack.append('unary -')
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
| Optional(oneOf("- +")) + Group(lpar + expr + rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + \
ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
term = factor + \
ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
expr << term + \
ZeroOrMore((addop + term).setParseAction(self.pushFirst))
# addop_term = ( addop + term ).setParseAction( self.pushFirst )
# general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
# expr << general_term
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = {"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow}
self.fn = {"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"exp": math.exp,
"abs": abs,
"trunc": lambda a: int(a),
"round": round,
"sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
def evaluateStack(self, s):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack(s)
if op in "+-*/^":
op2 = self.evaluateStack(s)
op1 = self.evaluateStack(s)
return self.opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op](self.evaluateStack(s))
elif op[0].isalpha():
return 0
else:
return float(op)
def eval(self, num_string, parseAll=True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack(self.exprStack[:])
return val
You can use it like this
nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0
result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Some safer alternatives to eval() and sympy.sympify().evalf()*:
asteval
numexpr
*SymPy sympify is also unsafe according to the following warning from the documentation.
Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.
Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..
import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})
Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?
The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.
Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.
c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]
The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).
This also opens the door to some more sophisticated input formats. For example:
stringExp = "1 + cos(2)"
This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.
from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator
globs = {'sin':sin, 'cos':cos}
safe = globs.values()
stack = LifoQueue()
class BINARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get(),stack.get()))
class UNARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get()))
def CALL_FUNCTION(context, arg):
argc = arg[0]+arg[1]*256
args = [stack.get() for i in range(argc)]
func = stack.get()
if func not in safe:
raise TypeError("Function %r now allowed"%func)
stack.put(func(*args))
def LOAD_CONST(context, arg):
cons = arg[0]+arg[1]*256
stack.put(context['code'].co_consts[cons])
def LOAD_NAME(context, arg):
name_num = arg[0]+arg[1]*256
name = context['code'].co_names[name_num]
if name in context['locals']:
stack.put(context['locals'][name])
else:
stack.put(context['globals'][name])
def RETURN_VALUE(context):
return stack.get()
opfuncs = {
opmap['BINARY_ADD']: BINARY(operator.add),
opmap['UNARY_INVERT']: UNARY(operator.invert),
opmap['CALL_FUNCTION']: CALL_FUNCTION,
opmap['LOAD_CONST']: LOAD_CONST,
opmap['LOAD_NAME']: LOAD_NAME
opmap['RETURN_VALUE']: RETURN_VALUE,
}
def VMeval(c):
context = dict(locals={}, globals=globs, code=c)
bci = iter(c.co_code)
for bytecode in bci:
func = opfuncs[ord(bytecode)]
if func.func_code.co_argcount==1:
ret = func(context)
else:
args = ord(bci.next()), ord(bci.next())
ret = func(context, args)
if ret:
return ret
def evaluate(expr):
return VMeval(compile(expr, 'userinput', 'eval'))
Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).
While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).
You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.
import ast, math
locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})
class Visitor(ast.NodeVisitor):
def visit(self, node):
if not isinstance(node, self.whitelist):
raise ValueError(node)
return super().visit(node)
whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)
def evaluate(expr, locals = {}):
if any(elem in expr for elem in '\n#') : raise ValueError(expr)
try:
node = ast.parse(expr.strip(), mode='eval')
Visitor().visit(node)
return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
except Exception: raise ValueError(expr)
Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.
If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.
Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.
>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0
The above code has only been tested on Python 3.
If desired, you can add a timeout decorator on this function.
I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.
eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.
[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]
Since python3.6, this capability is now built into the language, coined "f-strings".
See: PEP 498 -- Literal String Interpolation
For example (note the f prefix):
f'{2**4}'
=> '16'
Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:
import re
# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')
def simple_eval(expr):
try:
c = compile(expr, 'userinput', 'eval')
except SyntaxError:
raise ValueError(f"Malformed expression: {expr}")
m = _re_simple_eval.fullmatch(c.co_code)
if not m:
raise ValueError(f"Not a simple algebraic expression: {expr}")
try:
return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
except IndexError:
raise ValueError(f"Expression not evaluated as constant: {expr}")
Testing, using some of the examples in other answers:
for expr, res in (
('2^4', 6 ),
('2**4', 16 ),
('1 + 2*3**(4^5) / (6 + -7)', -5.0 ),
('7 + 9 * (2 << 2)', 79 ),
('6 // 2 + 0.0', 3.0 ),
('2+3', 5 ),
('6+4/2*2', 10.0 ),
('3+2.45/8', 3.30625),
('3**3*3/3+3', 30.0 ),
):
result = simple_eval(expr)
ok = (result == res and type(result) == type(res))
print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0
Testing bad input:
for expr in (
'foo bar',
'print("hi")',
'2*x',
'lambda: 10',
'2**1234',
):
try:
result = simple_eval(expr)
except ValueError as e:
print(e)
continue
print("OK!") # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:
>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133
Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.
Use eval in a clean namespace:
>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16
The clean namespace should prevent injection. For instance:
>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'
Otherwise you would get:
>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0
You might want to give access to the math module:
>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.
$ python -m pytest test.py
test.py
from solution import Solutions
class SolutionsTestCase(unittest.TestCase):
def setUp(self):
self.solutions = Solutions()
def test_evaluate(self):
expressions = [
'2+3=5',
'6+4/2*2=10',
'3+2.45/8=3.30625',
'3**3*3/3+3=30',
'2^4=6'
]
results = [x.split('=')[1] for x in expressions]
for e in range(len(expressions)):
if '.' in results[e]:
results[e] = float(results[e])
else:
results[e] = int(results[e])
self.assertEqual(
results[e],
self.solutions.evaluate(expressions[e])
)
solution.py
class Solutions(object):
def evaluate(self, exp):
def format(res):
if '.' in res:
try:
res = float(res)
except ValueError:
pass
else:
try:
res = int(res)
except ValueError:
pass
return res
def splitter(item, op):
mul = item.split(op)
if len(mul) == 2:
for x in ['^', '*', '/', '+', '-']:
if x in mul[0]:
mul = [mul[0].split(x)[1], mul[1]]
if x in mul[1]:
mul = [mul[0], mul[1].split(x)[0]]
elif len(mul) > 2:
pass
else:
pass
for x in range(len(mul)):
mul[x] = format(mul[x])
return mul
exp = exp.replace(' ', '')
if '=' in exp:
res = exp.split('=')[1]
res = format(res)
exp = exp.replace('=%s' % res, '')
while '^' in exp:
if '^' in exp:
itm = splitter(exp, '^')
res = itm[0] ^ itm[1]
exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
while '**' in exp:
if '**' in exp:
itm = splitter(exp, '**')
res = itm[0] ** itm[1]
exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
while '/' in exp:
if '/' in exp:
itm = splitter(exp, '/')
res = itm[0] / itm[1]
exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
while '*' in exp:
if '*' in exp:
itm = splitter(exp, '*')
res = itm[0] * itm[1]
exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
while '+' in exp:
if '+' in exp:
itm = splitter(exp, '+')
res = itm[0] + itm[1]
exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
while '-' in exp:
if '-' in exp:
itm = splitter(exp, '-')
res = itm[0] - itm[1]
exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))
return format(exp)
Using lark parser library https://stackoverflow.com/posts/67491514/edit
from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args
calc_grammar = f"""
?start: sum
?sum: product
| sum "+" product -> {add.__name__}
| sum "-" product -> {sub.__name__}
?product: power
| product "*" power -> {mul.__name__}
| product "/" power -> {truediv.__name__}
?power: atom
| power "^" atom -> {pow.__name__}
?atom: NUMBER -> number
| "-" atom -> {neg.__name__}
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateTree(Transformer):
add = add
sub = sub
neg = neg
mul = mul
truediv = truediv
pow = pow
number = float
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse
def eval_expr(expression: str) -> float:
return calc(expression)
print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4")) # Error
I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.
The basic syntax is:
from py_expression.core import Exp
exp = Exp()
parsed_formula = exp.parse('a+4')
result = exp.eval(parsed_formula, {"a":2})
The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

int to str but with variables [duplicate]

stringExp = "2^4"
intVal = int(stringExp) # Expected value: 16
This returns the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'
I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?
eval is evil
eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory
Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:
eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})
Evaluate arithmetic expression using ast
import ast
import operator as op
# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
ast.USub: op.neg}
def eval_expr(expr):
"""
>>> eval_expr('2^6')
4
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return eval_(ast.parse(expr, mode='eval').body)
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)
You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:
def power(a, b):
if any(abs(n) > 100 for n in [a, b]):
raise ValueError((a,b))
return op.pow(a, b)
operators[ast.Pow] = power
Or to limit magnitude of intermediate results:
import functools
def limit(max_=None):
"""Return decorator that limits allowed returned values."""
def decorator(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
ret = func(*args, **kwargs)
try:
mag = abs(ret)
except TypeError:
pass # not applicable
else:
if mag > max_:
raise ValueError(ret)
return ret
return wrapper
return decorator
eval_ = limit(max_=10**100)(eval_)
Example
>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py
shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.
from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator
__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
'''
def pushFirst(self, strg, loc, toks):
self.exprStack.append(toks[0])
def pushUMinus(self, strg, loc, toks):
if toks and toks[0] == '-':
self.exprStack.append('unary -')
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
| Optional(oneOf("- +")) + Group(lpar + expr + rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + \
ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
term = factor + \
ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
expr << term + \
ZeroOrMore((addop + term).setParseAction(self.pushFirst))
# addop_term = ( addop + term ).setParseAction( self.pushFirst )
# general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
# expr << general_term
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = {"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow}
self.fn = {"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"exp": math.exp,
"abs": abs,
"trunc": lambda a: int(a),
"round": round,
"sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
def evaluateStack(self, s):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack(s)
if op in "+-*/^":
op2 = self.evaluateStack(s)
op1 = self.evaluateStack(s)
return self.opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op](self.evaluateStack(s))
elif op[0].isalpha():
return 0
else:
return float(op)
def eval(self, num_string, parseAll=True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack(self.exprStack[:])
return val
You can use it like this
nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0
result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Some safer alternatives to eval() and sympy.sympify().evalf()*:
asteval
numexpr
*SymPy sympify is also unsafe according to the following warning from the documentation.
Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.
The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.
Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.
c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]
The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).
This also opens the door to some more sophisticated input formats. For example:
stringExp = "1 + cos(2)"
This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.
from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator
globs = {'sin':sin, 'cos':cos}
safe = globs.values()
stack = LifoQueue()
class BINARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get(),stack.get()))
class UNARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get()))
def CALL_FUNCTION(context, arg):
argc = arg[0]+arg[1]*256
args = [stack.get() for i in range(argc)]
func = stack.get()
if func not in safe:
raise TypeError("Function %r now allowed"%func)
stack.put(func(*args))
def LOAD_CONST(context, arg):
cons = arg[0]+arg[1]*256
stack.put(context['code'].co_consts[cons])
def LOAD_NAME(context, arg):
name_num = arg[0]+arg[1]*256
name = context['code'].co_names[name_num]
if name in context['locals']:
stack.put(context['locals'][name])
else:
stack.put(context['globals'][name])
def RETURN_VALUE(context):
return stack.get()
opfuncs = {
opmap['BINARY_ADD']: BINARY(operator.add),
opmap['UNARY_INVERT']: UNARY(operator.invert),
opmap['CALL_FUNCTION']: CALL_FUNCTION,
opmap['LOAD_CONST']: LOAD_CONST,
opmap['LOAD_NAME']: LOAD_NAME
opmap['RETURN_VALUE']: RETURN_VALUE,
}
def VMeval(c):
context = dict(locals={}, globals=globs, code=c)
bci = iter(c.co_code)
for bytecode in bci:
func = opfuncs[ord(bytecode)]
if func.func_code.co_argcount==1:
ret = func(context)
else:
args = ord(bci.next()), ord(bci.next())
ret = func(context, args)
if ret:
return ret
def evaluate(expr):
return VMeval(compile(expr, 'userinput', 'eval'))
Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).
While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).
Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..
import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})
Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?
You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.
import ast, math
locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})
class Visitor(ast.NodeVisitor):
def visit(self, node):
if not isinstance(node, self.whitelist):
raise ValueError(node)
return super().visit(node)
whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)
def evaluate(expr, locals = {}):
if any(elem in expr for elem in '\n#') : raise ValueError(expr)
try:
node = ast.parse(expr.strip(), mode='eval')
Visitor().visit(node)
return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
except Exception: raise ValueError(expr)
Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.
If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.
Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.
>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0
The above code has only been tested on Python 3.
If desired, you can add a timeout decorator on this function.
I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.
eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.
[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]
Since python3.6, this capability is now built into the language, coined "f-strings".
See: PEP 498 -- Literal String Interpolation
For example (note the f prefix):
f'{2**4}'
=> '16'
Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:
import re
# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')
def simple_eval(expr):
try:
c = compile(expr, 'userinput', 'eval')
except SyntaxError:
raise ValueError(f"Malformed expression: {expr}")
m = _re_simple_eval.fullmatch(c.co_code)
if not m:
raise ValueError(f"Not a simple algebraic expression: {expr}")
try:
return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
except IndexError:
raise ValueError(f"Expression not evaluated as constant: {expr}")
Testing, using some of the examples in other answers:
for expr, res in (
('2^4', 6 ),
('2**4', 16 ),
('1 + 2*3**(4^5) / (6 + -7)', -5.0 ),
('7 + 9 * (2 << 2)', 79 ),
('6 // 2 + 0.0', 3.0 ),
('2+3', 5 ),
('6+4/2*2', 10.0 ),
('3+2.45/8', 3.30625),
('3**3*3/3+3', 30.0 ),
):
result = simple_eval(expr)
ok = (result == res and type(result) == type(res))
print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0
Testing bad input:
for expr in (
'foo bar',
'print("hi")',
'2*x',
'lambda: 10',
'2**1234',
):
try:
result = simple_eval(expr)
except ValueError as e:
print(e)
continue
print("OK!") # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:
>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133
Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.
Use eval in a clean namespace:
>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16
The clean namespace should prevent injection. For instance:
>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'
Otherwise you would get:
>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0
You might want to give access to the math module:
>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.
$ python -m pytest test.py
test.py
from solution import Solutions
class SolutionsTestCase(unittest.TestCase):
def setUp(self):
self.solutions = Solutions()
def test_evaluate(self):
expressions = [
'2+3=5',
'6+4/2*2=10',
'3+2.45/8=3.30625',
'3**3*3/3+3=30',
'2^4=6'
]
results = [x.split('=')[1] for x in expressions]
for e in range(len(expressions)):
if '.' in results[e]:
results[e] = float(results[e])
else:
results[e] = int(results[e])
self.assertEqual(
results[e],
self.solutions.evaluate(expressions[e])
)
solution.py
class Solutions(object):
def evaluate(self, exp):
def format(res):
if '.' in res:
try:
res = float(res)
except ValueError:
pass
else:
try:
res = int(res)
except ValueError:
pass
return res
def splitter(item, op):
mul = item.split(op)
if len(mul) == 2:
for x in ['^', '*', '/', '+', '-']:
if x in mul[0]:
mul = [mul[0].split(x)[1], mul[1]]
if x in mul[1]:
mul = [mul[0], mul[1].split(x)[0]]
elif len(mul) > 2:
pass
else:
pass
for x in range(len(mul)):
mul[x] = format(mul[x])
return mul
exp = exp.replace(' ', '')
if '=' in exp:
res = exp.split('=')[1]
res = format(res)
exp = exp.replace('=%s' % res, '')
while '^' in exp:
if '^' in exp:
itm = splitter(exp, '^')
res = itm[0] ^ itm[1]
exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
while '**' in exp:
if '**' in exp:
itm = splitter(exp, '**')
res = itm[0] ** itm[1]
exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
while '/' in exp:
if '/' in exp:
itm = splitter(exp, '/')
res = itm[0] / itm[1]
exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
while '*' in exp:
if '*' in exp:
itm = splitter(exp, '*')
res = itm[0] * itm[1]
exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
while '+' in exp:
if '+' in exp:
itm = splitter(exp, '+')
res = itm[0] + itm[1]
exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
while '-' in exp:
if '-' in exp:
itm = splitter(exp, '-')
res = itm[0] - itm[1]
exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))
return format(exp)
Using lark parser library https://stackoverflow.com/posts/67491514/edit
from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args
calc_grammar = f"""
?start: sum
?sum: product
| sum "+" product -> {add.__name__}
| sum "-" product -> {sub.__name__}
?product: power
| product "*" power -> {mul.__name__}
| product "/" power -> {truediv.__name__}
?power: atom
| power "^" atom -> {pow.__name__}
?atom: NUMBER -> number
| "-" atom -> {neg.__name__}
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateTree(Transformer):
add = add
sub = sub
neg = neg
mul = mul
truediv = truediv
pow = pow
number = float
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse
def eval_expr(expression: str) -> float:
return calc(expression)
print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4")) # Error
I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.
The basic syntax is:
from py_expression.core import Exp
exp = Exp()
parsed_formula = exp.parse('a+4')
result = exp.eval(parsed_formula, {"a":2})
The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

Is there a way to pass an equation as a string to a Python function? [duplicate]

stringExp = "2^4"
intVal = int(stringExp) # Expected value: 16
This returns the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'
I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?
eval is evil
eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory
Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:
eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})
Evaluate arithmetic expression using ast
import ast
import operator as op
# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
ast.USub: op.neg}
def eval_expr(expr):
"""
>>> eval_expr('2^6')
4
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return eval_(ast.parse(expr, mode='eval').body)
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)
You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:
def power(a, b):
if any(abs(n) > 100 for n in [a, b]):
raise ValueError((a,b))
return op.pow(a, b)
operators[ast.Pow] = power
Or to limit magnitude of intermediate results:
import functools
def limit(max_=None):
"""Return decorator that limits allowed returned values."""
def decorator(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
ret = func(*args, **kwargs)
try:
mag = abs(ret)
except TypeError:
pass # not applicable
else:
if mag > max_:
raise ValueError(ret)
return ret
return wrapper
return decorator
eval_ = limit(max_=10**100)(eval_)
Example
>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py
shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.
from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator
__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
'''
def pushFirst(self, strg, loc, toks):
self.exprStack.append(toks[0])
def pushUMinus(self, strg, loc, toks):
if toks and toks[0] == '-':
self.exprStack.append('unary -')
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
| Optional(oneOf("- +")) + Group(lpar + expr + rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + \
ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
term = factor + \
ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
expr << term + \
ZeroOrMore((addop + term).setParseAction(self.pushFirst))
# addop_term = ( addop + term ).setParseAction( self.pushFirst )
# general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
# expr << general_term
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = {"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow}
self.fn = {"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"exp": math.exp,
"abs": abs,
"trunc": lambda a: int(a),
"round": round,
"sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
def evaluateStack(self, s):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack(s)
if op in "+-*/^":
op2 = self.evaluateStack(s)
op1 = self.evaluateStack(s)
return self.opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op](self.evaluateStack(s))
elif op[0].isalpha():
return 0
else:
return float(op)
def eval(self, num_string, parseAll=True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack(self.exprStack[:])
return val
You can use it like this
nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0
result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Some safer alternatives to eval() and sympy.sympify().evalf()*:
asteval
numexpr
*SymPy sympify is also unsafe according to the following warning from the documentation.
Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.
The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.
Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.
c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]
The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).
This also opens the door to some more sophisticated input formats. For example:
stringExp = "1 + cos(2)"
This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.
from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator
globs = {'sin':sin, 'cos':cos}
safe = globs.values()
stack = LifoQueue()
class BINARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get(),stack.get()))
class UNARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get()))
def CALL_FUNCTION(context, arg):
argc = arg[0]+arg[1]*256
args = [stack.get() for i in range(argc)]
func = stack.get()
if func not in safe:
raise TypeError("Function %r now allowed"%func)
stack.put(func(*args))
def LOAD_CONST(context, arg):
cons = arg[0]+arg[1]*256
stack.put(context['code'].co_consts[cons])
def LOAD_NAME(context, arg):
name_num = arg[0]+arg[1]*256
name = context['code'].co_names[name_num]
if name in context['locals']:
stack.put(context['locals'][name])
else:
stack.put(context['globals'][name])
def RETURN_VALUE(context):
return stack.get()
opfuncs = {
opmap['BINARY_ADD']: BINARY(operator.add),
opmap['UNARY_INVERT']: UNARY(operator.invert),
opmap['CALL_FUNCTION']: CALL_FUNCTION,
opmap['LOAD_CONST']: LOAD_CONST,
opmap['LOAD_NAME']: LOAD_NAME
opmap['RETURN_VALUE']: RETURN_VALUE,
}
def VMeval(c):
context = dict(locals={}, globals=globs, code=c)
bci = iter(c.co_code)
for bytecode in bci:
func = opfuncs[ord(bytecode)]
if func.func_code.co_argcount==1:
ret = func(context)
else:
args = ord(bci.next()), ord(bci.next())
ret = func(context, args)
if ret:
return ret
def evaluate(expr):
return VMeval(compile(expr, 'userinput', 'eval'))
Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).
While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).
Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..
import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})
Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?
You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.
import ast, math
locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})
class Visitor(ast.NodeVisitor):
def visit(self, node):
if not isinstance(node, self.whitelist):
raise ValueError(node)
return super().visit(node)
whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)
def evaluate(expr, locals = {}):
if any(elem in expr for elem in '\n#') : raise ValueError(expr)
try:
node = ast.parse(expr.strip(), mode='eval')
Visitor().visit(node)
return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
except Exception: raise ValueError(expr)
Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.
If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.
Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.
>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0
The above code has only been tested on Python 3.
If desired, you can add a timeout decorator on this function.
I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.
eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.
[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]
Since python3.6, this capability is now built into the language, coined "f-strings".
See: PEP 498 -- Literal String Interpolation
For example (note the f prefix):
f'{2**4}'
=> '16'
Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:
import re
# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')
def simple_eval(expr):
try:
c = compile(expr, 'userinput', 'eval')
except SyntaxError:
raise ValueError(f"Malformed expression: {expr}")
m = _re_simple_eval.fullmatch(c.co_code)
if not m:
raise ValueError(f"Not a simple algebraic expression: {expr}")
try:
return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
except IndexError:
raise ValueError(f"Expression not evaluated as constant: {expr}")
Testing, using some of the examples in other answers:
for expr, res in (
('2^4', 6 ),
('2**4', 16 ),
('1 + 2*3**(4^5) / (6 + -7)', -5.0 ),
('7 + 9 * (2 << 2)', 79 ),
('6 // 2 + 0.0', 3.0 ),
('2+3', 5 ),
('6+4/2*2', 10.0 ),
('3+2.45/8', 3.30625),
('3**3*3/3+3', 30.0 ),
):
result = simple_eval(expr)
ok = (result == res and type(result) == type(res))
print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0
Testing bad input:
for expr in (
'foo bar',
'print("hi")',
'2*x',
'lambda: 10',
'2**1234',
):
try:
result = simple_eval(expr)
except ValueError as e:
print(e)
continue
print("OK!") # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:
>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133
Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.
Use eval in a clean namespace:
>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16
The clean namespace should prevent injection. For instance:
>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'
Otherwise you would get:
>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0
You might want to give access to the math module:
>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.
$ python -m pytest test.py
test.py
from solution import Solutions
class SolutionsTestCase(unittest.TestCase):
def setUp(self):
self.solutions = Solutions()
def test_evaluate(self):
expressions = [
'2+3=5',
'6+4/2*2=10',
'3+2.45/8=3.30625',
'3**3*3/3+3=30',
'2^4=6'
]
results = [x.split('=')[1] for x in expressions]
for e in range(len(expressions)):
if '.' in results[e]:
results[e] = float(results[e])
else:
results[e] = int(results[e])
self.assertEqual(
results[e],
self.solutions.evaluate(expressions[e])
)
solution.py
class Solutions(object):
def evaluate(self, exp):
def format(res):
if '.' in res:
try:
res = float(res)
except ValueError:
pass
else:
try:
res = int(res)
except ValueError:
pass
return res
def splitter(item, op):
mul = item.split(op)
if len(mul) == 2:
for x in ['^', '*', '/', '+', '-']:
if x in mul[0]:
mul = [mul[0].split(x)[1], mul[1]]
if x in mul[1]:
mul = [mul[0], mul[1].split(x)[0]]
elif len(mul) > 2:
pass
else:
pass
for x in range(len(mul)):
mul[x] = format(mul[x])
return mul
exp = exp.replace(' ', '')
if '=' in exp:
res = exp.split('=')[1]
res = format(res)
exp = exp.replace('=%s' % res, '')
while '^' in exp:
if '^' in exp:
itm = splitter(exp, '^')
res = itm[0] ^ itm[1]
exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
while '**' in exp:
if '**' in exp:
itm = splitter(exp, '**')
res = itm[0] ** itm[1]
exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
while '/' in exp:
if '/' in exp:
itm = splitter(exp, '/')
res = itm[0] / itm[1]
exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
while '*' in exp:
if '*' in exp:
itm = splitter(exp, '*')
res = itm[0] * itm[1]
exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
while '+' in exp:
if '+' in exp:
itm = splitter(exp, '+')
res = itm[0] + itm[1]
exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
while '-' in exp:
if '-' in exp:
itm = splitter(exp, '-')
res = itm[0] - itm[1]
exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))
return format(exp)
Using lark parser library https://stackoverflow.com/posts/67491514/edit
from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args
calc_grammar = f"""
?start: sum
?sum: product
| sum "+" product -> {add.__name__}
| sum "-" product -> {sub.__name__}
?product: power
| product "*" power -> {mul.__name__}
| product "/" power -> {truediv.__name__}
?power: atom
| power "^" atom -> {pow.__name__}
?atom: NUMBER -> number
| "-" atom -> {neg.__name__}
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateTree(Transformer):
add = add
sub = sub
neg = neg
mul = mul
truediv = truediv
pow = pow
number = float
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse
def eval_expr(expression: str) -> float:
return calc(expression)
print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4")) # Error
I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.
The basic syntax is:
from py_expression.core import Exp
exp = Exp()
parsed_formula = exp.parse('a+4')
result = exp.eval(parsed_formula, {"a":2})
The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

parsing nested functions in python

line = "add(multiply(add(2,3),add(4,5)),1)"
def readLine(line):
countLeftBracket=0
string = ""
for char in line:
if char !=")":
string += char
else:
string +=char
break
for i in string:
if i=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
else:
return execute(string)
def cutString(string):
countLeftBracket=0
for char in string:
if char!="(":
string.replace(char,'')
elif char=="(":
string.replace(char,'')
break
for char in string:
if char=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
elif countLeftBracket==1:
return execute(string)
def add(num1,num2):
return print(num1+num2)
def multiply(num1,num2):
return print(num1*num2)
readLines(line)
I need to execute the whole line string. I tried to cut each function inside of brackets one by one and replace them with the result, but I am kind of lost. Not sure how to continue, my code gets the error:
File "main.py", line 26, in cutString
if char!="(":
RuntimeError: maximum recursion depth exceeded in comparison
Give me an idea of where to move, which method to use?
Here is a solution that uses pyparsing, and as such will be much easier to expand:
from pyparsing import *
first a convenience function (use the second tag function and print the parse tree to see why)
def tag(name):
"""This version converts ["expr", 4] => 4
comment in the version below to see the original parse tree
"""
def tagfn(tokens):
tklist = tokens.asList()
if name == 'expr' and len(tklist) == 1:
# LL1 artifact removal
return tklist
return tuple([name] + tklist)
return tagfn
# def tag(name):
# return lambda tokens: tuple([name] + tokens.asList())
Our lexer needs ot recognize left and right parenthesis, integers, and names. This is how you define them with pyparsing:
LPAR = Suppress("(")
RPAR = Suppress(")")
integer = Word(nums).setParseAction(lambda s,l,t: [int(t[0])])
name = Word(alphas)
our parser has function calls, which take zero or more expressions as parameters. A function call is also an expression, so to deal with the circularity we have to forward declare expr and fncall:
expr = Forward()
fncall = Forward()
expr << (integer | fncall).setParseAction(tag('expr'))
fnparams = delimitedList(expr)
fncall << (name + Group(LPAR + Optional(fnparams, default=[]) + RPAR)).setParseAction(tag('fncall'))
Now we can parse our string (we can add spaces and more or less than two parameters to functions as well):
line = "add(multiply(add(2,3),add(4,5)),1)"
res = fncall.parseString(line)
to see what is returned you can print it, this is called the parse-tree (or, since our tag function has simplified it, an abstract syntax tree):
import pprint
pprint.pprint(list(res))
which outputs:
[('fncall',
'add',
[('fncall',
'multiply',
[('fncall', 'add', [2, 3]), ('fncall', 'add', [4, 5])]),
1])]
with the commented out tag function it would be (which is just more work to deal with for no added benefit):
[('fncall',
'add',
[('expr',
('fncall',
'multiply',
[('expr', ('fncall', 'add', [('expr', 2), ('expr', 3)])),
('expr', ('fncall', 'add', [('expr', 4), ('expr', 5)]))])),
('expr', 1)])]
Now define the functions that are available to our program:
FUNCTIONS = {
'add': lambda *args: sum(args, 0),
'multiply': lambda *args: reduce(lambda a, b: a*b, args, 1),
}
# print FUNCTIONS['multiply'](1,2,3,4) # test that it works ;-)
Our parser is now very simple to write:
def parse(ast):
if not ast: # will not happen in our program, but it's good practice to exit early on no input
return
if isinstance(ast, tuple) and ast[0] == 'fncall':
# ast is here ('fncall', <name-of-function>, [list-of-arguments])
fn_name = ast[1] # get the function name
fn_args = parse(ast[2]) # parse each parameter (see elif below)
return FUNCTIONS[fn_name](*fn_args) # find and apply the function to its arguments
elif isinstance(ast, list):
# this is called when we hit a parameter list
return [parse(item) for item in ast]
elif isinstance(ast, int):
return ast
Now call the parser on the result of the lexing phase:
>>> print parse(res[0]) # the outermost item is an expression
46
Sounds like this could be solved with regex.
So this is an example of a single reduction
import re, operator
def apply(match):
func_name = match.group(1) # what's outside the patentesis
func_args = [int(x) for x in match.group(2).split(',')]
func = {"add": operator.add, "multiply": operator.mul}
return str(func[func_name](*func_args))
def single_step(line):
return re.sub(r"([a-z]+)\(([^()]+)\)",apply,line)
For example:
line = "add(multiply(add(2,3),add(4,5)),1)"
print(single_step(line))
Would output:
add(multiply(5,9),1)
All that is left to do, is to loop until the expression is a number
while not line.isdigit():
line = single_step(line)
print (line)
Will show
46
You can use a generator function to build a very simple parser:
import re, operator
line, f = "add(multiply(add(2,3),add(4,5)),1)", {'add':operator.add, 'multiply':operator.mul}
def parse(d):
n = next(d, None)
if n is not None and n != ')':
if n == '(':
yield iter(parse(d))
else:
yield n
yield from parse(d)
parsed = parse(iter(re.findall('\(|\)|\w+', line)))
def _eval(d):
_r = []
n = next(d, None)
while n is not None:
if n.isdigit():
_r.append(int(n))
else:
_r.append(f[n](*_eval(next(d))))
n = next(d, None)
return _r
print(_eval(parsed)[0])
Output:
46

Categories