I have a string that is a mathematical equation, but with some custom functions. I need to find all such functions and replace them with some code.
For example, I have a string:
a+b+f1(f2(x,y),x)
I want code that will replace (say) f2(x,y) with x+y^2 and f1(x,y) with sin(x+y).
It would be ideal if nested functions were supported, like in the example. However, it would still be useful if nesting was not supported.
As I understand from similar topics this can be done using a compiler module like compiler.parse(eq). How I can work with AST object created by compiler.parse(eq) to reconstruct my string back, replacing all found functions?
I need only to perform substitution and then string will be used in other program. Evaluation is not needed.
Here is a minimal working example (+, - , *, /, ** binary and unary operations and function call implemented). The priority of operations are set with parenthesis.
A little bit more than the functionality for the example given is done:
from __future__ import print_function
import ast
def transform(eq,functions):
class EqVisitor(ast.NodeVisitor):
def visit_BinOp(self,node):
#generate("=>BinOp")
generate("(")
self.visit(node.left)
self.visit(node.op)
#generate("ici",str(node.op),node._fields,node._attributes)
#generate(dir(node.op))
self.visit(node.right)
generate(")")
#ast.NodeVisitor.generic_visit(self,node)
def visit_USub(self,node):
generate("-")
def visit_UAdd(self,node):
generate("+")
def visit_Sub(self,node):
generate("-")
def visit_Add(self,node):
generate("+")
def visit_Pow(self,node):
generate("**")
def visit_Mult(self,node):
generate("*")
def visit_Div(self,node):
generate("/")
def visit_Name(self,node):
generate(node.id)
def visit_Call(self,node):
debug("function",node.func.id)
if node.func.id in functions:
debug("defined function")
func_visit(functions[node.func.id],node.args)
return
debug("not defined function",node.func.id)
#generate(node._fields)
#generate("args")
generate(node.func.id)
generate("(")
sep = ""
for arg in node.args:
generate (sep)
self.visit(arg)
sep=","
generate(")")
def visit_Num(self,node):
generate(node.n)
def generic_visit(self, node):
debug ("\n",type(node).__name__)
debug (node._fields)
ast.NodeVisitor.generic_visit(self, node)
def func_visit(definition,concrete_args):
class FuncVisitor(EqVisitor):
def visit_arguments(self,node):
#generate("visit arguments")
#generate(node._fields)
self.arguments={}
for concrete_arg,formal_arg in zip(concrete_args,node.args):
#generate(formal_arg._fields)
self.arguments[formal_arg.id]=concrete_arg
debug(self.arguments)
def visit_Name(self,node):
debug("visit Name",node.id)
if node.id in self.arguments:
eqV.visit(self.arguments[node.id])
else:
generate(node.id)
funcV=FuncVisitor()
funcV.visit(ast.parse(definition))
eqV=EqVisitor()
result = []
def generate(s):
#following line maybe usefull for debug
debug(str(s))
result.append(str(s))
eqV.visit(ast.parse(eq,mode="eval"))
return "".join(result)
def debug(*args,**kwargs):
#print(*args,**kwargs)
pass
Usage:
functions= {
"f1":"def f1(x,y):return x+y**2",
"f2":"def f2(x,y):return sin(x+y)",
}
eq="-(a+b)+f1(f2(+x,y),z)*4/365.12-h"
print(transform(eq,functions))
Result
((-(a+b)+(((sin((+x+y))+(z**2))*4)/365.12))-h)
WARNING
The code works with Python 2.7 and as it is AST dependent is not guaranteed to work with another version of Python. The Python 3 version doesn't work.
The full substitution is quite tricky. Here is my attempt to do it. Here we can successfully inline expressions,
but not in all scenarios. This code works on AST only, made by ast module. And uses codegen to stringify it back to code. The stringifying of ast and modifying ast in general is covered in other SO Q/A: "Parse a .py file, read the AST, modify it, then write back the modified source code".
First we define few helpers:
import ast
import codegen
import copy
def parseExpr(expr):
# Strip:
# Module(body=[Expr(value=
return ast.parse(expr).body[0].value
def toSource(expr):
return codegen.to_source(expr)
After that we define a substitution function using NodeTransformer.
For example:
substitute(parseExpr("a + b"), { "a": parseExpr("1") }) # 1 + b
The simulatenous substitution of multiple variables is needed to properly avoid nasty situations.
For example substituting both a and b for a + b in a + b.
The result should be (a + b) + (a + b), but if we substitute first a for a + b, we'll get (a + b) + b, and then substitute b, we'll get (a + (a + b)) + b which is the wrong result! So simultaneous is important:
class NameTransformer(ast.NodeTransformer):
def __init__(self, names):
self.names = names
def visit_Name(self, node):
if node.id in self.names:
return self.names[node.id]
else:
return node
def substitute(expr, names):
print "substitute"
for varName, varValue in names.iteritems():
print " name " + varName + " for " + toSource(varValue)
print " in " + toSource(expr)
return NameTransformer(names).visit(expr)
Then we write similar NodeTransformer to find calls, where we can inline function definitions:
class CallTransformer(ast.NodeTransformer):
def __init__(self, fnName, varNames, fnExpr):
self.fnName = fnName
self.varNames = varNames
# substitute in new fn expr for each CallTransformer
self.fnExpr = copy.deepcopy(fnExpr)
self.modified = False
def visit_Call(self, node):
if (node.func.id == self.fnName):
if len(node.args) == len(self.varNames):
print "expand call to " + self.fnName + "(" + (", ".join(self.varNames)) + ")" + " with arguments "+ ", ".join(map(toSource, node.args))
# We substitute in args too!
old_node = node
args = map(self.visit, node.args)
names = dict(zip(self.varNames, args))
node = substitute(self.fnExpr, names)
self.modified = True
return node
else:
raise Exception("invalid arity " + toSource(node))
else:
return self.generic_visit(node)
def substituteCalls(expr, definitions, n = 3):
while True:
if (n <= 0):
break
n -= 1
modified = False
for fnName, varNames, fnExpr in definitions:
transformer = CallTransformer(fnName, varNames, fnExpr)
expr = transformer.visit(expr)
modified = modified or transformer.modified
if not modified:
break
return expr
The substituteCalls is recursive so we can inline recursive functions too. Also there is an explicit limit, because some definitions might be infinitely recursive (as fact below). There is a bit of ugly looking copying, but it is required to separate different subtrees.
And the example code:
if True:
print "f1 first, unique variable names"
ex = parseExpr("a+b+f1(f2(x, y), x)")
ex = substituteCalls(ex, [
("f1", ["u", "v"], parseExpr("sin(u + v)")),
("f2", ["i", "j"], parseExpr("i + j ^ 2"))])
print toSource(ex)
print "---"
if True:
print "f1 first"
ex = parseExpr("a+b+f1(f2(x, y), x)")
ex = substituteCalls(ex, [
("f1", ["x", "y"], parseExpr("sin(x + y)")),
("f2", ["x", "y"], parseExpr("x + y ^ 2"))])
print toSource(ex)
print "---"
if True:
print "f2 first"
ex = parseExpr("f1(f1(x, x), y)")
ex = substituteCalls(ex, [
("f1", ["x", "y"], parseExpr("x + y"))])
print toSource(ex)
print "---"
if True:
print "fact"
ex = parseExpr("fact(n)")
ex = substituteCalls(ex, [
("fact", ["n"], parseExpr("n if n == 0 else n * fact(n-1)"))])
print toSource(ex)
print "---"
Which prints out:
f1 first, unique variable names
expand call to f1(u, v) with arguments f2(x, y), x
substitute
name u for f2(x, y)
name v for x
in sin((u + v))
expand call to f2(i, j) with arguments x, y
substitute
name i for x
name j for y
in ((i + j) ^ 2)
((a + b) + sin((((x + y) ^ 2) + x)))
---
f1 first
expand call to f1(x, y) with arguments f2(x, y), x
substitute
name y for x
name x for f2(x, y)
in sin((x + y))
expand call to f2(x, y) with arguments x, y
substitute
name y for y
name x for x
in ((x + y) ^ 2)
((a + b) + sin((((x + y) ^ 2) + x)))
---
f2 first
expand call to f1(x, y) with arguments f1(x, x), y
expand call to f1(x, y) with arguments x, x
substitute
name y for x
name x for x
in (x + y)
substitute
name y for y
name x for (x + x)
in (x + x)
((x + x) + ((x + x) + x))
---
fact
expand call to fact(n) with arguments n
substitute
name n for n
in n if (n == 0) else (n * fact((n - 1)))
expand call to fact(n) with arguments (n - 1)
substitute
name n for (n - 1)
in n if (n == 0) else (n * fact((n - 1)))
expand call to fact(n) with arguments ((n - 1) - 1)
substitute
name n for ((n - 1) - 1)
in n if (n == 0) else (n * fact((n - 1)))
n if (n == 0) else (n * (n - 1) if ((n - 1) == 0) else ((n - 1) * ((n - 1) - 1) if (((n - 1) - 1) == 0) else (((n - 1) - 1) * fact((((n - 1) - 1) - 1)))))
Unfortunately codegen version in pypi is buggy. It doesn't parenthesise expressions properly, even AST says they should. I used jbremer/codegen (pip install git+git://github.com/jbremer/codegen). It adds unnecessary parenthesis too, but it's better than no at all. Thanks to #XavierCombelle for the tip.
The substitution gets trickier if you have anonymous functions, i.e lambda. Then you need to rename variables. You could try to search for lambda calculus with substitution or implementation. Yet I had bad luck to find any articles which use Python for the task.
Do you know the variables beforehand?
I recommend using SymPy!
Take for example the following:
import sympy
a,b,x,y = sympy.symbols('a b x y')
f1 = sympy.Function('f1')
f2 = sympy.Function('f2')
readString = "a+b+f1(f2(x,y),x)"
z = eval(readString)
'z' will now be a symbolic term representing the mathematical formula. You can print it out. You can then use subs to replace symbolic terms or functions. You can either represent sine symbolically again (like f1 and f2) or you can possibly use the sin() in sympy.mpmath.
Depending on your needs, this approach is great because you can eventually compute, evaluate or simplify this expression.
What is your long term goal? Is it to evaluate the function or simply perform substitution? In the former case you can simply try this (note that f1 and f2 could also be dynamically defined):
import math
math.sin
def f2(x, y):
return x + y ** 2
def f1(x, y):
return math.sin(x + y)
a, b = 1, 2
x, y = 3, 4
eval('a + b + f1(f2(x, y), x)')
# 2.991148690709596
If you want to replace the functions and get back the modified version, you will indeed have to resort to some sort of AST parser. Be careful though with the use of eval, as this opens up a security hole for malicious user input code.
(Using sympy as adrianX suggested, with some extra code.)
Code below converts a given string to a new string after combining given functions. It's hasty and poorly documented, but it works.
WARNING!
Contains exec eval, malicious code could probably have an effected, if input is provided by external users.
UPDATE:
Rewrote the whole code. Works in Python 2.7.
Function arguments can be separated by comma or whitespace or both.
All examples in question and comments are working.
import re
import sympy
##################################################
# Input string and functions
initial_str = 'a1+myf1(myf2(a, b),y)'
given_functions = {'myf1(x,y)': 'cross(x,y)', 'myf2(a, b)': 'value(a,b)'}
##################################################
print '\nEXECUTED/EVALUATED STUFF:\n'
processed_str = initial_str
def fixed_power_op(str_to_fix):
return str_to_fix.replace('^', '**')
def fixed_multiplication(str_to_fix):
"""
Inserts multiplication symbol wherever omitted.
"""
pattern_digit_x = r"(\d)([A-Za-z])" # 4x -> 4*x
pattern_par_digit = r"(\))(\d)" # )4 -> )*4
pattern_digit_par = r"[^a-zA-Z]?_?(\d)(\()" # 4( -> 4*(
for patt in (pattern_digit_x, pattern_par_digit, pattern_digit_par):
str_to_fix = re.sub(patt, r'\1*\2', str_to_fix)
return str_to_fix
processed_str = fixed_power_op(processed_str)
class FProcessing(object):
def __init__(self, func_key, func_body):
self.func_key = func_key
self.func_body = func_body
def sliced_func_name(self):
return re.sub(r'(.+)\(.+', r'\1', self.func_key)
def sliced_func_args(self):
return re.search(r'\((.*)\)', self.func_key).group()
def sliced_args(self):
"""
Returns arguments found for given function. Arguments can be separated by comma or whitespace.
:returns (list)
"""
if ',' in self.sliced_func_args():
arg_separator = ','
else:
arg_separator = ' '
return self.sliced_func_args().replace('(', '').replace(')', '').split(arg_separator)
def num_of_sliced_args(self):
"""
Returns number of arguments found for given function.
"""
return len(self.sliced_args())
def functions_in_function_body(self):
"""
Detects functions in function body.
e.g. f1(x,y): sin(x+y**2), will result in "sin"
:returns (set)
"""
return set(re.findall(r'([a-zA-Z]+_?\w*)\(', self.func_body))
def symbols_in_func_body(self):
"""
Detects non argument symbols in function body.
"""
symbols_in_body = set(re.findall(r'[a-zA-Z]+_\w*', self.func_body))
return symbols_in_body - self.functions_in_function_body()
# --------------------------------------------------------------------------------------
# SYMBOL DETECTION (x, y, z, mz,..)
# Prohibited symbols
prohibited_symbol_names = set()
# Custom function names are prohibited symbol names.
for key in given_functions.keys():
prohibited_symbol_names |= {FProcessing(func_key=key, func_body=None).sliced_func_name()}
def symbols_in_str(provided_str):
"""
Returns a set of symbol names that are contained in provided string.
Allowed symbols start with a letter followed by 0 or more letters,
and then 0 or more numbers (eg. x, x1, Na, Xaa_sd, xa123)
"""
symbol_pattern = re.compile(r'[A-Za-z]+\d*')
symbol_name_set = re.findall(symbol_pattern, provided_str)
# Filters out prohibited.
symbol_name_set = {i for i in symbol_name_set if (i not in prohibited_symbol_names)}
return symbol_name_set
# ----------------------------------------------------------------
# EXEC SYMBOLS
symbols_in_given_str = symbols_in_str(initial_str)
# e.g. " x, y, sd = sympy.symbols('x y sd') "
symbol_string_to_exec = ', '.join(symbols_in_given_str)
symbol_string_to_exec += ' = '
symbol_string_to_exec += "sympy.symbols('%s')" % ' '.join(symbols_in_given_str)
exec symbol_string_to_exec
# -----------------------------------------------------------------------------------------
# FUNCTIONS
# Detects secondary functions (functions contained in body of given_functions dict)
sec_functions = set()
for key, val in given_functions.items():
sec_functions |= FProcessing(func_key=key, func_body=val).functions_in_function_body()
def secondary_function_as_exec_str(func_key):
"""
Used for functions that are contained in the function body of given_functions.
E.g. given_functions = {f1(x): sin(4+x)}
"my_f1 = sympy.Function('sin')(x)"
:param func_key: (str)
:return: (str)
"""
returned_str = "%s = sympy.Function('%s')" % (func_key, func_key)
print returned_str
return returned_str
def given_function_as_sympy_class_as_str(func_key, func_body):
"""
Converts given_function to sympy class and executes it.
E.g. class f1(sympy.Function):
nargs = (1, 2)
#classmethod
def eval(cls, x, y):
return cross(x+y**2)
:param func_key: (str)
:return: (None)
"""
func_proc_instance = FProcessing(func_key=func_key, func_body=func_body)
returned_str = 'class %s(sympy.Function): ' % func_proc_instance.sliced_func_name()
returned_str += '\n\tnargs = %s' % func_proc_instance.num_of_sliced_args()
returned_str += '\n\t#classmethod'
returned_str += '\n\tdef eval(cls, %s):' % ','.join(func_proc_instance.sliced_args())
returned_str = returned_str.replace("'", '')
returned_str += '\n\t\treturn %s' % func_body
returned_str = fixed_power_op(returned_str)
print '\n', returned_str
return returned_str
# Executes functions in given_functions' body
for name in sec_functions:
exec secondary_function_as_exec_str(func_key=name)
# Executes given_functions
for key, val in given_functions.items():
exec given_function_as_sympy_class_as_str(func_key=key, func_body=val)
final_result = eval(initial_str)
# PRINTING
print '\n' + ('-'*40)
print '\nRESULTS'
print '\nInitial string: \n%s' % initial_str
print '\nGiven functions:'
for key, val in given_functions.iteritems():
print '%s: ' % key, val
print '\nResult: \n%s' % final_result
I think you want to use something like PyBison which is a parser generator.
See an example that contains the basic code you need here:
http://freenet.mcnabhosting.com/python/pybison/calc.py
You need to add a token type for functions, and a rule for functions, and then what happens with that function if it is encountered.
If you need other information about parsing and so on, try to read some basic tutorials on Lex and (Yacc or Bison).
Related
I have a list of objects, each object is a mathematical function, and those functions may be dependent on each other, for example:
[
Formula("G", ["y"], "1 - y"),
Formula("V", ["x", "y"], "G(y) * x / 3"),
Formula("U", ["x", "y"],"(G(y)**2) / (9 * V(x, y)) + V(x, y)")
]
Where first argument is function name, second one is list of used variables, and third one is string - the function's expression.
Is there a simple way to evaluate value of function U(x, y) at a given point, for example, at [2, 3] and recursively call G(3) and V(2, 3), and get the final result?
I have tried to do this in Sympy, but couldn't call for example function G(y) in function V(x,y)
Thanks for all your suggestions. In example I have given, the Formulas are in topological order, but in practice it will not always be so.
I could use #Stef's solution, but I would have to format the formula expressions and then eval() it
x,y = sympy.symbols('x y'); G = 1-y; V = G * x / 3; U = G**2 / (9*V+V)
Then #OscarBenjamin suggested to use sympy's parse_expr which worked just fine, until I realized that formulas will not always be given in topological order. So I found out, that trying to put it in topological order and then parse it, would take too much execution time.
Eventually, I decided to make my own parser, which looks something like this (test classes and variables):
import re
import copy
class Formula():
function_name = ""
variables = []
expression = ""
__expr__ = ""
other_func_calls = 0
def __init__(self, function_name:str, variables:list, fun:str) -> None:
self.function_name = function_name
self.variables = variables
self.expression = fun
other_func = []
for i in fun:
if ord(i) in range(ord("A"), ord("Z") + 1):
self.other_func_calls += 1
other_func.append(i)
self.__expr__ = re.sub('[A-Z]?\((\w|, )*\)','_TBR_', fun) # _TBR_ is being replaced later
self.other_func = other_func # list of other functions in chronological order
class Pyramid():
name:str
functions:dict[str:Formula]
def __init__(self, name:str, funs:dict[str:Formula]) -> None:
self.name = name
self.functions = funs
def get_result(self, fun:str, values:dict[str:int]):
if (self.functions[fun].other_func_calls == 0): # Function does not call other functions
return eval(self.functions[fun].expression, values)
other_funcs = copy.deepcopy(self.functions[fun].other_func)
s = self.functions[fun].__expr__
for i in range(len(other_funcs)):
other_funcs[i] = self.get_result(other_funcs[i], values)
s = re.sub("_TBR_", f"({str(other_funcs[i])})", s, count=1)
return eval(s, values)
a = {
"V": Formula("V", ["x", "y"], "G(y) * x / 3"),
"U": Formula("U", ["x", "y"], "G(y)**2 / (9 * V(x, y)) + V(x, y)"),
"G": Formula("G", ["y"], "1 - y")
}
p = Pyramid("Testing", a)
print(p.get_result("U", {"x":2,"y":3}))
Maybe something like this?
>>> def Formula(*args):
... return parse_expr('{%s(*%s): %s}' % args)
...
>>> f =[
... Formula("G", ["y"], "1 - y"),
... Formula("V", ["x", "y"], "G(y) * x / 3"),
... Formula("U", ["x", "y"],"(G(y)**2) / (9 * V(x, y)) + V(x, y)")
... ]
>>> f
[{G(y): 1 - y}, {V(x, y): x*G(y)/3}, {U(x, y): G(y)**2/(9*V(x, y)) + V(x, y)}]
f is already topologically sorted so back substitute
>>> from sympy import Dict
>>> e=Dict(f[-1])
>>> e=e.subs(f[-2])
>>> e=e.subs(f[-3])
>>> a,b=dict(e).popitem()
>>> U = Lambda(a.args,b)
>>> U(2,3)
-5/3
If it's not sorted, you can use the topological_sort, perhaps via repsort here to do so:
>>> repsort(*[tuple(i.items()) for i in f])
[(U(x, y), G(y)**2/(9*V(x, y)) + V(x, y)), (V(x, y), x*G(y)/3), (G(y), 1 - y)]
>>> s = _
>>> expr = s[0][0]
>>> for i in s:
... expr = expr.subs(*i)
...
>>> expr
x*(1 - y)/3 + (1 - y)/(3*x)
>>> U = Lambda(tuple(ordered(expr.free_symbols)), _)
>>> U(2,3)
-5/3
I am student of MCS and learning python and stuck in one problem. I am trying to merge all overllaping strings.
I am using following algorithm but output is not as expected?
(1) find max overlap between all possible pairs.
(2) Store all the overlaps in dictionary with key as amount of overlap and values as start, stringa, stringb
(3) pick the maximum set of overlap and merge the string. I have implemnted my algorithm using following code but out does not produce expected output.
def overlap(a, b):
overlaps = []
for i in range(len(b)):
for j in range(len(a)):
if a.endswith(b[:i + 1], j):
overlaps.append((i, j))
return max(overlaps) if overlaps else (0, -1)
def get_merged_string(lst):
overlaps = defaultdict(list)
while len(lst) > 1:
overlaps.clear()
for a in lst:
for b in lst:
if a == b:
continue
amount, start = overlap(a, b)
overlaps[amount].append((start, a, b))
maximum = max(overlaps)
if maximum == 0:
break
start, a, b = choice(overlaps[maximum]) # pick one among equals
lst.remove(a)
lst.remove(b)
lst.append(a[:start] + b)
str1 = ''.join(lst)
return (urllib.parse.unquote_plus(urllib.parse.unquote_plus(str1)))
Input:
%23%21%2Fusr%2Fbin%2Fpyth
n%2Fpython3%0A%0A%23%0A%23+
%0A%0A%23%0A%23+Python+fu
+Python+functio
unctions+start+
+start+with+def
th+def.++They+t
hey+take+parame
parameters%2C+whi
+which+are%0A%23+un
are%0A%23+un-typed%2C
n-typed%2C+as+oth
+as+other+varia
her+variables.%0A
es.%0A%0A%23+The+stri
string+at+the+s
the+start+of+th
rt+of+the+funct
function+is+for
n+is+for+docume
documentation.%0A
tation.%0Adef+prh
f+prhello%28%29%3A%0A++
%28%29%3A%0A++++%22Print+
+%22Print+hello%22%0A
hello%22%0A++++prin
+print%28%22Hello%2C+
llo%2C+World%21%22%29%0A%0A
World%21%22%29%0A%0Aprhel
%29%0A%0Aprhello%28%29%0A%0A%23
%28%29%0A%0A%23%0A%23%0Adef+prl
f+prlines%28str%2C+
ines%28str%2C+num%29%3A
num%29%3A%0A++++%22Prin
++++%22Print+num+
nt+num+lines+co
ines+consisting
onsisting+of+st
ing+of+str%2C+rep
+str%2C+repeating
epeating+str+on
r+once+more+on+
+on+each+line.%22
ine.%22%0A++++for+n
+for+n+in+range
in+range%280%2Cnum%29
num%29%3A%0A++++++++p
++++print%28str+%2A
%28str+%2A+%28n+%2B+1%29%29
+%28n+%2B+1%29%29%0A%0Aprli
+1%29%29%0A%0Aprlines%28%27
rlines%28%27z%27%2C+5%29%0A
%2C+5%29%0Aprint%28%29%0Apr
print%28%29%0Aprlines
rlines%28%27fred+%27%2C
red+%27%2C+4%29%0A
My output:
hello()
#
#
def prlines(str, num):
"Print hello"
print("Hello, World!")
prhellhe string at the start of the functions start with def. They take parameters, which are
# un-typed, as other variables.
# The s#!/usr/bin/python3
#
# Python function is for documentation.
def prhello():
"Print num lines consisting of str, repeating str once more on each line."
for n in range(0,num):
print(str * (n 1))
prlines('z', 5)
print()
prlines('fred ', 4)
Expected Output: It is after merging overlapping string.
#!/usr/bin/python3
#
# Python functions start with def. They take parameters, which are
# un-typed, as other variables.
# The string at the start of the function is for documentation.
def prhello():
"Print hello"
print("Hello, World!")
prhello()
#
#
def prlines(str, num):
"Print num lines consisting of str, repeating str once more on each line."
for n in range(0,num):
print(str * (n + 1))
prlines('z', 5)
print()
prlines('fred ', 4)
Above issue is caused by ambiguity of overlapping. How can i fix such issue?
I want to write a Python code that will evaluate an expression using stack. I have the following code, where numStk is a stack that holds number and optStk that holds operators. In the expression 2+3*4-6, at the end of for loop, numStack contains 2, 12, and 6; and optStk contains - and +. Now how can I make my setOps() function to pop elements from the two stacks to do the evaluate the expression?
def main():
raw_expression = input("Enter an expression: ")
expression = raw_expression.replace(" ", "")
for i in expression:
if (i in numbers):
numStk.push(i)
else:
setOps(i)
optStk.push(i)
## code needed to evaluate the rest of the elements in stackstack
return valStk.top()
My setOps(i) function is as follow:
def repeatOps(refOp):
while (len(valStk) > 1 and operators.index(refOp) <= operators.index(optStk.top())):
x = numStk.pop()
y = numStk.pop()
op = optStk.pop()
numStk.push(str(eval(x+op+y)))
Even if I fill in all the stuff you left out, there are issues with your code: setOps() appears to be called repeatOps(); numStk is sometimes called valStk; you evaluate in the wrong order, e.g. "6-5" is evaluated "5-6"; you're calling eval()!
Below's my filling out and reworking of your code to address the above issues:
from collections import OrderedDict
DIGITS = "0123456789"
# position implies (PEMDAS) priority, low to high
OPERATORS = OrderedDict([ \
['+', lambda a, b: a + b], \
['-', lambda a, b: a - b], \
['*', lambda a, b: a * b], \
['/', lambda a, b: a / b], \
])
def operator_priority(character):
return list(OPERATORS.keys()).index(character)
class Stack(list):
""" minimalist stack implementation """
def push(self, thing):
self.append(thing)
def top(self):
return self[-1]
def evaluate(expression):
numStk = Stack()
optStk = Stack()
def setOps(refOp):
while numStk and optStk and operator_priority(refOp) <= operator_priority(optStk.top()):
y = numStk.pop()
x = numStk.pop()
op = optStk.pop()
print(x, op, y) # debugging
numStk.push(OPERATORS[op](x, y))
for i in expression:
if i in DIGITS:
numStk.push(int(i))
else:
setOps(i)
optStk.push(i)
if optStk:
# evaluate the rest of the elements in stacks
setOps(list(OPERATORS.keys())[0]) # trigger using lowest priority operator
return numStk.top()
if __name__ == "__main__":
raw_expression = input("Enter an expression: ")
expression = raw_expression.replace(" ", "")
print(evaluate(expression))
Far from perfect but something to get you going:
EXAMPLE
> python3 test.py
Enter an expression: 2+3*4-6
3 * 4
12 - 6
2 + 6
8
>
To address your original question, the key to finishing the evaluation seems to be running setOps() with a fictitious, low priority operator if there's anything left in the optStk.
Using sympy, I need to replace all occurrence of exp(C+anything) with C*exp(anything). Because exp(C) is constant, I just write at as C.
I can do this for one occurrence of exp in the expression. But do not how to do it if there are than one instance.
For example, for one instance, as in x+exp(C_0+3*x)+3*y, I need to change it to x+C_0*exp(3*x)+3*y
For one instance, this seems to work after some trial and error
from sympy import *
x,y,C_0 = symbols('x y C_0')
expr=x+exp(C_0+3*x)+3*y
#first check if exp is in the expression
if any([isinstance(a, exp) for a in preorder_traversal(expr)]):
p_1=Wild('p1');p_2=Wild('p_2');p_3=Wild('p_3')
r=(p_1+exp(C_0+p_2)+p_3).matches(expr)
expr.subs(exp(C_0+r[p_2]),C_0*exp(r[p_2]))
Which gives
C_0*exp(3*x) + x + 3*y
But what about something like x+exp(C_0+3*x)+3*y+exp(C_0+30*x+y) which I need to change to x+C_0*exp(3*x)+3*y+C_0*exp(30*x+y) I can't make special pattern match for each possible case. I need a way to change all occurrences
In Mathematica, I do the above as follows
expr = x + Exp[c + 3*x]*3*y + 3*y + Exp[c + 30*x + y]
expr /. Exp[c + any_] :> (c Exp[any])
Which gives
I actually prefer to tell Python just to change exp(C+anything) to C*exp(anything) without having to give pattern for the overall expression, since that can change in many way.
I am sure the above is also possible in python/sympy. Any hints how to do it?
I would look for function exp inside of the expression, check whether its argument is Add, and then whether C_0 is among the arguments of Add. Then build a thing to replace exp with. Consider the following:
from sympy import *
x, y, C_0 = symbols('x y C_0')
expr = x + exp(C_0+3*x) + 3*y + exp(y+C_0+30*x) - exp(x+y-C_0) + exp(x*y)
exp_sum = [(a, a.args[0].args) for a in preorder_traversal(expr) if a.func == exp and a.args[0].func == Add]
exp_sum = [p for p in exp_sum if C_0 in p[1]]
new_exp = [C_0*exp(Add(*[x for x in p[1] if x != C_0])) for p in exp_sum]
for (old, new) in zip(exp_sum, new_exp):
expr = expr.subs(old[0], new)
Initially, exp_sum contains all parts of the form exp(Add(...)). After that it's filtered down to sums containing C_0. New exponentials are formed by taking all summands that are not C_0, adding them, applying exp and multiplying by C_0. Then substitution happens.
To clarify the process, here is what exp_sum is in the above example: a list of tuples (exponential and the summands inside):
[(exp(C_0 + 3*x), (C_0, 3*x)), (exp(C_0 + 30*x + y), (C_0, y, 30*x))]
And this is new_exp
[C_0*exp(3*x), C_0*exp(30*x + y)]
Finally, expr at the end:
C_0*exp(3*x) + C_0*exp(30*x + y) + x + 3*y + exp(x*y) - exp(-C_0 + x + y)
Notice that exp(-C_0...) is not affected by the change; it's not a part of the pattern.
I have a class that was taking in lists of 1's and 0's and performing GF(2) finite field arithmetic operations. It used to work until I tried to make it take the input in polynomial format. As for how the finite arithmetic will be done after fixing the regex issue, I was thinking about overloading the operators.
The actual code in parsePolyToListInput(input) works when outside the class. The problem seems to be in the regex, which errors that it will only take in a string (this makes sense), but does not seem to initialize with self.expr as a parameter (that's a problem). The #staticmethod just before the initialization was an attempt to salvage the unbound error as it the polynomial was passed in, but this is apparently completely wrong. Just to save you time if you decide to look at any of the arithmetic operations, modular inverse does not work (seems to be due to the formatting issue after every iteration of that while loop for division in the function and what the return type is):
import re
class gf2poly:
#binary arithemtic on polynomials
##staticmethod
def __init__(self,expr):
self.expr = expr
#self.expr = [int(i) for i in expr]
self.expr = gf2poly.parsePolyToListInput(self.expr)
def convert(self): #to clarify the input if necessary
convertToString = str(self.expr)
print "expression is %s"%(convertToString)
def id(self): #returns modulus 2 (1,0,0,1,1,....) for input lists
return [int(self.expr[i])%2 for i in range(len(self.expr))]
def listToInt(self): #converts list to integer for later use
result = gf2poly.id(self)
return int(''.join(map(str,result)))
def prepBinary(a,b): #converts to base 2 and orders min and max for use
a = gf2poly.listToInt(a); b = gf2poly.listToInt(b)
bina = int(str(a),2); binb = int(str(b),2)
a = min(bina,binb); b = max(bina,binb);
return a,b
#staticmethod
def outFormat(raw):
raw = str(raw[::-1]); g = [] #reverse binary string for enumeration
[g.append(i) for i,c in enumerate(raw) if c == '1']
processed = "x**"+' + x**'.join(map(str, g[::-1]))
if len(g) == 0: return 0 #return 0 if list empty
return processed #returns result in gf(2) polynomial form
def parsePolyToListInput(poly):
c = [int(i.group(0)) for i in re.finditer(r'\d+', poly)] #re.finditer returns an iterator
#m = max(c)
return [1 if x in c else 0 for x in xrange(max(c), -1, -1)]
#return d
def add(self,other): #accepts 2 lists as parameters
a = gf2poly.listToInt(self); b = gf2poly.listToInt(other)
bina = int(str(a),2); binb = int(str(b),2)
m = bina^binb; z = "{0:b}".format(m)
return z #returns binary string
def subtract(self,other): #basically same as add() but built differently
result = [self.expr[i] ^ other.expr[i] for i in range(len(max(self.expr,other.expr)))]
return int(''.join(map(str,result)))
def multiply(a,b): #a,b are lists like (1,0,1,0,0,1,....)
a,b = gf2poly.prepBinary(a,b)
g = []; bitsa = "{0:b}".format(a)
[g.append((b<<i)*int(bit)) for i,bit in enumerate(bitsa)]
m = reduce(lambda x,y: x^y,g); z = "{0:b}".format(m)
return z #returns product of 2 polynomials in gf2
def divide(a,b): #a,b are lists like (1,0,1,0,0,1,....)
a,b = gf2poly.prepBinary(a,b)
bitsa = "{0:b}".format(a); bitsb = "{0:b}".format(b)
difflen = len(str(bitsb)) - len(str(bitsa))
c = a<<difflen; q=0
while difflen >= 0 and b != 0: #a is divisor, b is dividend, b/a
q+=1<<difflen; b = b^c # b/a because of sorting in prep
lendif = abs(len(str(bin(b))) - len(str(bin(c))))
c = c>>lendif; difflen -= lendif
r = "{0:b}".format(b); q = "{0:b}".format(q)
return r,q #returns r remainder and q quotient in gf2 division
def remainder(a,b): #separate function for clarity when calling
r = gf2poly.divide(a,b)[0]; r = int(str(r),2)
return "{0:b}".format(r)
def quotient(a,b): #separate function for clarity when calling
q = gf2poly.divide(a,b)[1]; q = int(str(q),2)
return "{0:b}".format(q)
def extendedEuclideanGF2(a,b): # extended euclidean. a,b are GF(2) polynomials in list form
inita,initb=a,b; x,prevx=0,1; y,prevy = 1,0
while sum(b) != 0:
q = gf2poly.quotient(a,b);
q = list(q); q = [int(x) for x in q]
#q = list(q);
#q = tuple([int(i) for i in q])
q = gf2poly(q)
a,b = b,gf2poly.remainder(a,b);
#a = map(list, a);
#b = [list(x) for x in a];
#a = [int(x) for x in a]; b = [int(x) for x in b];
b = list(b); b = [int(x) for x in b]
#b = list(b);
#b = tuple([int(i) for i in b])
b = gf2poly(b)
#x,prevx = (prevx-q*x, x);
#y,prevy=(prevy-q*y, y)
print "types ",type(q),type(a),type(b)
#q=a//b; a,b = b,a%b; x,prevx = (prevx-q*x, x); y,prevy=(prevy-q*y, y)
#print("%d * %d + %d * %d = %d" % (inita,prevx,initb,prevy,a))
return a,prevx,prevy # returns gcd of (a,b), and factors s and t
def modular_inverse(a,mod): # where a,mod are GF(2) polynomials in list form
gcd,s,t = gf2poly.extendedEuclideanGF2(a,mod); mi = gf2poly.remainder(s,mod)
#gcd,s,t = ext_euc_alg_i(a,mod); mi = s%mod
if gcd !=1: return False
#print ("%d * %d mod %d = 1"%(a,mi,mod))
return mi # returns modular inverse of a,mod
I usually test it with this input:
a = x**14 + x**1 + x**0
p1 = gf2poly(a)
b = x**6 + x**2 + x**1
p2 = gf2poly(b)
The first thing you might notice about my code is that it's not very good. There are 2 reasons for that:
1) I wrote it so that the 1st version could do work in the finite field GF(2), and output in polynomial format. Then the next versions were supposed to be able to take polynomial inputs, and also perform the crucial 'modular inverse' function which is not working as planned (this means it's actually not working at all).
2) I'm teaching myself Python (I'm actually teaching myself programming overall), so any constructive criticism from pro Python programmers is welcome as I'm trying to break myself of beginner habits as quickly as possible.
EDIT:
Maybe some more of the code I've been testing with will help clarify what works and what doesn't:
t1 = [1,1,1]; t2 = [1,0,1]; t3 = [1,1]; t4 = [1, 0, 1, 1, 1, 1, 1]
t5 = [1,1,1,1]; t6 = [1,1,0,1]; t7 = [1,0,1,1,0]
f1 = gf2poly(t1); f2 = gf2poly(t2); f3 = gf2poly(t3); f4 = gf2poly(t4)
f5 = gf2poly(t5);f6 = gf2poly(t6);f7 = gf2poly(t7)
##print "subtract: ",a.subtract(b)
##print "add: ",a.add(b)
##print "multiply: ",gf2poly.multiply(f1,f3)
##print "multiply: ",gf2poly.multiply(f1,f2)
##print "multiply: ",gf2poly.multiply(f3,f4)
##print "degree a: ",a.degree()
##print "degree c: ",c.degree()
##print "divide: ",gf2poly.divide(f1,b)
##print "divide: ",gf2poly.divide(f4,a)
##print "divide: ",gf2poly.divide(f4,f2)
##print "divide: ",gf2poly.divide(f2,a)
##print "***********************************"
##print "quotient: ",gf2poly.quotient(f2,f5)
##print "remainder: ",gf2poly.remainder(f2,f5)
##testq = gf2poly.quotient(f4,f2)
##testr = gf2poly.remainder(f4,f2)
##print "quotient: ",testq,type(testq)
##print "remainder: ",testr,type(testr)
##print "***********************************"
##print "outFormat testp: ",gf2poly.outFormat(testq)
##print "outFormat testr: ",gf2poly.outFormat(testr)
##print "***********************************"
#print "gf2poly.modular_inverse(): ",gf2poly.modular_inverse(f2,f3)
print "p1 ",p1 #,type(f2),type(f3)
#print "parsePolyToListInput ",gf2poly.parsePolyToListInput(a)
Part of your problem is that you haven't declared self as an argument for parsePolyToListInput. When you call a method, the instance you call it on is implicitly bound as the first argument. Naming the first argument self is a convention, not a strict requirement - the instance is being bound to poly, which you then try to run a regexp over.
It looks me like there's some confusion in your design here about what's behavior of individual instances of the class and what's class-level or module-level behavior. In Python, it's perfectly acceptable to leave something that doesn't take an instance of a class as a parameter defined as a module-level function rather than shoehorning it in awkwardly. parsePolyToListInput might be one such function.
Your add implementation, similarly, has a comment saying it "accepts 2 lists as parameters". In fact, it's going to get a gf2poly instance as its first argument - this is probably right if you're planning to do operator overloading, but it means the second argument should also be a gf2poly instance as well.
EDIT:
Yeah, your example code shows a breakdown between class behavior and instance behavior. Either your multiply call should look something like this:
print "multiply: ",f1.multiply(f3)
Or multiply shouldn't be a method at all:
gfpoly.py:
def multiply(f1, f2):
a,b = prepBinary(a,b)
g = []; bitsa = "{0:b}".format(a)
[g.append((b<<i)*int(bit)) for i,bit in enumerate(bitsa)]
m = reduce(lambda x,y: x^y,g); z = "{0:b}".format(m)
return z #returns product of 2 polynomials in gf2
That latter approach is, for instance, how the standard math library does things.
The advantage of defining a multiplication method is that you could name it appropriately (http://docs.python.org/2/reference/datamodel.html#special-method-names) and use it with the * operator:
print "multiply: ",f1 *f3