ANTLR4 Arith Expression visitor order operation

ANTLR4 Arith Expression visitor order operation - python

I am trying to understand how visitor context work in python version of ANTLR4
I am using the Python3 grammar and I trying to visit "arith_expr"
with this input example x = 10 + 50 - 50 I have this context in the visitor
len(ctx.term()) = 3 is the value of atom [10,50,50]
len(ctx.ADD()) = 1
len(ctx.MINUS()) = 1
What is the order of the operator?
arith_expr
: term (('+'|'-') term)*
;
class Arith_exprContext(ParserRuleContext):
def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
super().__init__(parent, invokingState)
self.parser = parser
def term(self, i:int=None):
if i is None:
return self.getTypedRuleContexts(MBIParser.TermContext)
else:
return self.getTypedRuleContext(MBIParser.TermContext,i)
def ADD(self, i:int=None):
if i is None:
return self.getTokens(MBIParser.ADD)
else:
return self.getToken(MBIParser.ADD, i)
def MINUS(self, i:int=None):
if i is None:
return self.getTokens(MBIParser.MINUS)
else:
return self.getToken(MBIParser.MINUS, i)
def getRuleIndex(self):
return MBIParser.RULE_arith_expr
def enterRule(self, listener:ParseTreeListener):
if hasattr( listener, "enterArith_expr" ):
listener.enterArith_expr(self)
def exitRule(self, listener:ParseTreeListener):
if hasattr( listener, "exitArith_expr" ):
listener.exitArith_expr(self)
def accept(self, visitor:ParseTreeVisitor):
if hasattr( visitor, "visitArith_expr" ):
return visitor.visitArith_expr(self)
else:
return visitor.visitChildren(self)
this is the override visitor def
def visitArith_expr(self, ctx:MBIParser.Arith_exprContext):
print(len(ctx.term())) #3
self.visit(ctx.term(0)) #10
self.visit(ctx.term(1)) #50
self.visit(ctx.term(2)) #50
print(len(ctx.ADD())) #1
print(len(ctx.MINUS())) #1
return self.visitChildren(ctx)
Thank you for your explanation

You could collect all operators inside a list like this:
arith_expr
: term (operators+=('+'|'-') term)*
;
The operators is a list and contains all operators. You can then do something like this in your visitor:
def visitArith_expr(self, ctx):
result = self.visit(ctx.term(0))
for i in range(len(ctx.operators)):
if ctx.operators[i].type == MBILexer.ADD:
result += self.visit(ctx.term(i + 1))
else:
result -= self.visit(ctx.term(i + 1))
return result
def visitTerm(self, ctx):
return int(ctx.getText())
Or better, do something like this:
expr
: expr ('+' | '-') expr #add_expr
| term #term_expr
;
in which case the visitor will get a lot easier:
# expr ('+' | '-') expr #add_expr
def visitAdd_expr(self, ctx):
if ctx.ADD() is not None:
return self.visit(ctx.expr(0)) + self.visit(ctx.expr(1))
return self.visit(ctx.expr(0)) - self.visit(ctx.expr(1))
# term #term_expr
def visitTerm_expr(self, ctx):
return self.visit(ctx.term())
def visitTerm(self, ctx):
return int(ctx.getText())

Related

Is there a way to have a object type of your choice (i.e. LinkedEdge) hash as part of a 'union.set()' processing?

I have the following code and it works until it gets to the 'union.set()' part. It says, "unhashable type: 'LinkedEdge' " . I am not sure why this is the case since I have looked at other sources on the web and in reference books to know that the 'g.addVertex()' method and the 'g.addEdge()' method as well as the arguments being passed should lead correctly to an output like this:
5 Vertices: A C B E D
5 Edges: A>B:3 A>C:2 B>D:1 C>D:1 D>E:2
class LinkedEdge(object):
def __init__(self, fromVertex, toVertex, weight = None):
self._vertex1 = fromVertex
self._vertex2 = toVertex
self._weight = weight
self._mark = False
def clearMark(self):
self._mark = False
def __eq__(self, other):
if self is other: return True
if type(self) != type(other):
return False
return self._vertex1 == other._vertex1 and self._vertex2 == other._vertex2
def getOtherVertex(self, thisVertex):
if thisVertex == None or thisVertex == self._vertex2:
return self._vertex1
else:
return self._vertex2
def getToVertex(self):
return self._vertex2
def getWeight(self):
return self._weight
def isMarked(self):
return self._mark
def setMark(self):
self._mark = True
def setWeight(self, weight):
self._weight = weight
def __str__(self):
return str(self._vertex1) + ">" + str(self._vertex2) + ":" + str(self._weight)
class LinkedVertex(object):
def __init__(self, label):
self._label = label
self._edgeList = []
self._mark = False
def clearMark(self):
self._mark = False;
def getLabel(self):
return self._label
def isMarked(self):
return self._mark
def setLabel(self, label, g):
g._vertices.pop(self._label, None)
g._vertices[label] = self
self._label = label
def setMark(self):
self._mark = True
def __str__(self):
return str(self._label)
def addEdgeTo(self, toVertex, weight):
edge = LinkedEdge(self, toVertex, weight)
self._edgeList.append(edge)
def getEdgeTo(self, toVertex):
edge = LinkedEdge(self, toVertex)
try:
return self._edgeList[self._edgeList.index(edge)]
except:
return None
def incidentEdges(self):
return iter(self._edgeList)
def neighboringVertices(self):
vertices = []
for edge in self._edgeList:
vertices.append(edge.getOtherVertex(self))
return iter(vertices)
def removeEdgeTo(self, toVertex):
edge = LinkedEdge(self, toVertex)
if edge in self._edgeList:
self._edgeList.remove(edge)
return True
else:
return False
class LinkedDirectedGraph(object):
def __init__(self, collection = None):
self._vertexCount = 0
self._edgeCount = 0
self._vertices = {}
if collection != None:
for label in collection:
self.addVertex(label)
# Methods for clearing, marks, sizes, string rep
def clear(self):
self._vertexCount = 0
self._edgeCount = 0
self._vertices = {}
def clearEdgeMarks(self):
for edge in self.edges():
edge.clearMark()
def clearVertexMarks(self):
for vertex in self.vertices():
vertex.clearMark()
def isEmpty(self):
return self._vertexCount == 0;
def sizeEdges(self):
return self._edgeCount
def sizeVertices(self):
return self._vertexCount
def __str__(self):
result = str(self.sizeVertices()) + " Vertices: "
for vertex in self._vertices:
result += " " + str(vertex)
result += "\n";
result += str(self.sizeEdges()) + " Edges: "
for edge in self.edges():
result += " " + str(edge)
return result
def addVertex(self, label):
self._vertices[label] = LinkedVertex(label)
self._vertexCount += 1
def containsVertex (self, label):
return label in self._vertices
def getVertex(self, label):
return self._vertices[label]
def removeVertex(self, label):
removedVertex = self._vertices.pop(label, None)
if removedVertex is None:
return False
# Examine all vertices
for vertex in self.vertices():
if vertex.removeEdgeTo(removedVertex):
self._edgeCount -= 1
self._vertexCount -= 1
return True
def addEdge(self, fromLabel, toLabel, weight):
fromVertex = self.getVertex(fromLabel)
toVertex = self.getVertex(toLabel)
fromVertex.addEdgeTo(toVertex, weight)
self._edgeCount += 1
def containsEdge(self, fromLabel, toLabel):
return self.getEdge(fromLabel, toLabel) != None
def getEdge(self, fromLabel, toLabel):
fromVertex = self._vertices[fromLabel]
toVertex = self._vertices[toLabel]
return fromVertex.getEdgeTo(toVertex)
def removeEdge (self, fromLabel, toLabel):
fromVertex = self.getVertex(fromLabel)
toVertex = self.getVertex(toLabel)
edgeRemovedFlg = fromVertex.removeEdgeTo(toVertex)
if edgeRemovedFlg:
self._edgeCount -= 1
return edgeRemovedFlg
# Iterators
def edges(self):
result = set()
for vertex in self.vertices():
edges = vertex.incidentEdges()
result = result.union(set(edges))
return iter(result)
def vertices(self):
return iter(self._vertices.values())
def incidentEdges(self, label):
return self._vertices[label].incidentEdges()
def neighboringVertices(self, label):
return self._vertices[label].neighboringVertices
g = LinkedDirectedGraph()
# Insert vertices
g.addVertex("John")
g.addVertex("Sam")
g.addVertex("Megan")
g.addVertex("Jennifer")
g.addVertex("Rick")
# Insert weighted edges
g.addEdge("John", "Sam", 3)
g.addEdge("John", "Megan", 2)
g.addEdge("Sam", "Jennifer", 1)
g.addEdge("Megan", "Jennifer", 1)
g.addEdge("Jennifer", "Rick", 2)
print(g)

If you override __eq__, then Python intentionally makes your class unhashable, since there is no longer a guarantee that the default hashing algorithm (based on the object's location in memory) is compatible with your __eq__ algorithm. "Compatible" here just means that equal objects must have equal hashes. By default, nothing is equal, so when you make some things equal using an __eq__ method, you impose a requirement on what a proper hash function must do.
If you want a custom class with a custom __eq__ method to be hashable, you must implement a __hash__ method yourself.
It could be as simple as being based on the hash of the corresponding tuple:
def __hash__(self):
return hash((type(self), self._vertex1, self._vertex2))
The Python docs explain this here.

Finding Method for Binary Tree doesn't return anything

I have a Binary Tree and want to search for elements in it.
But for some reason i'm not getting any output and no error. Tried debugging as well but the code is always being executed until the end.
def find(self, i):
if self.__head is None:
return -1
else:
return self.__find(self.__head, i)
def __find(self, element, i):
if element is not None:
if i == element.value():
return 1
elif i < element.value():
self.__find(element.getLeft(), i)
else:
self.__find(element.getRight(), i)
else:
return -1
The code for the Elements is the following:
class BElement:
def __init__(self, i, v):
self.__id = i
self.__value = v
self.__left = None
self.__right = None
def id(self):
return self.__id
def value(self):
return self.__value
def getLeft(self):
return self.__left
def setLeft(self, l):
self.__left = l
def getRight(self):
return self.__right
def setRight(self, r):
self.__right = r
def __str__(self):
s = "{"
if self.__left is not None:
s = s + str(self.__left)
s = s + str(self.__id) + ":" + str(self.__value)
if self.__right is not None:
s = s + str(self.__right)
s = s + "}"
return s

remove the brackets() after value,left and right they are not methods.

Optimal way to apply a conditional If statement

I'm basically defining a set of parameters below in the Start() method. Most of the params will be the same except for the request.Continuation which can have a value of either 6 or 3. My thinking was to have a list of the symbols that would get the value 6 (in self.continuation) and just say something like "If in self.continuation, value=6, else 3".
I'm fairly new to python so this could be way off but I wasn't sure if this would be a good use case for a lambda expression.
class BarsRequestSample(CELEnvironment.CELSinkBase):
def __init__(self):
self.symbols = ['CLES12Z', 'HOE']
self.continuation=['ClES12Z', 'CLES6M']
def Start(self):
for symbol in self.symbols:
request.Symbol = symbol
request.RangeStart = 0
request.RangeEnd = -60
request.IntradayPeriod = 5
request.Continuation = 6
request.UpdatesEnabled = True
request.EqualizeCloses = True
request.SessionsFilter = 31
What I know I could do:
def Start(self):
for symbol in self.symbols:
request.Symbol = symbol
request.RangeStart = 0
request.RangeEnd = -60
request.IntradayPeriod = 5
#potential attempt
if symbol in self.continuation:
request.Continuation = 6
else:
request.Continuation = 3
request.UpdatesEnabled = True
request.EqualizeCloses = True
request.SessionsFilter = 31
I'm just getting an intuition that there may be a more concise way to do the same thing - would appreciate any thoughts!

I would make the list of symbols a set. Sets containment is an O(1) operation versus lists which are O(n). It doesn't matter for two elements, but it's a good habit to get into.
self.continuation = {'ClES12Z', 'CLES6M'}
You can use a conditional expression to condense the assignment to one line:
request.Continuation = 6 if symbol in self.continuation else 3

For lambda:
https://github.com/baruchel/continuation/blob/master/continuation/init.py
"""
A module for adding a call/cc feature to Python.
"""
__version__ = '0.1'
__internal_type_error__ = (
"Argument of a continuation must be a continuable function."
)
class _Continuation(Exception):
def __init__(self, f, args, kwargs):
self.func, self.args, self.kwargs = f, args, kwargs
class Continuation():
def __init__(self, calling_id):
self.calling_id = calling_id
def __callback__(self, f):
if isinstance(f, _Continuation):
f.uid = id(self)
raise f
raise TypeError(__internal_type_error__)
def __call__(self, f):
return lambda *args, **kwargs: self.__callback__(f(*args, **kwargs))
__lshift__ = __call__
def with_continuation(func):
def _w(*args, **kwargs):
return _Continuation(func, args, kwargs)
_w.__with_continuation_signature__ = True
return _w
class _with_CC():
def __call__(self, f):
try:
_ = f.__with_continuation_signature__
except AttributeError:
raise TypeError(__internal_type_error__)
return (
lambda *args, **kwargs:
_with_CC().__callback__(f(*args, **kwargs)))
__rshift__ = __call__
def __callback__(self, f):
k = Continuation(id(self))
i = id(k)
while True:
try:
return (f.func)(k)(*f.args, **f.kwargs)
except _Continuation as e:
if i == e.uid: f = e
else: raise e
with_CC = _with_CC()

Paired Delimeter Matching

I would like to write a class that will return true if and only if the expression entered contains only correctly matching delimiters such as " (), <>, [], {} " What I have written thus far will do exactly that.
delim_openers = '{([<'
delim_closers = '})]>'
def check_delimiters(expr):
val = Stack()
for c in expr:
if c == (any(x) for x in delim_openers):
val.push(c)
elif c == (any(y) for y in delim_closers):
try:
val.pop()
except:
return False
return val.empty()
I am a little confused however if I were test this case against specific cases such as the one below, it'll return true when I assumed it would return False, my stack shouldn't be popping the delimiter in this case. Test case:
from unittest import TestCase
tc = TestCase()
tc.assertFalse(check_delimiters('[ ( ] )'))
tc.assertFalse(check_delimiters('((((((( ))))))'))
How might I edit the `check_delimiters' method to correctly return False in these scenarios? The stacks first-in-last-out capability tends to confuse me. Also here's the stack class code in case anything else is confusing.
class Stack:
class Node:
def __init__(self, val, next=None):
self.val = val
self.next = next
def __init__(self):
self.top = None
def push(self, val):
self.top = Stack.Node(val, self.top)
def pop(self):
assert self.top, 'Stack is empty'
val = self.top.val
self.top = self.top.next
return val
def peek(self):
return self.top.val if self.top else None
def empty(self):
return self.top == None
def __bool__(self):
return not self.empty()
def __repr__(self):
if not self.top:
return ''
return '--> ' + ', '.join(str(x) for x in self)
def __iter__(self):
n = self.top
while n:
yield n.val
n = n.next

No need for any, a simple contains-check will suffice. Then you have to check whether the closer matches the opener, otherwise you are just checking whether closers and openers are equal in numbers and that the closers never take the lead count-wise:
class Stack(list):
push = list.append
def peek(self):
return self[-1] if self else None
# ...
match = dict(zip('{([<', '})]>')) # dict makes the matching check faster
delim_openers = set(match.keys()) # set makes the 'in' check faster
delim_closers = set(match.values())
def check_delimiters(expr):
val = Stack()
for c in expr:
if c in delim_openers:
val.push(c)
elif c in delim_closers:
try:
assert match[val.pop()] == c
# fails if stack is empty or brackets don't match
except:
return False
return not val
> check_delimiters('[ ( ] )')
False
> check_delimiters('[ ( ) ]')
True

Python compiler for simple language to java vm code algorithm

I have a simple language that I am trying to write a compiler for (yes it is homework) to compile a simple language I shall describe if necessary to java vm code.
It currently works pretty well I've just hit a bump with logical AND's and OR's.
Each work fine in a single if/while condition, but if I try and chain them things go wrong, correct me if I am wrong but I believe that AND has precedence, but I was wondering if there are logical ways of arranging them? I think is what I'm trying to ask, the java vm code output just has the compare and jump statements one after the other (which seems wrong). I realise it's quite abstract so maybe what I'm after is a pseudo code/algorithm for how to structure chained AND's and OR's.
EDIT: Currently just treats any combination of AND and OR as AND's. Comparing the factor/term/expression connection (compared to booleanfactor etc) I believe that AND has precedence? Just a thought.
Apologies if this is poorly understood :/
So i figure ill include relevant info just incase.
compiler
import re
import sys
# Restrictions:
# Integer constants must be short.
# Stack size must not exceed 1024.
# Integer is the only type.
# Logical operators cannot be nested.
class Scanner:
'''The interface comprises the methods lookahead and consume.
Other methods should not be called from outside of this class.'''
def __init__(self, input_file):
'''Reads the whole input_file to input_string.'''
# source code of the program to be compiled
self.input_string = input_file.read()
# index where the unprocessed part of input_string starts
self.current_char_index = 0
# a pair (most recently read token, matched substring of input_string)
self.current_token = self.get_token()
def skip_white_space(self):
'''Consumes all characters in input_string up to the next
non-white-space character.'''
if (self.current_char_index >= len(self.input_string) - 1):
# bad fix for it over-running the end of the file
return
while self.input_string[self.current_char_index].isspace():
self.current_char_index += 1
return
def get_token(self):
'''Returns the next token and the part of input_string it matched.
Returns None if there is no next token.
The characters up to the end of the token are consumed.'''
self.skip_white_space()
# find the longest prefix of input_string that matches a token
token, longest = None, ''
for (t, r) in Token.token_regexp:
match = re.match(r, self.input_string[self.current_char_index:])
if match and match.end() > len(longest):
token, longest = t, match.group()
# consume the token by moving the index to the end of the matched part
self.current_char_index += len(longest)
return (token, longest)
def lookahead(self):
'''Returns the next token without consuming it.
Returns None if there is no next token.'''
return self.current_token[0]
def consume(self, *tokens):
'''Returns the next token and consumes it, if it is in tokens.
Raises an exception otherwise.
If the token is a number or an identifier, its value is returned.'''
if self.current_token[0] not in tokens:
print('Token ' + self.current_token[0] + ' isn\'t in the tokens: ')
for token in tokens:
print(token)
raise Exception('Token is not in tokens this shouldn\'t happen much')
if self.current_token[0] == 'ID':
symbol_table.location(self.current_token[1])
value = self.current_token[1]
elif (self.current_token[0] == 'NUM'):
value = self.current_token[1]
else:
value = self.current_token[0]
self.current_token = self.get_token()
return value
class Token:
DO = 'DO';
ELSE = 'ELSE';
END = 'END';
IF = 'IF';
THEN = 'THEN';
WHILE = 'WHILE';
SEM = 'SEM';
BEC = 'BEC';
LESS = 'LESS';
EQ = 'EQ';
GRTR = 'GRTR';
LEQ = 'LEQ';
NEQ = 'NEQ';
GEQ = 'GEQ';
ADD = 'ADD';
SUB = 'SUB';
MUL = 'MUL';
DIV = 'DIV';
LPAR = 'LPAR';
RPAR = 'RPAR';
NUM = 'NUM';
ID = 'ID';
READ = 'READ';
WRITE = 'WRITE';
OR = 'OR';
AND = 'AND';
NOT = 'NOT';
# The following list gives the regular expression to match a token.
# The order in the list matters for mimicking Flex behaviour.
# Longer matches are preferred over shorter ones.
# For same-length matches, the first in the list is preferred.
token_regexp = [
(DO, 'do'),
(ELSE, 'else'),
(END, 'end'),
(IF, 'if'),
(THEN, 'then'),
(WHILE, 'while'),
(READ, 'read'),
(WRITE, 'write'),
(OR, 'or'),
(AND, 'and'),
(NOT, 'not'),
(SEM, ';'),
(BEC, ':='),
(LESS, '<'),
(EQ, '='),
(NEQ, '!='),
(GRTR, '>'),
(LEQ, '<='),
(GEQ, '>='),
(ADD, '[+]'), # + is special in regular expressions
(SUB, '-'),
(MUL, '[*]'),
(DIV, '/'),
(LPAR, '[(]'), # ( is special in regular expressions
(RPAR, '[)]'), # ) is special in regular expressions
(ID, '[a-z]+'),
(NUM, '[0-9]+'),
]
class Symbol_Table:
'''A symbol table maps identifiers to locations.'''
def __init__(self):
self.symbol_table = {}
def size(self):
'''Returns the number of entries in the symbol table.'''
return len(self.symbol_table)
def location(self, identifier):
'''Returns the location of an identifier. If the identifier is not in
the symbol table, it is entered with a new location. Locations are
numbered sequentially starting with 0.'''
if identifier in self.symbol_table:
return self.symbol_table[identifier]
index = len(self.symbol_table)
self.symbol_table[identifier] = index
return index
class Label:
def __init__(self):
self.current_label = 0
def next(self):
'''Returns a new, unique label.'''
self.current_label += 1
return 'l' + str(self.current_label)
def indent(s, level):
return ' '*level + s + '\n'
# Each of the following classes is a kind of node in the abstract syntax tree.
# indented(level) returns a string that shows the tree levels by indentation.
# code() returns a string with JVM bytecode implementing the tree fragment.
# true_code/false_code(label) jumps to label if the condition is/is not true.
# Execution of the generated code leaves the value of expressions on the stack.
class Program_AST:
def __init__(self, program):
self.program = program
def __repr__(self):
return repr(self.program)
def indented(self, level):
return self.program.indented(level)
def code(self):
program = self.program.code()
local = symbol_table.size()
java_scanner = symbol_table.location('Java Scanner')
return '.class public Program\n' + \
'.super java/lang/Object\n' + \
'.method public <init>()V\n' + \
'aload_0\n' + \
'invokenonvirtual java/lang/Object/<init>()V\n' + \
'return\n' + \
'.end method\n' + \
'.method public static main([Ljava/lang/String;)V\n' + \
'.limit locals ' + str(local) + '\n' + \
'.limit stack 1024\n' + \
'new java/util/Scanner\n' + \
'dup\n' + \
'getstatic java/lang/System.in Ljava/io/InputStream;\n' + \
'invokespecial java/util/Scanner.<init>(Ljava/io/InputStream;)V\n' + \
'astore ' + str(java_scanner) + '\n' + \
program + \
'return\n' + \
'.end method\n'
class Statements_AST:
def __init__(self, statements):
self.statements = statements
def __repr__(self):
result = repr(self.statements[0])
for st in self.statements[1:]:
result += '; ' + repr(st)
return result
def indented(self, level):
result = indent('Statement(s)', level)
for st in self.statements:
result += st.indented(level+1)
return result
def code(self):
result = ''
for st in self.statements:
result += st.code()
return result
class If_AST:
def __init__(self, boolean_expression, then):
self.boolean_expression = boolean_expression
self.then = then
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' end'
def indented(self, level):
return indent('If-Then', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1)
def code(self):
l1 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
l1 + ':\n'
class If_Else_AST:
def __init__(self, boolean_expression, then, _else):
self.boolean_expression = boolean_expression;
self.then = then;
self._else = _else;
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' else ' + \
repr(self._else) + ' end'
def indented(self, level):
return indent('If-Then-Else', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1) + \
indent('Else', level+1) + \
self._else.indented(level+1)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
'goto ' + l2 + '\n' + \
l1 + ':\n' + \
self._else.code() + \
l2 + ':\n'
class While_AST:
def __init__(self, boolean_term, body):
self.boolean_term = boolean_term
self.body = body
def __repr__(self):
return 'while ' + repr(self.boolean_term) + ' do ' + \
repr(self.body) + ' end'
def indented(self, level):
return indent('While-Do', level) + \
self.boolean_term.indented(level+1) + \
self.body.indented(level+2)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return l1 + ':\n' + \
self.boolean_term.code(l2) + \
self.body.code() + \
'goto ' + l1 + '\n' + \
l2 + ':\n'
class Assign_AST:
def __init__(self, identifier, expression):
self.identifier = identifier
self.expression = expression
def __repr__(self):
return repr(self.identifier) + ':=' + repr(self.expression)
def indented(self, level):
return indent('Assign', level) + \
self.identifier.indented(level+1) + \
self.expression.indented(level+1)
def code(self):
loc = symbol_table.location(self.identifier.identifier)
return self.expression.code() + \
'istore ' + str(loc) + '\n'
class Write_AST:
def __init__(self, expression):
self.expression = expression
def __repr__(self):
return 'write ' + repr(self.expression)
def indented(self, level):
return indent('Write', level) + self.expression.indented(level+1)
def code(self):
return 'getstatic java/lang/System/out Ljava/io/PrintStream;\n' + \
self.expression.code() + \
'invokestatic java/lang/String/valueOf(I)Ljava/lang/String;\n' + \
'invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V\n'
class Read_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return 'read ' + repr(self.identifier)
def indented(self, level):
return indent('Read', level) + self.identifier.indented(level+1)
def code(self):
java_scanner = symbol_table.location('Java Scanner')
loc = symbol_table.location(self.identifier.identifier)
return 'aload ' + str(java_scanner) + '\n' + \
'invokevirtual java/util/Scanner.nextInt()I\n' + \
'istore ' + str(loc) + '\n'
class Comparison_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.LESS:'<', Token.EQ:'=', Token.GRTR:'>',
Token.LEQ:'<=', Token.NEQ:'!=', Token.GEQ:'>=' }
return repr(self.left) + op[self.op] + repr(self.right)
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def true_code(self, label):
op = { Token.LESS:'if_icmplt', Token.EQ:'if_icmpeq',
Token.GRTR:'if_icmpgt', Token.LEQ:'if_icmple',
Token.NEQ:'if_icmpne', Token.GEQ:'if_icmpge' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
def false_code(self, label):
# Negate each comparison because of jump to "false" label.
op = { Token.LESS:'if_icmpge', Token.EQ:'if_icmpne',
Token.GRTR:'if_icmple', Token.LEQ:'if_icmpgt',
Token.NEQ:'if_icmpeq', Token.GEQ:'if_icmplt' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
class Expression_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.ADD:'+', Token.SUB:'-', Token.MUL:'*', Token.DIV:'/' }
return '(' + repr(self.left) + op[self.op] + repr(self.right) + ')'
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def code(self):
op = { Token.ADD:'iadd', Token.SUB:'isub',
Token.MUL:'imul', Token.DIV:'idiv' }
return self.left.code() + \
self.right.code() + \
op[self.op] + '\n'
class Number_AST:
def __init__(self, number):
self.number = number
def __repr__(self):
return self.number
def indented(self, level):
return indent(self.number, level)
def code(self): # works only for short numbers
return 'sipush ' + self.number + '\n'
class Identifier_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return self.identifier
def indented(self, level):
return indent(self.identifier, level)
def code(self):
loc = symbol_table.location(self.identifier)
return 'iload ' + str(loc) + '\n'
class BooleanFactor_AST:
def __init__(self, condition, logic):
self.condition = condition
self.logic = logic
def __repr__(self):
if self.logic == False:
return 'NOT ' + repr(self.condition)
else:
return repr(self.condition)
def indented(self, level):
if self.logic == False:
return indent('NOT ', level) + self.condition.indented(level + 1)
else:
return self.condition.indented(level)
def false_code(self, label):
if self.logic == True:
return self.condition.false_code(label)
else:
return self.condition.true_code(label)
return
def true_code(self, label):
if self.logic == True:
return self.condition.true_code(label)
else:
return self.condition.false_code(label)
class BooleanTerm_AST:
def __init__(self, terms):
self.terms = terms
def __repr__(self):
result = repr(self.terms[0])
for term in self.terms[1:]:
result = result + ' AND ' + repr(term)
return result
def indented(self, level):
result = self.terms[0].indented(level)
for term in self.terms[1:]:
result = result + indent('AND', level)
result = result + term.indented(level)
return result
def code(self, label):
result = ''
for term in self.terms:
result = result + term.false_code(label)
return result
class BooleanExpression_AST:
def __init__(self, expressions):
self.expressions = expressions
def __repr__(self):
result = repr(self.expressions[0])
for expression in self.expressions[1:]:
result = result + ' OR ' + repr(expression)
return result
def indented(self, level):
result = self.expressions[0].indented(level)
indentation = 0
for expression in self.expressions[1:]:
indentation += 1
result = result + indent('OR', level + indentation)
result = result + expression.indented(level + indentation)
return result
def code(self, label):
result = ''
for expression in self.expressions:
result = result + expression.code(label)
return result
# The following methods comprise the recursive-descent parser.
def program():
sts = statements()
return Program_AST(sts)
def statements():
result = [statement()]
while scanner.lookahead() == Token.SEM:
scanner.consume(Token.SEM)
st = statement()
result.append(st)
return Statements_AST(result)
def statement():
if scanner.lookahead() == Token.IF:
return if_statement()
elif scanner.lookahead() == Token.WHILE:
return while_statement()
elif scanner.lookahead() == Token.ID:
return assignment()
elif scanner.lookahead() == Token.READ:
return read();
elif scanner.lookahead() == Token.WRITE:
return write();
else: # error
return scanner.consume(Token.IF, Token.WHILE, Token.ID)
def if_statement():
scanner.consume(Token.IF)
condition = boolean_expression()
scanner.consume(Token.THEN)
then = statements()
if scanner.lookahead() == Token.END:
scanner.consume(Token.END)
return If_AST(condition, then)
else:
scanner.consume(Token.ELSE)
_else = statements()
scanner.consume(Token.END)
return If_Else_AST(condition, then, _else)
def while_statement():
scanner.consume(Token.WHILE)
condition = boolean_expression()
scanner.consume(Token.DO)
body = statements()
scanner.consume(Token.END)
return While_AST(condition, body)
def assignment():
ident = identifier()
scanner.consume(Token.BEC)
expr = expression()
return Assign_AST(ident, expr)
def read():
scanner.consume(Token.READ)
variable = identifier()
return Read_AST(variable)
def write():
scanner.consume(Token.WRITE)
expr = expression()
return Write_AST(expr)
def comparison():
left = expression()
op = scanner.consume(Token.LESS, Token.EQ, Token.GRTR,
Token.LEQ, Token.NEQ, Token.GEQ)
right = expression()
return Comparison_AST(left, op, right)
def expression():
result = term()
while scanner.lookahead() in [Token.ADD, Token.SUB]:
op = scanner.consume(Token.ADD, Token.SUB)
tree = term()
result = Expression_AST(result, op, tree)
return result
def term():
result = factor()
while scanner.lookahead() in [Token.MUL, Token.DIV]:
op = scanner.consume(Token.MUL, Token.DIV)
tree = factor()
result = Expression_AST(result, op, tree)
return result
def factor():
if scanner.lookahead() == Token.LPAR:
scanner.consume(Token.LPAR)
result = expression()
scanner.consume(Token.RPAR)
return result
elif scanner.lookahead() == Token.NUM:
value = scanner.consume(Token.NUM)
return Number_AST(value)
elif scanner.lookahead() == Token.ID:
return identifier()
else: # error
return scanner.consume(Token.LPAR, Token.NUM, Token.ID)
def identifier():
value = scanner.consume(Token.ID)
return Identifier_AST(value)
def boolean_factor():
if scanner.lookahead() == Token.NOT:
scanner.consume(Token.NOT)
logic = False
else:
logic = True
result = comparison()
return BooleanFactor_AST(result, logic)
def boolean_term():
result = [boolean_factor()]
while scanner.lookahead() in [Token.AND]:
scanner.consume(scanner.lookahead())
temp = boolean_factor()
result.append(temp)
return BooleanTerm_AST(result)
def boolean_expression():
result = [boolean_term()]
while scanner.lookahead() in [Token.OR]:
scanner.consume(scanner.lookahead())
temp = boolean_term()
result.append(temp)
return BooleanExpression_AST(result)
# Initialise scanner, symbol table and label generator.
#scanner = Scanner(open('test.txt'))
scanner = Scanner(sys.stdin)
symbol_table = Symbol_Table()
symbol_table.location('Java Scanner') # fix a location for the Java Scanner
label_generator = Label()
# Uncomment the following to test the scanner without the parser.
# This shows a list of all tokens in the input.
#
#token = scanner.lookahead()
#while token != None:
# print(token)
# scanner.consume(token)
# token = scanner.lookahead()
#exit()
# Call the parser.
ast = program()
assert scanner.lookahead() == None
# Uncomment the following to test the parser without the code generator.
# The first line gives back the program by calling __repr__ of the AST classes.
# The second line shows the syntax tree with levels indicated by indentation.
#
#print(ast)
#print(ast.indented(0))
#exit()
# Call the code generator.
# This translates the abstract syntax tree to JVM bytecode.
# It can be assembled to a class file by Jasmin: http://jasmin.sourceforge.net/
print(ast.code())
testing bat file
python compiler.py <test.txt> Program.j
java -Xmx100m -jar jasmin.jar Program.j
java -Xmx100m Program < testInput.txt > test_output.txt
and language (BNF)
Program = Statements
Statements = Statement (; Statement)
Statement = If | While | Assignment
If = if Comparison then Statements end
While = while Comparison do Statements end
Assignment = identifier := Expression
Comparison = Expression Relation Expression
Relation = = | != | < | <= | > | >=
Expression = Term ((+ | -) Term)
Term = Factor ((* | /) Factor)
Factor = (Expression) | number | identifier
BooleanExpression = BooleanTerm (or BooleanTerm)*
BooleanTerm = BooleanFactor (and BooleanFactor)*
BooleanFactor = not BooleanFactor | Comparison
I think thats all that is relevant, cheers if you take a go at helping me on this

if you want a method to chain OR's and AND'syou can use this property:
p v q === ¬p ^ ¬q
Is equivalent, you can process all in the AND form. for example.
p v q ^ r v s === ¬p ^ ¬q ^ ¬r ^ ¬s
So evaluate the expression in AND form is simple with an algorithm.
I guess the expression doesn't have any parenthesis, in other way you need prioritize the grouping symbols (), [], {}.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ANTLR4 Arith Expression visitor order operation - python

Related

Is there a way to have a object type of your choice (i.e. LinkedEdge) hash as part of a 'union.set()' processing?

Finding Method for Binary Tree doesn't return anything

Optimal way to apply a conditional If statement

Paired Delimeter Matching

Python compiler for simple language to java vm code algorithm

Categories

Resources