I was reading about Rope(or cord) data structure https://en.wikipedia.org/wiki/Rope_(data_structure) and trying to implement it, but I am struggling to implement the split operation. I tried to look it up but all related answers I was able to find were incorrect.
Below is the split operation:
We want to find the character and return two nodes before and after the split. For example, if we want to split at index 5 of'MyNameIsSimon' then we should return the root of two ropes 'MyName' and 'IsSimon' respectively. Finding the index is easy as given by the pseudo-code in wiki. But I'm struggling the split part especially how to join and return the 2nd half as a new rope. Anyone can help with pseudo-code or any language is much appreciated.
Wikipedia’s diagram looks muddled to me. Here’s a working implementation in Python (without balancing).
class Leaf:
def __init__(self, s):
self._s = s
def __len__(self):
return len(self._s)
def __str__(self):
return self._s
def inspect(self, indent=0):
print(" " * indent + repr(self._s))
def split(self, i):
return Leaf(self._s[:i]), Leaf(self._s[i:])
class Branch:
def __init__(self, a, b):
self._a = a
self._b = b
self._l = len(a) + len(b)
def __len__(self):
return self._l
def __str__(self):
return str(self._a) + str(self._b)
def inspect(self, indent=0):
self._a.inspect(indent + 2)
print(" " * indent + str(len(self._a)))
self._b.inspect(indent + 2)
def split(self, i):
if i < len(self._a):
a0, a1 = self._a.split(i)
return a0, Branch(a1, self._b)
elif i == len(self._a):
return self._a, self._b
else:
assert i > len(self._a)
b0, b1 = self._b.split(i - len(self._a))
return Branch(self._a, b0), b1
def make_test_rope():
e = Leaf("Hello ")
f = Leaf("my ")
c = Branch(e, f)
j = Leaf("na")
k = Leaf("me i")
g = Branch(j, k)
m = Leaf("s")
n = Leaf(" Simon")
h = Branch(m, n)
d = Branch(g, h)
b = Branch(c, d)
a = Branch(b, Leaf(""))
return a
def test():
a = make_test_rope()
a.inspect()
b, c = a.split(11)
print("--")
b.inspect()
print("--")
c.inspect()
test()
Output:
'Hello '
6
'my '
9
'na'
2
'me i'
6
's'
1
' Simon'
22
''
--
'Hello '
6
'my '
9
'na'
--
'me i'
4
's'
1
' Simon'
11
''
I am coding a huffman coding tree in python, I have used one class for tree nodes, but I want the whole program to be object oriented. I just cant seem to be able to turn my functions into classes and run the whole thing as OOP. Is it possible to convert functions into classes/methods or does it involve rewriting the entire code in OOP style. The code works ok, im just trying to get my head around OOP and how to implement it. Any help would be great! Code below.
'''
import heapq
class TreeNode(object):
def __init__(self, freq, char=None, left=None, right=None):
self.char = char
self.freq = freq
self.left = left
self.right = right
def __lt__(self, other):
return self.freq < other.freq
def isLeaf(self):
return (self.left == None and self.right == None)
def createTree(freqData):
huffmanNodes = []
for char in freqData:
huffmanNodes.append(TreeNode(freqData[char], char))
heapq.heapify(huffmanNodes)
while (len(huffmanNodes) > 1):
# obtain the two minimum-frequency Huffman nodes
child1 = heapq.heappop(huffmanNodes)
child2 = heapq.heappop(huffmanNodes)
parent = TreeNode(child1.freq + child2.freq, left=child1, right=child2)
heapq.heappush(huffmanNodes, parent)
return None if huffmanNodes == [] else heapq.heappop(huffmanNodes)
def hTreeToHCode(hTree):
code = dict()
def getCode(hNode, curCode=""):
if (hNode == None): return
if (hNode.left == None and hNode.right == None):
code[hNode.char] = curCode
getCode(hNode.left, curCode + "0")
getCode(hNode.right, curCode + "1")
if hNode.char == None:
print("")
else:
print('Character = {} : Freq = {} --- Encoded into {}'.format(hNode.char, hNode.freq, curCode))
getCode(hTree)
return code
def encode(s, freqData):
hTree = createTree(freqData)
hCode = hTreeToHCode(hTree)
hEncoded = ""
for char in s:
hEncoded += hCode[char]
return hEncoded.strip()
def decode(s, freqData):
hTree = createTree(freqData)
decodedStr = ""
curTreeNode = hTree
for charCode in s:
if (charCode == "0"):
curTreeNode = curTreeNode.left
else:
curTreeNode = curTreeNode.right
if (curTreeNode.isLeaf()):
decodedStr += curTreeNode.char
curTreeNode = hTree
return decodedStr
words = "hello welcome to my huffman algorithm code"
charlst = {}
for char in words:
charlst[char] = charlst.get(char,0) + 1
freqData = charlst
encodedStr = encode(words, freqData)
print("encodedStr", encodedStr)
decodedStr = decode(encodedStr, freqData)
print("decodedStr", decodedStr)
'''
you can put function outside the NodeTree class in a Main class and add a run method with var initialisation etc and put at the end of your program a
if __name__=='__main__':
Main.run()
so I have been working on this class project implementing a binary search tree. The professor wants us to make the private recursive while make the public one simple. (like when to insert_element(50), it calls a private function recursive_insert(50, self.__root) to solve).
My insertion function runs for no error yet the test case always return empty, and here are my codes for the private functions:
class Binary_Search_Tree:
class __BST_Node:
def __init__(self, value):
self.value = value
self.left=None
self.right=None
def __init__(self):
self.__root = None
self.__height=0
self.__size=0
def _in_order_str(self, root):
if root is None:
outcome= "[ ]"
elif self.__size==1:
outcome = "[ " + str(root.value) + " ]"
else:
outcome = "[ "
self._in_order_str(root.left)
outcome += str(root.value) +", "
self._in_order_str(root.right)
outcome+= " ]"
return outcome
def _recur_ins(self, val,root):
if root is None:
root=Binary_Search_Tree.__BST_Node(val)
elif root.value>val:
root.left = _recur_ins(val,root.left) #do I need self here?
elif root.value <val:
root.right = _recur_ins(val,root.right)
return root
And this one is for the public:
def insert_element(self, value):
self._recur_ins(value,self.__root)
self.__size+=1
My Test Case:
def test_insertion_from_empty(self):
root=None
self.__bst.insert_element(50)
self.__bst.insert_element(30)
self.__bst.insert_element(70)
self.assertEqual('[ 30, 50, 70 ]', self.__bst.in_order())
UPDATE: I think the problem comes from my _in_order_str(self, root): method. The general case I found online is:
def inorder(root):
if root is not None:
inorder(root.left)
print root.key
inorder(root.right)
I know this could be a very silly question, but I really failed to figure it our by myself. Any help will be appreciated so thank you so much!!!
After changing your code as little as possible, I think I have managed to get it working.
from pprint import pprint # For debugging
class Binary_Search_Tree:
class __BST_Node:
def __init__(self, value):
self.value = value
self.left = None
self.right = None
def __init__(self):
self.__root = None
self.__height = 0
self.__size = 0
def __in_order_str(self, root):
if root is None:
outcome = "[ ]"
elif self.__size == 1:
outcome = "[ " + str(root.value) + " ]"
else:
outcome = "[ "
self.__in_order_str(root.left)
outcome += str(root.value) + ", "
self.__in_order_str(root.right)
outcome += " ]"
return outcome
def __recur_ins(self, val, root):
if root is None:
root = Binary_Search_Tree.__BST_Node(val)
elif root.value > val:
root.left = self.__recur_ins(val, root.left)
elif root.value < val:
root.right = self.__recur_ins(val, root.right)
return root
def insert_element(self, value):
self.__root = self.__recur_ins(value, self.__root)
self.__size += 1
def test_insertion_from_empty(self):
self.insert_element(50)
self.insert_element(60)
self.insert_element(70)
# self.assertEqual('[ 30, 50, 70 ]', self.__bst.in_order())
test = Binary_Search_Tree()
test.test_insertion_from_empty()
pprint(vars(test))
Notes on the changes:
changed some functions(_recur_ins, _in_order_str) from using '_' to '__' to make them private functions. I did it based on Python Official Documentation, private functions use at least two leading underscores and at most one trailing underscore.
First line in insert_element, added 'self.__root= ' so that the returned root value will be stored as the new root
Added 'self.' in front of '__recur_ins', since as far as I know, you must use self whenever you need to call a function which is located at the same class.
I did not modify anything much in __in_order_str, since I think the author only asked for the insertion (?)
commented assertEqual, since no function is provided in the question (?)
Modified the spaces so that it can be more readable
If I put the debug mode right before I dumped the variable, this is what I get:
Which I think should be correct. 50 is inserted first, thus it is used as the root, then 60 is inserted on the right child of 50, and 70 is put on the right child of 60.
Note: I'm also just a novice, please tell me any mistakes that I have done and I will rectify it :)
I have a simple language that I am trying to write a compiler for (yes it is homework) to compile a simple language I shall describe if necessary to java vm code.
It currently works pretty well I've just hit a bump with logical AND's and OR's.
Each work fine in a single if/while condition, but if I try and chain them things go wrong, correct me if I am wrong but I believe that AND has precedence, but I was wondering if there are logical ways of arranging them? I think is what I'm trying to ask, the java vm code output just has the compare and jump statements one after the other (which seems wrong). I realise it's quite abstract so maybe what I'm after is a pseudo code/algorithm for how to structure chained AND's and OR's.
EDIT: Currently just treats any combination of AND and OR as AND's. Comparing the factor/term/expression connection (compared to booleanfactor etc) I believe that AND has precedence? Just a thought.
Apologies if this is poorly understood :/
So i figure ill include relevant info just incase.
compiler
import re
import sys
# Restrictions:
# Integer constants must be short.
# Stack size must not exceed 1024.
# Integer is the only type.
# Logical operators cannot be nested.
class Scanner:
'''The interface comprises the methods lookahead and consume.
Other methods should not be called from outside of this class.'''
def __init__(self, input_file):
'''Reads the whole input_file to input_string.'''
# source code of the program to be compiled
self.input_string = input_file.read()
# index where the unprocessed part of input_string starts
self.current_char_index = 0
# a pair (most recently read token, matched substring of input_string)
self.current_token = self.get_token()
def skip_white_space(self):
'''Consumes all characters in input_string up to the next
non-white-space character.'''
if (self.current_char_index >= len(self.input_string) - 1):
# bad fix for it over-running the end of the file
return
while self.input_string[self.current_char_index].isspace():
self.current_char_index += 1
return
def get_token(self):
'''Returns the next token and the part of input_string it matched.
Returns None if there is no next token.
The characters up to the end of the token are consumed.'''
self.skip_white_space()
# find the longest prefix of input_string that matches a token
token, longest = None, ''
for (t, r) in Token.token_regexp:
match = re.match(r, self.input_string[self.current_char_index:])
if match and match.end() > len(longest):
token, longest = t, match.group()
# consume the token by moving the index to the end of the matched part
self.current_char_index += len(longest)
return (token, longest)
def lookahead(self):
'''Returns the next token without consuming it.
Returns None if there is no next token.'''
return self.current_token[0]
def consume(self, *tokens):
'''Returns the next token and consumes it, if it is in tokens.
Raises an exception otherwise.
If the token is a number or an identifier, its value is returned.'''
if self.current_token[0] not in tokens:
print('Token ' + self.current_token[0] + ' isn\'t in the tokens: ')
for token in tokens:
print(token)
raise Exception('Token is not in tokens this shouldn\'t happen much')
if self.current_token[0] == 'ID':
symbol_table.location(self.current_token[1])
value = self.current_token[1]
elif (self.current_token[0] == 'NUM'):
value = self.current_token[1]
else:
value = self.current_token[0]
self.current_token = self.get_token()
return value
class Token:
DO = 'DO';
ELSE = 'ELSE';
END = 'END';
IF = 'IF';
THEN = 'THEN';
WHILE = 'WHILE';
SEM = 'SEM';
BEC = 'BEC';
LESS = 'LESS';
EQ = 'EQ';
GRTR = 'GRTR';
LEQ = 'LEQ';
NEQ = 'NEQ';
GEQ = 'GEQ';
ADD = 'ADD';
SUB = 'SUB';
MUL = 'MUL';
DIV = 'DIV';
LPAR = 'LPAR';
RPAR = 'RPAR';
NUM = 'NUM';
ID = 'ID';
READ = 'READ';
WRITE = 'WRITE';
OR = 'OR';
AND = 'AND';
NOT = 'NOT';
# The following list gives the regular expression to match a token.
# The order in the list matters for mimicking Flex behaviour.
# Longer matches are preferred over shorter ones.
# For same-length matches, the first in the list is preferred.
token_regexp = [
(DO, 'do'),
(ELSE, 'else'),
(END, 'end'),
(IF, 'if'),
(THEN, 'then'),
(WHILE, 'while'),
(READ, 'read'),
(WRITE, 'write'),
(OR, 'or'),
(AND, 'and'),
(NOT, 'not'),
(SEM, ';'),
(BEC, ':='),
(LESS, '<'),
(EQ, '='),
(NEQ, '!='),
(GRTR, '>'),
(LEQ, '<='),
(GEQ, '>='),
(ADD, '[+]'), # + is special in regular expressions
(SUB, '-'),
(MUL, '[*]'),
(DIV, '/'),
(LPAR, '[(]'), # ( is special in regular expressions
(RPAR, '[)]'), # ) is special in regular expressions
(ID, '[a-z]+'),
(NUM, '[0-9]+'),
]
class Symbol_Table:
'''A symbol table maps identifiers to locations.'''
def __init__(self):
self.symbol_table = {}
def size(self):
'''Returns the number of entries in the symbol table.'''
return len(self.symbol_table)
def location(self, identifier):
'''Returns the location of an identifier. If the identifier is not in
the symbol table, it is entered with a new location. Locations are
numbered sequentially starting with 0.'''
if identifier in self.symbol_table:
return self.symbol_table[identifier]
index = len(self.symbol_table)
self.symbol_table[identifier] = index
return index
class Label:
def __init__(self):
self.current_label = 0
def next(self):
'''Returns a new, unique label.'''
self.current_label += 1
return 'l' + str(self.current_label)
def indent(s, level):
return ' '*level + s + '\n'
# Each of the following classes is a kind of node in the abstract syntax tree.
# indented(level) returns a string that shows the tree levels by indentation.
# code() returns a string with JVM bytecode implementing the tree fragment.
# true_code/false_code(label) jumps to label if the condition is/is not true.
# Execution of the generated code leaves the value of expressions on the stack.
class Program_AST:
def __init__(self, program):
self.program = program
def __repr__(self):
return repr(self.program)
def indented(self, level):
return self.program.indented(level)
def code(self):
program = self.program.code()
local = symbol_table.size()
java_scanner = symbol_table.location('Java Scanner')
return '.class public Program\n' + \
'.super java/lang/Object\n' + \
'.method public <init>()V\n' + \
'aload_0\n' + \
'invokenonvirtual java/lang/Object/<init>()V\n' + \
'return\n' + \
'.end method\n' + \
'.method public static main([Ljava/lang/String;)V\n' + \
'.limit locals ' + str(local) + '\n' + \
'.limit stack 1024\n' + \
'new java/util/Scanner\n' + \
'dup\n' + \
'getstatic java/lang/System.in Ljava/io/InputStream;\n' + \
'invokespecial java/util/Scanner.<init>(Ljava/io/InputStream;)V\n' + \
'astore ' + str(java_scanner) + '\n' + \
program + \
'return\n' + \
'.end method\n'
class Statements_AST:
def __init__(self, statements):
self.statements = statements
def __repr__(self):
result = repr(self.statements[0])
for st in self.statements[1:]:
result += '; ' + repr(st)
return result
def indented(self, level):
result = indent('Statement(s)', level)
for st in self.statements:
result += st.indented(level+1)
return result
def code(self):
result = ''
for st in self.statements:
result += st.code()
return result
class If_AST:
def __init__(self, boolean_expression, then):
self.boolean_expression = boolean_expression
self.then = then
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' end'
def indented(self, level):
return indent('If-Then', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1)
def code(self):
l1 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
l1 + ':\n'
class If_Else_AST:
def __init__(self, boolean_expression, then, _else):
self.boolean_expression = boolean_expression;
self.then = then;
self._else = _else;
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' else ' + \
repr(self._else) + ' end'
def indented(self, level):
return indent('If-Then-Else', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1) + \
indent('Else', level+1) + \
self._else.indented(level+1)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
'goto ' + l2 + '\n' + \
l1 + ':\n' + \
self._else.code() + \
l2 + ':\n'
class While_AST:
def __init__(self, boolean_term, body):
self.boolean_term = boolean_term
self.body = body
def __repr__(self):
return 'while ' + repr(self.boolean_term) + ' do ' + \
repr(self.body) + ' end'
def indented(self, level):
return indent('While-Do', level) + \
self.boolean_term.indented(level+1) + \
self.body.indented(level+2)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return l1 + ':\n' + \
self.boolean_term.code(l2) + \
self.body.code() + \
'goto ' + l1 + '\n' + \
l2 + ':\n'
class Assign_AST:
def __init__(self, identifier, expression):
self.identifier = identifier
self.expression = expression
def __repr__(self):
return repr(self.identifier) + ':=' + repr(self.expression)
def indented(self, level):
return indent('Assign', level) + \
self.identifier.indented(level+1) + \
self.expression.indented(level+1)
def code(self):
loc = symbol_table.location(self.identifier.identifier)
return self.expression.code() + \
'istore ' + str(loc) + '\n'
class Write_AST:
def __init__(self, expression):
self.expression = expression
def __repr__(self):
return 'write ' + repr(self.expression)
def indented(self, level):
return indent('Write', level) + self.expression.indented(level+1)
def code(self):
return 'getstatic java/lang/System/out Ljava/io/PrintStream;\n' + \
self.expression.code() + \
'invokestatic java/lang/String/valueOf(I)Ljava/lang/String;\n' + \
'invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V\n'
class Read_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return 'read ' + repr(self.identifier)
def indented(self, level):
return indent('Read', level) + self.identifier.indented(level+1)
def code(self):
java_scanner = symbol_table.location('Java Scanner')
loc = symbol_table.location(self.identifier.identifier)
return 'aload ' + str(java_scanner) + '\n' + \
'invokevirtual java/util/Scanner.nextInt()I\n' + \
'istore ' + str(loc) + '\n'
class Comparison_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.LESS:'<', Token.EQ:'=', Token.GRTR:'>',
Token.LEQ:'<=', Token.NEQ:'!=', Token.GEQ:'>=' }
return repr(self.left) + op[self.op] + repr(self.right)
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def true_code(self, label):
op = { Token.LESS:'if_icmplt', Token.EQ:'if_icmpeq',
Token.GRTR:'if_icmpgt', Token.LEQ:'if_icmple',
Token.NEQ:'if_icmpne', Token.GEQ:'if_icmpge' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
def false_code(self, label):
# Negate each comparison because of jump to "false" label.
op = { Token.LESS:'if_icmpge', Token.EQ:'if_icmpne',
Token.GRTR:'if_icmple', Token.LEQ:'if_icmpgt',
Token.NEQ:'if_icmpeq', Token.GEQ:'if_icmplt' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
class Expression_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.ADD:'+', Token.SUB:'-', Token.MUL:'*', Token.DIV:'/' }
return '(' + repr(self.left) + op[self.op] + repr(self.right) + ')'
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def code(self):
op = { Token.ADD:'iadd', Token.SUB:'isub',
Token.MUL:'imul', Token.DIV:'idiv' }
return self.left.code() + \
self.right.code() + \
op[self.op] + '\n'
class Number_AST:
def __init__(self, number):
self.number = number
def __repr__(self):
return self.number
def indented(self, level):
return indent(self.number, level)
def code(self): # works only for short numbers
return 'sipush ' + self.number + '\n'
class Identifier_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return self.identifier
def indented(self, level):
return indent(self.identifier, level)
def code(self):
loc = symbol_table.location(self.identifier)
return 'iload ' + str(loc) + '\n'
class BooleanFactor_AST:
def __init__(self, condition, logic):
self.condition = condition
self.logic = logic
def __repr__(self):
if self.logic == False:
return 'NOT ' + repr(self.condition)
else:
return repr(self.condition)
def indented(self, level):
if self.logic == False:
return indent('NOT ', level) + self.condition.indented(level + 1)
else:
return self.condition.indented(level)
def false_code(self, label):
if self.logic == True:
return self.condition.false_code(label)
else:
return self.condition.true_code(label)
return
def true_code(self, label):
if self.logic == True:
return self.condition.true_code(label)
else:
return self.condition.false_code(label)
class BooleanTerm_AST:
def __init__(self, terms):
self.terms = terms
def __repr__(self):
result = repr(self.terms[0])
for term in self.terms[1:]:
result = result + ' AND ' + repr(term)
return result
def indented(self, level):
result = self.terms[0].indented(level)
for term in self.terms[1:]:
result = result + indent('AND', level)
result = result + term.indented(level)
return result
def code(self, label):
result = ''
for term in self.terms:
result = result + term.false_code(label)
return result
class BooleanExpression_AST:
def __init__(self, expressions):
self.expressions = expressions
def __repr__(self):
result = repr(self.expressions[0])
for expression in self.expressions[1:]:
result = result + ' OR ' + repr(expression)
return result
def indented(self, level):
result = self.expressions[0].indented(level)
indentation = 0
for expression in self.expressions[1:]:
indentation += 1
result = result + indent('OR', level + indentation)
result = result + expression.indented(level + indentation)
return result
def code(self, label):
result = ''
for expression in self.expressions:
result = result + expression.code(label)
return result
# The following methods comprise the recursive-descent parser.
def program():
sts = statements()
return Program_AST(sts)
def statements():
result = [statement()]
while scanner.lookahead() == Token.SEM:
scanner.consume(Token.SEM)
st = statement()
result.append(st)
return Statements_AST(result)
def statement():
if scanner.lookahead() == Token.IF:
return if_statement()
elif scanner.lookahead() == Token.WHILE:
return while_statement()
elif scanner.lookahead() == Token.ID:
return assignment()
elif scanner.lookahead() == Token.READ:
return read();
elif scanner.lookahead() == Token.WRITE:
return write();
else: # error
return scanner.consume(Token.IF, Token.WHILE, Token.ID)
def if_statement():
scanner.consume(Token.IF)
condition = boolean_expression()
scanner.consume(Token.THEN)
then = statements()
if scanner.lookahead() == Token.END:
scanner.consume(Token.END)
return If_AST(condition, then)
else:
scanner.consume(Token.ELSE)
_else = statements()
scanner.consume(Token.END)
return If_Else_AST(condition, then, _else)
def while_statement():
scanner.consume(Token.WHILE)
condition = boolean_expression()
scanner.consume(Token.DO)
body = statements()
scanner.consume(Token.END)
return While_AST(condition, body)
def assignment():
ident = identifier()
scanner.consume(Token.BEC)
expr = expression()
return Assign_AST(ident, expr)
def read():
scanner.consume(Token.READ)
variable = identifier()
return Read_AST(variable)
def write():
scanner.consume(Token.WRITE)
expr = expression()
return Write_AST(expr)
def comparison():
left = expression()
op = scanner.consume(Token.LESS, Token.EQ, Token.GRTR,
Token.LEQ, Token.NEQ, Token.GEQ)
right = expression()
return Comparison_AST(left, op, right)
def expression():
result = term()
while scanner.lookahead() in [Token.ADD, Token.SUB]:
op = scanner.consume(Token.ADD, Token.SUB)
tree = term()
result = Expression_AST(result, op, tree)
return result
def term():
result = factor()
while scanner.lookahead() in [Token.MUL, Token.DIV]:
op = scanner.consume(Token.MUL, Token.DIV)
tree = factor()
result = Expression_AST(result, op, tree)
return result
def factor():
if scanner.lookahead() == Token.LPAR:
scanner.consume(Token.LPAR)
result = expression()
scanner.consume(Token.RPAR)
return result
elif scanner.lookahead() == Token.NUM:
value = scanner.consume(Token.NUM)
return Number_AST(value)
elif scanner.lookahead() == Token.ID:
return identifier()
else: # error
return scanner.consume(Token.LPAR, Token.NUM, Token.ID)
def identifier():
value = scanner.consume(Token.ID)
return Identifier_AST(value)
def boolean_factor():
if scanner.lookahead() == Token.NOT:
scanner.consume(Token.NOT)
logic = False
else:
logic = True
result = comparison()
return BooleanFactor_AST(result, logic)
def boolean_term():
result = [boolean_factor()]
while scanner.lookahead() in [Token.AND]:
scanner.consume(scanner.lookahead())
temp = boolean_factor()
result.append(temp)
return BooleanTerm_AST(result)
def boolean_expression():
result = [boolean_term()]
while scanner.lookahead() in [Token.OR]:
scanner.consume(scanner.lookahead())
temp = boolean_term()
result.append(temp)
return BooleanExpression_AST(result)
# Initialise scanner, symbol table and label generator.
#scanner = Scanner(open('test.txt'))
scanner = Scanner(sys.stdin)
symbol_table = Symbol_Table()
symbol_table.location('Java Scanner') # fix a location for the Java Scanner
label_generator = Label()
# Uncomment the following to test the scanner without the parser.
# This shows a list of all tokens in the input.
#
#token = scanner.lookahead()
#while token != None:
# print(token)
# scanner.consume(token)
# token = scanner.lookahead()
#exit()
# Call the parser.
ast = program()
assert scanner.lookahead() == None
# Uncomment the following to test the parser without the code generator.
# The first line gives back the program by calling __repr__ of the AST classes.
# The second line shows the syntax tree with levels indicated by indentation.
#
#print(ast)
#print(ast.indented(0))
#exit()
# Call the code generator.
# This translates the abstract syntax tree to JVM bytecode.
# It can be assembled to a class file by Jasmin: http://jasmin.sourceforge.net/
print(ast.code())
testing bat file
python compiler.py <test.txt> Program.j
java -Xmx100m -jar jasmin.jar Program.j
java -Xmx100m Program < testInput.txt > test_output.txt
and language (BNF)
Program = Statements
Statements = Statement (; Statement)
Statement = If | While | Assignment
If = if Comparison then Statements end
While = while Comparison do Statements end
Assignment = identifier := Expression
Comparison = Expression Relation Expression
Relation = = | != | < | <= | > | >=
Expression = Term ((+ | -) Term)
Term = Factor ((* | /) Factor)
Factor = (Expression) | number | identifier
BooleanExpression = BooleanTerm (or BooleanTerm)*
BooleanTerm = BooleanFactor (and BooleanFactor)*
BooleanFactor = not BooleanFactor | Comparison
I think thats all that is relevant, cheers if you take a go at helping me on this
if you want a method to chain OR's and AND'syou can use this property:
p v q === ¬p ^ ¬q
Is equivalent, you can process all in the AND form. for example.
p v q ^ r v s === ¬p ^ ¬q ^ ¬r ^ ¬s
So evaluate the expression in AND form is simple with an algorithm.
I guess the expression doesn't have any parenthesis, in other way you need prioritize the grouping symbols (), [], {}.