I am trying to write a compiler and read in a file one character at a time. If I were to find "/" followed by another "/" then I want to interpret the rest of my line as a comment. I am using file.read(1) each time I want another character. However, if I see that I have "/" followed by something that is not a "/" is there any way I can move the file stream back one character so as to not lose that character?
def tokenType(self):
# PAGE 108
if (self.current == '{' or self.current == '}' or self.current == '(' or self.current == ')' or self.current == '[' or self.current == ']' or self.current == '.' or self.current == ',' or self.current == ';' or self.current == '-' or self.current == '*' or self.current == '/' or self.current == '&' or self.current == '|' or self.current == '<' or self.current == '>' or self.current == '=' or self.current == '~'):
if (self.current == '/'):
next = self.file.read(1)
if (next == '/'):
while (next != "\n"):
next = self.file.read(1)
return "IGNORE"
if (next == '*'):
while (True):
next = self.file.read(1)
if (next == '*'):
next = self.file.read(1)
if (next == '/'):
break
return "IGNORE"
else:
return "SYMBOL"
return "SYMBOL"
elif (self.current == " " or self.current == "\n"):
return "IGNORE"
elif (self.current == "'"):
while(next != "'"):
self.current = self.current + next
return "STRING_CONST"
elif (type(self.current) == int):
next = self.file.read(1)
while(next != " "):
self.current = self.current + next
return "INT_CONST"
else:
next = self.file.read(1)
while(next != " " and next != ""):
self.current = self.current + next
next = self.file.read(1)
if (self.current == 'class' or self.current == 'constructor' or self.current == 'function' or self.current == 'method' or self.current == 'field' or self.current == 'static' or self.current == 'var' or self.current == 'int' or self.current == 'char' or self.current == 'boolean' or self.current == 'void' or self.current == 'true' or self.current == 'false' or self.current == 'null' or self.current == 'this' or self.current == 'let' or self.current == 'do' or self.current == 'if' or self.current == 'else' or self.current == 'while' or self.current == 'return'):
return "KEYWORD"
else:
return "IDENTIFIER"
My problem seems to be when I have something like 10/5 and my program checks to see if the next character is a "/". Then on the next pass through my character interpreting function, the 5 has already been removed when it was checking for a comment.
So, is there any way I can get a character from a file stream without it being "removed" from the stream or is there a way I can move it back a character when I hit a case like this?
It's not pretty, but you could pass the current and next symbol into your function if you don't mind reading the entire file into memory. You would only need some way to track ignoring the next character.
data = open('input_file').read()
for first, second in zip(data, data[1:]):
tokenType(first, second)
But like roippi says, there are better ways to lex input.
Related
def toktok(self) -> object:
tokens = []
while self.current is not None:
if self.current == " ":
self.next()
elif self.current == '=':
tokens.append(Token(TT_equal))
print("C1:", tokens[0], self.current, "C2")
self.next()
elif self.current == '+':
tokens.append(Token(self.current))
self.Methods()
elif self.current == '-':
tokens.append(Token(TT_subtraction))
self.Methods()
elif self.current == '*':
tokens.append(Token(TT_multiplication))
self.Methods()
elif self.current == '/':
tokens.append(Token(TT_division))
self.Methods()
elif self.current == '(':
tokens.append(Token(TT_parenthesis_left))
self.Methods()
elif self.current == ')':
tokens.append(Token(TT_parenthesis_right))
self.next()
elif self.current in TT_identifier:
tokens.append(Token(self.current))
self.next()
elif self.current in TT_numbers:
tokens.append(self.digitm())
else:
char = self.current
self.next()
print("Not found: ", char)
return tokens
is there any way I can shorten this or make it cleaner looking? I think it looks funky so if there is a way to shorten that would be greatly appreciated!
You can use dictionary to avoid the chain if-else conditions.
def toktok(self) -> object:
tokens = []
mapping = {
'+': Token(TT_addition),
'-': Token(TT_subtraction),
'*': Token(TT_multiplication),
'/': Token(TT_division),
'(': Token(TT_parenthesis_left),
}
mapping_2 = {
'=': Token(TT_equal),
')': Token(TT_parenthesis_right)
}
while self.current is not None:
if self.current == " ":
self.next()
elif self.current in mapping:
tokens.append(mapping.get(self.current))
self.Methods()
elif self.current in mapping_2:
tokens.append(mapping_2.get(self.current))
self.next()
elif self.current in TT_identifier:
tokens.append(Token(self.current))
self.next()
elif self.current in TT_numbers:
tokens.append(self.digitm())
else:
char = self.current
self.next()
print("Not found: ", char)
return tokens
I need to write a function that given a string with parenthesis and/or square brackets it is able to evaluate if they appear in the correct order. For example, in this string '([b])(aa)' you can see that every time a parenthesis or square bracket is open, it is closed in the correct position. However, a string like '[(a])' it is not closing the parenthesis or square brackets in the correct order as it should be '[(a)]'.
The function should return True or False depending on this correct position of both elements. I have tried the following code, but this logic seems to be infinite and it is not working if I have more than two parenthesis or square brackets opened.
def parenthesis(string):
for a in range(len(string)):
if string[a] == "(":
for b in range(a,len(string)):
if string[b] == "[":
for c in range(b,len(string)):
if string[c] == "]":
for d in range(c,len(string)):
if string[d] == ")":
return True
elif string[b] == ")":
return True
else:
return False
If I run the function over the string "([b])(aa)" it is returning false as output.
parenthesis("([b])(aa)")
How can I rewrite this function so it evaluates all the parenthesis and square brackets combinations properly?
If a right parenthesis is open before a left, you got -1 and return False
def is_balanced(string):
cnt = 0
for char in string:
if char == '(': cnt += 1
if char == ')': cnt -= 1
if cnt < 0: return False
return True if cnt == 0 else False
This is one of the stack implementations I know:
def is_balanced(s):
stack = []
for char in s:
if char == "(" or char == "{" or char == "[":
stack.append(char)
elif len(stack) <= 0:
return False
elif char == ")" and stack.pop() != "(":
return False
elif char == "]" and stack.pop() != "[":
return False
elif char == "}" and stack.pop() != "{":
return False
if len(stack) == 0:
return True
return False
This version is more DRY than the prior answer:
def is_balanced(parens: str) -> bool:
# Link: https://stackoverflow.com/a/73341167/
parens_map ={'(':')','{':'}','[':']'}
stack = []
for paren in parens:
if paren in parens_map: # is open
stack.append(paren)
elif paren in parens_map.values(): # is close
if (not stack) or (paren != parens_map[stack.pop()]):
return False
return not stack
i've implemented the infix-to-prefix converter using stack , the power operator has right to left association , which drove me nuts trying to implement it.
here is my code :
def infix_to_postfix_converter(string):
def pop_untill_open_parentheses():
while not stack.is_empty() and stack.peek() != '(':
output.append(stack.pop())
stack.pop()
def pop_all_the_higher_precedences(operator):
while not stack.is_empty() and precedence(stack.peek()) >= precedence(operator):
if stack.peek() != '(':
output.append(stack.pop())
else:
break
stack.push(operator)
def precedence(operator):
operators = {'(': 3,
')': 3,
'^':2,
'*': 1,
'/': 1,
'+': 0,
'-': 0,
}
return operators[operator]
stack = Stack()
output = []
for literal in string:
if literal.isalpha():
output.append(literal)
elif literal == '(':
stack.push(literal)
elif literal == ')':
pop_untill_open_parentheses()
else:
pop_all_the_higher_precedences(literal)
while not stack.is_empty():
output.append(stack.pop())
return ''.join(output)
so the solution is , if there is a '^' in the stack and another '^' came to be pushed it'll be pushed on top of the first '^' without poping it .
my solution :
add this compare function:
def compare(operatora,operatorb):
if operatora==operatorb=='^':
return False
else:
return precedence(operatora)>=precedence(operatorb)
and modify the pop_all_the_higher_precedence function:
def pop_all_the_higher_precedences(operator):
while not stack.is_empty() and compare(stack.peek(),operator):
if stack.peek() != '(':
output.append(stack.pop())
else:
break
stack.push(operator)
I have a class Stack that looks like this.
I have this function that checks if given string of parenthesis is valid or not.
After debugging and printing current character and character at peak:
This output matches condition at line 40 ans is supposed to pop the element. But does not.
Here is the full code.
class Stack:
def __init__(self):
self.item = []
def push(self, item):
self.item.append(item)
def pop(self):
self.item.pop()
def isEmpty(self):
return self.item == []
def peek(self):
if not self.isEmpty():
return self.item[-1]
else:
return "Stack is Empty."
def getStack(self):
return self.item
s = Stack()
string = "{[{()}][]}"
print(list(string))
def isValid(String):
for char in string:
# print(char)
print("Peak -> " +s.peek())
print("char -> " + char)
if char == "(" or char == "[" or char == "{":
s.push(char)
elif (char == ")" and s.peek == "("):
s.pop()
elif (char == "]" and not s.isEmpty() and s.peek == "["):
s.pop()
elif (char == "}" and not s.isEmpty() and s.peek == "{"):
s.pop()
else:
return False
return s.isEmpty()
print(isValid(string))
Before checking if statement, char -> ) and s.peak returns -> (.
So, it should be popped but instead doesnt run any if statement and returns false.
(P.S if I use or instead of and, it works(at least for couple I've tried). Shouldn't it work for and and not for or )
Am I missing something? help, someone!
You are comparing strings with function object s.peek == "[". You need to call s.peek().
Change your elif conditions to this
elif char == ")" and s.peek() == "(":
s.pop()
elif (char == "]" and not s.isEmpty() and s.peek() == "["):
s.pop()
elif (char == "}" and not s.isEmpty() and s.peek() == "{"):
s.pop()
Here is my code for converting infix to postfix using a previous stack code. The stack code is functional but I don't understand what is wrong with my postix converter code. It is returning "RecursionError: maximum recursion depth exceeded". But I don't think that is the problem. I think it might be the PrecedenceCheck function
def postfix(expr):
if len(expr)<=0 or not isinstance(expr,str):
print("expr error: postfix")
return "error"
expr=expr.strip()
items=Stack()
outputVariable = ""
pos=0
for i in range(len(expr)):
if expr[i] in '+-*/^()':
items.push(expr[pos:i].strip())
items.push(expr[i]) # operator
pos=i+1
items.push(expr[pos:].strip())
elif expr[i] is '1234567890':
outputVariable += i
elif expr[i] is '(':
items.push(expr[i])
elif expr[i] is ')':
while True:
temp = items.pop()
if temp is None or temp == '(':
break
else:
outputVariable += temp
elif expr[i] in '+-*/^' and (items is False or items.peek() is '('):
items.push(expr[i])
elif PrecedenceCheck(expr[i]) is True:
items.push(expr[i])
elif expr[i] in '+-*/^' and PrecedenceCheck(expr[i]) is False:
while True:
items.pop()
if items.isEmpty() is True:
items.push(expr[i])
break
return outputVariable
precedence = {}
precedence['*'] = 3
precedence['/'] = 3
precedence['+'] = 2
precedence['-'] = 2
precedence['('] = 1
precedence['^'] = 1
def PrecedenceCheck(char):
items=Stack()
outputVariable= " "
if char in '1234567890':
return False
elif PrecedenceCheck(char) is True:
while precedence[char] < precedence[items.peek()]:
x = items.pop()
x += outputVariable
items.push(char)
return outputVariable
else:
return 'error message'
Input: ('1 ^ 2')
Expected output: '1.0 2.0 ^'
Actual Output: RecursionError: maximum recursion depth exceeded
if PrecedenceCheck(char) is True: occurs several times, yet PrecedenceCheck can return False or a string, neither of which will be True. Probably better to clean up PrecedenceCheck to always return a bool, or at least change your conditionals to if PrecedenceCheck(char): and cross your fingers that it never returns ''.