I'm using Antlr4 with Python3.
I have a getTerminalPos method which returns a tuple of the line and column number of a given terminal. To do this, I first get the token using .getSymbol() and I then use the .getLine() and .getCharPositionInLine() methods to get the positions.
def getTerminalPos(self, terminal):
token = terminal.getSymbol()
return (token.getLine(), token.getCharPositionInLine())
An example of calling getTerminalPos inside an antlr visitor:
def visitAtom(self, ctx):
if ctx.LPAREN():
return self.visitExpr(ctx.expr())
if ctx.INT():
return nodes.Number(int(ctx.INT().getText()), getTerminalPos(ctx.INT()))
if ctx.FLOAT():
return nodes.Number(float(ctx.FLOAT().getText()), getTerminalPos(ctx.FLOAT()))
When I run the code, I get the following error message:
File ".../py-antlr4-lmaspl/AntlrVisitor.py", line 55, in getTerminalPos
return (token.getLine(), token.getCharPositionInLine())
AttributeError: 'CommonToken' object has no attribute 'getLine'
According to the Antlr4 Java Runtime, these methods exist: https://www.antlr.org/api/Java/org/antlr/v4/runtime/CommonToken.html
According to the Antlr3 Python Runtime, these methods exist: https://www.antlr3.org/api/Python/classantlr3_1_1_common_token.html
So, they should exist for the Antlr4 Python Runtime too?
How do I fix this error? Is there a different set of methods I should use instead to get the line and column numbers?
Edit: I meant to say that I found a similar issue here: https://github.com/antlr/antlr4/issues/1529. It is marked as a bug, but closed for now...
If I look at the source of the Python 3 runtime, I see CommonToken like this:
class CommonToken(Token):
# An empty {#link Pair} which is used as the default value of
# {#link #source} for tokens that do not have a source.
EMPTY_SOURCE = (None, None)
def __init__(self, source = EMPTY_SOURCE, type = None, channel=Token.DEFAULT_CHANNEL, start=-1, stop=-1):
super(CommonToken, self).__init__()
self.source = source
self.type = type
self.channel = channel
self.start = start
self.stop = stop
self.tokenIndex = -1
if source[0] is not None:
self.line = source[0].line
self.column = source[0].column
else:
self.column = -1
...
and Token like this:
class Token (object):
...
def __init__(self):
self.source = None
self.type = None # token type of the token
self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL
self.start = None # optional; return -1 if not implemented.
self.stop = None # optional; return -1 if not implemented.
self.tokenIndex = None # from 0..n-1 of the token object in the input stream
self.line = None # line=1..n of the 1st character
self.column = None # beginning of the line at which it occurs, 0..n-1
self._text = None # text of the token.
...
So, my guess is this should do it for you:
return (token.line, token.column)
Related
I'm trying to implement an RTD client using this project as an example, but without success.
Instance as RTD server the example contained in the win32com package below, and in Excel it works perfectly, but in the RTD client used as a template, it generates this error.
RTD client code
import functools
import pythoncom
import win32com.client
from win32com import universal
from win32com.client import gencache
from win32com.server.util import wrap
EXCEL_TLB_GUID = '{00020813-0000-0000-C000-000000000046}'
EXCEL_TLB_LCID = 0
EXCEL_TLB_MAJOR = 1
EXCEL_TLB_MINOR = 4
gencache.EnsureModule(EXCEL_TLB_GUID, EXCEL_TLB_LCID, EXCEL_TLB_MAJOR, EXCEL_TLB_MINOR)
universal.RegisterInterfaces(EXCEL_TLB_GUID,
EXCEL_TLB_LCID, EXCEL_TLB_MAJOR, EXCEL_TLB_MINOR,
['IRtdServer', 'IRTDUpdateEvent'])
# noinspection PyProtectedMember
class ObjectWrapperCOM:
LCID = 0x0
def __init__(self, obj):
self._impl = obj # type: win32com.client.CDispatch
def __getattr__(self, item):
flags, dispid = self._impl._find_dispatch_type_(item)
if dispid is None:
raise AttributeError("{} is not a valid property or method for this object.".format(item))
return functools.partial(self._impl._oleobj_.Invoke, dispid, self.LCID, flags, True)
# noinspection PyPep8Naming
class RTDUpdateEvent:
_com_interfaces_ = ['IRTDUpdateEvent']
_public_methods_ = ['Disconnect', 'UpdateNotify']
_public_attrs_ = ['HeartbeatInterval']
# Implementation of IRTDUpdateEvent.
HeartbeatInterval = -1
def __init__(self, event_driven=True):
self.ready = False
self._event_driven = event_driven
def UpdateNotify(self):
if self._event_driven:
self.ready = True
def Disconnect(self):
pass
class RTDClient:
MAX_REGISTERED_TOPICS = 1024
def __init__(self, class_id):
"""
:param classid: can either be class ID or program ID
"""
self._class_id = class_id
self._rtd = None
self._update_event = None
self._topic_to_id = {}
self._id_to_topic = {}
self._topic_values = {}
self._last_topic_id = 0
def connect(self, event_driven=True):
"""
Connects to the RTD server.
Set event_driven to false if you to disable update notifications.
In this case you'll need to call refresh_data manually.
"""
dispatch = win32com.client.Dispatch(self._class_id)
self._update_event = RTDUpdateEvent(event_driven)
try:
self._rtd = win32com.client.CastTo(dispatch, 'IRtdServer')
except TypeError:
# Automated makepy failed...no detailed construction available for the class
self._rtd = ObjectWrapperCOM(dispatch)
self._rtd.ServerStart(wrap(self._update_event))
def update(self):
"""
Check if there is data waiting and call RefreshData if necessary. Returns True if new data has been received.
Note that you should call this following a call to pythoncom.PumpWaitingMessages(). If you neglect to
pump the message loop you'll never receive UpdateNotify callbacks.
"""
# noinspection PyUnresolvedReferences
pythoncom.PumpWaitingMessages()
if self._update_event.ready:
self._update_event.ready = False
self.refresh_data()
return True
else:
return False
def refresh_data(self):
"""
Grabs new data from the RTD server.
"""
(ids, values) = self._rtd.RefreshData(self.MAX_REGISTERED_TOPICS)
for id_, value in zip(ids, values):
if id_ is None and value is None:
# This is probably the end of message
continue
assert id_ in self._id_to_topic, "Topic ID {} is not registered.".format(id_)
topic = self._id_to_topic[id_]
self._topic_values[topic] = value
def get(self, topic: tuple):
"""
Gets the value of a registered topic. Returns None if no value is available. Throws an exception if
the topic isn't registered.
"""
assert topic in self._topic_to_id, 'Topic %s not registered.' % (topic,)
return self._topic_values.get(topic)
def register_topic(self, topic: tuple):
"""
Registers a topic with the RTD server. The topic's value will be updated in subsequent data refreshes.
"""
if topic not in self._topic_to_id:
id_ = self._last_topic_id
self._last_topic_id += 1
self._topic_to_id[topic] = id_
self._id_to_topic[id_] = topic
self._rtd.ConnectData(id_, topic, True)
def unregister_topic(self, topic: tuple):
"""
Un-register topic so that it will not get updated.
:param topic:
:return:
"""
assert topic in self._topic_to_id, 'Topic %s not registered.' % (topic,)
self._rtd.DisconnectData(self._topic_to_id[topic])
def disconnect(self):
"""
Closes RTD server connection.
:return:
"""
self._rtd.ServerTerminate()
The example RTD Server is Python.RTD.TimeServer and it works great in Excel, but the RTD client in the above example throws this error:
File "C:\Users\XXXXXX\AppData\Local\Temp\gen_py\3.9\00020813-0000-0000-C000-000000000046x0x1x9.py", line 20963, in UpdateNotify
return self.oleobj.InvokeTypes(10, LCID, 1, (24, 0), (),)
pywintypes.com_error: (-2147352573, 'Member not found.', None, None)
I have no knowledge of COM, but in the struggle to learn.
Any suggestions from friends?
You need to implement all the methods defined by the IRTDServer interface.
https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.excel.irtdserver?view=excel-pia
Once you do that excel should be able to find all methods it needs to work with your server.
import operator
import re
from ply import lex, yacc
class Lexer(object):
tokens = [
'COMMA',
'TILDE',
'PARAM',
'LP',
'RP',
'FUNC'
]
# Regular expression rules for simple tokens
t_COMMA = r'\,'
t_TILDE = r'\~'
t_PARAM = r'[^\s\(\),&:\"\'~]+'
def __init__(self, dict_obj):
self.dict_obj = dict_obj
def t_LP(self, t):
r'\('
return t
def t_RP(self, t):
r'\)'
return t
def t_FUNC(self, t):
# I want to generate token for this FUNC from the keys of model map
# For eg: r'key1|key2'
r'(?i)FUNC'
return t
# Define a rule so we can track line numbers
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(self, t):
print("Illegal character '%s' on line %d, column %d" % (t.value[0], t.lexer.lineno, t.lexer.lexpos))
t.lexer.skip(1)
# Build the lexer
def build_lexer(self, **kwargs):
self.lexer = lex.lex(module=self, **kwargs)
return self.lexer
class Parser(object):
tokens = Lexer.tokens
def __init__(self, **kwargs):
self.parser = yacc.yacc(module=self, **kwargs)
self.lexer = None
self._dict_obj = None
self.error = ""
self.result = ""
#property
def dict_obj(self):
return self._dict_obj
#dict_obj.setter
def dict_obj(self, dict_obj):
self._dict_obj = dict_obj
self.lexer = Lexer(self._dict_obj).build_lexer()
# Handles LP expression RP
def p_expression(self, p):
"""
expression : LP expression RP
"""
# Handles TILDE PARAM - call search
def p_tilde_param(self, p):
"""
expression : TILDE PARAM
"""
p[0] = p[2]
return p[0]
# Handles ANY LP PARAM RP - call search
def p_expression_any(self, p):
"""
expression : FUNC LP PARAM RP
"""
p[0] = p[3]
return p[0]
# Error handling rule
def p_error(self, p):
if p:
stack_state_str = " ".join([symbol.type for symbol in self.parser.symstack[1:]])
self.error = "Syntax error at %s, type %s, on line %d, Parser state: %s %s . %s" % (
p.value, p.type, p.lineno, self.parser.state, stack_state_str, p
)
else:
self.error = "SYNTAX ERROR IN INPUT"
def get_result(self, input_):
input_ = input_.strip()
if input_:
self.result = self.parser.parse(input_, lexer=self.lexer)
return self.result
else:
raise ValueError("EMPTY EXPRESSION ERROR")
def parser(input_):
par_obj = Parser()
par_obj.dict_obj = {
'key1' : 'value1',
'key2' : 'value2'
}
return par_obj.get_result(input_)
result = parser("~hello")
Above is the code of lexer and parser using ply library. I have just encapsulated all of my code in the class form. Problems which i am facing:
1.) I'm trying to pass a dict_obj to the parser class. I don't know what i am doing wrong and getting an error like :
AttributeError: 'Parser' object has no attribute 'dict_obj'
2.) What I'm trying to do?
I want to pass this dict_obj to the parser class and then pass it to the lexer class as well and then make use of it in the lexer one of the tokens methods (t_FUNC) method. In this method my regex will return keys of the this dict obj.
I think i'm doing something wrong and hence failing to implement it. Please help.
In your constructor (__init__) for the Parser object, you ask Ply to generate a parser before the Parser object is fully constructed:
def __init__(self, **kwargs):
self.parser = yacc.yacc(module=self, **kwargs)
# This is the critical line:
self._dict_obj = None
In order to construct a parser from the object (yacc.yacc(module=self)), Ply needs to iterate over all the object's attributes. For example, it needs to find all the parser functions in order to extract their docstrings in order to determine the grammar.
Ply uses the dir built-in function to make a dictionary of all the object's attributes. Because your Parser object has a custom attribute dict_obj, that key is returned from dir and so Ply tries to cache that attribute with its value. But when it calls gettattr(module, 'dict_obj'), the getter is called, and the getter tries to return self._dict_obj. However, self._dict_obj has not yet been defined, so that ends up throwing an error:
AttributeError: 'Parser' object has no attribute '_dict_obj'
Note that this is not the error message you reported in your question; that error says that there is no attribute dict_obj. Perhaps that was a copy-and-paste error.
If you move the call to yacc.yacc to the end of the initialiser, that particular problem goes away:
def __init__(self, **kwargs):
self.lexer = None
self._dict_obj = None
self.error = ""
self.result = ""
self.parser = yacc.yacc(module=self, **kwargs)
However, there are a number of other problems in the code excerpt which make it difficult to verify this solution. These include:
There is no LexerNmsysSearch. I assumed you meant Lexer.
There is no node_expression. I have no idea what that is supposed to be so I just removed the test.
Your grammar does not match the input you are testing, so the parser immediately throws a syntax error. I changed the input to "(~hello)" in an attempt to produce something parseable.
The parser actions do not set semantic values, so self.parse.parse() doesn't return any value. This causes get_result to throw an error.
At that point, I gave up on trying to produce anything sensible out of the code. For future reference, please ensure that error messages are quoted exactly and that sample code included in the question can be run.
Below is the code for the Validator, Why None None is print? This same code is in django 1.6.
In [1]: %paste
class SVNPathValidator(object):
message = 'Enter a valid value.'
code = 'invalid'
def __init__(self, verify_existed=True, message=None, code=None, path_type=0, rev=None):
'''
file_type:
0 file
1 dir
'''
if message is None:
self.message = message
if code is None:
self.code = code
print self.message, self.code
self.path_type = path_type
self.rev = rev
def __call__(self, value):
print self.message, self.code
## -- End pasted text --
In [2]: validator=SVNPathValidator()
None None
In [3]: validator('svn://10.10.10.10/repo')
None None
I'm not sure why you think it would do anything else. You don't pass either the message or the code when you instantiate the object, and you don't set them in the __call__ method.
The code is not the same as the Django code you link to. On the contrary, all the Django versions have the opposite of your code: if self.message is not None etc.
The function of you code is to set those two variables to None -- though I assume it wasn't meant to be.
def __init__(self, verify_existed=True, message=None, code=None, path_type=0, rev=None):
if message is None:
self.message = message
if code is None:
self.code = code
First it checks whether the arguments are None and if they are, it assigns them as instance variables. Nowhere does self.message or self.code get assigned to anything else.
I've just been reading an article that talks about implementing a parser in python:
http://effbot.org/zone/simple-top-down-parsing.htm
The general idea behind the code is described in this paper: http://mauke.hopto.org/stuff/papers/p41-pratt.pdf
Being fairly new to writing parsers in python so I'm trying to write something similar as a learning exercise. However when I attempted to try to code up something similar to what was found in the article I am getting an TypeError: unbound method TypeError. This is the first time I've encountered such an error and I've spent all day trying to figure this out but I haven't solved the issue. Here is a minimal code example (in it's entirety) that has this problem:
import re
class Symbol_base(object):
""" A base class for all symbols"""
id = None # node/token type name
value = None #used by literals
first = second = third = None #used by tree nodes
def nud(self):
""" A default implementation for nud """
raise SyntaxError("Syntax error (%r)." % self.id)
def led(self,left):
""" A default implementation for led """
raise SyntaxError("Unknown operator (%r)." % self.id)
def __repr__(self):
if self.id == "(name)" or self.id == "(literal)":
return "(%s %s)" % (self.id[1:-1], self.value)
out = [self.id, self.first, self.second, self.third]
out = map(str, filter(None,out))
return "(" + " ".join(out) + ")"
symbol_table = {}
def symbol(id, bindingpower=0):
""" If a given symbol is found in the symbol_table return it.
If the symblo cannot be found theni create the appropriate class
and add that to the symbol_table."""
try:
s = symbol_table[id]
except KeyError:
class s(Symbol_base):
pass
s.__name__ = "symbol:" + id #for debugging purposes
s.id = id
s.lbp = bindingpower
symbol_table[id] = s
else:
s.lbp = max(bindingpower,s.lbp)
return s
def infix(id, bp):
""" Helper function for defining the symbols for infix operations """
def infix_led(self, left):
self.first = left
self.second = expression(bp)
return self
symbol(id, bp).led = infix_led
#define all the symbols
infix("+", 10)
symbol("(literal)").nud = lambda self: self #literal values must return the symbol itself
symbol("(end)")
token_pat = re.compile("\s*(?:(\d+)|(.))")
def tokenize(program):
for number, operator in token_pat.findall(program):
if number:
symbol = symbol_table["(literal)"]
s = symbol()
s.value = number
yield s
else:
symbol = symbol_table.get(operator)
if not symbol:
raise SyntaxError("Unknown operator")
yield symbol
symbol = symbol_table["(end)"]
yield symbol()
def expression(rbp = 0):
global token
t = token
token = next()
left = t.nud()
while rbp < token.lbp:
t = token
token = next()
left = t.led(left)
return left
def parse(program):
global token, next
next = tokenize(program).next
token = next()
return expression()
def __main__():
print parse("1 + 2")
if __name__ == "__main__":
__main__()
When I try to run this with pypy:
Traceback (most recent call last):
File "app_main.py", line 72, in run_toplevel
File "parser_code_issue.py", line 93, in <module>
__main__()
File "parser_code_issue.py", line 90, in __main__
print parse("1 + 2")
File "parser_code_issue.py", line 87, in parse
return expression()
File "parser_code_issue.py", line 81, in expression
left = t.led(left)
TypeError: unbound method infix_led() must be called with symbol:+ instance as first argument (got symbol:(literal) instance instead)
I'm guessing this happens because I don't create an instance for the infix operations but I'm not really wanting to create an instance at that point. Is there some way I can change those methods without creating instances?
Any help explaining why this is happening and what I can do to fix the code is greatly appreciated!
Also is this behaviour going to change in python 3?
You forgot to create an instance of the symbol in your tokenize() function; when not a number, yield symbol(), not symbol:
else:
symbol = symbol_table.get(operator)
if not symbol:
raise SyntaxError("Unknown operator")
yield symbol()
With that one change your code prints:
(+ (literal 1) (literal 2))
You haven't bound new function to the instance of your object.
import types
obj = symbol(id, bp)
obj.led = types.MethodType(infix_led, obj)
See accepted answer to another SO question
I wrote a simple program to read through a log and to parse through and obtain the lowest beginning number (the head) and to print it. I am now editing that program and combining it with a class I wrote to parse an actual logfile. Essentially, as opposed to sorting based off of the simple number from the log from my previous program, I now need to reference the parsed information from one class into another class. I was wondering what the most convenient way to do this. I am a beginner programmer in python and don't know if I can explicitly reference the class.
Here are the classes.
Parser
class LogLine:
SEVERITIES = ['EMERG','ALERT','CRIT','ERR','WARNING','NOTICE','INFO','DEBUG']
severity = 1
def __init__(self, line):
try:
m = re.match(r"^(\d{4}-\d{2}-\d{2}\s*\d{2}:\d{2}:\d{2}),?(\d{3}),?(\s+\[(?:[^\]]+)\])+\s+[A-Z]+\s+(\s?[a-zA-Z0-9\.])+\s?(\((?:\s?\w)+\))\s?(\s?.)+", line)
timestr, msstr, sevstr, self.filename, linestr, self.message = m.groups()
self.line = int(linestr)
self.sev = self.SEVERITIES.index(sevstr)
self.time = float(calendar.timegm(time.strptime(timestr, "%Y-%m-%d %H:%M:%S,%f"))) + float(msstr)/1000.0
dt = datetime.strptime(t, "%Y-%m-%d %H:%M:%S,%f")
except Exception:
print 'error',self.filename
def get_time(self):
return self.time
def get_severity(self):
return self.sev
def get_message(self):
return self.message
def get_filename(self):
return self.filename
def get_line(self):
return self.line
Sorter
class LogFile:
def __init__(self,filepath):
self.logfile = open(filepath, "r")
self.head = None
def __str__(self):
return "x=" + str(self.x) + "y="+str(self.y)
def readline(self):
if self.head != None:
h = self.head
self.head = None
return h
else:
return self.logfile.readline().rstrip(' ')
def get_line(self):
if self.head == None:
self.head = self.readline().rstrip(' ')
return self.head.get.line()
else:
return self.head.get.line()
def close (self):
self.logfile.close()
I have begun to edit my second class by adding the get_line function. Don't know if I'm on the right track.
In simpler terms, I need the head to become "LogLine"
It is okay to use one class from another class. You have one class that parses a single line from a log file and builds an object that represents the line; and you have another class that reads lines from a log file. It would be very natural for the second class to call the first class.
Here is a very simple class that reads all lines from a log file and builds a list:
class LogFile(object):
def __init__(self,filepath):
with open(filepath, "r") as f:
self.lst = [LogLine(line) for line in f]
You can see that self.lst is being set to a list of lines from the input log file, but not just the text of the line; the code is calling LogLine(line) to store instances of LogLine. If you want, you can sort the list after you build it:
self.lst.sort(key=LogLine.get_line)
If the log files are very large, it might not be practical to build the list. You have a .get_line() method function, and we can use that:
class LogFile(object):
def __init__(self,filepath):
self.logfile = open(filepath, "r")
def get_line(self):
try:
line = next(self.logfile) # get next line from open file object
return LogLine(line)
except StopIteration: # next() raises this when you reach the end of the file
return None # return
def close(self):
self.logfile.close()
An open file object (returned by the open() function) can be iterated. We can call next() on this object and it will give us the next input line. When the end of file is reached, Python will raise StopIteration to signal the end of the file.
Here the code will catch the StopIteration exception and return None when the end of the log file is reached. But I think this isn't the best way to handle this problem. Let's make the LogFile class work in for loops and such:
class LogFile(object):
def __init__(self,filepath):
self.f = open(filepath)
def __next__(self): # Python 3.x needs this to be named "__next__"
try:
line = next(self.f)
return LogLine(line)
except StopIteration:
# when we reach the end of input, close the file object
self.f.close()
# re-raise the exception
raise
next = __next__ # Python 2.x needs this to be named "next"
A for loop in Python will repeatedly call the .__next__() method function (Python 3.x) or else the .next() method function (Python 2.x) until the StopIteration exception is raised. Here we have defined both method function names so this code should work in Python 2.x or in Python 3.x.
Now you can do this:
for ll in LogFile("some_log_file"):
... # do something with ll, which will always be a LogLine instance