Syntax error of Python PLY parser

Syntax error of Python PLY parser - python

I'm writing simplified MODULA-2 grammar using Python PLY.
But I'm getting syntax error:
$ python3 m2.py
Syntax error at 'MODULE'
and I cannot figure out what is the problem with rules.
Here is the grammar:
import ply.lex as lex
import ply.yacc as yacc
# =============================================================================
# Lexer rules
# =============================================================================
tokens = (
# Keywords
'RETURN', 'IF', 'THEN', 'VAR', 'MODULE', 'BEGIN', 'END',
# Contants
'NUMBER',
# Operators
'PLUS', 'MINUS', 'TIMES', 'DIV', 'MOD', 'ASSIGN_OP',
# Separators
'LPAR', 'RPAR', 'PERIOD', 'COLON', 'SEMICOLON',
# Identifier
'IDENT',
)
# Tokens
t_NUMBER = r'\d+'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_LPAR = r'\('
t_RPAR = r'\)'
t_PERIOD = r'\.'
t_COLON = r':'
t_SEMICOLON = r';'
t_ASSIGN_OP = r':='
t_IDENT = r'[a-zA-Z][a-zA-Z0-9]*'
# Ignored characters
t_ignore = ' \t'
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
# =============================================================================
# Parser rules
# =============================================================================
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIV'),
)
def p_add_operator(t):
""" add_operator : PLUS
| MINUS
"""
pass
def p_mul_operator(t):
""" mul_operator : TIMES
| DIV
| MOD
"""
pass
def p_simple_expression(t):
""" expression : term
| expression add_operator term
"""
pass
def p_term(t):
""" term : factor
| term mul_operator factor
"""
pass
def p_factor(t):
""" factor : NUMBER
| IDENT
| LPAR expression RPAR
"""
pass
def p_statement(t):
""" statement : IDENT
| IDENT ASSIGN_OP expression
| IF expression THEN statement_sequence END
| RETURN expression
"""
pass
def p_statement_sequence(t):
""" statement_sequence : statement
| statement_sequence SEMICOLON statement
"""
pass
def p_block(t):
""" block : declaration_list BEGIN statement_sequence END
"""
pass
def p_declaration_list(t):
""" declaration_list : declaration
| declaration_list declaration
"""
pass
def p_declaration(t):
""" declaration : VAR IDENT COLON IDENT SEMICOLON
"""
pass
def p_program_module(t):
""" program_module : MODULE IDENT SEMICOLON block IDENT PERIOD
"""
pass
def p_error(t):
print("Syntax error at '%s'" % t.value)
parser = yacc.yacc(start='program_module')
if __name__ == "__main__":
s = "MODULE test; VAR x: INTEGER; BEGIN x := 10 END test."
parser.parse(s)
The interesting thing is that the same grammar rules written for lex/yacc are working fine. Can somebody help me with this?

AFAIK, ply.lex has not enough magic to know that you want that the special MODULE word to be the token MODULE.
With your definition, the simple test:
lexer.input("MODULE test; VAR x: INTEGER; BEGIN x := 10 END test.")
for tok in lexer:
print(tok)
outputs:
LexToken(IDENT,'MODULE',1,0)
LexToken(IDENT,'test',1,7)
LexToken(SEMICOLON,';',1,11)
LexToken(IDENT,'VAR',1,13)
LexToken(IDENT,'x',1,17)
LexToken(COLON,':',1,18)
LexToken(IDENT,'INTEGER',1,20)
LexToken(SEMICOLON,';',1,27)
LexToken(IDENT,'BEGIN',1,29)
LexToken(IDENT,'x',1,35)
LexToken(ASSIGN_OP,':=',1,37)
LexToken(NUMBER,'10',1,40)
LexToken(IDENT,'END',1,43)
LexToken(IDENT,'test',1,47)
LexToken(PERIOD,'.',1,51)
The correct way to process keywords is to identify them inside the IDENT token:
=============================================================================
# Lexer rules
# =============================================================================
# Keywords
keywords = ( 'RETURN', 'IF', 'THEN', 'VAR', 'MODULE', 'BEGIN', 'END' )
tokens = keywords + (
# Contants
'NUMBER',
...
and
def t_IDENT(t):
r'[a-zA-Z][a-zA-Z0-9]*'
if t.value in keywords: # is this a keyword
t.type = t.value
return t
The same lexer control now correctly gives:
LexToken(MODULE,'MODULE',1,0)
LexToken(IDENT,'test',1,7)
LexToken(SEMICOLON,';',1,11)
LexToken(VAR,'VAR',1,13)
LexToken(IDENT,'x',1,17)
LexToken(COLON,':',1,18)
LexToken(IDENT,'INTEGER',1,20)
LexToken(SEMICOLON,';',1,27)
LexToken(BEGIN,'BEGIN',1,29)
LexToken(IDENT,'x',1,35)
LexToken(ASSIGN_OP,':=',1,37)
LexToken(NUMBER,'10',1,40)
LexToken(END,'END',1,43)
LexToken(IDENT,'test',1,47)
LexToken(PERIOD,'.',1,51)
and the parsing shows no error.

Related

pyparsing: how to parse nested function which start with particular function name?

I want to use pyparsing to parse a nested function which start with particular function name.
Just like this:
tag("tag_name_1", value_equal("proxy.province", "value", "return_value", test(1,2)))
The string waited to be parsed starts with the function named 'tag'.
The problem is that why exprStack doesn't contain "tag" function?
import pyparsing as pp
from typing import Any, List, Dict
def debug(*args, **kwargs):
print("debug"+"---"*10)
print(*args, **kwargs)
print("debug"+"---"*10)
# return "debug"
return "debug"
def insert_fn_argcount_tuple(t):
fn = t.pop(0)
num_args = len(t)
print((fn, num_args))
t.insert(0, (fn, num_args))
def push_first(toks):
exprStack.append(toks[0])
def to_string(toks):
pass
LPAREN, RPAREN, COMMA = map(pp.Suppress, '(),')
ident = pp.Word(pp.alphas, pp.alphanums+"_")
integer = pp.Word(pp.nums)
string = (pp.QuotedString("'") | pp.QuotedString('"')).setParseAction()
expr = pp.Forward()
expr_list = pp.delimitedList(pp.Group(expr))
tag_fn = ("tag" + LPAREN + expr_list + RPAREN).setParseAction(insert_fn_argcount_tuple)
fn_call = (ident + LPAREN + expr_list + RPAREN).setParseAction(insert_fn_argcount_tuple)
atom = ( (fn_call | string | integer) | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
# atom = ( fn_call | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
expr <<= atom
bnf = pp.Forward()
bnf <<= tag_fn
funcs = """tag
value_equal
value_contain
value_match
value
"""
# functions
def tag(tag_name: str, value:Any)->Dict:
if not tag_name or not value:
return {}
return {"tag_name": tag_name, "tag_value": value}
def test(*args, **kwargs):
return ""
def value_equal(key: str, value, default=None, test=None):
print(f"---{value_equal}---")
print(f"key: {key}, value: {value}, defaul: {default}, test:{test}")
return "value-1"
fns = {
"tag": tag,
"value_equal": value_equal,
"test": test
}
exprStack = []
def evaluate_stack(s:List): # List param will be changed after invoke evaluate_stack function
fn, arg_nums = s.pop(), 0
if isinstance(fn, tuple):
fn, arg_nums = fn
if fn in fns:
args = reversed([evaluate_stack(s) for _ in range(arg_nums)])
return fns[fn](*args)
else:
return fn
test_str = """tag("tag_name_1", value_equal("proxy.province", "value", "return_value", test(1,2)))"""
# test_str = "123"
p = bnf.parse_string(test_str)
print(f"\nexprStack:{exprStack}\n")
t = evaluate_stack(exprStack)
print(f"tag:{t}")
The output of above code is:
('test', 2)
('value_equal', 4)
('tag', 2)
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4)\]
I expect that exprStack contains tag function. maybe like this:
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4), ('tag', 2)\]

You are really pretty close. The thing is, the push_first parse action is attached to atoms, but tag_fn is not an atom. So it won't get its data pushed to expr_stack.
To fix this:
Change atom to include tag_fn, something like this:
atom = ((tag_fn | fn_call | string | integer) | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
Change bnf to expr instead of tag_fn:
bnf <<= expr
With these two changes, I get this for expr_stack:
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4), ('tag', 2)]

Parsing dbus monitor output messages

I am trying to parse the dbus monitor output messages. It has most of the messages as multi-line entries(including parameters). I need to parse and concatenate individual log messages to a single line entry.
The dbus-monitor output messages appear as below,
method call time=462.117843 sender=:1.62 -> destination=org.freedesktop.filehandler serial=122 path=/org/freedesktop/filehandler/routing; interface=org.freedesktop.filehandler.routing; member=start
int16 29877
uint16 0
method return time=462.117844 sender=org.freedesktop.filehandler -> destination=:1.62 serial=2210 reply_serial=122
int16 29877
uint16 0
method call time=462.117845 sender=:1.62 -> destination=org.freedesktop.filehandler serial=123 path=/org/freedesktop/filehandler/routing; interface=org.freedesktop.filehandler.routing; member=comment
string "starting .."
string "routing"
method return time=462.117846 sender=:1.19 -> destination=:1.62 serial=2212 reply_serial=123
int12 -23145
signal time=463.11223 sender=:1.64 -> destination=(null destination) serial=124 path=/org/freedesktop/fileserver; interface=org.freedesktop.DBus.Properties; member=PropertiesChanged
string "com.freedesktop.Systemserver"
array[
dict entry(
string "SystemTime"
variant struct{
byte 12
byte 9
byte 0
}
)
]
array [
]
This is the regex I tried to group the dbus messages(Parameter not grouped),
\b(signal|method call|method return)\b time=([\d,.]*) sender=([\w,.,:,(,), ]*) -> destination=([\w,.,:,(,), ]*) serial=([(,),\w]*) (?:path=([\w,\/]*); interface=([\w,.]*); member=([\w,_,-]*))?(?:reply_serial=([\d]*))?
I expect the output in the below format,
C [sender,serial] path interface+member (parameter1, parameter2, ...)
R [destination,reply_serial] interface+member (parameter1, parameter2, ...)
S [sender, serial] path interface+member (parameter1, parameter2, ...)
A sample output for the above dbus-monitor messages is shown below,
C [:1.62,122] /org/freedesktop/filehandler/routing org.freedesktop.filehandler.routing.start (29877,0)
R [:1.62,122] org.freedesktop.filehandler.routing.start (29877,0)
C [:1.62,123] /org/freedesktop/filehandler/routing org.freedesktop.filehandler.routing.comment ("starting", "routing")
R [:1.62,123] org.freedesktop.filehandler.routing.comment (-23145)
S [:1.64, 124] /org/freedesktop/fileserver org.freedesktop.DBus.Properties.PropertiesChanged ("com.freedesktop.Systemserver"[("SystemTime",{12,9,0})][])
How can the above expected result be achieved when the entries are usually multi-line? Also, the SIGNALS has multiple encapsulations making it difficult to access the parameters. Can someone help with the parsing of these dbus messages to the expected format?

Can you suggest how the code can be rewritten to process line by line?
Here I rearranged it accordingly:
import re
import sys
regex = r'\b(signal|method call|method return)\b time=([\d,.]*) sender=([\w,.,:,(,), ]*) -> destination=([\w,.,:,(,), ]*) serial=([(,),\w]*) (?:path=([\w,\/]*); interface=([\w,.]*); member=([\w,_,-]*))?(?:reply_serial=([\d]*))?'
remember = dict()
sep = None
for line in open('dbusl.in'):
m = re.match(regex, line)
if m:
if sep is not None: print ")" # end the previous parameter group
m = list(m.groups()) # each match is 9 capturing groups
if m[0] == 'method call':
print "C [{2},{4}] {5} {6}.{7}".format(*m),
remember[m[4]] = m[6:8] # store interface+member for return
if m[0] == 'method return':
m[6:8] = remember.pop(m[8]) # recall stored interface+member
print "R [{3},{8}] {6}.{7}".format(*m),
if m[0] == 'signal':
print "S [{2}, {4}] {5} {6}.{7}".format(*m),
sep = "("
else:
p = line.rstrip() # now handle parameters
if p[-1] in "[](){}": # with "encapsulations":
p = p[-1] # delete spaces, "array", "dict ..."
p = re.sub('^\s*\w*\s*', '', p) # delete spaces and data type
if p[-1] in "])}":
sep = '' # no separator before closing
print sep+p,
sys.stdout.softspace=0
if p[-1] in "[](){}": sep = ''
else: sep = ', ' # separator after data item
print ")" # end the previous parameter group
Note that I also changed m[6:8] = remember[m[8]] to m[6:8] = remember.pop(m[8]) in order to free the memory of no longer needed interface+member data.

If you absolutely have to use dbus-monitor, it’s probably best to use its PCAP output mode by passing the --pcap option to it. That outputs in a well-documented structured format which can be read by libpcap.

As you already have a usable regex, you can build on it by using it with re.split to get the needed message parts. Note that this yields a separate string for each capture group plus one string with the parameters, for each message entry. This example assumes that all the messages are in the string messages:
import re
import sys
regex = r'\b(signal|method call|method return)\b time=([\d,.]*) sender=([\w,.,:,(,), ]*) -> destination=([\w,.,:,(,), ]*) serial=([(,),\w]*) (?:path=([\w,\/]*); interface=([\w,.]*); member=([\w,_,-]*))?(?:reply_serial=([\d]*))?'
m = re.split(regex, messages)
m = m[1:] # discard empty? text before first match
remember = dict()
while m: # each match group is 9 capturing groups + 1 parameter group
if m[0] == 'method call':
print "C [{2},{4}] {5} {6}.{7}".format(*m),
remember[m[4]] = m[6:8] # store interface+member for return
if m[0] == 'method return':
m[6:8] = remember[m[8]] # recall stored interface+member
print "R [{3},{8}] {6}.{7}".format(*m),
if m[0] == 'signal':
print "S [{2}, {4}] {5} {6}.{7}".format(*m),
# now handle parameters
sep = "("
for p in m[9].split('\n')[1:-1]: # except empty string at start and end
if p[-1] in "[](){}": # with "encapsulations":
p = p[-1] # delete spaces, "array", "dict ..."
p = re.sub('^\s*\w*\s*', '', p) # delete spaces and data type
if p[-1] in "])}":
sep = '' # no separator before closing
print sep+p,
sys.stdout.softspace=0
if p[-1] in "[](){}": sep = ''
else: sep = ', ' # separator after data item
print ")"
m = m[10:] # delete the processed match group of 10
The output with your sample data is:
C [:1.62,122] /org/freedesktop/filehandler/routing org.freedesktop.filehandler.routing.start (29877, 0)
R [:1.62,122] org.freedesktop.filehandler.routing.start (29877, 0)
C [:1.62,123] /org/freedesktop/filehandler/routing org.freedesktop.filehandler.routing.comment ("starting ..", "routing")
R [:1.62,123] org.freedesktop.filehandler.routing.comment (-23145)
S [:1.64, 124] /org/freedesktop/fileserver org.freedesktop.DBus.Properties.PropertiesChanged ("com.freedesktop.Systemserver", [("SystemTime", {12, 9, 0})][])

Get correct brace grouping from string

I have files with incorrect JSON that I want to start fixing by getting it into properly grouped chunks.
The brace grouping {{ {} {} } } {{}} {{{}}} should already be correct
How can I grab all the top-level braces, correctly grouped, as separate strings?

If you don't want to install any extra modules simple function will do:
def top_level(s):
depth = 0
start = -1
for i, c in enumerate(s):
if c == '{':
if depth == 0:
start = i
depth += 1
elif c == '}' and depth:
depth -= 1
if depth == 0:
yield s[start:i+1]
print(list(top_level('{{ {} {} } } {{}} {{{}}}')))
Output:
['{{ {} {} } }', '{{}}', '{{{}}}']
It will skip invalid braces but could be easily modified to report an error when they are spotted.

Using the regex module:
In [1]: import regex
In [2]: braces = regex.compile(r"\{(?:[^{}]++|(?R))*\}")
In [3]: braces.findall("{{ {} {} } } {{}} {{{}}}")
Out[3]: ['{{ {} {} } }', '{{}}', '{{{}}}']

pyparsing can be really helpful here. It will handle pathological cases where you have braces inside strings, etc. It might be a little tricky to do all of this work yourself, but fortunately, somebody (the author of the library) has already done the hard stuff for us.... I'll reproduce the code here to prevent link-rot:
# jsonParser.py
#
# Implementation of a simple JSON parser, returning a hierarchical
# ParseResults object support both list- and dict-style data access.
#
# Copyright 2006, by Paul McGuire
#
# Updated 8 Jan 2007 - fixed dict grouping bug, and made elements and
# members optional in array and object collections
#
json_bnf = """
object
{ members }
{}
members
string : value
members , string : value
array
[ elements ]
[]
elements
value
elements , value
value
string
number
object
array
true
false
null
"""
from pyparsing import *
TRUE = Keyword("true").setParseAction( replaceWith(True) )
FALSE = Keyword("false").setParseAction( replaceWith(False) )
NULL = Keyword("null").setParseAction( replaceWith(None) )
jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) ) +
Optional( '.' + Word(nums) ) +
Optional( Word('eE',exact=1) + Word(nums+'+-',nums) ) )
jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group(Suppress('[') + Optional(jsonElements) + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + Optional(jsonMembers) + Suppress('}') )
jsonComment = cppStyleComment
jsonObject.ignore( jsonComment )
def convertNumbers(s,l,toks):
n = toks[0]
try:
return int(n)
except ValueError, ve:
return float(n)
jsonNumber.setParseAction( convertNumbers )
Phew! That's a lot ... Now how do we use it? The general strategy here will be to scan the string for matches and then slice those matches out of the original string. Each scan result is a tuple of the form (lex-tokens, start_index, stop_index). For our use, we don't care about the lex-tokens, just the start and stop. We could do: string[result[1], result[2]] and it would work. We can also do string[slice(*result[1:])] -- Take your pick.
results = jsonObject.scanString(testdata)
for result in results:
print '*' * 80
print testdata[slice(*result[1:])]

Convert LexToken to list Python

I have a lexer for html tokens which returns and prints lextoken objects in a given html string
I have a parser which takes tokens as a list and grammar as input and returns true if the set of tokens form a valid string in grammar
I want to combine these programs to form a complete lexer - parser program
But the problem is in the second program the tokens are in form of list and output of first program is lextoken
Lexer
import ply.lex as lex
tokens = (
'LANGLE', # <
'LANGLESLASH', # </
'RANGLE', # >
'SLASHRANGLE', # />
'EQUAL', # =
'STRING', # "144"
'WORD', # 'Welcome' in "Welcome to my webpage."
'NUMBER' # 12, 5.6, -1., 3.14159, -8.1, 867.5309
)
t_ignore = ' \t\v\r' # shortcut for whitespace
states = (
('htmlcomment', 'exclusive'), # <!--
)
def t_htmlcomment(t):
r'<!--'
t.lexer.begin('htmlcomment')
def t_htmlcomment_end(t):
r'-->'
t.lexer.lineno += t.value.count('\n')
t.lexer.begin('INITIAL')
pass
def t_htmlcomment_error(t):
t.lexer.skip(1)
def t_LANGLESLASH(t):
r'</'
return t
def t_LANGLE(t):
r'<'
return t
def t_SLASHRANGLE(t):
r'/>'
return t
def t_RANGLE(t):
r'>'
return t
def t_EQUAL(t):
r'='
return t
def t_STRING(t):
r'"[^"]*"'
t.value = t.value[1:-1] # drop "surrounding quotes"
return t
def t_WORD(t):
r'[^ <>]+'
return t
webpage = "hello <!-- comment --> 123456 <b> Bushra </b> all"
htmllexer = lex.lex()
htmllexer.input(webpage)
while True:
tok = htmllexer.token()
if not tok: break
print tok
This is my parser
work_count = 0 # track one notion of "time taken"
def addtoset(theset,index,elt):
if not (elt in theset[index]):
theset[index] = [elt] + theset[index]
return True
return False
def parse(tokens,grammar):
global work_count
work_count = 0
tokens = tokens + [ "end_of_input_marker" ]
chart = {}
start_rule = grammar[0]
for i in range(len(tokens)+1):
chart[i] = [ ]
start_state = (start_rule[0], [], start_rule[1], 0)
chart[0] = [ start_state ]
for i in range(len(tokens)):
while True:
changes = False
for state in chart[i]:
# State === x -> a b . c d , j
x = state[0]
ab = state[1]
cd = state[2]
j = state[3]
next_states = [ (rule[0],[],rule[1],i)
for rule in grammar if cd <> [] and cd[0] == rule[0] ]
work_count = work_count + len(grammar)
for next_state in next_states:
changes = addtoset(chart,i,next_state) or changes
if cd <> [] and tokens[i] == cd[0]:
next_state = (x, ab + [cd[0]], cd[1:], j)
changes = addtoset(chart,i+1,next_state) or changes
next_states = [ (jstate[0], jstate[1] + [x], (jstate[2])[1:],
jstate[3] )
for jstate in chart[j]
if cd == [] and jstate[2] <> [] and (jstate[2])[0] == x ]
work_count = work_count + len(chart[j])
for next_state in next_states:
changes = addtoset(chart,i,next_state) or changes
# We're done if nothing changed!
if not changes:
break
accepting_state = (start_rule[0], start_rule[1], [], 0)
return accepting_state in chart[len(tokens)-1]
grammar = [
("html", ["element", "html"]),
("html", [ ]),
("element", ["word"]),
("element", ["tag-open","word","tag-close"]),
("tag-open",["<","word",">"]),
("tag-close",["<","/","word",">"])
]
tokens = [ "<", "b", ">" , "Hello", "<", "/" , "b" , ">"]
result=parse(tokens, grammar)
print result

You can do this by using the attribute value of LexToken:
webpage = "hello <!-- comment --> 123456 <b> Bushra </b> all"
htmllexer = lex.lex()
htmllexer.input(webpage)
tokens = []
while True:
tok = htmllexer.token()
if not tok: break
tokens.append(tok.value)
print tokens #['hello', '123456', '<', 'b', '>', 'Bushra', '</', 'b', '>', 'all']
All available attributes may be obtained by using the dir() function:
print dir(tok)

python parsley - domain name syntax

I'm trying to get a limited syntax for domain names to work. The syntax is defined at https://www.rfc-editor.org/rfc/rfc1035 Section 2.3.1. A subset of it is as under
<label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
<ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
<let-dig-hyp> ::= <let-dig> | "-"
<let-dig> ::= <letter> | <digit>
<letter> ::= any one of the 52 alphabetic characters A through Z in upper case and a through z in lower case
<digit> ::= any one of the ten digits 0 through 9
My attempt below. I'm trying to match label
from parsley import makeGrammar
import ometa
domain = makeGrammar('''
letdighyp = (letterOrDigit|-)
label = letterOrDigit letdighyp+ letterOrDigit
''', {})
tests = ('abcd1000',)
for t in tests:
try:
print domain(t).label()
except ometa.runtime.ParseError as e:
print 'parse failed for', t
print e
running that gives me
parse failed for abcd1000
abcd1000
^
Parse error at line 2, column 0: expected EOF. trail: [digit letdig letdighyp]
What am I doing wrong ?
P.S.
label = letterOrDigit letdighyp+ letterOrDigit
is the line I'm not able to get working. It matches the string if the last letterOrDigit isn't there.

Try:
from parsley import makeGrammar
import ometa
def check_the_rest(s):
if '-' in s:
return s[-1].isalnum()
return True
domain = makeGrammar('''
letdighyp = (letterOrDigit|'-')
# label = <letterOrDigit (letdighyp* letterOrDigit)*>
label = <letter the_rest>
the_rest = letdighyp*:r ?(check_the_rest(r)) -> r
''', dict(check_the_rest=check_the_rest))
tests = ('a', 'abcd1000', 'a-',)
for t in tests:
try:
print('✓', domain(t).label())
except ometa.runtime.ParseError as e:
print('parse failed for', t)
print(e)
(The last test should fail)
I'm using the py3 branch by vsajip.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Syntax error of Python PLY parser - python

Related

pyparsing: how to parse nested function which start with particular function name?

Parsing dbus monitor output messages

Get correct brace grouping from string

Convert LexToken to list Python

python parsley - domain name syntax

Categories

Resources