Why does using Combine(...) preserve the whitespace, whereas Keyword(...) removes thes whitespace?
I need to preserve the whitespace after the matched token.
The test is as follows:
from pyparsing import *
def parse(string, refpattern):
print refpattern.searchString(string)
pattern = StringStart() \
+ SkipTo(refpattern)('previous') \
+ refpattern('ref') \
+ SkipTo(StringEnd())('rest')
print pattern.parseString(string)
string = "With #ref to_something"
identifier = Combine(Word(alphas + '_', alphanums + '_') + Optional('.' + Word(alphas)))
pattern_without_space = (CaselessKeyword('#ref') | CaselessKeyword(r'\ref')).setParseAction(lambda s, l, t: ['ref']) \
+ White().suppress() + identifier
pattern_with_space = Combine((Literal('#') | Literal('\\')).suppress() + 'ref') + White().suppress() + identifier
parse(string, pattern_without_space)
parse(string, pattern_with_space)
will output:
[['ref', 'to_something']]
['With', 'ref', 'to_something', '']
[['ref', 'to_something']]
['With ', 'ref', 'to_something', '']
# ^ space i need is preserved here
The problem happens when using alternation (the | operator) with CaselessKeyword. See these examples:
from pyparsing import *
theString = 'This is #Foo Bar'
identifier = Combine(Word(alphas + '_', alphanums + '_') + Optional('.' + Word(alphas)))
def testParser(p):
q = StringStart() + SkipTo(p)("previous") + p("body") + SkipTo(StringEnd())("rest")
return q.parseString(theString)
def test7():
p0 = (CaselessKeyword('#Foo') | Literal('#qwe')) + White().suppress() + identifier
p1 = (CaselessKeyword('#Foo') | CaselessKeyword('#qwe')) + White().suppress() + identifier
p2 = (Literal('#qwe') | CaselessKeyword('#Foo')) + White().suppress() + identifier
p3 = (CaselessKeyword('#Foo')) + White().suppress() + identifier
p4 = Combine((Literal('#') | Literal('\\')).suppress() + 'Foo') + White().suppress() + identifier
print "p0:", testParser(p0)
print "p1:", testParser(p1)
print "p2:", testParser(p2)
print "p3:", testParser(p3)
print "p4:", testParser(p4)
test7()
The output is:
p0: ['This is', '#Foo', 'Bar', '']
p1: ['This is', '#Foo', 'Bar', '']
p2: ['This is', '#Foo', 'Bar', '']
p3: ['This is ', '#Foo', 'Bar', '']
p4: ['This is ', 'Foo', 'Bar', '']
Perhaps this is a bug?
Update: This is how you could define your own parser to match either #Foo or \Foo as a keyword:
from pyparsing import *
import string
class FooKeyWord(Token):
alphas = string.ascii_lowercase + string.ascii_uppercase
nums = "0123456789"
alphanums = alphas + nums
def __init__(self):
super(FooKeyWord,self).__init__()
self.identChars = alphanums+"_$"
self.name = "#Foo"
def parseImpl(self, instring, loc, doActions = True):
if (instring[loc] in ['#', '\\'] and
instring.startswith('Foo', loc+1) and
(loc+4 >= len(instring) or instring[loc+4] not in self.identChars) and
(loc == 0 or instring[loc-1].upper() not in self.identChars)):
return loc+4, instring[loc] + 'Foo'
raise ParseException(instring, loc, self.errmsg, self)
def test8():
p = FooKeyWord() + White().suppress() + identifier
q = StringStart() + SkipTo(p)("previous") + p("body") + SkipTo(StringEnd())("rest")
print "with #Foo:", q.parseString("This is #Foo Bar")
print "with \\Foo:", q.parseString("This is \\Foo Bar")
And the output:
with #Foo: ['This is ', '#Foo', 'Bar', '']
with \Foo: ['This is ', '\\Foo', 'Bar', '']
Related
I have a stringized array that I am receiving from an external system. It is stripped of quotes and separated by a comma and a space.
I'm trying to use pyparsing, but I'm only getting the first element of the array. How do I specify that a word must end in alphanumeric ?
value = '[AaAa=Aaa_xx_12,Bxfm=djfn_13, ldfjk=ddd,ttt=ddfs_ddfj_99]'
LBR, RBR = map(pp.Suppress, "[]")
qs = pp.Word(pp.alphas, pp.alphanums + pp.srange("[_=,]"))
qsList = LBR + pp.delimitedList(qs, delim=', ') + RBR
print(value)
print(qsList.parseString(value).asList())
[AaAa=Aaa_xx_12,Bxfm=djfn_13, ldfjk=ddd,ttt=ddfs_ddfj_99]
# pyparsing.exceptions.ParseException: Expected ']', found 'ldfjk' (at char 30), (line:1, col:31)
BR
Thanks for the mental support :D
LBR, RBR = map(pp.Suppress, "[]")
element = pp.Word(pp.alphanums) + pp.Literal('=') + pp.Word(pp.alphanums + pp.srange("[_]"))
qs = pp.Combine(element + pp.OneOrMore(pp.Optional(pp.Literal(',')) + element))
qsList = LBR + pp.delimitedList(qs, delim= ', ') + RBR
print(value)
print(qsList.parseString(value).asList())
#[AaAa=Aaa_xx_12,Bxfm=djfn_13, ldfjk=ddd,ttt=ddfs_ddfj_99]
#['AaAa=Aaa_xx_12,Bxfm=djfn_13', 'ldfjk=ddd,ttt=ddfs_ddfj_99']
I have the following code:
from scipy.constants import R as Rgi
from numpy import *
import numpy as np
import matplotlib.pyplot as plt
Constants
Tccomp = array([190.56,305.32,369.83,425.12,469.7,507.6,282.34,365.57,511.76,553.58,562.16,591.8,512.64,513.92,536.78,400.1,408.00,508.2,591.95,469.15,132.45,154.58,126.2,405.65,430.75,304.26,647.13,535.5]) #k
Pccomp = array([45.90,48.50,42.10,37.70,33.60,30.40,50.30,46.30,45.00,41.00,48.00,41.30,81.40,61.20,51.20,52.70,65.90,47.10,57.40,72.60,37.90,50.20,33.98,113.00,78.60,73.90,219.40,41.50]) # bar
Vccomp = array([0.099,0.146,0.2,0.255,0.315,0.373,0.132,0.188,0.257,0.308,0.261,0.314,0.117,0.168,0.22,0.171,0.115,0.21,0.179,0.142,0.092,0.074,0.089,0.072,0.123,0.095,0.056,0.267]) # m^3/kmol
Zccomp = array([0.286,0.279,0.273,0.272,0.271,0.269,0.283,0.286,0.272,0.274,0.273,0.262,0.224,0.24,0.252,0.271,0.223,0.234,0.208,0.264,0.318,0.287,0.288,0.241,0.269,0.277,0.228,0.249])
Wcomp = array([0.011,0.098,0.149,0.197,0.251,0.304,0.086,0.137,0.196,0.212,0.209,0.262,0.566,0.643,0.617,0.192,0.282,0.307,0.463,0.201,0.0,0.02,0.037,0.253,0.244,0.244,0.343,0.323])
Nombre = array(['Metano','Etano','Propano','Butano','Pentano','Hexano','Etileno','Propileno','Ciclopentano','Ciclohexano','Benceno','Tolueno','Metanol','Etanol','1-Propanol','Dimetileter','Formaldehido','Acetona','Acido Actico','Oxido de etileno','Aire','Oxigeno','Nitrogeno','Amoniaco','Dioxido de azufre','Dioxido de carbono','Agua','Metiletilcetona'])
Rbar = Rgi*10 # bar*cm^3/K*mol
Rgas = Rgi # Kj / Kmol * K
Program interface
print 'Cálculo de la fugacidad de una mezcla de gases reales utilizando Peng Robinson'
print '-----------------------------------------------------------------------------------'
print ' Metano |(0)| '
print ' Etano |(1)| '
print ' Propano |(2)| '
print ' Butano |(3)| '
print ' Pentano |(4)| '
print ' Hexano |(5)| '
print ' Etileno |(6)| '
print ' Propileno |(7)| '
print ' Ciclopentano |(8)| '
print ' Ciclohexano |(9)| '
print ' Benceno |(10)|'
print ' Tolueno |(11)|'
print ' Metanol |(12)|'
print ' Etanol |(13)|'
print ' 1-propanol |(14)|'
print ' Dimetileter |(15)|'
print ' Formaldehido |(16)|'
print ' Acetona |(17)|'
print ' Ácido acético |(18)|'
print ' Óxido de etileno |(19)|'
print ' Aire |(20)|'
print ' Oxígeno |(21)|'
print ' Nitrógeno |(22)|'
print ' Amoniaco |(23)|'
print ' Dióxido de azufre |(24)|'
print ' Agua |(25)|'
print ' Metiletilcetona |(26)|'
print '-------------------------------------------------------------------------------------'
print ' '
print ' '
ncomp = int(input('numero de componentes del sistema : '))
def PRMix(T,P):
comp = zeros(ncomp)
y = zeros(ncomp)
Wcomp = zeros(ncomp)
Tccomp = zeros(ncomp)
Pccomp = zeros(ncomp)
a = zeros(ncomp)
b = zeros(ncomp)
sumbi = zeros(ncomp)
s = (ncomp,ncomp)
aij = zeros(s)
sumaij = zeros(s)
for i in arange(0,ncomp,1):
comp[i] = int(input('Escoja el componente : '))
y[i] = float(input(' Fraccion : '))
print ' '
for i in arange (0,ncomp,1):
Wcomp[i] = Wcomp[int(comp[i])]
Tccomp[i] = Tccomp[int(comp[i])]
Pccomp[i] = Pccomp[int(comp[i])]
kappa = 0.37464 + 1.54226*Wcomp - 0.26992*Wcomp**2
alpha = (1 + kappa*(1 - sqrt(T/Tccomp)))**2
a = 0.45724*(((Rgi**2) * (Tccomp**2))/(Pccomp))*alpha
b = 0.0788*((Rgi*Tccomp)/Pccomp)
for i in arange(0,ncomp,1):
sumbi[i] = y[i]*b[i]
for j in arange(0,ncomp,1):
aij[i,j] = sqrt(a[i]*a[j])
sumaij[i,j] = y[i]*y[j] * aij[i,j]
amix = sum(sumaij)
bmix = sum(sumbi)
Aij = (aij * P) / ((Rgi*T)**2)
Bi = (b * P) / (Rgi * T)
Amix = (amix * P)/((Rgi*T)**2)
Bmix = (bmix * P)/(Rgi * T)
pol = [1,(Bmix - 1), (Amix - 2 * Bmix - 3 * Bmix**2),(Bmix**2 + Bmix**3 -Amix * Bmix)]
Z = roots(pol)
Z = real(Z)
Zvmix = max(Z)
Zlmix = min(Z)
LnPhiV = (Bi/Bmix)*(Zvmix-1) - log(Zvmix-Bmix)-(Amix/(2*Sqrt(2)*Bmix)) * ((2 *(dot(Aij,y)/Amix) -(Bi/Bmix)) * log((Zvmix + (1 + sqrt(2)) * Bmix) / (Zvmix + (1 - sqrt(2)) * Bmix)))
LnPhiL = (Bi/Bmix)*(Zlmix-1) - log(Zlmix-Bmix)-(Amix/(2*Sqrt(2)*Bmix)) * ((2 *(dot(Aij,y)/Amix) -(Bi/Bmix)) * log((Zlmix + (1 + sqrt(2)) * Bmix) / (Zlmix + (1 - sqrt(2)) * Bmix)))
PhiV = exp(LnPhiV)
PhiL = exp(LnPhiL)
However, when I run it I get this error:
Traceback (most recent call last):
line 82, in PRMix
Wcomp[i] = Wcomp[int(comp[i])]
IndexError: index 5 is out of bounds for axis 0 with size 2
>>> IndexError: index 5 is out of bounds for axis 0 with size 2
I can't figure out what's wrong. I tried reading about the error but didn't find anything that applied to my code.
I have the following list of lists of dicts:
l = [[{'close': 'TRUE'}], [{'error': 'FALSE'}], [{'close': 'TRUE', 'error': 'TRUE'}, {'close': 'TRUE', 'error': 'FALSE'}]]
and I would like to print it this way:
(close = TRUE) & (error = FALSE) & ((close = TRUE & error = TRUE) | (close = TRUE & error = FALSE))
For the moment, I have the following function which nearly do the job:
def pretty_string(l):
print ' & '.join('({0})'
.format(' | '
.join('({0})'
.format(' & '
.join('{0} = {1}'
.format(key, value)
for key, value
in disjuncts.items()))
for disjuncts
in conjuncts))
for conjuncts
in l)
But it gives me:
((close = TRUE)) & ((error = FALSE)) & ((close = TRUE & error = TRUE) | (close = TRUE & error = FALSE))
Notice the extra parentheses around "(close = TRUE)" and "(error = FALSE)".
How can these be removed?
Just use if statement to change format string (('({0})' if len(disjuncts) > 1 else '{0}')) depending on the length of your internal list. Like this:
def pretty_string(l):
print ' & '.join(
'({0})'.format(
' | '.join(
('({0})' if len(disjuncts) > 1 else '{0}').format(
' & '.join(
'{0} = {1}'.format(key, value) for key, value in disjuncts.items()
)
) for disjuncts in conjuncts
)
) for conjuncts in l
)
def conv(item):
if(isinstance(item, dict)):
yield '(' + ' & '.join("{} = {}".format(*i) for i in item.items()) + ')'
elif(isinstance(item, list)):
for i in item:
for s in conv(i):
yield s
def pretty_string(l):
return ' | '.join(conv(l))
I'm trying to build a grammar to parse an Erlang tagged tuple list, and map this to a Dict in pyparsing. I'm having problems when I have a list of Dicts. The grammar works if the Dict has just one element, but when I add a second can't work out now to get it to parse.
Current (simplified grammar code (I removed the bits of the language not necessary in this case):
#!/usr/bin/env python2.7
from pyparsing import *
# Erlang config file definition:
erlangAtom = Word( alphas + '_')
erlangString = dblQuotedString.setParseAction( removeQuotes )
erlangValue = Forward()
erlangList = Forward()
erlangElements = delimitedList( erlangValue )
erlangCSList = Suppress('[') + erlangElements + Suppress(']')
erlangList <<= Group( erlangCSList )
erlangTaggedTuple = Group( Suppress('{') + erlangAtom + Suppress(',') +
erlangValue + Suppress('}') )
erlangDict = Dict( Suppress('[') + delimitedList( erlangTaggedTuple ) +
Suppress(']') )
erlangValue <<= ( erlangAtom | erlangString |
erlangTaggedTuple |
erlangDict | erlangList )
if __name__ == "__main__":
working = """
[{foo,"bar"}, {baz, "bar2"}]
"""
broken = """
[
[{foo,"bar"}, {baz, "bar2"}],
[{foo,"bob"}, {baz, "fez"}]
]
"""
w = erlangValue.parseString(working)
print w.dump()
b = erlangValue.parseString(broken)
print "b[0]:", b[0].dump()
print "b[1]:", b[1].dump()
This gives:
[['foo', 'bar'], ['baz', 'bar2']]
- baz: bar2
- foo: bar
b[0]: [['foo', 'bar'], ['baz', 'bar2'], ['foo', 'bob'], ['baz', 'fez']]
- baz: fez
- foo: bob
b[1]:
Traceback (most recent call last):
File "./erl_testcase.py", line 39, in <module>
print "b[1]:", b[1].dump()
File "/Library/Python/2.7/site-packages/pyparsing.py", line 317, in __getitem__
return self.__toklist[i]
IndexError: list index out of range
i.e. working works, but broken doesn't parse as two lists.
Any ideas?
Edit: Tweaked testcase to be more explicit about expected output.
Ok, so I have never worked with pyparsing before, so excuse me if my solution does not make sense. Here we go:
As far as I understand what you need is three main structures. The most common mistake you made was grouping delimitedLists. They are already grouped, so you have an issue of double grouping. Here are my definitions:
for {a,"b"}:
erlangTaggedTuple = Dict(Group(Suppress('{') + erlangAtom + Suppress(',') + erlangValue + Suppress('}') ))
for [{a,"b"}, {c,"d"}]:
erlangDict = Suppress('[') + delimitedList( erlangTaggedTuple ) + Suppress(']')
for the rest:
erlangList <<= Suppress('[') + delimitedList( Group(erlangDict|erlangList) ) + Suppress(']')
So my fix for your code is:
#!/usr/bin/env python2.7
from pyparsing import *
# Erlang config file definition:
erlangAtom = Word( alphas + '_')
erlangString = dblQuotedString.setParseAction( removeQuotes )
erlangValue = Forward()
erlangList = Forward()
erlangTaggedTuple = Dict(Group(Suppress('{') + erlangAtom + Suppress(',') +
erlangValue + Suppress('}') ))
erlangDict = Suppress('[') + delimitedList( erlangTaggedTuple ) + Suppress(']')
erlangList <<= Suppress('[') + delimitedList( Group(erlangDict|erlangList) ) + Suppress(']')
erlangValue <<= ( erlangAtom | erlangString |
erlangTaggedTuple |
erlangDict| erlangList )
if __name__ == "__main__":
working = """
[{foo,"bar"}, {baz, "bar2"}]
"""
broken = """
[
[{foo,"bar"}, {baz, "bar2"}],
[{foo,"bob"}, {baz, "fez"}]
]
"""
w = erlangValue.parseString(working)
print w.dump()
b = erlangValue.parseString(broken)
print "b[0]:", b[0].dump()
print "b[1]:", b[1].dump()
Which gives the output:
[['foo', 'bar'], ['baz', 'bar2']]
- baz: bar2
- foo: bar
b[0]: [['foo', 'bar'], ['baz', 'bar2']]
- baz: bar2
- foo: bar
b[1]: [['foo', 'bob'], ['baz', 'fez']]
- baz: fez
- foo: bob
Hope that helps, cheers!
I can't understand why it's not working, because your code looks very much like the JSON example, which handles nested lists just fine.
But the problem seems to happen at this line
erlangElements = delimitedList( erlangValue )
where if the erlangValues are lists, they get appended instead of cons'd. You can kludge around this with
erlangElements = delimitedList( Group(erlangValue) )
which adds an extra layer of list around the top-most element, but keeps your sub-lists from merging.
Sorry for the sorry title. I could not think of anything better
I am trying to implement a DSL with pyparsing that has the following requirements:
varaibles: All of them begin with v_
Unary operators: +, -
Binary operators: +,-,*,/,%
Constant numbers
Functions, like normal functions when they have just one variable
Functions need to work like this: foo(v_1+v_2) = foo(v_1) + foo(v_2), foo(bar(10*v_6))=foo(bar(10))*foo(bar(v_6)). It should be the case for any binary operation
I am able to get 1-5 working
This is the code I have so far
from pyparsing import *
exprstack = []
#~ #traceParseAction
def pushFirst(tokens):
exprstack.insert(0,tokens[0])
# define grammar
point = Literal( '.' )
plusorminus = Literal( '+' ) | Literal( '-' )
number = Word( nums )
integer = Combine( Optional( plusorminus ) + number )
floatnumber = Combine( integer +
Optional( point + Optional( number ) ) +
Optional( integer )
)
ident = Combine("v_" + Word(nums))
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
cent = Literal( "%" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div | cent
expop = Literal( "^" )
band = Literal( "#" )
# define expr as Forward, since we will reference it in atom
expr = Forward()
fn = Word( alphas )
atom = ( ( floatnumber | integer | ident | ( fn + lpar + expr + rpar ) ).setParseAction(pushFirst) |
( lpar + expr.suppress() + rpar ))
factor = Forward()
factor << atom + ( ( band + factor ).setParseAction( pushFirst ) | ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
print(expr)
bnf = expr
pattern = bnf + StringEnd()
def test(s):
del exprstack[:]
bnf.parseString(s,parseAll=True)
print exprstack
test("avg(+10)")
test("v_1+8")
test("avg(v_1+10)+10")
Here is the what I want.
My functions are of this type:
foo(v_1+v_2) = foo(v_1) + foo(v_2)
The same behaviour is expected for any other binary operation as well. I have no idea how to make the parser do this automatically.
Break out the function call as a separate sub expression:
function_call = fn + lpar + expr + rpar
Then add a parse action to function_call that pops the operators and operands from expr_stack, then pushes them back onto the stack:
if an operand, push operand then function
if an operator, push the operator
Since you are only doing binary operations, you might be better off doing a simple approach first:
expr = Forward()
identifier = Word(alphas+'_', alphanums+'_')
expr = Forward()
function_call = Group(identifier + LPAR + Group(expr) + RPAR)
unop = oneOf("+ -")
binop = oneOf("+ - * / %")
operand = Group(Optional(unop) + (function_call | number | identifier))
binexpr = operand + binop + operand
expr << (binexpr | operand)
bnf = expr
This gives you a simpler structure to work with, without having to mess with exprstack.
def test(s):
exprtokens = bnf.parseString(s,parseAll=True)
print exprtokens
test("10")
test("10+20")
test("avg(10)")
test("avg(+10)")
test("column_1+8")
test("avg(column_1+10)+10")
Gives:
[['10']]
[['10'], '+', ['20']]
[[['avg', [['10']]]]]
[[['avg', [['+', '10']]]]]
[['column_1'], '+', ['8']]
[[['avg', [['column_1'], '+', ['10']]]], '+', ['10']]
You want to expand fn(a op b) to fn(a) op fn(b), but fn(a) should be left alone, so you need to test on the length of the parsed expression argument:
def distribute_function(tokens):
# unpack function name and arguments
fname, args = tokens[0]
# if args contains an expression, expand it
if len(args) > 1:
ret = ParseResults([])
for i,a in enumerate(args):
if i % 2 == 0:
# even args are operands to be wrapped in the function
ret += ParseResults([ParseResults([fname,ParseResults([a])])])
else:
# odd args are operators, just add them to the results
ret += ParseResults([a])
return ParseResults([ret])
function_call.setParseAction(distribute_function)
Now your calls to test will look like:
[['10']]
[['10'], '+', ['20']]
[[['avg', [['10']]]]]
[[['avg', [['+', '10']]]]]
[['column_1'], '+', ['8']]
[[[['avg', [['column_1']]], '+', ['avg', [['10']]]]], '+', ['10']]
This should even work recursively with a call like fna(fnb(3+2)+fnc(4+9)).