Tkinter: restrict entry data to float - python

I am in need of a function that retricts the input of the user to numeric values.
I've been looking for an answer but the one I've found does not allow for '-', '+' and ','(comma).
Here's the code for the validation method:
def __init__(self, master1):
self.panel2 = tk.Frame(master1)
self.panel2.grid()
vcmd = (master1.register(self.validate),
'%d', '%i', '%P', '%s', '%S', '%v', '%V', '%W')
self.text1 = tk.Entry(self.panel2, validate = 'key', validatecommand = vcmd)
self.text1.grid()
self.text1.focus()
def validate(self, action, index, value_if_allowed,
prior_value, text, validation_type, trigger_type, widget_name):
# action=1 -> insert
if(action=='1'):
if text in '0123456789,-+':
try:
float(value_if_allowed)
return True
except ValueError:
return False
else:
return False
else:
return True
Again, this only seems to work with numbers, but restricts commas and plus and minus signs, which is not intended.
How could this be fixed?

The right tool is definitely the re module.
Here is a regular expression that should do the job:
(\+|\-)?\d+(,\d+)?$
Let's break it down:
(\+|\-)?\d+(,\d+)?$
\+|\- Starts with a + or a -...
( )? ... but not necessarily
\d+ Contains a repetition of at least one digits
,\d+ Is followed by a comma and at least one digits...
( )? ... but not necessarily
$ Stops here: nothing follows the trailing digits
Now, your validate function only has to return True if the input matches that regex, and False if it does not.
def validate(string):
result = re.match(r"(\+|\-)?\d+(,\d+)?$", string)
return result is not None
Some tests:
# Valid inputs
>>> validate("123")
True
>>> validate("123,2")
True
>>> validate("+123,2")
True
>>> validate("-123,2")
True
# Invalid inputs
>>> validate("123,")
False
>>> validate("123,2,")
False
>>> validate("+123,2,")
False
>>> validate("hello")
False
Edit
I understand now that you want to check in real-time if the input is valid.
So here is an example of what you can do:
import tkinter as tk
import re
def validate(string):
regex = re.compile(r"(\+|\-)?[0-9,]*$")
result = regex.match(string)
return (string == ""
or (string.count('+') <= 1
and string.count('-') <= 1
and string.count(',') <= 1
and result is not None
and result.group(0) != ""))
def on_validate(P):
return validate(P)
root = tk.Tk()
entry = tk.Entry(root, validate="key")
vcmd = (entry.register(on_validate), '%P')
entry.config(validatecommand=vcmd)
entry.pack()
root.mainloop()
The validate function now checks more or less the same thing, but more loosely.
Then if the regex results in a match, some additional checks are performed:
Allow the input to be empty;
Prevent the input to have more than one '+'/'-', and more than one ',';
Ignore a match against the empty string, because the pattern allows it, so "a" would result in a match but we don't want it.
The command is registered, and works with the '%P' parameter, that corresponds to the string if the input were accepted.
Please not however that forcing the input to be always correct is somewhat harsh, and might be counter-intuitive.
A more commonly used approach is to update a string next to the entry, and have the user know when their input is valid or invalid.

Related

Allow alphabet characters to be the only inputs into entrybox tkinter

#Want this code but to only allow str (alphabet)
def is_type_int(*args):
item = var2.get()
try:
item_type = type(int(item))
if item_type == type(int(1)):
#Had too have any action here so just made a print do nothing
print
except:
IDEntry.delete(0, tk.END)
var2 = tk.StringVar()
var2.trace("w", is_type_int)
So, the first option I was thinking about it to compare the type to str instead of int. But this solution would not always work because it depends on how the argument is passed.
I'd try to use regex instead, use a simple function to check if there is any number in the input text as below:
import re
def numberInText(input_string):
# Using regex, look for a number in the string
if (re.search("[0-9]",input_string)):
return True
# if no number is found, return False
return False
test_with_number = "TEST 123"
print(numberInText(test_with_number))
test_without_number = "TEST"
print(numberInText(test_without_number))

How to parse a BinaryTree from a string?

I want to code a BinaryTree parser. I don't know how to solve this problem. I've tried using regular expressions recursively but I can't find good resources. My goal is:
BinaryTree.from_string("('a') 'b' ('c')") --> BinaryTree("a", "b", "c")
BinaryTree.from_string("") --> None
BinaryTree.from_string("() ()") --> BinaryTree(None, None, None)
BinaryTree.from_string("((1) 2 (3)) 4 (5)") --> BinaryTree(BinaryTree(1, 2, 3), 4, 5)
Here is some source code:
class BinaryTree:
def __init__(self, left=None, name=None, right=None):
self.left = left
self.name = name
self.right = right
def __str__(self):
return f"({self.left}) {self.name} ({self.right})"
def __repr__(self):
return f"BinaryTree({repr(self.left)}, {repr(self.name)}, {repr(self.right)})"
def __len__(self):
if self.name is not None:
output = 1
else:
output = 0
if self.left is not None:
output += len(self.left)
if self.right is not None:
output += len(self.right)
return output
#staticmethod
def from_string(string):
# "(x) y (z)" --> BinaryTree("x", "y", "z")
# "((a) b (c)) y (z)" --> BinaryTree(BinaryTree("a", "b", "c"), "y", "z")
# "" --> None
# () () --> BinaryTree("", "", "")
pass
First, I believe that you need to drop the idea of regular expressions and concentrate on simple matching the parentheses. You have a very simple expression grammar here. Rather than reproducing such a well-traveled exercise, I simply direct your to research how to parse a binary tree expression with parentheses.
The basic expression is
left root right
where each of left and right is either
a sub-tree (first char is a left parenthesis)
a leaf-node label (first char is something else)
null (white space)
Note that you have some ambiguities. For instance, given a b, is the resulting tree (a, b, None), (None, a, b), or an error?
In any case, if you focus on simple string processing, you should be able to do this without external packages:
Find the first left parenthesis and its matching right.
In the string after that, look again for a left-right pair.
If there's anything before that first left-paren, then it must be a leaf node for the left and the node label for the root.
Either way, there must be a root node in the middle (unless this is a degenerate tree).
Recur on each of the paren matches you made.
Can you take it from there?
You can use regular expressions, but only use it to tokenize the input, i.e. it should match all parentheses as individual matches, and match quoted or non-quoted literals. Some care has to be taken to support backslash escaping inside quoted substrings.
For converting quoted strings and numbers to the corresponding Python data typed values, you can use ast.literal_eval. Of course, if the input format would have been a valid Python expression (using comma separators, ...etc), you could leave the parsing completely to ast.literal_eval. But as this is not the case, you'll have to tokenize the input and iterate over the tokens.
So import these:
import re
import ast
And then:
#staticmethod
def from_string(string):
tokens = re.finditer(r"[()]|'(?:\\.|[^'])*'|[^\s()]+|$", string)
def format(token):
return f"'{token}'" if token else "end of input"
def take(end, expect=None, forbid=None):
token = next(tokens).group(0)
if expect is not None and token != expect:
raise ValueError("Expected {}, got {}".format(format(expect), format(token)))
if end != "" and token == "" or token == forbid:
raise ValueError("Unexpected {}".format(format(token)))
if token not in ("(", ")", ""):
token = ast.literal_eval(token)
return token
def recur(end=")"):
token = take(end)
if token == end: # it is an empty leaf
return None
if token != "(": # it is a leaf
take(end, end)
return token
# It is a (left)-name-(right) sequence:
left = recur()
name = None
token = take(end, None, end)
if token != "(":
name = token
take(end, "(")
right = recur()
take(end, end)
return BinaryTree(left, name, right)
return recur("")

click custom option prompt function

I have noticed that prompt using click accepts inputs with trailing spaces
ftp_server = click.prompt("FTP Server")
Is there a way to use a custom return function like this to reject trailing spaces?
def custom_prompt(value):
if value.strip():
return True
else:
return False
ftp_server = click.prompt("FTP Server", custom_prompt)
I have already used this:
while not ftp_server.strip():
ftp_server = click.prompt("FTP Server")
But I'm looking for a better way because I don't want to use a while loop each time I use prompt.
To reject invalid user input, you can use the value_proc parameter to click.prompt. A validator to reject input with trailing spaces could look like:
Prompt Validator
import click
def validate_no_trailing_space(value):
if value != value.rstrip():
raise click.UsageError("Trailing space is invalid!")
return value
ftp_server = click.prompt("FTP Server",
value_proc=validate_no_trailing_space)
Trim Spaces
You might also consider a validator which trims leading and trailing spaces but reject spaces in the name:
def validate_no_internal_space(value):
value = value.strip()
if ' ' in value:
raise click.UsageError("Spaces are not valid here!")
return value

Processing Strings from data file containing punctuation coding in python

I am trying to make a simple programme that can help make army lists for a popular tabletop wargame. More as an excercise for my own experience as there are plenty of pre made software packages that do this, but the idea behind it seems fairly straightforward
The programme reads the data for all the units available in an army from a spreadsheet and creates various classes for each unit. The main bit I am looking at now is the options/ upgrades.
In the file I want a straightforward syntax for the option field for each unit. i.e. the following options string itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ would mean
1. you may take itemA (X pts per model)
2. for every 3 models, you may exchange itemB with
a) itemC (net X pts per model)
3. each model may take 2 of itemD (X pts per model)
4. each model may take one of either
a)itemE (X pts per model)
b)itemF (X pts per model)
c)itemG (X pts per model
5. each model may take either
a)itemH (X points per model)
b)itemI and itemJ (X points per model)
At the moment I am processing the string using lots of splits and if statements, that make it very hard to keep track of and assign correctly once the user input their choice.
for index, option in enumerate(self.options):
output = "{}.".format(index+1)
if '-' in option:
sub_option, no_models = option.split('-')
no_models = int(no_models)
print(sub_option)
print(no_models)
output += "For every {} models ".format(no_models)
if '/' in sub_option:
temp_str, temp_options, points_list = exchange_option(sub_option)
else:
temp_str, temp_options, points_list = standard_option(sub_option)
index_points.append(points_list)
temp_options.append(no_models)
index_options.append(temp_options)
else:
if '/' in option:
temp_str, temp_options, points_list = exchange_option(option)
else:
temp_str, temp_options, points_list = standard_option(option)
index_points.append(points_list)
index_options.append(temp_options)
output += temp_str
the *_option() functions are additional helper functions I have defined above which have a similar structure with further if statements within them.
The main question I am asking, is there an easier way to process a code like string such as this? While it works to produce the output in the example above it seems awfully cumbersome to then deal with the user input.
What I am aiming to do is first output the string as given in my example at the top of the question, and then taking the user input index of the given option, modify the associated unit class to have the correct wargear and points value.
I thought about trying to make some kind of options class, but again labelling and defining each option so that they can interact with one another properly seems equally complex, and I feel there must be something more pythonic or just generally better coding practice to processing encoded strings such as this?
So, here's a full blown parser to do that! Now, this only outputs the list as in the previous version of your question, but it shouldn't be too hard to add more features as you want. Also please note that at the moment, the lexer does not error out when a string contains invalid tokens, but that's just a proof-of-concept, so it should be fine.
Part I: the lexer
This tokenises the input string - looks through it from left to right and attempts to classify non-overlapping substrings as instances of tokens. It's to be used before parsing. When given a string, Lexer.tokenize yields a stream of Tokens.
# FILE: lex.py
import re
import enum
class Token:
def __init__(self, type, value: str, lineno: int, pos: int):
self.type, self.value, self.lineno, self.pos = type, value, lineno, pos
def __str__(self):
v = f'({self.value!r})' if self.value else ''
return f'{self.type.name}{v} at {self.lineno}:{self.pos}'
__repr__ = __str__
class Lexer:
def __init__(self, token_types: enum.Enum, tokens_regexes: dict):
self.token_types = token_types
regex = '|'.join(map('(?P<{}>{})'.format, *zip(*((tok.name, regex) for tok, regex in tokens_regexes.items()))))
self.regex = re.compile(regex)
def tokenize(self, string, skip=['space']):
# TODO: detect invalid input
lineno, pos = 0, 0
skip = set(map(self.token_types.__getitem__, skip))
for matchobj in self.regex.finditer(string):
type_name = matchobj.lastgroup
value = matchobj.groupdict()[type_name]
Type = self.token_types[type_name]
if Type == self.token_types.newline: # possibly buggy, but not catastrophic
self.lineno += 1
self.pos = 0
continue
pos = matchobj.end()
if Type not in skip:
yield Token(Type, value, lineno, pos)
yield Token(self.token_types.EOF, '', lineno, pos)
Part II: the parser (with syntax-driven evaluation):
This parses the given stream of tokens provided by lex.Lexer.tokenize and translates individual symbols to English according to the following grammar:
Opt_list -> Option Opt_list_
Opt_list_ -> comma Option Opt_list_ | empty
Option -> Choice | Mult
Choice -> Compound More_choices Exchange
Compound -> item Add_item
Add_item -> plus item Add_item | empty
More_choices -> slash Compound More_choices | empty
Exchange -> minus num | empty
Mult -> num star Compound
The uppercase symbols are nonterminals, the lowercase ones are terminals. There's also a special symbol EOF that's not present here.
Also, take a look at the vital statistics of this grammar. This grammar is LL(1), so we can use an LL(1) recursive descent predictive parser, as shown below.
If you modify the grammar, you should modify the parser accordingly! The methods that do the actual parsing are called parse_<something>, and to change the output of the parser (the Parser.parse function, actually) you should change the return values of these parse_<something> functions.
# FILE: parse.py
import lex
class Parser:
def __init__(self, lexer):
self.string, self.tokens = None, None
self.lexer = lexer
self.t = self.lexer.token_types
self.__lookahead = None
#property
def lookahead(self):
if not self.__lookahead:
try:
self.__lookahead = next(self.tokens)
except StopIteration:
self.__lookahead = lex.Token(self.t.EOF, '', 0, -1)
return self.__lookahead
def next(self):
if self.__lookahead and self.__lookahead.type == self.t.EOF:
return self.__lookahead
self.__lookahead = None
return self.lookahead
def match(self, token_type):
if self.lookahead.type == token_type:
return self.next()
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
# THE PARSING STARTS HERE
def parse(self, string):
# setup
self.string = string
self.tokens = self.lexer.tokenize(string)
self.__lookahead = None
self.next()
# do parsing
ret = [''] + self.parse_opt_list()
return ' '.join(ret)
def parse_opt_list(self) -> list:
ret = self.parse_option(1)
ret.extend(self.parse_opt_list_(1))
return ret
def parse_opt_list_(self, curr_opt_number) -> list:
if self.lookahead.type in {self.t.EOF}:
return []
self.match(self.t.comma)
ret = self.parse_option(curr_opt_number + 1)
ret.extend(self.parse_opt_list_(curr_opt_number + 1))
return ret
def parse_option(self, opt_number) -> list:
ret = [f'{opt_number}.']
if self.lookahead.type == self.t.item:
ret.extend(self.parse_choice())
elif self.lookahead.type == self.t.num:
ret.extend(self.parse_mult())
else:
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
ret[-1] += '\n'
return ret
def parse_choice(self) -> list:
c = self.parse_compound()
m = self.parse_more_choices()
e = self.parse_exchange()
if not m:
if not e:
ret = f'You may take {" ".join(c)}'
else:
ret = f'for every {e} models you may take item {" ".join(c)}'
elif m:
c.extend(m)
if not e:
ret = f'each model may take one of: {", ".join(c)}'
else:
ret = f'for every {e} models you may exchange the following items with each other: {", ".join(c)}'
else:
ret = 'Semantic error!'
return [ret]
def parse_compound(self) -> list:
ret = [self.lookahead.value]
self.match(self.t.item)
_ret = self.parse_add_item()
return [' '.join(ret + _ret)]
def parse_add_item(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.slash, self.t.EOF}:
return []
ret = ['with']
self.match(self.t.plus)
ret.append(self.lookahead.value)
self.match(self.t.item)
return ret + self.parse_add_item()
def parse_more_choices(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.EOF}:
return []
self.match(self.t.slash)
ret = self.parse_compound()
return ret + self.parse_more_choices()
def parse_exchange(self) -> str:
if self.lookahead.type in {self.t.comma, self.t.EOF}:
return ''
self.match(self.t.minus)
ret = self.lookahead.value
self.match(self.t.num)
return ret
def parse_mult(self) -> list:
ret = [f'each model may take {self.lookahead.value} of:']
self.match(self.t.num)
self.match(self.t.star)
return ret + self.parse_compound()
Part III: usage
Here's how to use all of that code:
# FILE: evaluate.py
import enum
from lex import Lexer
from parse import Parser
# these are all the types of tokens present in our grammar
token_types = enum.Enum('Types', 'item num plus minus star slash comma space newline empty EOF')
t = token_types
# these are the regexes that the lexer uses to recognise the tokens
terminals_regexes = {
t.item: r'[a-zA-Z_]\w*',
t.num: '0|[1-9][0-9]*',
t.plus: r'\+',
t.minus: '-',
t.star: r'\*',
t.slash: '/',
t.comma: ',',
t.space: r'[ \t]',
t.newline: r'\n'
}
lexer = Lexer(token_types, terminals_regexes)
parser = Parser(lexer)
string = 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
print(f'STRING FROM THE QUESTION: {string!r}\nRESULT:')
print(parser.parse(string), '\n\n')
string = input('Enter a command: ')
while string and string.lower() not in {'q', 'quit', 'e', 'exit'}:
try:
print(parser.parse(string))
except SyntaxError as e:
print(f' Syntax error: {e}\n {e.text}\n' + ' ' * (4 + e.offset - 1) + '^\n')
string = input('Enter a command: ')
Example session:
# python3 evaluate.py
STRING FROM THE QUESTION: 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
RESULT:
1. You may take itemA
2. for every 3 models you may exchange the following items with each other: itemB, itemC
3. each model may take 2 of: itemD
4. each model may take one of: itemE, itemF, itemG
5. each model may take one of: itemH, itemI with itemJ
Enter a command: itemA/b/c/stuff
1. each model may take one of: itemA, b, c, stuff
Enter a command: 4 * anything
1. each model may take 4 of: anything
Enter a command: 5 * anything + more
1. each model may take 5 of: anything with more
Enter a command: a + b + c+ d
1. You may take a with b with c with d
Enter a command: a+b/c
1. each model may take one of: a with b, c
Enter a command: itemA/itemB-2
1. for every 2 models you may exchange the following items with each other: itemA, itemB
Enter a command: itemA+itemB/itemC - 5
1. for every 5 models you may exchange the following items with each other: itemA with itemB, itemC
Enter a command: q

Dynamically evaluating simple boolean logic in Python

I've got some dynamically-generated boolean logic expressions, like:
(A or B) and (C or D)
A or (A and B)
A
empty - evaluates to True
The placeholders get replaced with booleans. Should I,
Convert this information to a Python expression like True or (True or False) and eval it?
Create a binary tree where a node is either a bool or Conjunction/Disjunction object and recursively evaluate it?
Convert it into nested S-expressions and use a Lisp parser?
Something else?
Suggestions welcome.
Here's a small (possibly, 74 lines including whitespace) module I built in about an hour and a half (plus almost an hour to refactoring):
str_to_token = {'True':True,
'False':False,
'and':lambda left, right: left and right,
'or':lambda left, right: left or right,
'(':'(',
')':')'}
empty_res = True
def create_token_lst(s, str_to_token=str_to_token):
"""create token list:
'True or False' -> [True, lambda..., False]"""
s = s.replace('(', ' ( ')
s = s.replace(')', ' ) ')
return [str_to_token[it] for it in s.split()]
def find(lst, what, start=0):
return [i for i,it in enumerate(lst) if it == what and i >= start]
def parens(token_lst):
"""returns:
(bool)parens_exist, left_paren_pos, right_paren_pos
"""
left_lst = find(token_lst, '(')
if not left_lst:
return False, -1, -1
left = left_lst[-1]
#can not occur earlier, hence there are args and op.
right = find(token_lst, ')', left + 4)[0]
return True, left, right
def bool_eval(token_lst):
"""token_lst has length 3 and format: [left_arg, operator, right_arg]
operator(left_arg, right_arg) is returned"""
return token_lst[1](token_lst[0], token_lst[2])
def formatted_bool_eval(token_lst, empty_res=empty_res):
"""eval a formatted (i.e. of the form 'ToFa(ToF)') string"""
if not token_lst:
return empty_res
if len(token_lst) == 1:
return token_lst[0]
has_parens, l_paren, r_paren = parens(token_lst)
if not has_parens:
return bool_eval(token_lst)
token_lst[l_paren:r_paren + 1] = [bool_eval(token_lst[l_paren+1:r_paren])]
return formatted_bool_eval(token_lst, bool_eval)
def nested_bool_eval(s):
"""The actual 'eval' routine,
if 's' is empty, 'True' is returned,
otherwise 's' is evaluated according to parentheses nesting.
The format assumed:
[1] 'LEFT OPERATOR RIGHT',
where LEFT and RIGHT are either:
True or False or '(' [1] ')' (subexpression in parentheses)
"""
return formatted_bool_eval(create_token_lst(s))
The simple tests give:
>>> print nested_bool_eval('')
True
>>> print nested_bool_eval('False')
False
>>> print nested_bool_eval('True or False')
True
>>> print nested_bool_eval('True and False')
False
>>> print nested_bool_eval('(True or False) and (True or False)')
True
>>> print nested_bool_eval('(True or False) and (True and False)')
False
>>> print nested_bool_eval('(True or False) or (True and False)')
True
>>> print nested_bool_eval('(True and False) or (True and False)')
False
>>> print nested_bool_eval('(True and False) or (True and (True or False))')
True
[Partially off-topic possibly]
Note, the you can easily configure the tokens (both operands and operators) you use with the poor-mans dependency-injection means provided (token_to_char=token_to_char and friends) to have multiple different evaluators at the same time (just resetting the "injected-by-default" globals will leave you with a single behavior).
For example:
def fuzzy_bool_eval(s):
"""as normal, but:
- an argument 'Maybe' may be :)) present
- algebra is:
[one of 'True', 'False', 'Maybe'] [one of 'or', 'and'] 'Maybe' -> 'Maybe'
"""
Maybe = 'Maybe' # just an object with nice __str__
def or_op(left, right):
return (Maybe if Maybe in [left, right] else (left or right))
def and_op(left, right):
args = [left, right]
if Maybe in args:
if True in args:
return Maybe # Maybe and True -> Maybe
else:
return False # Maybe and False -> False
return left and right
str_to_token = {'True':True,
'False':False,
'Maybe':Maybe,
'and':and_op,
'or':or_op,
'(':'(',
')':')'}
token_lst = create_token_lst(s, str_to_token=str_to_token)
return formatted_bool_eval(token_lst)
gives:
>>> print fuzzy_bool_eval('')
True
>>> print fuzzy_bool_eval('Maybe')
Maybe
>>> print fuzzy_bool_eval('True or False')
True
>>> print fuzzy_bool_eval('True or Maybe')
Maybe
>>> print fuzzy_bool_eval('False or (False and Maybe)')
False
It shouldn't be difficult at all to write a evaluator that can handle this, for example using pyparsing. You only have a few operations to handle (and, or, and grouping?), so you should be able to parse and evaluate it yourself.
You shouldn't need to explicitly form the binary tree to evaluate the expression.
If you set up dicts with the locals and globals you care about then you should be able to safely pass them along with the expression into eval().
Sounds like a piece of cake using SymPy logic module. They even have an example of that on the docs: http://docs.sympy.org/0.7.1/modules/logic.html
I am writing this because I had a solve a similar problem today and I was here when I was looking for clues. (Boolean parser with arbitrary string tokens that get converted to boolean values later).
After considering different options (implementing a solution myself or use some package), I settled on using Lark, https://github.com/lark-parser/lark
It's easy to use and pretty fast if you use LALR(1)
Here is an example that could match your syntax
from lark import Lark, Tree, Transformer
base_parser = Lark("""
expr: and_expr
| or_expr
and_expr: token
| "(" expr ")"
| and_expr " " and " " and_expr
or_expr: token
| "(" expr ")"
| or_expr " " or " " or_expr
token: LETTER
and: "and"
or: "or"
LETTER: /[A-Z]+/
""", start="expr")
class Cleaner(Transformer):
def expr(self, children):
num_children = len(children)
if num_children == 1:
return children[0]
else:
raise RuntimeError()
def and_expr(self, children):
num_children = len(children)
if num_children == 1:
return children[0]
elif num_children == 3:
first, middle, last = children
return Tree(data="and_expr", children=[first, last])
else:
raise RuntimeError()
def or_expr(self, children):
num_children = len(children)
if num_children == 1:
return children[0]
elif num_children == 3:
first, middle, last = children
return Tree(data="or_expr", children=[first, last])
else:
raise RuntimeError()
def get_syntax_tree(expression):
return Cleaner().transform(base_parser.parse(expression))
print(get_syntax_tree("A and (B or C)").pretty())
Note: the regex I chose doesn't match the empty string on purpose (Lark for some reason doesn't allow it).
You can perform that with Lark grammar library https://github.com/lark-parser/lark
from lark import Lark, Transformer, v_args, Token, Tree
from operator import or_, and_, not_
calc_grammar = f"""
?start: disjunction
?disjunction: conjunction
| disjunction "or" conjunction -> {or_.__name__}
?conjunction: atom
| conjunction "and" atom -> {and_.__name__}
?atom: BOOLEAN_LITTERAL -> bool_lit
| "not" atom -> {not_.__name__}
| "(" disjunction ")"
BOOLEAN_LITTERAL: TRUE | FALSE
TRUE: "True"
FALSE: "False"
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateBoolTree(Transformer):
or_ = or_
not_ = not_
and_ = and_
allowed_value = {"True": True, "False": False}
def bool_lit(self, val: Token) -> bool:
return self.allowed_value[val]
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateBoolTree())
calc = calc_parser.parse
def eval_bool_expression(bool_expression: str) -> bool:
return calc(bool_expression)
print(eval_bool_expression("(True or False) and (False and True)"))
print(eval_bool_expression("not (False and True)"))
print(eval_bool_expression("not True or False and True and True"))

Categories