9from sly
import Lexer, Parser
12def findMatchedParenthesis(string: str, openchar: str, closechar: str) -> int:
14 Finds matching control token in string
and returns the offset.
15 The string
's first character must match openchar. Otherwise, 0 is returned.
19 openchar (str): opening char e.g
'{'
20 closechar (str): closing char e.g
'}'
23 int: position of matching closing char
in string.
26 if string[0] == openchar:
28 while end < len(string)
and count > 0:
29 if string[end] == openchar:
31 elif string[end] == closechar:
35 raise SyntaxError(
"Matched parenthesis for metavariable could not be found.")
39class B2ParameterLexer(Lexer):
41 Lexer class responsible for changing the default scanning behavior.
42 It disables token scanning
and captures
43 everything within the matched parenthesis.
44 Call pop_state to
return to the default scanning state.
47 tokens = {ARGUMENTTUPLE, }
52 Capture metavariable parameters by finding the matched parenthesis.
55 t (sly.lex.token): token of type LPAREN
58 t (sly.lex.token): ARGUMENTTUPLE token
61 pos = findMatchedParenthesis(self.text[self.
index-1:],
"(",
")")
63 t.value = self.text[self.
index-1: self.
index+pos]
74 Class responsible for scanning the cut
and generating a stream of tokens.
75 The token stream can be passed to `B2Parser` to generate a syntax tree.
79 """Initialize Lexer"""
92 EQUALEQUAL, GREATEREQUAL, LESSEQUAL, GREATER, LESS,
99 DOUBLE, INTEGER, IDENTIFIER, BOOLEAN,
101 POWER, TIMES, DIVIDE, PLUS, MINUS
104 tokens = expression_tokens.union(cut_tokens)
147 Scan opening bracket.
150 t (sly.lex.token): token of type LBRACK
153 SyntaxError: if no following closing bracket
is found
157 Pushes
'BRACK' onto control_token_stack
162 if "]" not in self.text[self.index:]:
163 raise SyntaxError(
"Unmatched '[' in cut.")
170 Scan closing bracket.
173 t (sly.lex.token): token of type RBRACK
176 SyntaxError: 1. If control_token_stack is empty, which means
177 no bracket was opened previously.
178 2. If state of control_token_stack
is 'PAREN', which
179 means a closing parenthesis
is expected.
182 Pops object
from control_token_stack
190 raise SyntaxError(
"Unmatched ']' in cut.")
193 elif state ==
"PAREN":
194 raise SyntaxError(
"Illegal ']', expected ')'.")
199 Scan opening parenthesis.
202 t (sly.lex.token): token of type LPAREN
205 SyntaxError: if no following closing parenthesis
is found
209 Pushes
'PAREN' onto control_token_stack
214 if ")" not in self.text[self.index:]:
215 raise SyntaxError(
"Unmatched '('")
219 @_(r"\)") # noqa: F821
222 Scan closing parenthesis.
225 t (sly.lex.token): token of type RPAREN
228 SyntaxError: 1. If control_token_stack is empty, which means
229 no parenthesis was opened previously.
230 2. If state of control_token_stack
is 'BRACK', which
231 means a closing bracket
is expected.
234 Pops state
from control_token_stack
242 raise SyntaxError(
"Unmatched ')' in cut.")
244 raise SyntaxError(
"Illegal ')', expected ']'.")
245 elif state ==
"PAREN":
248 @_(r"((\d+\.\d*|\d*\.\d+)(e(-|\+)?\d+|E(-|\+)?\d+)?|\d+(e(-|\+)?\d+|E(-|\+)?\d+))
") # noqa: E501, F821
251 Scanning function for double values
254 t (sly.lex.Token): initial token generated by the scanner library.
255 The value attribute
is of type str initially, equals
256 the matched sequence
and is casted to float.
258 Possible notations covered by this regular expression:
259 Normal decimal notation e.g 0.1
260 Hanging decimal seperator notation e.g 1.
261 Preceding decimal seperator notation e.g .1
262 Scientific notation
with (signed) exponents e.g 1.0E4, 1.e-4, .1E+3
263 Exponents are case insensitive e.g 1.e4, 1.E4
264 Integer
with exponent e.g 1E4
269 t.value = float(t.value)
272 @_(r"(0(x|X)[0-9A-Fa-f]+)|\d+
") # noqa: F821
273 def INTEGER(self, t):
275 Scanning function for integer values
276 Allows normal
and hex notation (case insensitive)
279 t (sly.lex.Token): initial token generated by the scanner library.
280 The value attribute
is of type str initially, equals
281 the matched sequence
and is casted to int.
284 python int-objects are converted
285 to the standard c++ int datatype (32bit).
286 Overflows can happen because numerical limits
287 of python int
and c++ int datatypes differ.
288 If you need to input large values write it
as double.
294 t.value = int(t.value)
297 t.value = int(t.value, base=16)
300 @_(r"[a-zA-Z_][a-zA-Z_0-9]*")
301 def IDENTIFIER(self, t):
303 Scaning function for identifiers
305 If a matched sequence equals reserved keywords of other tokens
306 the token type
and value
is remapped via the reserved dictionary.
309 t (sly.lex.Token): initial token generated by the scanner library.
310 value attribute equals the matched sequence.
324 "infinity":
"DOUBLE",
328 t.type = reserved.get(t.value,
"IDENTIFIER")
331 if t.type ==
"BOOLEAN":
332 t.value = t.value ==
"True" or t.value ==
"true"
334 if t.type ==
"DOUBLE":
335 t.value = float(t.value)
336 if t.type ==
"IDENTIFIER":
338 if self.text[self.index] ==
"(":
340 if ")" not in self.text[self.index:]:
341 raise SyntaxError(
"Unmatched '('")
343 self.push_state(B2ParameterLexer)
349def parser_class_decorator(cls, parser_type):
351 Class decorator which allows creating a Parser class object
352 for the B2Parser
and B2ExpressionParser without repeating the
class body.
355 parser_type (str): choice of parser type,
'cut' or 'expression'
358 (type): returns a parser
class object
360 assert parser_type
in (
363 ),
"Invalid parser type, valid choices are 'cut' or 'expression'"
365 class B2ParserMixin(cls):
367 Parser class implementing the grammar specified below.
369 Full Grammar Specification:
371 | <boolean_expression>
373 <boolean_expression> ::= <disjunction>
375 <disjunction> ::= <conjunction>
376 | <disjunction> OR <conjunction>
378 <conjunction> ::= <negation>
379 | <conjunction> AND <negation>
381 <negation> ::= <bracket_expression>
384 <bracket_expression> ::= <relational_expression>
385 | LBRACK <boolean_expression> RBRACK
387 <relational_expression> ::= <expression>
388 | <expression> <comparison_operator> <expression>
389 | <expression> <comparison_operator> <expression>
390 <comparison_operator> <expression>
392 <comparison_operator> ::= EQUALEQUAL
399 <expression> ::= <sum>
406 | <term> TIMES <factor>
407 | <term> DIVIDE <factor>
413 <power> ::= <primary>
414 | <primary> POWER <factor>
416 <primary> ::= LPAREN <expression> RPAREN
423 <function> ::= IDENTIFIER ARGUMENTTUPLE
429 @param verbose run parser
in verbose mode. The nodetype names
in
430 the parsed tuple are written out
and not encoded
431 as integers. Useful
for debugging parsing errors.
437 self.verbose = verbose
440 self.parameter_stack = list()
442 if parser_type ==
"cut":
444 tokens = B2Lexer.tokens.union(B2ParameterLexer.tokens)
447 tokens = B2Lexer.expression_tokens.union(B2ParameterLexer.tokens)
451 if parser_type ==
"cut":
457 (
"left",
"EQUALEQUAL",
"GREATER",
"LESS",
458 "GREATEREQUAL",
"LESSEQUAL",
"NOTEQUAL"),
459 (
"left",
"PLUS",
"MINUS"),
460 (
"left",
"TIMES",
"DIVIDE"),
466 (
"left",
"PLUS",
"MINUS"),
467 (
"left",
"TIMES",
"DIVIDE"),
473 "UnaryBooleanNode": 0,
474 "BinaryBooleanNode": 1,
475 "UnaryRelationalNode": 2,
476 "BinaryRelationalNode": 3,
477 "TernaryRelationalNode": 4,
478 "UnaryExpressionNode": 5,
479 "BinaryExpressionNode": 6,
507 a_operation_types = {
516 def get_node_type(self, node_name: str):
518 Return the node type integer value
519 or node name
if verbose setting
is chosen.
521 return node_name
if self.verbose
else self.node_types[node_name]
523 def get_coper_type(self, coper_name: str):
525 Return the comparison operator type integer value
526 or comparison operator name
if verbose setting
is chosen.
528 return coper_name
if self.verbose
else self.c_operator_types[coper_name]
530 def get_boper_type(self, boper_name: str):
532 Return the boolean operator type integer value
533 or boolean operator name
if verbose setting
is chosen.
535 return boper_name
if self.verbose
else self.b_operator_types[boper_name]
537 def get_a_operation_type(self, operation_name: str):
539 Return the arithmetic operator type integer value
540 or arithmetic operator token
if verbose setting
is chosen.
542 return operation_name
if self.verbose
else self.a_operation_types[operation_name]
544 if parser_type ==
"cut":
545 @_(r"", r"boolean_expression",)
548 Parsing function for <cut> nonterminal
552 | <boolean_expression>
555 return p.boolean_expression
556 except AttributeError:
558 self.get_node_type(
"UnaryRelationalNode"),
560 self.get_node_type(
"BooleanNode"),
566 def boolean_expression(self, p):
568 Parsing function for <boolean_expression> nonterminal
571 <boolean_expression> ::= <disjunction>
575 @_(r"disjunction OR conjunction", r"conjunction")
576 def disjunction(self, p):
578 Parsing function for <disjunction> nonterminal
581 <disjunction> ::= <conjunction>
582 | <disjunction> OR <conjunction>
586 self.get_node_type(
"BinaryBooleanNode"),
589 self.get_boper_type(p.OR),
591 except AttributeError:
594 @_(r"conjunction AND negation", r"negation")
595 def conjunction(self, p):
597 Parsing function for <conjunction> nonterminal
600 <conjunction> ::= <negation>
601 | <conjunction> AND <negation>
605 self.get_node_type(
"BinaryBooleanNode"),
608 self.get_boper_type(p.AND),
610 except AttributeError:
613 @_(r"bracket_expression", r"NOT negation")
614 def negation(self, p):
616 Parsing function for <negation> nonterminal
619 <negation> ::= <bracket_expression>
623 return p.bracket_expression
624 except AttributeError:
626 self.get_node_type(
"UnaryBooleanNode"),
633 r"relational_expression",
634 r"LBRACK boolean_expression RBRACK")
635 def bracket_expression(self, p):
637 Parsing function for <bracket_expression> nonterminal
640 <bracket_expression> ::= <relational_expression>
641 | LBRACK <boolean_expression> RBRACK
644 return p.relational_expression
645 except AttributeError:
647 self.get_node_type(
"UnaryBooleanNode"),
648 p.boolean_expression,
654 def relational_expression(self, p):
656 Parsing function for <relational_expression> nonterminal
659 <relational_expression> ::= <expression>
662 return (self.get_node_type(
"UnaryRelationalNode"), p.expression)
664 @_(r"expression comparison_operator expression")
665 def relational_expression(self, p):
667 Parsing function for <relational_expression> nonterminal
670 <relational_expression> ::= <expression> <comparison_operator>
674 self.get_node_type(
"BinaryRelationalNode"),
677 self.get_coper_type(p.comparison_operator),
680 @_(r"expression comparison_operator expression comparison_operator expression")
681 def relational_expression(self, p):
683 Parsing function for <relational_expression> nonterminal
686 <relational_expression> ::= expression> <comparison_operator>
687 <expression> <comparison_operator> <expression>
690 self.get_node_type(
"TernaryRelationalNode"),
694 self.get_coper_type(p.comparison_operator0),
695 self.get_coper_type(p.comparison_operator1),
706 def comparison_operator(self, p):
708 Parsing function for <comparison_operator> nonterminal
711 <comparison_operator> ::= EQUALEQUAL
721 def expression(self, p):
723 Parsing function for <expression> nonterminal
726 <expression> ::= <sum>
730 @_(r"sum PLUS term", r"sum MINUS term", r"term")
733 Parsing function for <sum> nonterminal
742 self.get_node_type(
"BinaryExpressionNode"),
745 self.get_a_operation_type(p[1]),
747 except AttributeError:
750 @_(r"term TIMES factor", r"term DIVIDE factor", r"factor")
753 Parsing function for <term> nonterminal
757 | <term> TIMES <factor>
758 | <term> DIVIDE <factor>
762 self.get_node_type(
"BinaryExpressionNode"),
765 self.get_a_operation_type(p[1]),
767 except AttributeError:
773 Parsing function for <power> nonterminal
783 Parsing function for <factor> nonterminal
786 <factor> ::= PLUS <factor>
789 self.get_node_type(
"UnaryExpressionNode"),
798 Parsing function for <factor> nonterminal
801 <factor> ::= MINUS factor
804 self.get_node_type(
"UnaryExpressionNode"),
813 Parsing function for <power> nonterminal
816 <power> ::= <primary>
820 @_(r"primary POWER factor")
823 Parsing function for <power> nonterminal
826 <power> ::= <primary> POWER <factor>
829 self.get_node_type(
"BinaryExpressionNode"),
832 self.get_a_operation_type(p.POWER),
836 def primary(self, p):
838 Parsing function for <primary> nonterminal
841 <primary> ::= <function>
845 @_(r"LPAREN expression RPAREN")
846 def primary(self, p):
848 Parsing function for <primary> nonterminal
851 <primary> ::= LPAREN <expression> RPAREN
854 self.get_node_type(
"UnaryExpressionNode"),
861 def primary(self, p):
863 Parsing function for <primary> nonterminal
866 <primary> ::= INTEGER
868 return (self.get_node_type(
"IntegerNode"), p.INTEGER)
871 def primary(self, p):
873 Parsing function for <primary> nonterminal
878 return (self.get_node_type(
"DoubleNode"), p.DOUBLE)
881 def primary(self, p):
883 Parsing function for <primary> nonterminal
886 <primary> ::= BOOLEAN
888 return (self.get_node_type(
"BooleanNode"), p.BOOLEAN)
891 def primary(self, p):
893 Parsing function for <primary> nonterminal
896 <primary> ::= IDENTIFIER
898 if self.parameter_stack:
900 self.get_node_type(
"IdentifierNode"),
905 self.get_node_type(
"IdentifierNode"),
909 @_(r"IDENTIFIER ARGUMENTTUPLE")
910 def function(self, p):
912 Parsing function for <function> nonterminal
915 <function> ::= IDENTIFIER LPAREN <parameters> RPAREN
917 if self.parameter_stack:
919 self.get_node_type(
"FunctionNode"),
921 p.ARGUMENTTUPLE[1:-1],
925 self.get_node_type(
"FunctionNode"),
927 p.ARGUMENTTUPLE[1:-1],
932 Error function, called immediately if syntax error
is detected
933 @param p (sly.token) offending token p
934 p
is None if syntax error occurs at EOF.
939 except AttributeError:
941 error_pos = len(self.cut) - 1
945 except AttributeError:
947 error_token = self.symstack[-1].type
950 error_msg = f
"detected at:\n{self.cut}\n{' '*error_pos}^\n"
951 error_msg += f
"Unexpected token '{error_token}'"
952 raise SyntaxError(error_msg)
954 def parse(self, cut: str, token_generator) -> tuple:
956 Overwrite sly.Parser parse function.
957 @param cut unparsed cut input which
is used to
958 indicate where the error occurred
959 @param token_generator generator object which yields tokens.
960 Produced by the lexer
from the cut input.
964 return super().parse(token_generator)
969B2Parser = parser_class_decorator(Parser, parser_type=
"cut")
971B2ExpressionParser = parser_class_decorator(Parser, parser_type=
"expression")
974def parse(cut: str, verbose=
False) -> tuple:
976 Initialize a parser and lexer object
and parse cut
977 @param cut cut string which should be parsed
978 @param verbose provide verbose parsing output
for
979 parser debugging purposes,
not to be set true
in production
982 parser = B2Parser(verbose)
983 return parser.parse(cut, lexer.tokenize(cut))
986def parse_expression(cut: str, verbose=
False) -> tuple:
988 Initialize a parser and lexer object
and parse cut
989 @param cut cut string which should be parsed
990 @param verbose provide verbose parsing output
for
991 parser debugging purposes,
not to be set true
in production
994 parser = B2ExpressionParser(verbose)
995 return parser.parse(cut, lexer.tokenize(cut))
998if __name__ ==
"__main__":
999 argparser = argparse.ArgumentParser()
1000 argparser.add_argument(
1001 "-e",
"--expression", action=
"store_const", default=0, const=1
1003 args = argparser.parse_args()
1005 cut = input(
"Please input expression:\n")
1006 print(parse_expression(cut))
1008 cut = input(
"Please input cut:\n")
control_token_stack
control_token_stack (list): stack for keeping track of seen brackets and parenthesis.
def ARGUMENTTUPLE(self, t)
index
Increment current scanning position.